diff options
Diffstat (limited to 'Documentation')
312 files changed, 19996 insertions, 10064 deletions
diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX index 2a39aeba1464..d05737aaa84b 100644 --- a/Documentation/00-INDEX +++ b/Documentation/00-INDEX | |||
@@ -86,6 +86,8 @@ cachetlb.txt | |||
86 | - describes the cache/TLB flushing interfaces Linux uses. | 86 | - describes the cache/TLB flushing interfaces Linux uses. |
87 | cdrom/ | 87 | cdrom/ |
88 | - directory with information on the CD-ROM drivers that Linux has. | 88 | - directory with information on the CD-ROM drivers that Linux has. |
89 | cgroups/ | ||
90 | - cgroups features, including cpusets and memory controller. | ||
89 | connector/ | 91 | connector/ |
90 | - docs on the netlink based userspace<->kernel space communication mod. | 92 | - docs on the netlink based userspace<->kernel space communication mod. |
91 | console/ | 93 | console/ |
@@ -98,8 +100,6 @@ cpu-load.txt | |||
98 | - document describing how CPU load statistics are collected. | 100 | - document describing how CPU load statistics are collected. |
99 | cpuidle/ | 101 | cpuidle/ |
100 | - info on CPU_IDLE, CPU idle state management subsystem. | 102 | - info on CPU_IDLE, CPU idle state management subsystem. |
101 | cpusets.txt | ||
102 | - documents the cpusets feature; assign CPUs and Mem to a set of tasks. | ||
103 | cputopology.txt | 103 | cputopology.txt |
104 | - documentation on how CPU topology info is exported via sysfs. | 104 | - documentation on how CPU topology info is exported via sysfs. |
105 | cris/ | 105 | cris/ |
diff --git a/Documentation/ABI/testing/debugfs-kmemtrace b/Documentation/ABI/testing/debugfs-kmemtrace new file mode 100644 index 000000000000..5e6a92a02d85 --- /dev/null +++ b/Documentation/ABI/testing/debugfs-kmemtrace | |||
@@ -0,0 +1,71 @@ | |||
1 | What: /sys/kernel/debug/kmemtrace/ | ||
2 | Date: July 2008 | ||
3 | Contact: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro> | ||
4 | Description: | ||
5 | |||
6 | In kmemtrace-enabled kernels, the following files are created: | ||
7 | |||
8 | /sys/kernel/debug/kmemtrace/ | ||
9 | cpu<n> (0400) Per-CPU tracing data, see below. (binary) | ||
10 | total_overruns (0400) Total number of bytes which were dropped from | ||
11 | cpu<n> files because of full buffer condition, | ||
12 | non-binary. (text) | ||
13 | abi_version (0400) Kernel's kmemtrace ABI version. (text) | ||
14 | |||
15 | Each per-CPU file should be read according to the relay interface. That is, | ||
16 | the reader should set affinity to that specific CPU and, as currently done by | ||
17 | the userspace application (though there are other methods), use poll() with | ||
18 | an infinite timeout before every read(). Otherwise, erroneous data may be | ||
19 | read. The binary data has the following _core_ format: | ||
20 | |||
21 | Event ID (1 byte) Unsigned integer, one of: | ||
22 | 0 - represents an allocation (KMEMTRACE_EVENT_ALLOC) | ||
23 | 1 - represents a freeing of previously allocated memory | ||
24 | (KMEMTRACE_EVENT_FREE) | ||
25 | Type ID (1 byte) Unsigned integer, one of: | ||
26 | 0 - this is a kmalloc() / kfree() | ||
27 | 1 - this is a kmem_cache_alloc() / kmem_cache_free() | ||
28 | 2 - this is a __get_free_pages() et al. | ||
29 | Event size (2 bytes) Unsigned integer representing the | ||
30 | size of this event. Used to extend | ||
31 | kmemtrace. Discard the bytes you | ||
32 | don't know about. | ||
33 | Sequence number (4 bytes) Signed integer used to reorder data | ||
34 | logged on SMP machines. Wraparound | ||
35 | must be taken into account, although | ||
36 | it is unlikely. | ||
37 | Caller address (8 bytes) Return address to the caller. | ||
38 | Pointer to mem (8 bytes) Pointer to target memory area. Can be | ||
39 | NULL, but not all such calls might be | ||
40 | recorded. | ||
41 | |||
42 | In case of KMEMTRACE_EVENT_ALLOC events, the next fields follow: | ||
43 | |||
44 | Requested bytes (8 bytes) Total number of requested bytes, | ||
45 | unsigned, must not be zero. | ||
46 | Allocated bytes (8 bytes) Total number of actually allocated | ||
47 | bytes, unsigned, must not be lower | ||
48 | than requested bytes. | ||
49 | Requested flags (4 bytes) GFP flags supplied by the caller. | ||
50 | Target CPU (4 bytes) Signed integer, valid for event id 1. | ||
51 | If equal to -1, target CPU is the same | ||
52 | as origin CPU, but the reverse might | ||
53 | not be true. | ||
54 | |||
55 | The data is made available in the same endianness the machine has. | ||
56 | |||
57 | Other event ids and type ids may be defined and added. Other fields may be | ||
58 | added by increasing event size, but see below for details. | ||
59 | Every modification to the ABI, including new id definitions, are followed | ||
60 | by bumping the ABI version by one. | ||
61 | |||
62 | Adding new data to the packet (features) is done at the end of the mandatory | ||
63 | data: | ||
64 | Feature size (2 byte) | ||
65 | Feature ID (1 byte) | ||
66 | Feature data (Feature size - 3 bytes) | ||
67 | |||
68 | |||
69 | Users: | ||
70 | kmemtrace-user - git://repo.or.cz/kmemtrace-user.git | ||
71 | |||
diff --git a/Documentation/ABI/testing/debugfs-pktcdvd b/Documentation/ABI/testing/debugfs-pktcdvd index bf9c16b64c34..cf11736acb76 100644 --- a/Documentation/ABI/testing/debugfs-pktcdvd +++ b/Documentation/ABI/testing/debugfs-pktcdvd | |||
@@ -1,4 +1,4 @@ | |||
1 | What: /debug/pktcdvd/pktcdvd[0-7] | 1 | What: /sys/kernel/debug/pktcdvd/pktcdvd[0-7] |
2 | Date: Oct. 2006 | 2 | Date: Oct. 2006 |
3 | KernelVersion: 2.6.20 | 3 | KernelVersion: 2.6.20 |
4 | Contact: Thomas Maier <balagi@justmail.de> | 4 | Contact: Thomas Maier <balagi@justmail.de> |
@@ -10,10 +10,10 @@ debugfs interface | |||
10 | The pktcdvd module (packet writing driver) creates | 10 | The pktcdvd module (packet writing driver) creates |
11 | these files in debugfs: | 11 | these files in debugfs: |
12 | 12 | ||
13 | /debug/pktcdvd/pktcdvd[0-7]/ | 13 | /sys/kernel/debug/pktcdvd/pktcdvd[0-7]/ |
14 | info (0444) Lots of driver statistics and infos. | 14 | info (0444) Lots of driver statistics and infos. |
15 | 15 | ||
16 | Example: | 16 | Example: |
17 | ------- | 17 | ------- |
18 | 18 | ||
19 | cat /debug/pktcdvd/pktcdvd0/info | 19 | cat /sys/kernel/debug/pktcdvd/pktcdvd0/info |
diff --git a/Documentation/ABI/testing/ima_policy b/Documentation/ABI/testing/ima_policy new file mode 100644 index 000000000000..6434f0df012e --- /dev/null +++ b/Documentation/ABI/testing/ima_policy | |||
@@ -0,0 +1,61 @@ | |||
1 | What: security/ima/policy | ||
2 | Date: May 2008 | ||
3 | Contact: Mimi Zohar <zohar@us.ibm.com> | ||
4 | Description: | ||
5 | The Trusted Computing Group(TCG) runtime Integrity | ||
6 | Measurement Architecture(IMA) maintains a list of hash | ||
7 | values of executables and other sensitive system files | ||
8 | loaded into the run-time of this system. At runtime, | ||
9 | the policy can be constrained based on LSM specific data. | ||
10 | Policies are loaded into the securityfs file ima/policy | ||
11 | by opening the file, writing the rules one at a time and | ||
12 | then closing the file. The new policy takes effect after | ||
13 | the file ima/policy is closed. | ||
14 | |||
15 | rule format: action [condition ...] | ||
16 | |||
17 | action: measure | dont_measure | ||
18 | condition:= base | lsm | ||
19 | base: [[func=] [mask=] [fsmagic=] [uid=]] | ||
20 | lsm: [[subj_user=] [subj_role=] [subj_type=] | ||
21 | [obj_user=] [obj_role=] [obj_type=]] | ||
22 | |||
23 | base: func:= [BPRM_CHECK][FILE_MMAP][INODE_PERMISSION] | ||
24 | mask:= [MAY_READ] [MAY_WRITE] [MAY_APPEND] [MAY_EXEC] | ||
25 | fsmagic:= hex value | ||
26 | uid:= decimal value | ||
27 | lsm: are LSM specific | ||
28 | |||
29 | default policy: | ||
30 | # PROC_SUPER_MAGIC | ||
31 | dont_measure fsmagic=0x9fa0 | ||
32 | # SYSFS_MAGIC | ||
33 | dont_measure fsmagic=0x62656572 | ||
34 | # DEBUGFS_MAGIC | ||
35 | dont_measure fsmagic=0x64626720 | ||
36 | # TMPFS_MAGIC | ||
37 | dont_measure fsmagic=0x01021994 | ||
38 | # SECURITYFS_MAGIC | ||
39 | dont_measure fsmagic=0x73636673 | ||
40 | |||
41 | measure func=BPRM_CHECK | ||
42 | measure func=FILE_MMAP mask=MAY_EXEC | ||
43 | measure func=INODE_PERM mask=MAY_READ uid=0 | ||
44 | |||
45 | The default policy measures all executables in bprm_check, | ||
46 | all files mmapped executable in file_mmap, and all files | ||
47 | open for read by root in inode_permission. | ||
48 | |||
49 | Examples of LSM specific definitions: | ||
50 | |||
51 | SELinux: | ||
52 | # SELINUX_MAGIC | ||
53 | dont_measure fsmagic=0xF97CFF8C | ||
54 | |||
55 | dont_measure obj_type=var_log_t | ||
56 | dont_measure obj_type=auditd_log_t | ||
57 | measure subj_user=system_u func=INODE_PERM mask=MAY_READ | ||
58 | measure subj_role=system_r func=INODE_PERM mask=MAY_READ | ||
59 | |||
60 | Smack: | ||
61 | measure subj_user=_ func=INODE_PERM mask=MAY_READ | ||
diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block index 44f52a4f5903..cbbd3e069945 100644 --- a/Documentation/ABI/testing/sysfs-block +++ b/Documentation/ABI/testing/sysfs-block | |||
@@ -60,3 +60,62 @@ Description: | |||
60 | Indicates whether the block layer should automatically | 60 | Indicates whether the block layer should automatically |
61 | generate checksums for write requests bound for | 61 | generate checksums for write requests bound for |
62 | devices that support receiving integrity metadata. | 62 | devices that support receiving integrity metadata. |
63 | |||
64 | What: /sys/block/<disk>/alignment_offset | ||
65 | Date: April 2009 | ||
66 | Contact: Martin K. Petersen <martin.petersen@oracle.com> | ||
67 | Description: | ||
68 | Storage devices may report a physical block size that is | ||
69 | bigger than the logical block size (for instance a drive | ||
70 | with 4KB physical sectors exposing 512-byte logical | ||
71 | blocks to the operating system). This parameter | ||
72 | indicates how many bytes the beginning of the device is | ||
73 | offset from the disk's natural alignment. | ||
74 | |||
75 | What: /sys/block/<disk>/<partition>/alignment_offset | ||
76 | Date: April 2009 | ||
77 | Contact: Martin K. Petersen <martin.petersen@oracle.com> | ||
78 | Description: | ||
79 | Storage devices may report a physical block size that is | ||
80 | bigger than the logical block size (for instance a drive | ||
81 | with 4KB physical sectors exposing 512-byte logical | ||
82 | blocks to the operating system). This parameter | ||
83 | indicates how many bytes the beginning of the partition | ||
84 | is offset from the disk's natural alignment. | ||
85 | |||
86 | What: /sys/block/<disk>/queue/logical_block_size | ||
87 | Date: May 2009 | ||
88 | Contact: Martin K. Petersen <martin.petersen@oracle.com> | ||
89 | Description: | ||
90 | This is the smallest unit the storage device can | ||
91 | address. It is typically 512 bytes. | ||
92 | |||
93 | What: /sys/block/<disk>/queue/physical_block_size | ||
94 | Date: May 2009 | ||
95 | Contact: Martin K. Petersen <martin.petersen@oracle.com> | ||
96 | Description: | ||
97 | This is the smallest unit the storage device can write | ||
98 | without resorting to read-modify-write operation. It is | ||
99 | usually the same as the logical block size but may be | ||
100 | bigger. One example is SATA drives with 4KB sectors | ||
101 | that expose a 512-byte logical block size to the | ||
102 | operating system. | ||
103 | |||
104 | What: /sys/block/<disk>/queue/minimum_io_size | ||
105 | Date: April 2009 | ||
106 | Contact: Martin K. Petersen <martin.petersen@oracle.com> | ||
107 | Description: | ||
108 | Storage devices may report a preferred minimum I/O size, | ||
109 | which is the smallest request the device can perform | ||
110 | without incurring a read-modify-write penalty. For disk | ||
111 | drives this is often the physical block size. For RAID | ||
112 | arrays it is often the stripe chunk size. | ||
113 | |||
114 | What: /sys/block/<disk>/queue/optimal_io_size | ||
115 | Date: April 2009 | ||
116 | Contact: Martin K. Petersen <martin.petersen@oracle.com> | ||
117 | Description: | ||
118 | Storage devices may report an optimal I/O size, which is | ||
119 | the device's preferred unit of receiving I/O. This is | ||
120 | rarely reported for disk drives. For RAID devices it is | ||
121 | usually the stripe width or the internal block size. | ||
diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci index e638e15a8895..6bf68053e4b8 100644 --- a/Documentation/ABI/testing/sysfs-bus-pci +++ b/Documentation/ABI/testing/sysfs-bus-pci | |||
@@ -41,6 +41,49 @@ Description: | |||
41 | for the device and attempt to bind to it. For example: | 41 | for the device and attempt to bind to it. For example: |
42 | # echo "8086 10f5" > /sys/bus/pci/drivers/foo/new_id | 42 | # echo "8086 10f5" > /sys/bus/pci/drivers/foo/new_id |
43 | 43 | ||
44 | What: /sys/bus/pci/drivers/.../remove_id | ||
45 | Date: February 2009 | ||
46 | Contact: Chris Wright <chrisw@sous-sol.org> | ||
47 | Description: | ||
48 | Writing a device ID to this file will remove an ID | ||
49 | that was dynamically added via the new_id sysfs entry. | ||
50 | The format for the device ID is: | ||
51 | VVVV DDDD SVVV SDDD CCCC MMMM. That is Vendor ID, Device | ||
52 | ID, Subsystem Vendor ID, Subsystem Device ID, Class, | ||
53 | and Class Mask. The Vendor ID and Device ID fields are | ||
54 | required, the rest are optional. After successfully | ||
55 | removing an ID, the driver will no longer support the | ||
56 | device. This is useful to ensure auto probing won't | ||
57 | match the driver to the device. For example: | ||
58 | # echo "8086 10f5" > /sys/bus/pci/drivers/foo/remove_id | ||
59 | |||
60 | What: /sys/bus/pci/rescan | ||
61 | Date: January 2009 | ||
62 | Contact: Linux PCI developers <linux-pci@vger.kernel.org> | ||
63 | Description: | ||
64 | Writing a non-zero value to this attribute will | ||
65 | force a rescan of all PCI buses in the system, and | ||
66 | re-discover previously removed devices. | ||
67 | Depends on CONFIG_HOTPLUG. | ||
68 | |||
69 | What: /sys/bus/pci/devices/.../remove | ||
70 | Date: January 2009 | ||
71 | Contact: Linux PCI developers <linux-pci@vger.kernel.org> | ||
72 | Description: | ||
73 | Writing a non-zero value to this attribute will | ||
74 | hot-remove the PCI device and any of its children. | ||
75 | Depends on CONFIG_HOTPLUG. | ||
76 | |||
77 | What: /sys/bus/pci/devices/.../rescan | ||
78 | Date: January 2009 | ||
79 | Contact: Linux PCI developers <linux-pci@vger.kernel.org> | ||
80 | Description: | ||
81 | Writing a non-zero value to this attribute will | ||
82 | force a rescan of the device's parent bus and all | ||
83 | child buses, and re-discover devices removed earlier | ||
84 | from this part of the device tree. | ||
85 | Depends on CONFIG_HOTPLUG. | ||
86 | |||
44 | What: /sys/bus/pci/devices/.../vpd | 87 | What: /sys/bus/pci/devices/.../vpd |
45 | Date: February 2008 | 88 | Date: February 2008 |
46 | Contact: Ben Hutchings <bhutchings@solarflare.com> | 89 | Contact: Ben Hutchings <bhutchings@solarflare.com> |
@@ -52,3 +95,37 @@ Description: | |||
52 | that some devices may have malformatted data. If the | 95 | that some devices may have malformatted data. If the |
53 | underlying VPD has a writable section then the | 96 | underlying VPD has a writable section then the |
54 | corresponding section of this file will be writable. | 97 | corresponding section of this file will be writable. |
98 | |||
99 | What: /sys/bus/pci/devices/.../virtfnN | ||
100 | Date: March 2009 | ||
101 | Contact: Yu Zhao <yu.zhao@intel.com> | ||
102 | Description: | ||
103 | This symbolic link appears when hardware supports the SR-IOV | ||
104 | capability and the Physical Function driver has enabled it. | ||
105 | The symbolic link points to the PCI device sysfs entry of the | ||
106 | Virtual Function whose index is N (0...MaxVFs-1). | ||
107 | |||
108 | What: /sys/bus/pci/devices/.../dep_link | ||
109 | Date: March 2009 | ||
110 | Contact: Yu Zhao <yu.zhao@intel.com> | ||
111 | Description: | ||
112 | This symbolic link appears when hardware supports the SR-IOV | ||
113 | capability and the Physical Function driver has enabled it, | ||
114 | and this device has vendor specific dependencies with others. | ||
115 | The symbolic link points to the PCI device sysfs entry of | ||
116 | Physical Function this device depends on. | ||
117 | |||
118 | What: /sys/bus/pci/devices/.../physfn | ||
119 | Date: March 2009 | ||
120 | Contact: Yu Zhao <yu.zhao@intel.com> | ||
121 | Description: | ||
122 | This symbolic link appears when a device is a Virtual Function. | ||
123 | The symbolic link points to the PCI device sysfs entry of the | ||
124 | Physical Function this device associates with. | ||
125 | |||
126 | What: /sys/bus/pci/slots/.../module | ||
127 | Date: June 2009 | ||
128 | Contact: linux-pci@vger.kernel.org | ||
129 | Description: | ||
130 | This symbolic link points to the PCI hotplug controller driver | ||
131 | module that manages the hotplug slot. | ||
diff --git a/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss new file mode 100644 index 000000000000..0a92a7c93a62 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss | |||
@@ -0,0 +1,33 @@ | |||
1 | Where: /sys/bus/pci/devices/<dev>/ccissX/cXdY/model | ||
2 | Date: March 2009 | ||
3 | Kernel Version: 2.6.30 | ||
4 | Contact: iss_storagedev@hp.com | ||
5 | Description: Displays the SCSI INQUIRY page 0 model for logical drive | ||
6 | Y of controller X. | ||
7 | |||
8 | Where: /sys/bus/pci/devices/<dev>/ccissX/cXdY/rev | ||
9 | Date: March 2009 | ||
10 | Kernel Version: 2.6.30 | ||
11 | Contact: iss_storagedev@hp.com | ||
12 | Description: Displays the SCSI INQUIRY page 0 revision for logical | ||
13 | drive Y of controller X. | ||
14 | |||
15 | Where: /sys/bus/pci/devices/<dev>/ccissX/cXdY/unique_id | ||
16 | Date: March 2009 | ||
17 | Kernel Version: 2.6.30 | ||
18 | Contact: iss_storagedev@hp.com | ||
19 | Description: Displays the SCSI INQUIRY page 83 serial number for logical | ||
20 | drive Y of controller X. | ||
21 | |||
22 | Where: /sys/bus/pci/devices/<dev>/ccissX/cXdY/vendor | ||
23 | Date: March 2009 | ||
24 | Kernel Version: 2.6.30 | ||
25 | Contact: iss_storagedev@hp.com | ||
26 | Description: Displays the SCSI INQUIRY page 0 vendor for logical drive | ||
27 | Y of controller X. | ||
28 | |||
29 | Where: /sys/bus/pci/devices/<dev>/ccissX/cXdY/block:cciss!cXdY | ||
30 | Date: March 2009 | ||
31 | Kernel Version: 2.6.30 | ||
32 | Contact: iss_storagedev@hp.com | ||
33 | Description: A symbolic link to /sys/block/cciss!cXdY | ||
diff --git a/Documentation/ABI/testing/sysfs-class-mtd b/Documentation/ABI/testing/sysfs-class-mtd new file mode 100644 index 000000000000..4d55a1888981 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-mtd | |||
@@ -0,0 +1,125 @@ | |||
1 | What: /sys/class/mtd/ | ||
2 | Date: April 2009 | ||
3 | KernelVersion: 2.6.29 | ||
4 | Contact: linux-mtd@lists.infradead.org | ||
5 | Description: | ||
6 | The mtd/ class subdirectory belongs to the MTD subsystem | ||
7 | (MTD core). | ||
8 | |||
9 | What: /sys/class/mtd/mtdX/ | ||
10 | Date: April 2009 | ||
11 | KernelVersion: 2.6.29 | ||
12 | Contact: linux-mtd@lists.infradead.org | ||
13 | Description: | ||
14 | The /sys/class/mtd/mtd{0,1,2,3,...} directories correspond | ||
15 | to each /dev/mtdX character device. These may represent | ||
16 | physical/simulated flash devices, partitions on a flash | ||
17 | device, or concatenated flash devices. They exist regardless | ||
18 | of whether CONFIG_MTD_CHAR is actually enabled. | ||
19 | |||
20 | What: /sys/class/mtd/mtdXro/ | ||
21 | Date: April 2009 | ||
22 | KernelVersion: 2.6.29 | ||
23 | Contact: linux-mtd@lists.infradead.org | ||
24 | Description: | ||
25 | These directories provide the corresponding read-only device | ||
26 | nodes for /sys/class/mtd/mtdX/ . They are only created | ||
27 | (for the benefit of udev) if CONFIG_MTD_CHAR is enabled. | ||
28 | |||
29 | What: /sys/class/mtd/mtdX/dev | ||
30 | Date: April 2009 | ||
31 | KernelVersion: 2.6.29 | ||
32 | Contact: linux-mtd@lists.infradead.org | ||
33 | Description: | ||
34 | Major and minor numbers of the character device corresponding | ||
35 | to this MTD device (in <major>:<minor> format). This is the | ||
36 | read-write device so <minor> will be even. | ||
37 | |||
38 | What: /sys/class/mtd/mtdXro/dev | ||
39 | Date: April 2009 | ||
40 | KernelVersion: 2.6.29 | ||
41 | Contact: linux-mtd@lists.infradead.org | ||
42 | Description: | ||
43 | Major and minor numbers of the character device corresponding | ||
44 | to the read-only variant of thie MTD device (in | ||
45 | <major>:<minor> format). In this case <minor> will be odd. | ||
46 | |||
47 | What: /sys/class/mtd/mtdX/erasesize | ||
48 | Date: April 2009 | ||
49 | KernelVersion: 2.6.29 | ||
50 | Contact: linux-mtd@lists.infradead.org | ||
51 | Description: | ||
52 | "Major" erase size for the device. If numeraseregions is | ||
53 | zero, this is the eraseblock size for the entire device. | ||
54 | Otherwise, the MEMGETREGIONCOUNT/MEMGETREGIONINFO ioctls | ||
55 | can be used to determine the actual eraseblock layout. | ||
56 | |||
57 | What: /sys/class/mtd/mtdX/flags | ||
58 | Date: April 2009 | ||
59 | KernelVersion: 2.6.29 | ||
60 | Contact: linux-mtd@lists.infradead.org | ||
61 | Description: | ||
62 | A hexadecimal value representing the device flags, ORed | ||
63 | together: | ||
64 | |||
65 | 0x0400: MTD_WRITEABLE - device is writable | ||
66 | 0x0800: MTD_BIT_WRITEABLE - single bits can be flipped | ||
67 | 0x1000: MTD_NO_ERASE - no erase necessary | ||
68 | 0x2000: MTD_POWERUP_LOCK - always locked after reset | ||
69 | |||
70 | What: /sys/class/mtd/mtdX/name | ||
71 | Date: April 2009 | ||
72 | KernelVersion: 2.6.29 | ||
73 | Contact: linux-mtd@lists.infradead.org | ||
74 | Description: | ||
75 | A human-readable ASCII name for the device or partition. | ||
76 | This will match the name in /proc/mtd . | ||
77 | |||
78 | What: /sys/class/mtd/mtdX/numeraseregions | ||
79 | Date: April 2009 | ||
80 | KernelVersion: 2.6.29 | ||
81 | Contact: linux-mtd@lists.infradead.org | ||
82 | Description: | ||
83 | For devices that have variable eraseblock sizes, this | ||
84 | provides the total number of erase regions. Otherwise, | ||
85 | it will read back as zero. | ||
86 | |||
87 | What: /sys/class/mtd/mtdX/oobsize | ||
88 | Date: April 2009 | ||
89 | KernelVersion: 2.6.29 | ||
90 | Contact: linux-mtd@lists.infradead.org | ||
91 | Description: | ||
92 | Number of OOB bytes per page. | ||
93 | |||
94 | What: /sys/class/mtd/mtdX/size | ||
95 | Date: April 2009 | ||
96 | KernelVersion: 2.6.29 | ||
97 | Contact: linux-mtd@lists.infradead.org | ||
98 | Description: | ||
99 | Total size of the device/partition, in bytes. | ||
100 | |||
101 | What: /sys/class/mtd/mtdX/type | ||
102 | Date: April 2009 | ||
103 | KernelVersion: 2.6.29 | ||
104 | Contact: linux-mtd@lists.infradead.org | ||
105 | Description: | ||
106 | One of the following ASCII strings, representing the device | ||
107 | type: | ||
108 | |||
109 | absent, ram, rom, nor, nand, dataflash, ubi, unknown | ||
110 | |||
111 | What: /sys/class/mtd/mtdX/writesize | ||
112 | Date: April 2009 | ||
113 | KernelVersion: 2.6.29 | ||
114 | Contact: linux-mtd@lists.infradead.org | ||
115 | Description: | ||
116 | Minimal writable flash unit size. This will always be | ||
117 | a positive integer. | ||
118 | |||
119 | In the case of NOR flash it is 1 (even though individual | ||
120 | bits can be cleared). | ||
121 | |||
122 | In the case of NAND flash it is one NAND page (or a | ||
123 | half page, or a quarter page). | ||
124 | |||
125 | In the case of ECC NOR, it is the ECC block size. | ||
diff --git a/Documentation/ABI/testing/sysfs-class-regulator b/Documentation/ABI/testing/sysfs-class-regulator index 873ef1fc1569..e091fa873792 100644 --- a/Documentation/ABI/testing/sysfs-class-regulator +++ b/Documentation/ABI/testing/sysfs-class-regulator | |||
@@ -4,8 +4,8 @@ KernelVersion: 2.6.26 | |||
4 | Contact: Liam Girdwood <lrg@slimlogic.co.uk> | 4 | Contact: Liam Girdwood <lrg@slimlogic.co.uk> |
5 | Description: | 5 | Description: |
6 | Some regulator directories will contain a field called | 6 | Some regulator directories will contain a field called |
7 | state. This reports the regulator enable status, for | 7 | state. This reports the regulator enable control, for |
8 | regulators which can report that value. | 8 | regulators which can report that input value. |
9 | 9 | ||
10 | This will be one of the following strings: | 10 | This will be one of the following strings: |
11 | 11 | ||
@@ -14,16 +14,54 @@ Description: | |||
14 | 'unknown' | 14 | 'unknown' |
15 | 15 | ||
16 | 'enabled' means the regulator output is ON and is supplying | 16 | 'enabled' means the regulator output is ON and is supplying |
17 | power to the system. | 17 | power to the system (assuming no error prevents it). |
18 | 18 | ||
19 | 'disabled' means the regulator output is OFF and is not | 19 | 'disabled' means the regulator output is OFF and is not |
20 | supplying power to the system.. | 20 | supplying power to the system (unless some non-Linux |
21 | control has enabled it). | ||
21 | 22 | ||
22 | 'unknown' means software cannot determine the state, or | 23 | 'unknown' means software cannot determine the state, or |
23 | the reported state is invalid. | 24 | the reported state is invalid. |
24 | 25 | ||
25 | NOTE: this field can be used in conjunction with microvolts | 26 | NOTE: this field can be used in conjunction with microvolts |
26 | and microamps to determine regulator output levels. | 27 | or microamps to determine configured regulator output levels. |
28 | |||
29 | |||
30 | What: /sys/class/regulator/.../status | ||
31 | Description: | ||
32 | Some regulator directories will contain a field called | ||
33 | "status". This reports the current regulator status, for | ||
34 | regulators which can report that output value. | ||
35 | |||
36 | This will be one of the following strings: | ||
37 | |||
38 | off | ||
39 | on | ||
40 | error | ||
41 | fast | ||
42 | normal | ||
43 | idle | ||
44 | standby | ||
45 | |||
46 | "off" means the regulator is not supplying power to the | ||
47 | system. | ||
48 | |||
49 | "on" means the regulator is supplying power to the system, | ||
50 | and the regulator can't report a detailed operation mode. | ||
51 | |||
52 | "error" indicates an out-of-regulation status such as being | ||
53 | disabled due to thermal shutdown, or voltage being unstable | ||
54 | because of problems with the input power supply. | ||
55 | |||
56 | "fast", "normal", "idle", and "standby" are all detailed | ||
57 | regulator operation modes (described elsewhere). They | ||
58 | imply "on", but provide more detail. | ||
59 | |||
60 | Note that regulator status is a function of many inputs, | ||
61 | not limited to control inputs from Linux. For example, | ||
62 | the actual load presented may trigger "error" status; or | ||
63 | a regulator may be enabled by another user, even though | ||
64 | Linux did not enable it. | ||
27 | 65 | ||
28 | 66 | ||
29 | What: /sys/class/regulator/.../type | 67 | What: /sys/class/regulator/.../type |
@@ -58,7 +96,7 @@ Description: | |||
58 | Some regulator directories will contain a field called | 96 | Some regulator directories will contain a field called |
59 | microvolts. This holds the regulator output voltage setting | 97 | microvolts. This holds the regulator output voltage setting |
60 | measured in microvolts (i.e. E-6 Volts), for regulators | 98 | measured in microvolts (i.e. E-6 Volts), for regulators |
61 | which can report that voltage. | 99 | which can report the control input for voltage. |
62 | 100 | ||
63 | NOTE: This value should not be used to determine the regulator | 101 | NOTE: This value should not be used to determine the regulator |
64 | output voltage level as this value is the same regardless of | 102 | output voltage level as this value is the same regardless of |
@@ -73,7 +111,7 @@ Description: | |||
73 | Some regulator directories will contain a field called | 111 | Some regulator directories will contain a field called |
74 | microamps. This holds the regulator output current limit | 112 | microamps. This holds the regulator output current limit |
75 | setting measured in microamps (i.e. E-6 Amps), for regulators | 113 | setting measured in microamps (i.e. E-6 Amps), for regulators |
76 | which can report that current. | 114 | which can report the control input for a current limit. |
77 | 115 | ||
78 | NOTE: This value should not be used to determine the regulator | 116 | NOTE: This value should not be used to determine the regulator |
79 | output current level as this value is the same regardless of | 117 | output current level as this value is the same regardless of |
@@ -87,7 +125,7 @@ Contact: Liam Girdwood <lrg@slimlogic.co.uk> | |||
87 | Description: | 125 | Description: |
88 | Some regulator directories will contain a field called | 126 | Some regulator directories will contain a field called |
89 | opmode. This holds the current regulator operating mode, | 127 | opmode. This holds the current regulator operating mode, |
90 | for regulators which can report it. | 128 | for regulators which can report that control input value. |
91 | 129 | ||
92 | The opmode value can be one of the following strings: | 130 | The opmode value can be one of the following strings: |
93 | 131 | ||
@@ -101,7 +139,8 @@ Description: | |||
101 | 139 | ||
102 | NOTE: This value should not be used to determine the regulator | 140 | NOTE: This value should not be used to determine the regulator |
103 | output operating mode as this value is the same regardless of | 141 | output operating mode as this value is the same regardless of |
104 | whether the regulator is enabled or disabled. | 142 | whether the regulator is enabled or disabled. A "status" |
143 | attribute may be available to determine the actual mode. | ||
105 | 144 | ||
106 | 145 | ||
107 | What: /sys/class/regulator/.../min_microvolts | 146 | What: /sys/class/regulator/.../min_microvolts |
diff --git a/Documentation/ABI/testing/sysfs-devices-cache_disable b/Documentation/ABI/testing/sysfs-devices-cache_disable new file mode 100644 index 000000000000..175bb4f70512 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-devices-cache_disable | |||
@@ -0,0 +1,18 @@ | |||
1 | What: /sys/devices/system/cpu/cpu*/cache/index*/cache_disable_X | ||
2 | Date: August 2008 | ||
3 | KernelVersion: 2.6.27 | ||
4 | Contact: mark.langsdorf@amd.com | ||
5 | Description: These files exist in every cpu's cache index directories. | ||
6 | There are currently 2 cache_disable_# files in each | ||
7 | directory. Reading from these files on a supported | ||
8 | processor will return that cache disable index value | ||
9 | for that processor and node. Writing to one of these | ||
10 | files will cause the specificed cache index to be disabled. | ||
11 | |||
12 | Currently, only AMD Family 10h Processors support cache index | ||
13 | disable, and only for their L3 caches. See the BIOS and | ||
14 | Kernel Developer's Guide at | ||
15 | http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/31116-Public-GH-BKDG_3.20_2-4-09.pdf | ||
16 | for formatting information and other details on the | ||
17 | cache index disable. | ||
18 | Users: joachim.deguara@amd.com | ||
diff --git a/Documentation/ABI/testing/sysfs-firmware-acpi b/Documentation/ABI/testing/sysfs-firmware-acpi index e8ffc70ffe12..4f9ba3c2fca7 100644 --- a/Documentation/ABI/testing/sysfs-firmware-acpi +++ b/Documentation/ABI/testing/sysfs-firmware-acpi | |||
@@ -69,9 +69,13 @@ Description: | |||
69 | gpe1F: 0 invalid | 69 | gpe1F: 0 invalid |
70 | gpe_all: 1192 | 70 | gpe_all: 1192 |
71 | sci: 1194 | 71 | sci: 1194 |
72 | sci_not: 0 | ||
72 | 73 | ||
73 | sci - The total number of times the ACPI SCI | 74 | sci - The number of times the ACPI SCI |
74 | has claimed an interrupt. | 75 | has been called and claimed an interrupt. |
76 | |||
77 | sci_not - The number of times the ACPI SCI | ||
78 | has been called and NOT claimed an interrupt. | ||
75 | 79 | ||
76 | gpe_all - count of SCI caused by GPEs. | 80 | gpe_all - count of SCI caused by GPEs. |
77 | 81 | ||
diff --git a/Documentation/ABI/testing/sysfs-fs-ext4 b/Documentation/ABI/testing/sysfs-fs-ext4 new file mode 100644 index 000000000000..5fb709997d96 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-fs-ext4 | |||
@@ -0,0 +1,91 @@ | |||
1 | What: /sys/fs/ext4/<disk>/mb_stats | ||
2 | Date: March 2008 | ||
3 | Contact: "Theodore Ts'o" <tytso@mit.edu> | ||
4 | Description: | ||
5 | Controls whether the multiblock allocator should | ||
6 | collect statistics, which are shown during the unmount. | ||
7 | 1 means to collect statistics, 0 means not to collect | ||
8 | statistics | ||
9 | |||
10 | What: /sys/fs/ext4/<disk>/mb_group_prealloc | ||
11 | Date: March 2008 | ||
12 | Contact: "Theodore Ts'o" <tytso@mit.edu> | ||
13 | Description: | ||
14 | The multiblock allocator will round up allocation | ||
15 | requests to a multiple of this tuning parameter if the | ||
16 | stripe size is not set in the ext4 superblock | ||
17 | |||
18 | What: /sys/fs/ext4/<disk>/mb_max_to_scan | ||
19 | Date: March 2008 | ||
20 | Contact: "Theodore Ts'o" <tytso@mit.edu> | ||
21 | Description: | ||
22 | The maximum number of extents the multiblock allocator | ||
23 | will search to find the best extent | ||
24 | |||
25 | What: /sys/fs/ext4/<disk>/mb_min_to_scan | ||
26 | Date: March 2008 | ||
27 | Contact: "Theodore Ts'o" <tytso@mit.edu> | ||
28 | Description: | ||
29 | The minimum number of extents the multiblock allocator | ||
30 | will search to find the best extent | ||
31 | |||
32 | What: /sys/fs/ext4/<disk>/mb_order2_req | ||
33 | Date: March 2008 | ||
34 | Contact: "Theodore Ts'o" <tytso@mit.edu> | ||
35 | Description: | ||
36 | Tuning parameter which controls the minimum size for | ||
37 | requests (as a power of 2) where the buddy cache is | ||
38 | used | ||
39 | |||
40 | What: /sys/fs/ext4/<disk>/mb_stream_req | ||
41 | Date: March 2008 | ||
42 | Contact: "Theodore Ts'o" <tytso@mit.edu> | ||
43 | Description: | ||
44 | Files which have fewer blocks than this tunable | ||
45 | parameter will have their blocks allocated out of a | ||
46 | block group specific preallocation pool, so that small | ||
47 | files are packed closely together. Each large file | ||
48 | will have its blocks allocated out of its own unique | ||
49 | preallocation pool. | ||
50 | |||
51 | What: /sys/fs/ext4/<disk>/inode_readahead | ||
52 | Date: March 2008 | ||
53 | Contact: "Theodore Ts'o" <tytso@mit.edu> | ||
54 | Description: | ||
55 | Tuning parameter which controls the maximum number of | ||
56 | inode table blocks that ext4's inode table readahead | ||
57 | algorithm will pre-read into the buffer cache | ||
58 | |||
59 | What: /sys/fs/ext4/<disk>/delayed_allocation_blocks | ||
60 | Date: March 2008 | ||
61 | Contact: "Theodore Ts'o" <tytso@mit.edu> | ||
62 | Description: | ||
63 | This file is read-only and shows the number of blocks | ||
64 | that are dirty in the page cache, but which do not | ||
65 | have their location in the filesystem allocated yet. | ||
66 | |||
67 | What: /sys/fs/ext4/<disk>/lifetime_write_kbytes | ||
68 | Date: March 2008 | ||
69 | Contact: "Theodore Ts'o" <tytso@mit.edu> | ||
70 | Description: | ||
71 | This file is read-only and shows the number of kilobytes | ||
72 | of data that have been written to this filesystem since it was | ||
73 | created. | ||
74 | |||
75 | What: /sys/fs/ext4/<disk>/session_write_kbytes | ||
76 | Date: March 2008 | ||
77 | Contact: "Theodore Ts'o" <tytso@mit.edu> | ||
78 | Description: | ||
79 | This file is read-only and shows the number of | ||
80 | kilobytes of data that have been written to this | ||
81 | filesystem since it was mounted. | ||
82 | |||
83 | What: /sys/fs/ext4/<disk>/inode_goal | ||
84 | Date: June 2008 | ||
85 | Contact: "Theodore Ts'o" <tytso@mit.edu> | ||
86 | Description: | ||
87 | Tuning parameter which (if non-zero) controls the goal | ||
88 | inode used by the inode allocator in p0reference to | ||
89 | all other allocation hueristics. This is intended for | ||
90 | debugging use only, and should be 0 on production | ||
91 | systems. | ||
diff --git a/Documentation/ABI/testing/sysfs-kernel-slab b/Documentation/ABI/testing/sysfs-kernel-slab new file mode 100644 index 000000000000..6dcf75e594fb --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-slab | |||
@@ -0,0 +1,479 @@ | |||
1 | What: /sys/kernel/slab | ||
2 | Date: May 2007 | ||
3 | KernelVersion: 2.6.22 | ||
4 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
5 | Christoph Lameter <cl@linux-foundation.org> | ||
6 | Description: | ||
7 | The /sys/kernel/slab directory contains a snapshot of the | ||
8 | internal state of the SLUB allocator for each cache. Certain | ||
9 | files may be modified to change the behavior of the cache (and | ||
10 | any cache it aliases, if any). | ||
11 | Users: kernel memory tuning tools | ||
12 | |||
13 | What: /sys/kernel/slab/cache/aliases | ||
14 | Date: May 2007 | ||
15 | KernelVersion: 2.6.22 | ||
16 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
17 | Christoph Lameter <cl@linux-foundation.org> | ||
18 | Description: | ||
19 | The aliases file is read-only and specifies how many caches | ||
20 | have merged into this cache. | ||
21 | |||
22 | What: /sys/kernel/slab/cache/align | ||
23 | Date: May 2007 | ||
24 | KernelVersion: 2.6.22 | ||
25 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
26 | Christoph Lameter <cl@linux-foundation.org> | ||
27 | Description: | ||
28 | The align file is read-only and specifies the cache's object | ||
29 | alignment in bytes. | ||
30 | |||
31 | What: /sys/kernel/slab/cache/alloc_calls | ||
32 | Date: May 2007 | ||
33 | KernelVersion: 2.6.22 | ||
34 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
35 | Christoph Lameter <cl@linux-foundation.org> | ||
36 | Description: | ||
37 | The alloc_calls file is read-only and lists the kernel code | ||
38 | locations from which allocations for this cache were performed. | ||
39 | The alloc_calls file only contains information if debugging is | ||
40 | enabled for that cache (see Documentation/vm/slub.txt). | ||
41 | |||
42 | What: /sys/kernel/slab/cache/alloc_fastpath | ||
43 | Date: February 2008 | ||
44 | KernelVersion: 2.6.25 | ||
45 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
46 | Christoph Lameter <cl@linux-foundation.org> | ||
47 | Description: | ||
48 | The alloc_fastpath file is read-only and specifies how many | ||
49 | objects have been allocated using the fast path. | ||
50 | Available when CONFIG_SLUB_STATS is enabled. | ||
51 | |||
52 | What: /sys/kernel/slab/cache/alloc_from_partial | ||
53 | Date: February 2008 | ||
54 | KernelVersion: 2.6.25 | ||
55 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
56 | Christoph Lameter <cl@linux-foundation.org> | ||
57 | Description: | ||
58 | The alloc_from_partial file is read-only and specifies how | ||
59 | many times a cpu slab has been full and it has been refilled | ||
60 | by using a slab from the list of partially used slabs. | ||
61 | Available when CONFIG_SLUB_STATS is enabled. | ||
62 | |||
63 | What: /sys/kernel/slab/cache/alloc_refill | ||
64 | Date: February 2008 | ||
65 | KernelVersion: 2.6.25 | ||
66 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
67 | Christoph Lameter <cl@linux-foundation.org> | ||
68 | Description: | ||
69 | The alloc_refill file is read-only and specifies how many | ||
70 | times the per-cpu freelist was empty but there were objects | ||
71 | available as the result of remote cpu frees. | ||
72 | Available when CONFIG_SLUB_STATS is enabled. | ||
73 | |||
74 | What: /sys/kernel/slab/cache/alloc_slab | ||
75 | Date: February 2008 | ||
76 | KernelVersion: 2.6.25 | ||
77 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
78 | Christoph Lameter <cl@linux-foundation.org> | ||
79 | Description: | ||
80 | The alloc_slab file is read-only and specifies how many times | ||
81 | a new slab had to be allocated from the page allocator. | ||
82 | Available when CONFIG_SLUB_STATS is enabled. | ||
83 | |||
84 | What: /sys/kernel/slab/cache/alloc_slowpath | ||
85 | Date: February 2008 | ||
86 | KernelVersion: 2.6.25 | ||
87 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
88 | Christoph Lameter <cl@linux-foundation.org> | ||
89 | Description: | ||
90 | The alloc_slowpath file is read-only and specifies how many | ||
91 | objects have been allocated using the slow path because of a | ||
92 | refill or allocation from a partial or new slab. | ||
93 | Available when CONFIG_SLUB_STATS is enabled. | ||
94 | |||
95 | What: /sys/kernel/slab/cache/cache_dma | ||
96 | Date: May 2007 | ||
97 | KernelVersion: 2.6.22 | ||
98 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
99 | Christoph Lameter <cl@linux-foundation.org> | ||
100 | Description: | ||
101 | The cache_dma file is read-only and specifies whether objects | ||
102 | are from ZONE_DMA. | ||
103 | Available when CONFIG_ZONE_DMA is enabled. | ||
104 | |||
105 | What: /sys/kernel/slab/cache/cpu_slabs | ||
106 | Date: May 2007 | ||
107 | KernelVersion: 2.6.22 | ||
108 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
109 | Christoph Lameter <cl@linux-foundation.org> | ||
110 | Description: | ||
111 | The cpu_slabs file is read-only and displays how many cpu slabs | ||
112 | are active and their NUMA locality. | ||
113 | |||
114 | What: /sys/kernel/slab/cache/cpuslab_flush | ||
115 | Date: April 2009 | ||
116 | KernelVersion: 2.6.31 | ||
117 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
118 | Christoph Lameter <cl@linux-foundation.org> | ||
119 | Description: | ||
120 | The file cpuslab_flush is read-only and specifies how many | ||
121 | times a cache's cpu slabs have been flushed as the result of | ||
122 | destroying or shrinking a cache, a cpu going offline, or as | ||
123 | the result of forcing an allocation from a certain node. | ||
124 | Available when CONFIG_SLUB_STATS is enabled. | ||
125 | |||
126 | What: /sys/kernel/slab/cache/ctor | ||
127 | Date: May 2007 | ||
128 | KernelVersion: 2.6.22 | ||
129 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
130 | Christoph Lameter <cl@linux-foundation.org> | ||
131 | Description: | ||
132 | The ctor file is read-only and specifies the cache's object | ||
133 | constructor function, which is invoked for each object when a | ||
134 | new slab is allocated. | ||
135 | |||
136 | What: /sys/kernel/slab/cache/deactivate_empty | ||
137 | Date: February 2008 | ||
138 | KernelVersion: 2.6.25 | ||
139 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
140 | Christoph Lameter <cl@linux-foundation.org> | ||
141 | Description: | ||
142 | The file deactivate_empty is read-only and specifies how many | ||
143 | times an empty cpu slab was deactivated. | ||
144 | Available when CONFIG_SLUB_STATS is enabled. | ||
145 | |||
146 | What: /sys/kernel/slab/cache/deactivate_full | ||
147 | Date: February 2008 | ||
148 | KernelVersion: 2.6.25 | ||
149 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
150 | Christoph Lameter <cl@linux-foundation.org> | ||
151 | Description: | ||
152 | The file deactivate_full is read-only and specifies how many | ||
153 | times a full cpu slab was deactivated. | ||
154 | Available when CONFIG_SLUB_STATS is enabled. | ||
155 | |||
156 | What: /sys/kernel/slab/cache/deactivate_remote_frees | ||
157 | Date: February 2008 | ||
158 | KernelVersion: 2.6.25 | ||
159 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
160 | Christoph Lameter <cl@linux-foundation.org> | ||
161 | Description: | ||
162 | The file deactivate_remote_frees is read-only and specifies how | ||
163 | many times a cpu slab has been deactivated and contained free | ||
164 | objects that were freed remotely. | ||
165 | Available when CONFIG_SLUB_STATS is enabled. | ||
166 | |||
167 | What: /sys/kernel/slab/cache/deactivate_to_head | ||
168 | Date: February 2008 | ||
169 | KernelVersion: 2.6.25 | ||
170 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
171 | Christoph Lameter <cl@linux-foundation.org> | ||
172 | Description: | ||
173 | The file deactivate_to_head is read-only and specifies how | ||
174 | many times a partial cpu slab was deactivated and added to the | ||
175 | head of its node's partial list. | ||
176 | Available when CONFIG_SLUB_STATS is enabled. | ||
177 | |||
178 | What: /sys/kernel/slab/cache/deactivate_to_tail | ||
179 | Date: February 2008 | ||
180 | KernelVersion: 2.6.25 | ||
181 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
182 | Christoph Lameter <cl@linux-foundation.org> | ||
183 | Description: | ||
184 | The file deactivate_to_tail is read-only and specifies how | ||
185 | many times a partial cpu slab was deactivated and added to the | ||
186 | tail of its node's partial list. | ||
187 | Available when CONFIG_SLUB_STATS is enabled. | ||
188 | |||
189 | What: /sys/kernel/slab/cache/destroy_by_rcu | ||
190 | Date: May 2007 | ||
191 | KernelVersion: 2.6.22 | ||
192 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
193 | Christoph Lameter <cl@linux-foundation.org> | ||
194 | Description: | ||
195 | The destroy_by_rcu file is read-only and specifies whether | ||
196 | slabs (not objects) are freed by rcu. | ||
197 | |||
198 | What: /sys/kernel/slab/cache/free_add_partial | ||
199 | Date: February 2008 | ||
200 | KernelVersion: 2.6.25 | ||
201 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
202 | Christoph Lameter <cl@linux-foundation.org> | ||
203 | Description: | ||
204 | The file free_add_partial is read-only and specifies how many | ||
205 | times an object has been freed in a full slab so that it had to | ||
206 | added to its node's partial list. | ||
207 | Available when CONFIG_SLUB_STATS is enabled. | ||
208 | |||
209 | What: /sys/kernel/slab/cache/free_calls | ||
210 | Date: May 2007 | ||
211 | KernelVersion: 2.6.22 | ||
212 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
213 | Christoph Lameter <cl@linux-foundation.org> | ||
214 | Description: | ||
215 | The free_calls file is read-only and lists the locations of | ||
216 | object frees if slab debugging is enabled (see | ||
217 | Documentation/vm/slub.txt). | ||
218 | |||
219 | What: /sys/kernel/slab/cache/free_fastpath | ||
220 | Date: February 2008 | ||
221 | KernelVersion: 2.6.25 | ||
222 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
223 | Christoph Lameter <cl@linux-foundation.org> | ||
224 | Description: | ||
225 | The free_fastpath file is read-only and specifies how many | ||
226 | objects have been freed using the fast path because it was an | ||
227 | object from the cpu slab. | ||
228 | Available when CONFIG_SLUB_STATS is enabled. | ||
229 | |||
230 | What: /sys/kernel/slab/cache/free_frozen | ||
231 | Date: February 2008 | ||
232 | KernelVersion: 2.6.25 | ||
233 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
234 | Christoph Lameter <cl@linux-foundation.org> | ||
235 | Description: | ||
236 | The free_frozen file is read-only and specifies how many | ||
237 | objects have been freed to a frozen slab (i.e. a remote cpu | ||
238 | slab). | ||
239 | Available when CONFIG_SLUB_STATS is enabled. | ||
240 | |||
241 | What: /sys/kernel/slab/cache/free_remove_partial | ||
242 | Date: February 2008 | ||
243 | KernelVersion: 2.6.25 | ||
244 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
245 | Christoph Lameter <cl@linux-foundation.org> | ||
246 | Description: | ||
247 | The file free_remove_partial is read-only and specifies how | ||
248 | many times an object has been freed to a now-empty slab so | ||
249 | that it had to be removed from its node's partial list. | ||
250 | Available when CONFIG_SLUB_STATS is enabled. | ||
251 | |||
252 | What: /sys/kernel/slab/cache/free_slab | ||
253 | Date: February 2008 | ||
254 | KernelVersion: 2.6.25 | ||
255 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
256 | Christoph Lameter <cl@linux-foundation.org> | ||
257 | Description: | ||
258 | The free_slab file is read-only and specifies how many times an | ||
259 | empty slab has been freed back to the page allocator. | ||
260 | Available when CONFIG_SLUB_STATS is enabled. | ||
261 | |||
262 | What: /sys/kernel/slab/cache/free_slowpath | ||
263 | Date: February 2008 | ||
264 | KernelVersion: 2.6.25 | ||
265 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
266 | Christoph Lameter <cl@linux-foundation.org> | ||
267 | Description: | ||
268 | The free_slowpath file is read-only and specifies how many | ||
269 | objects have been freed using the slow path (i.e. to a full or | ||
270 | partial slab). | ||
271 | Available when CONFIG_SLUB_STATS is enabled. | ||
272 | |||
273 | What: /sys/kernel/slab/cache/hwcache_align | ||
274 | Date: May 2007 | ||
275 | KernelVersion: 2.6.22 | ||
276 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
277 | Christoph Lameter <cl@linux-foundation.org> | ||
278 | Description: | ||
279 | The hwcache_align file is read-only and specifies whether | ||
280 | objects are aligned on cachelines. | ||
281 | |||
282 | What: /sys/kernel/slab/cache/min_partial | ||
283 | Date: February 2009 | ||
284 | KernelVersion: 2.6.30 | ||
285 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
286 | David Rientjes <rientjes@google.com> | ||
287 | Description: | ||
288 | The min_partial file specifies how many empty slabs shall | ||
289 | remain on a node's partial list to avoid the overhead of | ||
290 | allocating new slabs. Such slabs may be reclaimed by utilizing | ||
291 | the shrink file. | ||
292 | |||
293 | What: /sys/kernel/slab/cache/object_size | ||
294 | Date: May 2007 | ||
295 | KernelVersion: 2.6.22 | ||
296 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
297 | Christoph Lameter <cl@linux-foundation.org> | ||
298 | Description: | ||
299 | The object_size file is read-only and specifies the cache's | ||
300 | object size. | ||
301 | |||
302 | What: /sys/kernel/slab/cache/objects | ||
303 | Date: May 2007 | ||
304 | KernelVersion: 2.6.22 | ||
305 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
306 | Christoph Lameter <cl@linux-foundation.org> | ||
307 | Description: | ||
308 | The objects file is read-only and displays how many objects are | ||
309 | active and from which nodes they are from. | ||
310 | |||
311 | What: /sys/kernel/slab/cache/objects_partial | ||
312 | Date: April 2008 | ||
313 | KernelVersion: 2.6.26 | ||
314 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
315 | Christoph Lameter <cl@linux-foundation.org> | ||
316 | Description: | ||
317 | The objects_partial file is read-only and displays how many | ||
318 | objects are on partial slabs and from which nodes they are | ||
319 | from. | ||
320 | |||
321 | What: /sys/kernel/slab/cache/objs_per_slab | ||
322 | Date: May 2007 | ||
323 | KernelVersion: 2.6.22 | ||
324 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
325 | Christoph Lameter <cl@linux-foundation.org> | ||
326 | Description: | ||
327 | The file objs_per_slab is read-only and specifies how many | ||
328 | objects may be allocated from a single slab of the order | ||
329 | specified in /sys/kernel/slab/cache/order. | ||
330 | |||
331 | What: /sys/kernel/slab/cache/order | ||
332 | Date: May 2007 | ||
333 | KernelVersion: 2.6.22 | ||
334 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
335 | Christoph Lameter <cl@linux-foundation.org> | ||
336 | Description: | ||
337 | The order file specifies the page order at which new slabs are | ||
338 | allocated. It is writable and can be changed to increase the | ||
339 | number of objects per slab. If a slab cannot be allocated | ||
340 | because of fragmentation, SLUB will retry with the minimum order | ||
341 | possible depending on its characteristics. | ||
342 | |||
343 | What: /sys/kernel/slab/cache/order_fallback | ||
344 | Date: April 2008 | ||
345 | KernelVersion: 2.6.26 | ||
346 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
347 | Christoph Lameter <cl@linux-foundation.org> | ||
348 | Description: | ||
349 | The file order_fallback is read-only and specifies how many | ||
350 | times an allocation of a new slab has not been possible at the | ||
351 | cache's order and instead fallen back to its minimum possible | ||
352 | order. | ||
353 | Available when CONFIG_SLUB_STATS is enabled. | ||
354 | |||
355 | What: /sys/kernel/slab/cache/partial | ||
356 | Date: May 2007 | ||
357 | KernelVersion: 2.6.22 | ||
358 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
359 | Christoph Lameter <cl@linux-foundation.org> | ||
360 | Description: | ||
361 | The partial file is read-only and displays how long many | ||
362 | partial slabs there are and how long each node's list is. | ||
363 | |||
364 | What: /sys/kernel/slab/cache/poison | ||
365 | Date: May 2007 | ||
366 | KernelVersion: 2.6.22 | ||
367 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
368 | Christoph Lameter <cl@linux-foundation.org> | ||
369 | Description: | ||
370 | The poison file specifies whether objects should be poisoned | ||
371 | when a new slab is allocated. | ||
372 | |||
373 | What: /sys/kernel/slab/cache/reclaim_account | ||
374 | Date: May 2007 | ||
375 | KernelVersion: 2.6.22 | ||
376 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
377 | Christoph Lameter <cl@linux-foundation.org> | ||
378 | Description: | ||
379 | The reclaim_account file specifies whether the cache's objects | ||
380 | are reclaimable (and grouped by their mobility). | ||
381 | |||
382 | What: /sys/kernel/slab/cache/red_zone | ||
383 | Date: May 2007 | ||
384 | KernelVersion: 2.6.22 | ||
385 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
386 | Christoph Lameter <cl@linux-foundation.org> | ||
387 | Description: | ||
388 | The red_zone file specifies whether the cache's objects are red | ||
389 | zoned. | ||
390 | |||
391 | What: /sys/kernel/slab/cache/remote_node_defrag_ratio | ||
392 | Date: January 2008 | ||
393 | KernelVersion: 2.6.25 | ||
394 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
395 | Christoph Lameter <cl@linux-foundation.org> | ||
396 | Description: | ||
397 | The file remote_node_defrag_ratio specifies the percentage of | ||
398 | times SLUB will attempt to refill the cpu slab with a partial | ||
399 | slab from a remote node as opposed to allocating a new slab on | ||
400 | the local node. This reduces the amount of wasted memory over | ||
401 | the entire system but can be expensive. | ||
402 | Available when CONFIG_NUMA is enabled. | ||
403 | |||
404 | What: /sys/kernel/slab/cache/sanity_checks | ||
405 | Date: May 2007 | ||
406 | KernelVersion: 2.6.22 | ||
407 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
408 | Christoph Lameter <cl@linux-foundation.org> | ||
409 | Description: | ||
410 | The sanity_checks file specifies whether expensive checks | ||
411 | should be performed on free and, at minimum, enables double free | ||
412 | checks. Caches that enable sanity_checks cannot be merged with | ||
413 | caches that do not. | ||
414 | |||
415 | What: /sys/kernel/slab/cache/shrink | ||
416 | Date: May 2007 | ||
417 | KernelVersion: 2.6.22 | ||
418 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
419 | Christoph Lameter <cl@linux-foundation.org> | ||
420 | Description: | ||
421 | The shrink file is written when memory should be reclaimed from | ||
422 | a cache. Empty partial slabs are freed and the partial list is | ||
423 | sorted so the slabs with the fewest available objects are used | ||
424 | first. | ||
425 | |||
426 | What: /sys/kernel/slab/cache/slab_size | ||
427 | Date: May 2007 | ||
428 | KernelVersion: 2.6.22 | ||
429 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
430 | Christoph Lameter <cl@linux-foundation.org> | ||
431 | Description: | ||
432 | The slab_size file is read-only and specifies the object size | ||
433 | with metadata (debugging information and alignment) in bytes. | ||
434 | |||
435 | What: /sys/kernel/slab/cache/slabs | ||
436 | Date: May 2007 | ||
437 | KernelVersion: 2.6.22 | ||
438 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
439 | Christoph Lameter <cl@linux-foundation.org> | ||
440 | Description: | ||
441 | The slabs file is read-only and displays how long many slabs | ||
442 | there are (both cpu and partial) and from which nodes they are | ||
443 | from. | ||
444 | |||
445 | What: /sys/kernel/slab/cache/store_user | ||
446 | Date: May 2007 | ||
447 | KernelVersion: 2.6.22 | ||
448 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
449 | Christoph Lameter <cl@linux-foundation.org> | ||
450 | Description: | ||
451 | The store_user file specifies whether the location of | ||
452 | allocation or free should be tracked for a cache. | ||
453 | |||
454 | What: /sys/kernel/slab/cache/total_objects | ||
455 | Date: April 2008 | ||
456 | KernelVersion: 2.6.26 | ||
457 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
458 | Christoph Lameter <cl@linux-foundation.org> | ||
459 | Description: | ||
460 | The total_objects file is read-only and displays how many total | ||
461 | objects a cache has and from which nodes they are from. | ||
462 | |||
463 | What: /sys/kernel/slab/cache/trace | ||
464 | Date: May 2007 | ||
465 | KernelVersion: 2.6.22 | ||
466 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
467 | Christoph Lameter <cl@linux-foundation.org> | ||
468 | Description: | ||
469 | The trace file specifies whether object allocations and frees | ||
470 | should be traced. | ||
471 | |||
472 | What: /sys/kernel/slab/cache/validate | ||
473 | Date: May 2007 | ||
474 | KernelVersion: 2.6.22 | ||
475 | Contact: Pekka Enberg <penberg@cs.helsinki.fi>, | ||
476 | Christoph Lameter <cl@linux-foundation.org> | ||
477 | Description: | ||
478 | Writing to the validate file causes SLUB to traverse all of its | ||
479 | cache's objects and check the validity of metadata. | ||
diff --git a/Documentation/ABI/testing/sysfs-pps b/Documentation/ABI/testing/sysfs-pps new file mode 100644 index 000000000000..25028c7bc37d --- /dev/null +++ b/Documentation/ABI/testing/sysfs-pps | |||
@@ -0,0 +1,73 @@ | |||
1 | What: /sys/class/pps/ | ||
2 | Date: February 2008 | ||
3 | Contact: Rodolfo Giometti <giometti@linux.it> | ||
4 | Description: | ||
5 | The /sys/class/pps/ directory will contain files and | ||
6 | directories that will provide a unified interface to | ||
7 | the PPS sources. | ||
8 | |||
9 | What: /sys/class/pps/ppsX/ | ||
10 | Date: February 2008 | ||
11 | Contact: Rodolfo Giometti <giometti@linux.it> | ||
12 | Description: | ||
13 | The /sys/class/pps/ppsX/ directory is related to X-th | ||
14 | PPS source into the system. Each directory will | ||
15 | contain files to manage and control its PPS source. | ||
16 | |||
17 | What: /sys/class/pps/ppsX/assert | ||
18 | Date: February 2008 | ||
19 | Contact: Rodolfo Giometti <giometti@linux.it> | ||
20 | Description: | ||
21 | The /sys/class/pps/ppsX/assert file reports the assert events | ||
22 | and the assert sequence number of the X-th source in the form: | ||
23 | |||
24 | <secs>.<nsec>#<sequence> | ||
25 | |||
26 | If the source has no assert events the content of this file | ||
27 | is empty. | ||
28 | |||
29 | What: /sys/class/pps/ppsX/clear | ||
30 | Date: February 2008 | ||
31 | Contact: Rodolfo Giometti <giometti@linux.it> | ||
32 | Description: | ||
33 | The /sys/class/pps/ppsX/clear file reports the clear events | ||
34 | and the clear sequence number of the X-th source in the form: | ||
35 | |||
36 | <secs>.<nsec>#<sequence> | ||
37 | |||
38 | If the source has no clear events the content of this file | ||
39 | is empty. | ||
40 | |||
41 | What: /sys/class/pps/ppsX/mode | ||
42 | Date: February 2008 | ||
43 | Contact: Rodolfo Giometti <giometti@linux.it> | ||
44 | Description: | ||
45 | The /sys/class/pps/ppsX/mode file reports the functioning | ||
46 | mode of the X-th source in hexadecimal encoding. | ||
47 | |||
48 | Please, refer to linux/include/linux/pps.h for further | ||
49 | info. | ||
50 | |||
51 | What: /sys/class/pps/ppsX/echo | ||
52 | Date: February 2008 | ||
53 | Contact: Rodolfo Giometti <giometti@linux.it> | ||
54 | Description: | ||
55 | The /sys/class/pps/ppsX/echo file reports if the X-th does | ||
56 | or does not support an "echo" function. | ||
57 | |||
58 | What: /sys/class/pps/ppsX/name | ||
59 | Date: February 2008 | ||
60 | Contact: Rodolfo Giometti <giometti@linux.it> | ||
61 | Description: | ||
62 | The /sys/class/pps/ppsX/name file reports the name of the | ||
63 | X-th source. | ||
64 | |||
65 | What: /sys/class/pps/ppsX/path | ||
66 | Date: February 2008 | ||
67 | Contact: Rodolfo Giometti <giometti@linux.it> | ||
68 | Description: | ||
69 | The /sys/class/pps/ppsX/path file reports the path name of | ||
70 | the device connected with the X-th source. | ||
71 | |||
72 | If the source is not connected with any device the content | ||
73 | of this file is empty. | ||
diff --git a/Documentation/Changes b/Documentation/Changes index b95082be4d5e..6d0f1efc5bf6 100644 --- a/Documentation/Changes +++ b/Documentation/Changes | |||
@@ -29,7 +29,7 @@ hardware, for example, you probably needn't concern yourself with | |||
29 | isdn4k-utils. | 29 | isdn4k-utils. |
30 | 30 | ||
31 | o Gnu C 3.2 # gcc --version | 31 | o Gnu C 3.2 # gcc --version |
32 | o Gnu make 3.79.1 # make --version | 32 | o Gnu make 3.80 # make --version |
33 | o binutils 2.12 # ld -v | 33 | o binutils 2.12 # ld -v |
34 | o util-linux 2.10o # fdformat --version | 34 | o util-linux 2.10o # fdformat --version |
35 | o module-init-tools 0.9.10 # depmod -V | 35 | o module-init-tools 0.9.10 # depmod -V |
@@ -48,6 +48,7 @@ o procps 3.2.0 # ps --version | |||
48 | o oprofile 0.9 # oprofiled --version | 48 | o oprofile 0.9 # oprofiled --version |
49 | o udev 081 # udevinfo -V | 49 | o udev 081 # udevinfo -V |
50 | o grub 0.93 # grub --version | 50 | o grub 0.93 # grub --version |
51 | o mcelog 0.6 | ||
51 | 52 | ||
52 | Kernel compilation | 53 | Kernel compilation |
53 | ================== | 54 | ================== |
@@ -61,7 +62,7 @@ computer. | |||
61 | Make | 62 | Make |
62 | ---- | 63 | ---- |
63 | 64 | ||
64 | You will need Gnu make 3.79.1 or later to build the kernel. | 65 | You will need Gnu make 3.80 or later to build the kernel. |
65 | 66 | ||
66 | Binutils | 67 | Binutils |
67 | -------- | 68 | -------- |
@@ -71,6 +72,13 @@ assembling the 16-bit boot code, removing the need for as86 to compile | |||
71 | your kernel. This change does, however, mean that you need a recent | 72 | your kernel. This change does, however, mean that you need a recent |
72 | release of binutils. | 73 | release of binutils. |
73 | 74 | ||
75 | Perl | ||
76 | ---- | ||
77 | |||
78 | You will need perl 5 and the following modules: Getopt::Long, Getopt::Std, | ||
79 | File::Basename, and File::Find to build the kernel. | ||
80 | |||
81 | |||
74 | System utilities | 82 | System utilities |
75 | ================ | 83 | ================ |
76 | 84 | ||
@@ -276,6 +284,16 @@ before running exportfs or mountd. It is recommended that all NFS | |||
276 | services be protected from the internet-at-large by a firewall where | 284 | services be protected from the internet-at-large by a firewall where |
277 | that is possible. | 285 | that is possible. |
278 | 286 | ||
287 | mcelog | ||
288 | ------ | ||
289 | |||
290 | In Linux 2.6.31+ the i386 kernel needs to run the mcelog utility | ||
291 | as a regular cronjob similar to the x86-64 kernel to process and log | ||
292 | machine check events when CONFIG_X86_NEW_MCE is enabled. Machine check | ||
293 | events are errors reported by the CPU. Processing them is strongly encouraged. | ||
294 | All x86-64 kernels since 2.6.4 require the mcelog utility to | ||
295 | process machine checks. | ||
296 | |||
279 | Getting updated software | 297 | Getting updated software |
280 | ======================== | 298 | ======================== |
281 | 299 | ||
@@ -365,6 +383,10 @@ FUSE | |||
365 | ---- | 383 | ---- |
366 | o <http://sourceforge.net/projects/fuse> | 384 | o <http://sourceforge.net/projects/fuse> |
367 | 385 | ||
386 | mcelog | ||
387 | ------ | ||
388 | o <ftp://ftp.kernel.org/pub/linux/utils/cpu/mce/mcelog/> | ||
389 | |||
368 | Networking | 390 | Networking |
369 | ********** | 391 | ********** |
370 | 392 | ||
diff --git a/Documentation/CodingStyle b/Documentation/CodingStyle index 72968cd5eaf3..8bb37237ebd2 100644 --- a/Documentation/CodingStyle +++ b/Documentation/CodingStyle | |||
@@ -698,8 +698,8 @@ very often is not. Abundant use of the inline keyword leads to a much bigger | |||
698 | kernel, which in turn slows the system as a whole down, due to a bigger | 698 | kernel, which in turn slows the system as a whole down, due to a bigger |
699 | icache footprint for the CPU and simply because there is less memory | 699 | icache footprint for the CPU and simply because there is less memory |
700 | available for the pagecache. Just think about it; a pagecache miss causes a | 700 | available for the pagecache. Just think about it; a pagecache miss causes a |
701 | disk seek, which easily takes 5 miliseconds. There are a LOT of cpu cycles | 701 | disk seek, which easily takes 5 milliseconds. There are a LOT of cpu cycles |
702 | that can go into these 5 miliseconds. | 702 | that can go into these 5 milliseconds. |
703 | 703 | ||
704 | A reasonable rule of thumb is to not put inline at functions that have more | 704 | A reasonable rule of thumb is to not put inline at functions that have more |
705 | than 3 lines of code in them. An exception to this rule are the cases where | 705 | than 3 lines of code in them. An exception to this rule are the cases where |
diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt index 2a3fcc55e981..5aceb88b3f8b 100644 --- a/Documentation/DMA-API.txt +++ b/Documentation/DMA-API.txt | |||
@@ -609,3 +609,121 @@ size is the size (and should be a page-sized multiple). | |||
609 | The return value will be either a pointer to the processor virtual | 609 | The return value will be either a pointer to the processor virtual |
610 | address of the memory, or an error (via PTR_ERR()) if any part of the | 610 | address of the memory, or an error (via PTR_ERR()) if any part of the |
611 | region is occupied. | 611 | region is occupied. |
612 | |||
613 | Part III - Debug drivers use of the DMA-API | ||
614 | ------------------------------------------- | ||
615 | |||
616 | The DMA-API as described above as some constraints. DMA addresses must be | ||
617 | released with the corresponding function with the same size for example. With | ||
618 | the advent of hardware IOMMUs it becomes more and more important that drivers | ||
619 | do not violate those constraints. In the worst case such a violation can | ||
620 | result in data corruption up to destroyed filesystems. | ||
621 | |||
622 | To debug drivers and find bugs in the usage of the DMA-API checking code can | ||
623 | be compiled into the kernel which will tell the developer about those | ||
624 | violations. If your architecture supports it you can select the "Enable | ||
625 | debugging of DMA-API usage" option in your kernel configuration. Enabling this | ||
626 | option has a performance impact. Do not enable it in production kernels. | ||
627 | |||
628 | If you boot the resulting kernel will contain code which does some bookkeeping | ||
629 | about what DMA memory was allocated for which device. If this code detects an | ||
630 | error it prints a warning message with some details into your kernel log. An | ||
631 | example warning message may look like this: | ||
632 | |||
633 | ------------[ cut here ]------------ | ||
634 | WARNING: at /data2/repos/linux-2.6-iommu/lib/dma-debug.c:448 | ||
635 | check_unmap+0x203/0x490() | ||
636 | Hardware name: | ||
637 | forcedeth 0000:00:08.0: DMA-API: device driver frees DMA memory with wrong | ||
638 | function [device address=0x00000000640444be] [size=66 bytes] [mapped as | ||
639 | single] [unmapped as page] | ||
640 | Modules linked in: nfsd exportfs bridge stp llc r8169 | ||
641 | Pid: 0, comm: swapper Tainted: G W 2.6.28-dmatest-09289-g8bb99c0 #1 | ||
642 | Call Trace: | ||
643 | <IRQ> [<ffffffff80240b22>] warn_slowpath+0xf2/0x130 | ||
644 | [<ffffffff80647b70>] _spin_unlock+0x10/0x30 | ||
645 | [<ffffffff80537e75>] usb_hcd_link_urb_to_ep+0x75/0xc0 | ||
646 | [<ffffffff80647c22>] _spin_unlock_irqrestore+0x12/0x40 | ||
647 | [<ffffffff8055347f>] ohci_urb_enqueue+0x19f/0x7c0 | ||
648 | [<ffffffff80252f96>] queue_work+0x56/0x60 | ||
649 | [<ffffffff80237e10>] enqueue_task_fair+0x20/0x50 | ||
650 | [<ffffffff80539279>] usb_hcd_submit_urb+0x379/0xbc0 | ||
651 | [<ffffffff803b78c3>] cpumask_next_and+0x23/0x40 | ||
652 | [<ffffffff80235177>] find_busiest_group+0x207/0x8a0 | ||
653 | [<ffffffff8064784f>] _spin_lock_irqsave+0x1f/0x50 | ||
654 | [<ffffffff803c7ea3>] check_unmap+0x203/0x490 | ||
655 | [<ffffffff803c8259>] debug_dma_unmap_page+0x49/0x50 | ||
656 | [<ffffffff80485f26>] nv_tx_done_optimized+0xc6/0x2c0 | ||
657 | [<ffffffff80486c13>] nv_nic_irq_optimized+0x73/0x2b0 | ||
658 | [<ffffffff8026df84>] handle_IRQ_event+0x34/0x70 | ||
659 | [<ffffffff8026ffe9>] handle_edge_irq+0xc9/0x150 | ||
660 | [<ffffffff8020e3ab>] do_IRQ+0xcb/0x1c0 | ||
661 | [<ffffffff8020c093>] ret_from_intr+0x0/0xa | ||
662 | <EOI> <4>---[ end trace f6435a98e2a38c0e ]--- | ||
663 | |||
664 | The driver developer can find the driver and the device including a stacktrace | ||
665 | of the DMA-API call which caused this warning. | ||
666 | |||
667 | Per default only the first error will result in a warning message. All other | ||
668 | errors will only silently counted. This limitation exist to prevent the code | ||
669 | from flooding your kernel log. To support debugging a device driver this can | ||
670 | be disabled via debugfs. See the debugfs interface documentation below for | ||
671 | details. | ||
672 | |||
673 | The debugfs directory for the DMA-API debugging code is called dma-api/. In | ||
674 | this directory the following files can currently be found: | ||
675 | |||
676 | dma-api/all_errors This file contains a numeric value. If this | ||
677 | value is not equal to zero the debugging code | ||
678 | will print a warning for every error it finds | ||
679 | into the kernel log. Be careful with this | ||
680 | option, as it can easily flood your logs. | ||
681 | |||
682 | dma-api/disabled This read-only file contains the character 'Y' | ||
683 | if the debugging code is disabled. This can | ||
684 | happen when it runs out of memory or if it was | ||
685 | disabled at boot time | ||
686 | |||
687 | dma-api/error_count This file is read-only and shows the total | ||
688 | numbers of errors found. | ||
689 | |||
690 | dma-api/num_errors The number in this file shows how many | ||
691 | warnings will be printed to the kernel log | ||
692 | before it stops. This number is initialized to | ||
693 | one at system boot and be set by writing into | ||
694 | this file | ||
695 | |||
696 | dma-api/min_free_entries | ||
697 | This read-only file can be read to get the | ||
698 | minimum number of free dma_debug_entries the | ||
699 | allocator has ever seen. If this value goes | ||
700 | down to zero the code will disable itself | ||
701 | because it is not longer reliable. | ||
702 | |||
703 | dma-api/num_free_entries | ||
704 | The current number of free dma_debug_entries | ||
705 | in the allocator. | ||
706 | |||
707 | dma-api/driver-filter | ||
708 | You can write a name of a driver into this file | ||
709 | to limit the debug output to requests from that | ||
710 | particular driver. Write an empty string to | ||
711 | that file to disable the filter and see | ||
712 | all errors again. | ||
713 | |||
714 | If you have this code compiled into your kernel it will be enabled by default. | ||
715 | If you want to boot without the bookkeeping anyway you can provide | ||
716 | 'dma_debug=off' as a boot parameter. This will disable DMA-API debugging. | ||
717 | Notice that you can not enable it again at runtime. You have to reboot to do | ||
718 | so. | ||
719 | |||
720 | If you want to see debug messages only for a special device driver you can | ||
721 | specify the dma_debug_driver=<drivername> parameter. This will enable the | ||
722 | driver filter at boot time. The debug code will only print errors for that | ||
723 | driver afterwards. This filter can be disabled or changed later using debugfs. | ||
724 | |||
725 | When the code disables itself at runtime this is most likely because it ran | ||
726 | out of dma_debug_entries. These entries are preallocated at boot. The number | ||
727 | of preallocated entries is defined per architecture. If it is too low for you | ||
728 | boot with 'dma_debug_entries=<your_desired_number>' to overwrite the | ||
729 | architectural default. | ||
diff --git a/Documentation/DMA-mapping.txt b/Documentation/DMA-mapping.txt index b2a4d6d244d9..01f24e94bdb6 100644 --- a/Documentation/DMA-mapping.txt +++ b/Documentation/DMA-mapping.txt | |||
@@ -136,7 +136,7 @@ exactly why. | |||
136 | The standard 32-bit addressing PCI device would do something like | 136 | The standard 32-bit addressing PCI device would do something like |
137 | this: | 137 | this: |
138 | 138 | ||
139 | if (pci_set_dma_mask(pdev, DMA_32BIT_MASK)) { | 139 | if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) { |
140 | printk(KERN_WARNING | 140 | printk(KERN_WARNING |
141 | "mydev: No suitable DMA available.\n"); | 141 | "mydev: No suitable DMA available.\n"); |
142 | goto ignore_this_device; | 142 | goto ignore_this_device; |
@@ -155,9 +155,9 @@ all 64-bits when accessing streaming DMA: | |||
155 | 155 | ||
156 | int using_dac; | 156 | int using_dac; |
157 | 157 | ||
158 | if (!pci_set_dma_mask(pdev, DMA_64BIT_MASK)) { | 158 | if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) { |
159 | using_dac = 1; | 159 | using_dac = 1; |
160 | } else if (!pci_set_dma_mask(pdev, DMA_32BIT_MASK)) { | 160 | } else if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) { |
161 | using_dac = 0; | 161 | using_dac = 0; |
162 | } else { | 162 | } else { |
163 | printk(KERN_WARNING | 163 | printk(KERN_WARNING |
@@ -170,14 +170,14 @@ the case would look like this: | |||
170 | 170 | ||
171 | int using_dac, consistent_using_dac; | 171 | int using_dac, consistent_using_dac; |
172 | 172 | ||
173 | if (!pci_set_dma_mask(pdev, DMA_64BIT_MASK)) { | 173 | if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) { |
174 | using_dac = 1; | 174 | using_dac = 1; |
175 | consistent_using_dac = 1; | 175 | consistent_using_dac = 1; |
176 | pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK); | 176 | pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); |
177 | } else if (!pci_set_dma_mask(pdev, DMA_32BIT_MASK)) { | 177 | } else if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) { |
178 | using_dac = 0; | 178 | using_dac = 0; |
179 | consistent_using_dac = 0; | 179 | consistent_using_dac = 0; |
180 | pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK); | 180 | pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); |
181 | } else { | 181 | } else { |
182 | printk(KERN_WARNING | 182 | printk(KERN_WARNING |
183 | "mydev: No suitable DMA available.\n"); | 183 | "mydev: No suitable DMA available.\n"); |
@@ -192,7 +192,7 @@ check the return value from pci_set_consistent_dma_mask(). | |||
192 | Finally, if your device can only drive the low 24-bits of | 192 | Finally, if your device can only drive the low 24-bits of |
193 | address during PCI bus mastering you might do something like: | 193 | address during PCI bus mastering you might do something like: |
194 | 194 | ||
195 | if (pci_set_dma_mask(pdev, DMA_24BIT_MASK)) { | 195 | if (pci_set_dma_mask(pdev, DMA_BIT_MASK(24))) { |
196 | printk(KERN_WARNING | 196 | printk(KERN_WARNING |
197 | "mydev: 24-bit DMA addressing not available.\n"); | 197 | "mydev: 24-bit DMA addressing not available.\n"); |
198 | goto ignore_this_device; | 198 | goto ignore_this_device; |
@@ -213,7 +213,7 @@ most specific mask. | |||
213 | 213 | ||
214 | Here is pseudo-code showing how this might be done: | 214 | Here is pseudo-code showing how this might be done: |
215 | 215 | ||
216 | #define PLAYBACK_ADDRESS_BITS DMA_32BIT_MASK | 216 | #define PLAYBACK_ADDRESS_BITS DMA_BIT_MASK(32) |
217 | #define RECORD_ADDRESS_BITS 0x00ffffff | 217 | #define RECORD_ADDRESS_BITS 0x00ffffff |
218 | 218 | ||
219 | struct my_sound_card *card; | 219 | struct my_sound_card *card; |
diff --git a/Documentation/DocBook/.gitignore b/Documentation/DocBook/.gitignore index c102c02ecf89..c6def352fe39 100644 --- a/Documentation/DocBook/.gitignore +++ b/Documentation/DocBook/.gitignore | |||
@@ -4,3 +4,7 @@ | |||
4 | *.html | 4 | *.html |
5 | *.9.gz | 5 | *.9.gz |
6 | *.9 | 6 | *.9 |
7 | *.aux | ||
8 | *.dvi | ||
9 | *.log | ||
10 | *.out | ||
diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile index 1462ed86d40a..9632444f6c62 100644 --- a/Documentation/DocBook/Makefile +++ b/Documentation/DocBook/Makefile | |||
@@ -12,7 +12,9 @@ DOCBOOKS := z8530book.xml mcabook.xml device-drivers.xml \ | |||
12 | kernel-api.xml filesystems.xml lsm.xml usb.xml kgdb.xml \ | 12 | kernel-api.xml filesystems.xml lsm.xml usb.xml kgdb.xml \ |
13 | gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \ | 13 | gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \ |
14 | genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \ | 14 | genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \ |
15 | mac80211.xml debugobjects.xml sh.xml regulator.xml | 15 | mac80211.xml debugobjects.xml sh.xml regulator.xml \ |
16 | alsa-driver-api.xml writing-an-alsa-driver.xml \ | ||
17 | tracepoint.xml | ||
16 | 18 | ||
17 | ### | 19 | ### |
18 | # The build process is as follows (targets): | 20 | # The build process is as follows (targets): |
@@ -30,7 +32,7 @@ PS_METHOD = $(prefer-db2x) | |||
30 | 32 | ||
31 | ### | 33 | ### |
32 | # The targets that may be used. | 34 | # The targets that may be used. |
33 | PHONY += xmldocs sgmldocs psdocs pdfdocs htmldocs mandocs installmandocs | 35 | PHONY += xmldocs sgmldocs psdocs pdfdocs htmldocs mandocs installmandocs cleandocs |
34 | 36 | ||
35 | BOOKS := $(addprefix $(obj)/,$(DOCBOOKS)) | 37 | BOOKS := $(addprefix $(obj)/,$(DOCBOOKS)) |
36 | xmldocs: $(BOOKS) | 38 | xmldocs: $(BOOKS) |
@@ -142,7 +144,8 @@ quiet_cmd_db2pdf = PDF $@ | |||
142 | $(call cmd,db2pdf) | 144 | $(call cmd,db2pdf) |
143 | 145 | ||
144 | 146 | ||
145 | main_idx = Documentation/DocBook/index.html | 147 | index = index.html |
148 | main_idx = Documentation/DocBook/$(index) | ||
146 | build_main_index = rm -rf $(main_idx) && \ | 149 | build_main_index = rm -rf $(main_idx) && \ |
147 | echo '<h1>Linux Kernel HTML Documentation</h1>' >> $(main_idx) && \ | 150 | echo '<h1>Linux Kernel HTML Documentation</h1>' >> $(main_idx) && \ |
148 | echo '<h2>Kernel Version: $(KERNELVERSION)</h2>' >> $(main_idx) && \ | 151 | echo '<h2>Kernel Version: $(KERNELVERSION)</h2>' >> $(main_idx) && \ |
@@ -212,11 +215,12 @@ silent_gen_xml = : | |||
212 | dochelp: | 215 | dochelp: |
213 | @echo ' Linux kernel internal documentation in different formats:' | 216 | @echo ' Linux kernel internal documentation in different formats:' |
214 | @echo ' htmldocs - HTML' | 217 | @echo ' htmldocs - HTML' |
215 | @echo ' installmandocs - install man pages generated by mandocs' | ||
216 | @echo ' mandocs - man pages' | ||
217 | @echo ' pdfdocs - PDF' | 218 | @echo ' pdfdocs - PDF' |
218 | @echo ' psdocs - Postscript' | 219 | @echo ' psdocs - Postscript' |
219 | @echo ' xmldocs - XML DocBook' | 220 | @echo ' xmldocs - XML DocBook' |
221 | @echo ' mandocs - man pages' | ||
222 | @echo ' installmandocs - install man pages generated by mandocs' | ||
223 | @echo ' cleandocs - clean all generated DocBook files' | ||
220 | 224 | ||
221 | ### | 225 | ### |
222 | # Temporary files left by various tools | 226 | # Temporary files left by various tools |
@@ -230,10 +234,14 @@ clean-files := $(DOCBOOKS) \ | |||
230 | $(patsubst %.xml, %.pdf, $(DOCBOOKS)) \ | 234 | $(patsubst %.xml, %.pdf, $(DOCBOOKS)) \ |
231 | $(patsubst %.xml, %.html, $(DOCBOOKS)) \ | 235 | $(patsubst %.xml, %.html, $(DOCBOOKS)) \ |
232 | $(patsubst %.xml, %.9, $(DOCBOOKS)) \ | 236 | $(patsubst %.xml, %.9, $(DOCBOOKS)) \ |
233 | $(C-procfs-example) | 237 | $(C-procfs-example) $(index) |
234 | 238 | ||
235 | clean-dirs := $(patsubst %.xml,%,$(DOCBOOKS)) man | 239 | clean-dirs := $(patsubst %.xml,%,$(DOCBOOKS)) man |
236 | 240 | ||
241 | cleandocs: | ||
242 | $(Q)rm -f $(call objectify, $(clean-files)) | ||
243 | $(Q)rm -rf $(call objectify, $(clean-dirs)) | ||
244 | |||
237 | # Declare the contents of the .PHONY variable as phony. We keep that | 245 | # Declare the contents of the .PHONY variable as phony. We keep that |
238 | # information in a variable se we can use it in if_changed and friends. | 246 | # information in a variable se we can use it in if_changed and friends. |
239 | 247 | ||
diff --git a/Documentation/sound/alsa/DocBook/alsa-driver-api.tmpl b/Documentation/DocBook/alsa-driver-api.tmpl index 9d644f7e241e..0230a96f0564 100644 --- a/Documentation/sound/alsa/DocBook/alsa-driver-api.tmpl +++ b/Documentation/DocBook/alsa-driver-api.tmpl | |||
@@ -1,11 +1,11 @@ | |||
1 | <!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook V4.1//EN"> | 1 | <?xml version="1.0" encoding="UTF-8"?> |
2 | 2 | <!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN" | |
3 | <book> | 3 | "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []> |
4 | <?dbhtml filename="index.html"> | ||
5 | 4 | ||
6 | <!-- ****************************************************** --> | 5 | <!-- ****************************************************** --> |
7 | <!-- Header --> | 6 | <!-- Header --> |
8 | <!-- ****************************************************** --> | 7 | <!-- ****************************************************** --> |
8 | <book id="ALSA-Driver-API"> | ||
9 | <bookinfo> | 9 | <bookinfo> |
10 | <title>The ALSA Driver API</title> | 10 | <title>The ALSA Driver API</title> |
11 | 11 | ||
@@ -35,6 +35,8 @@ | |||
35 | 35 | ||
36 | </bookinfo> | 36 | </bookinfo> |
37 | 37 | ||
38 | <toc></toc> | ||
39 | |||
38 | <chapter><title>Management of Cards and Devices</title> | 40 | <chapter><title>Management of Cards and Devices</title> |
39 | <sect1><title>Card Management</title> | 41 | <sect1><title>Card Management</title> |
40 | !Esound/core/init.c | 42 | !Esound/core/init.c |
@@ -71,6 +73,10 @@ | |||
71 | !Esound/pci/ac97/ac97_codec.c | 73 | !Esound/pci/ac97/ac97_codec.c |
72 | !Esound/pci/ac97/ac97_pcm.c | 74 | !Esound/pci/ac97/ac97_pcm.c |
73 | </sect1> | 75 | </sect1> |
76 | <sect1><title>Virtual Master Control API</title> | ||
77 | !Esound/core/vmaster.c | ||
78 | !Iinclude/sound/control.h | ||
79 | </sect1> | ||
74 | </chapter> | 80 | </chapter> |
75 | <chapter><title>MIDI API</title> | 81 | <chapter><title>MIDI API</title> |
76 | <sect1><title>Raw MIDI API</title> | 82 | <sect1><title>Raw MIDI API</title> |
@@ -89,6 +95,9 @@ | |||
89 | <sect1><title>Hardware-Dependent Devices API</title> | 95 | <sect1><title>Hardware-Dependent Devices API</title> |
90 | !Esound/core/hwdep.c | 96 | !Esound/core/hwdep.c |
91 | </sect1> | 97 | </sect1> |
98 | <sect1><title>Jack Abstraction Layer API</title> | ||
99 | !Esound/core/jack.c | ||
100 | </sect1> | ||
92 | <sect1><title>ISA DMA Helpers</title> | 101 | <sect1><title>ISA DMA Helpers</title> |
93 | !Esound/core/isadma.c | 102 | !Esound/core/isadma.c |
94 | </sect1> | 103 | </sect1> |
diff --git a/Documentation/DocBook/debugobjects.tmpl b/Documentation/DocBook/debugobjects.tmpl index 7f5f218015fe..08ff908aa7a2 100644 --- a/Documentation/DocBook/debugobjects.tmpl +++ b/Documentation/DocBook/debugobjects.tmpl | |||
@@ -106,7 +106,7 @@ | |||
106 | number of errors are printk'ed including a full stack trace. | 106 | number of errors are printk'ed including a full stack trace. |
107 | </para> | 107 | </para> |
108 | <para> | 108 | <para> |
109 | The statistics are available via debugfs/debug_objects/stats. | 109 | The statistics are available via /sys/kernel/debug/debug_objects/stats. |
110 | They provide information about the number of warnings and the | 110 | They provide information about the number of warnings and the |
111 | number of successful fixups along with information about the | 111 | number of successful fixups along with information about the |
112 | usage of the internal tracking objects and the state of the | 112 | usage of the internal tracking objects and the state of the |
diff --git a/Documentation/DocBook/genericirq.tmpl b/Documentation/DocBook/genericirq.tmpl index 3a882d9a90a9..c671a0168096 100644 --- a/Documentation/DocBook/genericirq.tmpl +++ b/Documentation/DocBook/genericirq.tmpl | |||
@@ -440,6 +440,7 @@ desc->chip->end(); | |||
440 | used in the generic IRQ layer. | 440 | used in the generic IRQ layer. |
441 | </para> | 441 | </para> |
442 | !Iinclude/linux/irq.h | 442 | !Iinclude/linux/irq.h |
443 | !Iinclude/linux/interrupt.h | ||
443 | </chapter> | 444 | </chapter> |
444 | 445 | ||
445 | <chapter id="pubfunctions"> | 446 | <chapter id="pubfunctions"> |
diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl index bc962cda6504..44b3def961a2 100644 --- a/Documentation/DocBook/kernel-api.tmpl +++ b/Documentation/DocBook/kernel-api.tmpl | |||
@@ -190,15 +190,20 @@ X!Ekernel/module.c | |||
190 | !Edrivers/pci/pci.c | 190 | !Edrivers/pci/pci.c |
191 | !Edrivers/pci/pci-driver.c | 191 | !Edrivers/pci/pci-driver.c |
192 | !Edrivers/pci/remove.c | 192 | !Edrivers/pci/remove.c |
193 | !Edrivers/pci/pci-acpi.c | ||
194 | !Edrivers/pci/search.c | 193 | !Edrivers/pci/search.c |
195 | !Edrivers/pci/msi.c | 194 | !Edrivers/pci/msi.c |
196 | !Edrivers/pci/bus.c | 195 | !Edrivers/pci/bus.c |
196 | !Edrivers/pci/access.c | ||
197 | !Edrivers/pci/irq.c | ||
198 | !Edrivers/pci/htirq.c | ||
197 | <!-- FIXME: Removed for now since no structured comments in source | 199 | <!-- FIXME: Removed for now since no structured comments in source |
198 | X!Edrivers/pci/hotplug.c | 200 | X!Edrivers/pci/hotplug.c |
199 | --> | 201 | --> |
200 | !Edrivers/pci/probe.c | 202 | !Edrivers/pci/probe.c |
203 | !Edrivers/pci/slot.c | ||
201 | !Edrivers/pci/rom.c | 204 | !Edrivers/pci/rom.c |
205 | !Edrivers/pci/iov.c | ||
206 | !Idrivers/pci/pci-sysfs.c | ||
202 | </sect1> | 207 | </sect1> |
203 | <sect1><title>PCI Hotplug Support Library</title> | 208 | <sect1><title>PCI Hotplug Support Library</title> |
204 | !Edrivers/pci/hotplug/pci_hotplug_core.c | 209 | !Edrivers/pci/hotplug/pci_hotplug_core.c |
@@ -258,7 +263,7 @@ X!Earch/x86/kernel/mca_32.c | |||
258 | !Eblock/blk-tag.c | 263 | !Eblock/blk-tag.c |
259 | !Iblock/blk-tag.c | 264 | !Iblock/blk-tag.c |
260 | !Eblock/blk-integrity.c | 265 | !Eblock/blk-integrity.c |
261 | !Iblock/blktrace.c | 266 | !Ikernel/trace/blktrace.c |
262 | !Iblock/genhd.c | 267 | !Iblock/genhd.c |
263 | !Eblock/genhd.c | 268 | !Eblock/genhd.c |
264 | </chapter> | 269 | </chapter> |
diff --git a/Documentation/DocBook/kgdb.tmpl b/Documentation/DocBook/kgdb.tmpl index 372dec20c8da..5cff41a5fa7c 100644 --- a/Documentation/DocBook/kgdb.tmpl +++ b/Documentation/DocBook/kgdb.tmpl | |||
@@ -281,7 +281,7 @@ | |||
281 | seriously wrong while debugging, it will most often be the case | 281 | seriously wrong while debugging, it will most often be the case |
282 | that you want to enable gdb to be verbose about its target | 282 | that you want to enable gdb to be verbose about its target |
283 | communications. You do this prior to issuing the <constant>target | 283 | communications. You do this prior to issuing the <constant>target |
284 | remote</constant> command by typing in: <constant>set remote debug 1</constant> | 284 | remote</constant> command by typing in: <constant>set debug remote 1</constant> |
285 | </para> | 285 | </para> |
286 | </chapter> | 286 | </chapter> |
287 | <chapter id="KGDBTestSuite"> | 287 | <chapter id="KGDBTestSuite"> |
diff --git a/Documentation/DocBook/mac80211.tmpl b/Documentation/DocBook/mac80211.tmpl index 77c3c202991b..e36986663570 100644 --- a/Documentation/DocBook/mac80211.tmpl +++ b/Documentation/DocBook/mac80211.tmpl | |||
@@ -17,8 +17,7 @@ | |||
17 | </authorgroup> | 17 | </authorgroup> |
18 | 18 | ||
19 | <copyright> | 19 | <copyright> |
20 | <year>2007</year> | 20 | <year>2007-2009</year> |
21 | <year>2008</year> | ||
22 | <holder>Johannes Berg</holder> | 21 | <holder>Johannes Berg</holder> |
23 | </copyright> | 22 | </copyright> |
24 | 23 | ||
@@ -146,7 +145,6 @@ usage should require reading the full document. | |||
146 | interface in STA mode at first! | 145 | interface in STA mode at first! |
147 | </para> | 146 | </para> |
148 | !Finclude/net/mac80211.h ieee80211_if_init_conf | 147 | !Finclude/net/mac80211.h ieee80211_if_init_conf |
149 | !Finclude/net/mac80211.h ieee80211_if_conf | ||
150 | </chapter> | 148 | </chapter> |
151 | 149 | ||
152 | <chapter id="rx-tx"> | 150 | <chapter id="rx-tx"> |
@@ -165,8 +163,8 @@ usage should require reading the full document. | |||
165 | !Pinclude/net/mac80211.h Frame format | 163 | !Pinclude/net/mac80211.h Frame format |
166 | </sect1> | 164 | </sect1> |
167 | <sect1> | 165 | <sect1> |
168 | <title>Alignment issues</title> | 166 | <title>Packet alignment</title> |
169 | <para>TBD</para> | 167 | !Pnet/mac80211/rx.c Packet alignment |
170 | </sect1> | 168 | </sect1> |
171 | <sect1> | 169 | <sect1> |
172 | <title>Calling into mac80211 from interrupts</title> | 170 | <title>Calling into mac80211 from interrupts</title> |
@@ -223,6 +221,17 @@ usage should require reading the full document. | |||
223 | !Finclude/net/mac80211.h ieee80211_key_flags | 221 | !Finclude/net/mac80211.h ieee80211_key_flags |
224 | </chapter> | 222 | </chapter> |
225 | 223 | ||
224 | <chapter id="powersave"> | ||
225 | <title>Powersave support</title> | ||
226 | !Pinclude/net/mac80211.h Powersave support | ||
227 | </chapter> | ||
228 | |||
229 | <chapter id="beacon-filter"> | ||
230 | <title>Beacon filter support</title> | ||
231 | !Pinclude/net/mac80211.h Beacon filter support | ||
232 | !Finclude/net/mac80211.h ieee80211_beacon_loss | ||
233 | </chapter> | ||
234 | |||
226 | <chapter id="qos"> | 235 | <chapter id="qos"> |
227 | <title>Multiple queues and QoS support</title> | 236 | <title>Multiple queues and QoS support</title> |
228 | <para>TBD</para> | 237 | <para>TBD</para> |
diff --git a/Documentation/DocBook/procfs_example.c b/Documentation/DocBook/procfs_example.c index 8c6396e4bf31..a5b11793b1e0 100644 --- a/Documentation/DocBook/procfs_example.c +++ b/Documentation/DocBook/procfs_example.c | |||
@@ -117,9 +117,6 @@ static int __init init_procfs_example(void) | |||
117 | rv = -ENOMEM; | 117 | rv = -ENOMEM; |
118 | goto out; | 118 | goto out; |
119 | } | 119 | } |
120 | |||
121 | example_dir->owner = THIS_MODULE; | ||
122 | |||
123 | /* create jiffies using convenience function */ | 120 | /* create jiffies using convenience function */ |
124 | jiffies_file = create_proc_read_entry("jiffies", | 121 | jiffies_file = create_proc_read_entry("jiffies", |
125 | 0444, example_dir, | 122 | 0444, example_dir, |
@@ -130,8 +127,6 @@ static int __init init_procfs_example(void) | |||
130 | goto no_jiffies; | 127 | goto no_jiffies; |
131 | } | 128 | } |
132 | 129 | ||
133 | jiffies_file->owner = THIS_MODULE; | ||
134 | |||
135 | /* create foo and bar files using same callback | 130 | /* create foo and bar files using same callback |
136 | * functions | 131 | * functions |
137 | */ | 132 | */ |
@@ -146,7 +141,6 @@ static int __init init_procfs_example(void) | |||
146 | foo_file->data = &foo_data; | 141 | foo_file->data = &foo_data; |
147 | foo_file->read_proc = proc_read_foobar; | 142 | foo_file->read_proc = proc_read_foobar; |
148 | foo_file->write_proc = proc_write_foobar; | 143 | foo_file->write_proc = proc_write_foobar; |
149 | foo_file->owner = THIS_MODULE; | ||
150 | 144 | ||
151 | bar_file = create_proc_entry("bar", 0644, example_dir); | 145 | bar_file = create_proc_entry("bar", 0644, example_dir); |
152 | if(bar_file == NULL) { | 146 | if(bar_file == NULL) { |
@@ -159,7 +153,6 @@ static int __init init_procfs_example(void) | |||
159 | bar_file->data = &bar_data; | 153 | bar_file->data = &bar_data; |
160 | bar_file->read_proc = proc_read_foobar; | 154 | bar_file->read_proc = proc_read_foobar; |
161 | bar_file->write_proc = proc_write_foobar; | 155 | bar_file->write_proc = proc_write_foobar; |
162 | bar_file->owner = THIS_MODULE; | ||
163 | 156 | ||
164 | /* create symlink */ | 157 | /* create symlink */ |
165 | symlink = proc_symlink("jiffies_too", example_dir, | 158 | symlink = proc_symlink("jiffies_too", example_dir, |
@@ -169,8 +162,6 @@ static int __init init_procfs_example(void) | |||
169 | goto no_symlink; | 162 | goto no_symlink; |
170 | } | 163 | } |
171 | 164 | ||
172 | symlink->owner = THIS_MODULE; | ||
173 | |||
174 | /* everything OK */ | 165 | /* everything OK */ |
175 | printk(KERN_INFO "%s %s initialised\n", | 166 | printk(KERN_INFO "%s %s initialised\n", |
176 | MODULE_NAME, MODULE_VERS); | 167 | MODULE_NAME, MODULE_VERS); |
diff --git a/Documentation/DocBook/tracepoint.tmpl b/Documentation/DocBook/tracepoint.tmpl new file mode 100644 index 000000000000..b0756d0fd579 --- /dev/null +++ b/Documentation/DocBook/tracepoint.tmpl | |||
@@ -0,0 +1,89 @@ | |||
1 | <?xml version="1.0" encoding="UTF-8"?> | ||
2 | <!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN" | ||
3 | "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []> | ||
4 | |||
5 | <book id="Tracepoints"> | ||
6 | <bookinfo> | ||
7 | <title>The Linux Kernel Tracepoint API</title> | ||
8 | |||
9 | <authorgroup> | ||
10 | <author> | ||
11 | <firstname>Jason</firstname> | ||
12 | <surname>Baron</surname> | ||
13 | <affiliation> | ||
14 | <address> | ||
15 | <email>jbaron@redhat.com</email> | ||
16 | </address> | ||
17 | </affiliation> | ||
18 | </author> | ||
19 | </authorgroup> | ||
20 | |||
21 | <legalnotice> | ||
22 | <para> | ||
23 | This documentation is free software; you can redistribute | ||
24 | it and/or modify it under the terms of the GNU General Public | ||
25 | License as published by the Free Software Foundation; either | ||
26 | version 2 of the License, or (at your option) any later | ||
27 | version. | ||
28 | </para> | ||
29 | |||
30 | <para> | ||
31 | This program is distributed in the hope that it will be | ||
32 | useful, but WITHOUT ANY WARRANTY; without even the implied | ||
33 | warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
34 | See the GNU General Public License for more details. | ||
35 | </para> | ||
36 | |||
37 | <para> | ||
38 | You should have received a copy of the GNU General Public | ||
39 | License along with this program; if not, write to the Free | ||
40 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, | ||
41 | MA 02111-1307 USA | ||
42 | </para> | ||
43 | |||
44 | <para> | ||
45 | For more details see the file COPYING in the source | ||
46 | distribution of Linux. | ||
47 | </para> | ||
48 | </legalnotice> | ||
49 | </bookinfo> | ||
50 | |||
51 | <toc></toc> | ||
52 | <chapter id="intro"> | ||
53 | <title>Introduction</title> | ||
54 | <para> | ||
55 | Tracepoints are static probe points that are located in strategic points | ||
56 | throughout the kernel. 'Probes' register/unregister with tracepoints | ||
57 | via a callback mechanism. The 'probes' are strictly typed functions that | ||
58 | are passed a unique set of parameters defined by each tracepoint. | ||
59 | </para> | ||
60 | |||
61 | <para> | ||
62 | From this simple callback mechanism, 'probes' can be used to profile, debug, | ||
63 | and understand kernel behavior. There are a number of tools that provide a | ||
64 | framework for using 'probes'. These tools include Systemtap, ftrace, and | ||
65 | LTTng. | ||
66 | </para> | ||
67 | |||
68 | <para> | ||
69 | Tracepoints are defined in a number of header files via various macros. Thus, | ||
70 | the purpose of this document is to provide a clear accounting of the available | ||
71 | tracepoints. The intention is to understand not only what tracepoints are | ||
72 | available but also to understand where future tracepoints might be added. | ||
73 | </para> | ||
74 | |||
75 | <para> | ||
76 | The API presented has functions of the form: | ||
77 | <function>trace_tracepointname(function parameters)</function>. These are the | ||
78 | tracepoints callbacks that are found throughout the code. Registering and | ||
79 | unregistering probes with these callback sites is covered in the | ||
80 | <filename>Documentation/trace/*</filename> directory. | ||
81 | </para> | ||
82 | </chapter> | ||
83 | |||
84 | <chapter id="irq"> | ||
85 | <title>IRQ</title> | ||
86 | !Iinclude/trace/events/irq.h | ||
87 | </chapter> | ||
88 | |||
89 | </book> | ||
diff --git a/Documentation/DocBook/uio-howto.tmpl b/Documentation/DocBook/uio-howto.tmpl index 52e1b79ce0e6..8f6e3b2403c7 100644 --- a/Documentation/DocBook/uio-howto.tmpl +++ b/Documentation/DocBook/uio-howto.tmpl | |||
@@ -42,6 +42,13 @@ GPL version 2. | |||
42 | 42 | ||
43 | <revhistory> | 43 | <revhistory> |
44 | <revision> | 44 | <revision> |
45 | <revnumber>0.8</revnumber> | ||
46 | <date>2008-12-24</date> | ||
47 | <authorinitials>hjk</authorinitials> | ||
48 | <revremark>Added name attributes in mem and portio sysfs directories. | ||
49 | </revremark> | ||
50 | </revision> | ||
51 | <revision> | ||
45 | <revnumber>0.7</revnumber> | 52 | <revnumber>0.7</revnumber> |
46 | <date>2008-12-23</date> | 53 | <date>2008-12-23</date> |
47 | <authorinitials>hjk</authorinitials> | 54 | <authorinitials>hjk</authorinitials> |
@@ -303,12 +310,19 @@ interested in translating it, please email me | |||
303 | appear if the size of the mapping is not 0. | 310 | appear if the size of the mapping is not 0. |
304 | </para> | 311 | </para> |
305 | <para> | 312 | <para> |
306 | Each <filename>mapX/</filename> directory contains two read-only files | 313 | Each <filename>mapX/</filename> directory contains four read-only files |
307 | that show start address and size of the memory: | 314 | that show attributes of the memory: |
308 | </para> | 315 | </para> |
309 | <itemizedlist> | 316 | <itemizedlist> |
310 | <listitem> | 317 | <listitem> |
311 | <para> | 318 | <para> |
319 | <filename>name</filename>: A string identifier for this mapping. This | ||
320 | is optional, the string can be empty. Drivers can set this to make it | ||
321 | easier for userspace to find the correct mapping. | ||
322 | </para> | ||
323 | </listitem> | ||
324 | <listitem> | ||
325 | <para> | ||
312 | <filename>addr</filename>: The address of memory that can be mapped. | 326 | <filename>addr</filename>: The address of memory that can be mapped. |
313 | </para> | 327 | </para> |
314 | </listitem> | 328 | </listitem> |
@@ -366,12 +380,19 @@ offset = N * getpagesize(); | |||
366 | <filename>/sys/class/uio/uioX/portio/</filename>. | 380 | <filename>/sys/class/uio/uioX/portio/</filename>. |
367 | </para> | 381 | </para> |
368 | <para> | 382 | <para> |
369 | Each <filename>portX/</filename> directory contains three read-only | 383 | Each <filename>portX/</filename> directory contains four read-only |
370 | files that show start, size, and type of the port region: | 384 | files that show name, start, size, and type of the port region: |
371 | </para> | 385 | </para> |
372 | <itemizedlist> | 386 | <itemizedlist> |
373 | <listitem> | 387 | <listitem> |
374 | <para> | 388 | <para> |
389 | <filename>name</filename>: A string identifier for this port region. | ||
390 | The string is optional and can be empty. Drivers can set it to make it | ||
391 | easier for userspace to find a certain port region. | ||
392 | </para> | ||
393 | </listitem> | ||
394 | <listitem> | ||
395 | <para> | ||
375 | <filename>start</filename>: The first port of this region. | 396 | <filename>start</filename>: The first port of this region. |
376 | </para> | 397 | </para> |
377 | </listitem> | 398 | </listitem> |
diff --git a/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl b/Documentation/DocBook/writing-an-alsa-driver.tmpl index 87a7c07ab658..7a2e0e98986a 100644 --- a/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl +++ b/Documentation/DocBook/writing-an-alsa-driver.tmpl | |||
@@ -1,11 +1,11 @@ | |||
1 | <!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook V4.1//EN"> | 1 | <?xml version="1.0" encoding="UTF-8"?> |
2 | 2 | <!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN" | |
3 | <book> | 3 | "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []> |
4 | <?dbhtml filename="index.html"> | ||
5 | 4 | ||
6 | <!-- ****************************************************** --> | 5 | <!-- ****************************************************** --> |
7 | <!-- Header --> | 6 | <!-- Header --> |
8 | <!-- ****************************************************** --> | 7 | <!-- ****************************************************** --> |
8 | <book id="Writing-an-ALSA-Driver"> | ||
9 | <bookinfo> | 9 | <bookinfo> |
10 | <title>Writing an ALSA Driver</title> | 10 | <title>Writing an ALSA Driver</title> |
11 | <author> | 11 | <author> |
@@ -492,9 +492,9 @@ | |||
492 | } | 492 | } |
493 | 493 | ||
494 | /* (2) */ | 494 | /* (2) */ |
495 | card = snd_card_new(index[dev], id[dev], THIS_MODULE, 0); | 495 | err = snd_card_create(index[dev], id[dev], THIS_MODULE, 0, &card); |
496 | if (card == NULL) | 496 | if (err < 0) |
497 | return -ENOMEM; | 497 | return err; |
498 | 498 | ||
499 | /* (3) */ | 499 | /* (3) */ |
500 | err = snd_mychip_create(card, pci, &chip); | 500 | err = snd_mychip_create(card, pci, &chip); |
@@ -590,8 +590,9 @@ | |||
590 | <programlisting> | 590 | <programlisting> |
591 | <![CDATA[ | 591 | <![CDATA[ |
592 | struct snd_card *card; | 592 | struct snd_card *card; |
593 | int err; | ||
593 | .... | 594 | .... |
594 | card = snd_card_new(index[dev], id[dev], THIS_MODULE, 0); | 595 | err = snd_card_create(index[dev], id[dev], THIS_MODULE, 0, &card); |
595 | ]]> | 596 | ]]> |
596 | </programlisting> | 597 | </programlisting> |
597 | </informalexample> | 598 | </informalexample> |
@@ -809,26 +810,28 @@ | |||
809 | 810 | ||
810 | <para> | 811 | <para> |
811 | As mentioned above, to create a card instance, call | 812 | As mentioned above, to create a card instance, call |
812 | <function>snd_card_new()</function>. | 813 | <function>snd_card_create()</function>. |
813 | 814 | ||
814 | <informalexample> | 815 | <informalexample> |
815 | <programlisting> | 816 | <programlisting> |
816 | <![CDATA[ | 817 | <![CDATA[ |
817 | struct snd_card *card; | 818 | struct snd_card *card; |
818 | card = snd_card_new(index, id, module, extra_size); | 819 | int err; |
820 | err = snd_card_create(index, id, module, extra_size, &card); | ||
819 | ]]> | 821 | ]]> |
820 | </programlisting> | 822 | </programlisting> |
821 | </informalexample> | 823 | </informalexample> |
822 | </para> | 824 | </para> |
823 | 825 | ||
824 | <para> | 826 | <para> |
825 | The function takes four arguments, the card-index number, the | 827 | The function takes five arguments, the card-index number, the |
826 | id string, the module pointer (usually | 828 | id string, the module pointer (usually |
827 | <constant>THIS_MODULE</constant>), | 829 | <constant>THIS_MODULE</constant>), |
828 | and the size of extra-data space. The last argument is used to | 830 | the size of extra-data space, and the pointer to return the |
831 | card instance. The extra_size argument is used to | ||
829 | allocate card->private_data for the | 832 | allocate card->private_data for the |
830 | chip-specific data. Note that these data | 833 | chip-specific data. Note that these data |
831 | are allocated by <function>snd_card_new()</function>. | 834 | are allocated by <function>snd_card_create()</function>. |
832 | </para> | 835 | </para> |
833 | </section> | 836 | </section> |
834 | 837 | ||
@@ -915,15 +918,16 @@ | |||
915 | </para> | 918 | </para> |
916 | 919 | ||
917 | <section id="card-management-chip-specific-snd-card-new"> | 920 | <section id="card-management-chip-specific-snd-card-new"> |
918 | <title>1. Allocating via <function>snd_card_new()</function>.</title> | 921 | <title>1. Allocating via <function>snd_card_create()</function>.</title> |
919 | <para> | 922 | <para> |
920 | As mentioned above, you can pass the extra-data-length | 923 | As mentioned above, you can pass the extra-data-length |
921 | to the 4th argument of <function>snd_card_new()</function>, i.e. | 924 | to the 4th argument of <function>snd_card_create()</function>, i.e. |
922 | 925 | ||
923 | <informalexample> | 926 | <informalexample> |
924 | <programlisting> | 927 | <programlisting> |
925 | <![CDATA[ | 928 | <![CDATA[ |
926 | card = snd_card_new(index[dev], id[dev], THIS_MODULE, sizeof(struct mychip)); | 929 | err = snd_card_create(index[dev], id[dev], THIS_MODULE, |
930 | sizeof(struct mychip), &card); | ||
927 | ]]> | 931 | ]]> |
928 | </programlisting> | 932 | </programlisting> |
929 | </informalexample> | 933 | </informalexample> |
@@ -952,8 +956,8 @@ | |||
952 | 956 | ||
953 | <para> | 957 | <para> |
954 | After allocating a card instance via | 958 | After allocating a card instance via |
955 | <function>snd_card_new()</function> (with | 959 | <function>snd_card_create()</function> (with |
956 | <constant>NULL</constant> on the 4th arg), call | 960 | <constant>0</constant> on the 4th arg), call |
957 | <function>kzalloc()</function>. | 961 | <function>kzalloc()</function>. |
958 | 962 | ||
959 | <informalexample> | 963 | <informalexample> |
@@ -961,7 +965,7 @@ | |||
961 | <![CDATA[ | 965 | <![CDATA[ |
962 | struct snd_card *card; | 966 | struct snd_card *card; |
963 | struct mychip *chip; | 967 | struct mychip *chip; |
964 | card = snd_card_new(index[dev], id[dev], THIS_MODULE, NULL); | 968 | err = snd_card_create(index[dev], id[dev], THIS_MODULE, 0, &card); |
965 | ..... | 969 | ..... |
966 | chip = kzalloc(sizeof(*chip), GFP_KERNEL); | 970 | chip = kzalloc(sizeof(*chip), GFP_KERNEL); |
967 | ]]> | 971 | ]]> |
@@ -1133,8 +1137,8 @@ | |||
1133 | if (err < 0) | 1137 | if (err < 0) |
1134 | return err; | 1138 | return err; |
1135 | /* check PCI availability (28bit DMA) */ | 1139 | /* check PCI availability (28bit DMA) */ |
1136 | if (pci_set_dma_mask(pci, DMA_28BIT_MASK) < 0 || | 1140 | if (pci_set_dma_mask(pci, DMA_BIT_MASK(28)) < 0 || |
1137 | pci_set_consistent_dma_mask(pci, DMA_28BIT_MASK) < 0) { | 1141 | pci_set_consistent_dma_mask(pci, DMA_BIT_MASK(28)) < 0) { |
1138 | printk(KERN_ERR "error to set 28bit mask DMA\n"); | 1142 | printk(KERN_ERR "error to set 28bit mask DMA\n"); |
1139 | pci_disable_device(pci); | 1143 | pci_disable_device(pci); |
1140 | return -ENXIO; | 1144 | return -ENXIO; |
@@ -1248,8 +1252,8 @@ | |||
1248 | err = pci_enable_device(pci); | 1252 | err = pci_enable_device(pci); |
1249 | if (err < 0) | 1253 | if (err < 0) |
1250 | return err; | 1254 | return err; |
1251 | if (pci_set_dma_mask(pci, DMA_28BIT_MASK) < 0 || | 1255 | if (pci_set_dma_mask(pci, DMA_BIT_MASK(28)) < 0 || |
1252 | pci_set_consistent_dma_mask(pci, DMA_28BIT_MASK) < 0) { | 1256 | pci_set_consistent_dma_mask(pci, DMA_BIT_MASK(28)) < 0) { |
1253 | printk(KERN_ERR "error to set 28bit mask DMA\n"); | 1257 | printk(KERN_ERR "error to set 28bit mask DMA\n"); |
1254 | pci_disable_device(pci); | 1258 | pci_disable_device(pci); |
1255 | return -ENXIO; | 1259 | return -ENXIO; |
@@ -5750,8 +5754,9 @@ struct _snd_pcm_runtime { | |||
5750 | .... | 5754 | .... |
5751 | struct snd_card *card; | 5755 | struct snd_card *card; |
5752 | struct mychip *chip; | 5756 | struct mychip *chip; |
5757 | int err; | ||
5753 | .... | 5758 | .... |
5754 | card = snd_card_new(index[dev], id[dev], THIS_MODULE, NULL); | 5759 | err = snd_card_create(index[dev], id[dev], THIS_MODULE, 0, &card); |
5755 | .... | 5760 | .... |
5756 | chip = kzalloc(sizeof(*chip), GFP_KERNEL); | 5761 | chip = kzalloc(sizeof(*chip), GFP_KERNEL); |
5757 | .... | 5762 | .... |
@@ -5763,7 +5768,7 @@ struct _snd_pcm_runtime { | |||
5763 | </informalexample> | 5768 | </informalexample> |
5764 | 5769 | ||
5765 | When you created the chip data with | 5770 | When you created the chip data with |
5766 | <function>snd_card_new()</function>, it's anyway accessible | 5771 | <function>snd_card_create()</function>, it's anyway accessible |
5767 | via <structfield>private_data</structfield> field. | 5772 | via <structfield>private_data</structfield> field. |
5768 | 5773 | ||
5769 | <informalexample> | 5774 | <informalexample> |
@@ -5775,9 +5780,10 @@ struct _snd_pcm_runtime { | |||
5775 | .... | 5780 | .... |
5776 | struct snd_card *card; | 5781 | struct snd_card *card; |
5777 | struct mychip *chip; | 5782 | struct mychip *chip; |
5783 | int err; | ||
5778 | .... | 5784 | .... |
5779 | card = snd_card_new(index[dev], id[dev], THIS_MODULE, | 5785 | err = snd_card_create(index[dev], id[dev], THIS_MODULE, |
5780 | sizeof(struct mychip)); | 5786 | sizeof(struct mychip), &card); |
5781 | .... | 5787 | .... |
5782 | chip = card->private_data; | 5788 | chip = card->private_data; |
5783 | .... | 5789 | .... |
diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt index 256defd7e174..dcf7acc720e1 100644 --- a/Documentation/PCI/MSI-HOWTO.txt +++ b/Documentation/PCI/MSI-HOWTO.txt | |||
@@ -4,506 +4,356 @@ | |||
4 | Revised Feb 12, 2004 by Martine Silbermann | 4 | Revised Feb 12, 2004 by Martine Silbermann |
5 | email: Martine.Silbermann@hp.com | 5 | email: Martine.Silbermann@hp.com |
6 | Revised Jun 25, 2004 by Tom L Nguyen | 6 | Revised Jun 25, 2004 by Tom L Nguyen |
7 | Revised Jul 9, 2008 by Matthew Wilcox <willy@linux.intel.com> | ||
8 | Copyright 2003, 2008 Intel Corporation | ||
7 | 9 | ||
8 | 1. About this guide | 10 | 1. About this guide |
9 | 11 | ||
10 | This guide describes the basics of Message Signaled Interrupts (MSI), | 12 | This guide describes the basics of Message Signaled Interrupts (MSIs), |
11 | the advantages of using MSI over traditional interrupt mechanisms, | 13 | the advantages of using MSI over traditional interrupt mechanisms, how |
12 | and how to enable your driver to use MSI or MSI-X. Also included is | 14 | to change your driver to use MSI or MSI-X and some basic diagnostics to |
13 | a Frequently Asked Questions (FAQ) section. | 15 | try if a device doesn't support MSIs. |
14 | |||
15 | 1.1 Terminology | ||
16 | |||
17 | PCI devices can be single-function or multi-function. In either case, | ||
18 | when this text talks about enabling or disabling MSI on a "device | ||
19 | function," it is referring to one specific PCI device and function and | ||
20 | not to all functions on a PCI device (unless the PCI device has only | ||
21 | one function). | ||
22 | |||
23 | 2. Copyright 2003 Intel Corporation | ||
24 | |||
25 | 3. What is MSI/MSI-X? | ||
26 | |||
27 | Message Signaled Interrupt (MSI), as described in the PCI Local Bus | ||
28 | Specification Revision 2.3 or later, is an optional feature, and a | ||
29 | required feature for PCI Express devices. MSI enables a device function | ||
30 | to request service by sending an Inbound Memory Write on its PCI bus to | ||
31 | the FSB as a Message Signal Interrupt transaction. Because MSI is | ||
32 | generated in the form of a Memory Write, all transaction conditions, | ||
33 | such as a Retry, Master-Abort, Target-Abort or normal completion, are | ||
34 | supported. | ||
35 | |||
36 | A PCI device that supports MSI must also support pin IRQ assertion | ||
37 | interrupt mechanism to provide backward compatibility for systems that | ||
38 | do not support MSI. In systems which support MSI, the bus driver is | ||
39 | responsible for initializing the message address and message data of | ||
40 | the device function's MSI/MSI-X capability structure during device | ||
41 | initial configuration. | ||
42 | |||
43 | An MSI capable device function indicates MSI support by implementing | ||
44 | the MSI/MSI-X capability structure in its PCI capability list. The | ||
45 | device function may implement both the MSI capability structure and | ||
46 | the MSI-X capability structure; however, the bus driver should not | ||
47 | enable both. | ||
48 | |||
49 | The MSI capability structure contains Message Control register, | ||
50 | Message Address register and Message Data register. These registers | ||
51 | provide the bus driver control over MSI. The Message Control register | ||
52 | indicates the MSI capability supported by the device. The Message | ||
53 | Address register specifies the target address and the Message Data | ||
54 | register specifies the characteristics of the message. To request | ||
55 | service, the device function writes the content of the Message Data | ||
56 | register to the target address. The device and its software driver | ||
57 | are prohibited from writing to these registers. | ||
58 | |||
59 | The MSI-X capability structure is an optional extension to MSI. It | ||
60 | uses an independent and separate capability structure. There are | ||
61 | some key advantages to implementing the MSI-X capability structure | ||
62 | over the MSI capability structure as described below. | ||
63 | |||
64 | - Support a larger maximum number of vectors per function. | ||
65 | |||
66 | - Provide the ability for system software to configure | ||
67 | each vector with an independent message address and message | ||
68 | data, specified by a table that resides in Memory Space. | ||
69 | |||
70 | - MSI and MSI-X both support per-vector masking. Per-vector | ||
71 | masking is an optional extension of MSI but a required | ||
72 | feature for MSI-X. Per-vector masking provides the kernel the | ||
73 | ability to mask/unmask a single MSI while running its | ||
74 | interrupt service routine. If per-vector masking is | ||
75 | not supported, then the device driver should provide the | ||
76 | hardware/software synchronization to ensure that the device | ||
77 | generates MSI when the driver wants it to do so. | ||
78 | |||
79 | 4. Why use MSI? | ||
80 | |||
81 | As a benefit to the simplification of board design, MSI allows board | ||
82 | designers to remove out-of-band interrupt routing. MSI is another | ||
83 | step towards a legacy-free environment. | ||
84 | |||
85 | Due to increasing pressure on chipset and processor packages to | ||
86 | reduce pin count, the need for interrupt pins is expected to | ||
87 | diminish over time. Devices, due to pin constraints, may implement | ||
88 | messages to increase performance. | ||
89 | |||
90 | PCI Express endpoints uses INTx emulation (in-band messages) instead | ||
91 | of IRQ pin assertion. Using INTx emulation requires interrupt | ||
92 | sharing among devices connected to the same node (PCI bridge) while | ||
93 | MSI is unique (non-shared) and does not require BIOS configuration | ||
94 | support. As a result, the PCI Express technology requires MSI | ||
95 | support for better interrupt performance. | ||
96 | |||
97 | Using MSI enables the device functions to support two or more | ||
98 | vectors, which can be configured to target different CPUs to | ||
99 | increase scalability. | ||
100 | |||
101 | 5. Configuring a driver to use MSI/MSI-X | ||
102 | |||
103 | By default, the kernel will not enable MSI/MSI-X on all devices that | ||
104 | support this capability. The CONFIG_PCI_MSI kernel option | ||
105 | must be selected to enable MSI/MSI-X support. | ||
106 | |||
107 | 5.1 Including MSI/MSI-X support into the kernel | ||
108 | |||
109 | To allow MSI/MSI-X capable device drivers to selectively enable | ||
110 | MSI/MSI-X (using pci_enable_msi()/pci_enable_msix() as described | ||
111 | below), the VECTOR based scheme needs to be enabled by setting | ||
112 | CONFIG_PCI_MSI during kernel config. | ||
113 | |||
114 | Since the target of the inbound message is the local APIC, providing | ||
115 | CONFIG_X86_LOCAL_APIC must be enabled as well as CONFIG_PCI_MSI. | ||
116 | |||
117 | 5.2 Configuring for MSI support | ||
118 | |||
119 | Due to the non-contiguous fashion in vector assignment of the | ||
120 | existing Linux kernel, this version does not support multiple | ||
121 | messages regardless of a device function is capable of supporting | ||
122 | more than one vector. To enable MSI on a device function's MSI | ||
123 | capability structure requires a device driver to call the function | ||
124 | pci_enable_msi() explicitly. | ||
125 | |||
126 | 5.2.1 API pci_enable_msi | ||
127 | 16 | ||
128 | int pci_enable_msi(struct pci_dev *dev) | ||
129 | 17 | ||
130 | With this new API, a device driver that wants to have MSI | 18 | 2. What are MSIs? |
131 | enabled on its device function must call this API to enable MSI. | ||
132 | A successful call will initialize the MSI capability structure | ||
133 | with ONE vector, regardless of whether a device function is | ||
134 | capable of supporting multiple messages. This vector replaces the | ||
135 | pre-assigned dev->irq with a new MSI vector. To avoid a conflict | ||
136 | of the new assigned vector with existing pre-assigned vector requires | ||
137 | a device driver to call this API before calling request_irq(). | ||
138 | 19 | ||
139 | 5.2.2 API pci_disable_msi | 20 | A Message Signaled Interrupt is a write from the device to a special |
21 | address which causes an interrupt to be received by the CPU. | ||
140 | 22 | ||
141 | void pci_disable_msi(struct pci_dev *dev) | 23 | The MSI capability was first specified in PCI 2.2 and was later enhanced |
24 | in PCI 3.0 to allow each interrupt to be masked individually. The MSI-X | ||
25 | capability was also introduced with PCI 3.0. It supports more interrupts | ||
26 | per device than MSI and allows interrupts to be independently configured. | ||
142 | 27 | ||
143 | This API should always be used to undo the effect of pci_enable_msi() | 28 | Devices may support both MSI and MSI-X, but only one can be enabled at |
144 | when a device driver is unloading. This API restores dev->irq with | 29 | a time. |
145 | the pre-assigned IOAPIC vector and switches a device's interrupt | ||
146 | mode to PCI pin-irq assertion/INTx emulation mode. | ||
147 | |||
148 | Note that a device driver should always call free_irq() on the MSI vector | ||
149 | that it has done request_irq() on before calling this API. Failure to do | ||
150 | so results in a BUG_ON() and a device will be left with MSI enabled and | ||
151 | leaks its vector. | ||
152 | |||
153 | 5.2.3 MSI mode vs. legacy mode diagram | ||
154 | |||
155 | The below diagram shows the events which switch the interrupt | ||
156 | mode on the MSI-capable device function between MSI mode and | ||
157 | PIN-IRQ assertion mode. | ||
158 | |||
159 | ------------ pci_enable_msi ------------------------ | ||
160 | | | <=============== | | | ||
161 | | MSI MODE | | PIN-IRQ ASSERTION MODE | | ||
162 | | | ===============> | | | ||
163 | ------------ pci_disable_msi ------------------------ | ||
164 | |||
165 | |||
166 | Figure 1. MSI Mode vs. Legacy Mode | ||
167 | |||
168 | In Figure 1, a device operates by default in legacy mode. Legacy | ||
169 | in this context means PCI pin-irq assertion or PCI-Express INTx | ||
170 | emulation. A successful MSI request (using pci_enable_msi()) switches | ||
171 | a device's interrupt mode to MSI mode. A pre-assigned IOAPIC vector | ||
172 | stored in dev->irq will be saved by the PCI subsystem and a new | ||
173 | assigned MSI vector will replace dev->irq. | ||
174 | |||
175 | To return back to its default mode, a device driver should always call | ||
176 | pci_disable_msi() to undo the effect of pci_enable_msi(). Note that a | ||
177 | device driver should always call free_irq() on the MSI vector it has | ||
178 | done request_irq() on before calling pci_disable_msi(). Failure to do | ||
179 | so results in a BUG_ON() and a device will be left with MSI enabled and | ||
180 | leaks its vector. Otherwise, the PCI subsystem restores a device's | ||
181 | dev->irq with a pre-assigned IOAPIC vector and marks the released | ||
182 | MSI vector as unused. | ||
183 | |||
184 | Once being marked as unused, there is no guarantee that the PCI | ||
185 | subsystem will reserve this MSI vector for a device. Depending on | ||
186 | the availability of current PCI vector resources and the number of | ||
187 | MSI/MSI-X requests from other drivers, this MSI may be re-assigned. | ||
188 | |||
189 | For the case where the PCI subsystem re-assigns this MSI vector to | ||
190 | another driver, a request to switch back to MSI mode may result | ||
191 | in being assigned a different MSI vector or a failure if no more | ||
192 | vectors are available. | ||
193 | |||
194 | 5.3 Configuring for MSI-X support | ||
195 | |||
196 | Due to the ability of the system software to configure each vector of | ||
197 | the MSI-X capability structure with an independent message address | ||
198 | and message data, the non-contiguous fashion in vector assignment of | ||
199 | the existing Linux kernel has no impact on supporting multiple | ||
200 | messages on an MSI-X capable device functions. To enable MSI-X on | ||
201 | a device function's MSI-X capability structure requires its device | ||
202 | driver to call the function pci_enable_msix() explicitly. | ||
203 | |||
204 | The function pci_enable_msix(), once invoked, enables either | ||
205 | all or nothing, depending on the current availability of PCI vector | ||
206 | resources. If the PCI vector resources are available for the number | ||
207 | of vectors requested by a device driver, this function will configure | ||
208 | the MSI-X table of the MSI-X capability structure of a device with | ||
209 | requested messages. To emphasize this reason, for example, a device | ||
210 | may be capable for supporting the maximum of 32 vectors while its | ||
211 | software driver usually may request 4 vectors. It is recommended | ||
212 | that the device driver should call this function once during the | ||
213 | initialization phase of the device driver. | ||
214 | |||
215 | Unlike the function pci_enable_msi(), the function pci_enable_msix() | ||
216 | does not replace the pre-assigned IOAPIC dev->irq with a new MSI | ||
217 | vector because the PCI subsystem writes the 1:1 vector-to-entry mapping | ||
218 | into the field vector of each element contained in a second argument. | ||
219 | Note that the pre-assigned IOAPIC dev->irq is valid only if the device | ||
220 | operates in PIN-IRQ assertion mode. In MSI-X mode, any attempt at | ||
221 | using dev->irq by the device driver to request for interrupt service | ||
222 | may result in unpredictable behavior. | ||
223 | |||
224 | For each MSI-X vector granted, a device driver is responsible for calling | ||
225 | other functions like request_irq(), enable_irq(), etc. to enable | ||
226 | this vector with its corresponding interrupt service handler. It is | ||
227 | a device driver's choice to assign all vectors with the same | ||
228 | interrupt service handler or each vector with a unique interrupt | ||
229 | service handler. | ||
230 | |||
231 | 5.3.1 Handling MMIO address space of MSI-X Table | ||
232 | |||
233 | The PCI 3.0 specification has implementation notes that MMIO address | ||
234 | space for a device's MSI-X structure should be isolated so that the | ||
235 | software system can set different pages for controlling accesses to the | ||
236 | MSI-X structure. The implementation of MSI support requires the PCI | ||
237 | subsystem, not a device driver, to maintain full control of the MSI-X | ||
238 | table/MSI-X PBA (Pending Bit Array) and MMIO address space of the MSI-X | ||
239 | table/MSI-X PBA. A device driver should not access the MMIO address | ||
240 | space of the MSI-X table/MSI-X PBA. | ||
241 | |||
242 | 5.3.2 API pci_enable_msix | ||
243 | 30 | ||
244 | int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec) | ||
245 | 31 | ||
246 | This API enables a device driver to request the PCI subsystem | 32 | 3. Why use MSIs? |
247 | to enable MSI-X messages on its hardware device. Depending on | 33 | |
248 | the availability of PCI vectors resources, the PCI subsystem enables | 34 | There are three reasons why using MSIs can give an advantage over |
249 | either all or none of the requested vectors. | 35 | traditional pin-based interrupts. |
36 | |||
37 | Pin-based PCI interrupts are often shared amongst several devices. | ||
38 | To support this, the kernel must call each interrupt handler associated | ||
39 | with an interrupt, which leads to reduced performance for the system as | ||
40 | a whole. MSIs are never shared, so this problem cannot arise. | ||
41 | |||
42 | When a device writes data to memory, then raises a pin-based interrupt, | ||
43 | it is possible that the interrupt may arrive before all the data has | ||
44 | arrived in memory (this becomes more likely with devices behind PCI-PCI | ||
45 | bridges). In order to ensure that all the data has arrived in memory, | ||
46 | the interrupt handler must read a register on the device which raised | ||
47 | the interrupt. PCI transaction ordering rules require that all the data | ||
48 | arrives in memory before the value can be returned from the register. | ||
49 | Using MSIs avoids this problem as the interrupt-generating write cannot | ||
50 | pass the data writes, so by the time the interrupt is raised, the driver | ||
51 | knows that all the data has arrived in memory. | ||
52 | |||
53 | PCI devices can only support a single pin-based interrupt per function. | ||
54 | Often drivers have to query the device to find out what event has | ||
55 | occurred, slowing down interrupt handling for the common case. With | ||
56 | MSIs, a device can support more interrupts, allowing each interrupt | ||
57 | to be specialised to a different purpose. One possible design gives | ||
58 | infrequent conditions (such as errors) their own interrupt which allows | ||
59 | the driver to handle the normal interrupt handling path more efficiently. | ||
60 | Other possible designs include giving one interrupt to each packet queue | ||
61 | in a network card or each port in a storage controller. | ||
62 | |||
63 | |||
64 | 4. How to use MSIs | ||
65 | |||
66 | PCI devices are initialised to use pin-based interrupts. The device | ||
67 | driver has to set up the device to use MSI or MSI-X. Not all machines | ||
68 | support MSIs correctly, and for those machines, the APIs described below | ||
69 | will simply fail and the device will continue to use pin-based interrupts. | ||
70 | |||
71 | 4.1 Include kernel support for MSIs | ||
72 | |||
73 | To support MSI or MSI-X, the kernel must be built with the CONFIG_PCI_MSI | ||
74 | option enabled. This option is only available on some architectures, | ||
75 | and it may depend on some other options also being set. For example, | ||
76 | on x86, you must also enable X86_UP_APIC or SMP in order to see the | ||
77 | CONFIG_PCI_MSI option. | ||
78 | |||
79 | 4.2 Using MSI | ||
80 | |||
81 | Most of the hard work is done for the driver in the PCI layer. It simply | ||
82 | has to request that the PCI layer set up the MSI capability for this | ||
83 | device. | ||
84 | |||
85 | 4.2.1 pci_enable_msi | ||
86 | |||
87 | int pci_enable_msi(struct pci_dev *dev) | ||
88 | |||
89 | A successful call will allocate ONE interrupt to the device, regardless | ||
90 | of how many MSIs the device supports. The device will be switched from | ||
91 | pin-based interrupt mode to MSI mode. The dev->irq number is changed | ||
92 | to a new number which represents the message signaled interrupt. | ||
93 | This function should be called before the driver calls request_irq() | ||
94 | since enabling MSIs disables the pin-based IRQ and the driver will not | ||
95 | receive interrupts on the old interrupt. | ||
96 | |||
97 | 4.2.2 pci_enable_msi_block | ||
98 | |||
99 | int pci_enable_msi_block(struct pci_dev *dev, int count) | ||
100 | |||
101 | This variation on the above call allows a device driver to request multiple | ||
102 | MSIs. The MSI specification only allows interrupts to be allocated in | ||
103 | powers of two, up to a maximum of 2^5 (32). | ||
104 | |||
105 | If this function returns 0, it has succeeded in allocating at least as many | ||
106 | interrupts as the driver requested (it may have allocated more in order | ||
107 | to satisfy the power-of-two requirement). In this case, the function | ||
108 | enables MSI on this device and updates dev->irq to be the lowest of | ||
109 | the new interrupts assigned to it. The other interrupts assigned to | ||
110 | the device are in the range dev->irq to dev->irq + count - 1. | ||
111 | |||
112 | If this function returns a negative number, it indicates an error and | ||
113 | the driver should not attempt to request any more MSI interrupts for | ||
114 | this device. If this function returns a positive number, it will be | ||
115 | less than 'count' and indicate the number of interrupts that could have | ||
116 | been allocated. In neither case will the irq value have been | ||
117 | updated, nor will the device have been switched into MSI mode. | ||
118 | |||
119 | The device driver must decide what action to take if | ||
120 | pci_enable_msi_block() returns a value less than the number asked for. | ||
121 | Some devices can make use of fewer interrupts than the maximum they | ||
122 | request; in this case the driver should call pci_enable_msi_block() | ||
123 | again. Note that it is not guaranteed to succeed, even when the | ||
124 | 'count' has been reduced to the value returned from a previous call to | ||
125 | pci_enable_msi_block(). This is because there are multiple constraints | ||
126 | on the number of vectors that can be allocated; pci_enable_msi_block() | ||
127 | will return as soon as it finds any constraint that doesn't allow the | ||
128 | call to succeed. | ||
129 | |||
130 | 4.2.3 pci_disable_msi | ||
131 | |||
132 | void pci_disable_msi(struct pci_dev *dev) | ||
250 | 133 | ||
251 | Argument 'dev' points to the device (pci_dev) structure. | 134 | This function should be used to undo the effect of pci_enable_msi() or |
135 | pci_enable_msi_block(). Calling it restores dev->irq to the pin-based | ||
136 | interrupt number and frees the previously allocated message signaled | ||
137 | interrupt(s). The interrupt may subsequently be assigned to another | ||
138 | device, so drivers should not cache the value of dev->irq. | ||
252 | 139 | ||
253 | Argument 'entries' is a pointer to an array of msix_entry structs. | 140 | A device driver must always call free_irq() on the interrupt(s) |
254 | The number of entries is indicated in argument 'nvec'. | 141 | for which it has called request_irq() before calling this function. |
255 | struct msix_entry is defined in /driver/pci/msi.h: | 142 | Failure to do so will result in a BUG_ON(), the device will be left with |
143 | MSI enabled and will leak its vector. | ||
144 | |||
145 | 4.3 Using MSI-X | ||
146 | |||
147 | The MSI-X capability is much more flexible than the MSI capability. | ||
148 | It supports up to 2048 interrupts, each of which can be controlled | ||
149 | independently. To support this flexibility, drivers must use an array of | ||
150 | `struct msix_entry': | ||
256 | 151 | ||
257 | struct msix_entry { | 152 | struct msix_entry { |
258 | u16 vector; /* kernel uses to write alloc vector */ | 153 | u16 vector; /* kernel uses to write alloc vector */ |
259 | u16 entry; /* driver uses to specify entry */ | 154 | u16 entry; /* driver uses to specify entry */ |
260 | }; | 155 | }; |
261 | 156 | ||
262 | A device driver is responsible for initializing the field 'entry' of | 157 | This allows for the device to use these interrupts in a sparse fashion; |
263 | each element with a unique entry supported by MSI-X table. Otherwise, | 158 | for example it could use interrupts 3 and 1027 and allocate only a |
264 | -EINVAL will be returned as a result. A successful return of zero | 159 | two-element array. The driver is expected to fill in the 'entry' value |
265 | indicates the PCI subsystem completed initializing each of the requested | 160 | in each element of the array to indicate which entries it wants the kernel |
266 | entries of the MSI-X table with message address and message data. | 161 | to assign interrupts for. It is invalid to fill in two entries with the |
267 | Last but not least, the PCI subsystem will write the 1:1 | 162 | same number. |
268 | vector-to-entry mapping into the field 'vector' of each element. A | 163 | |
269 | device driver is responsible for keeping track of allocated MSI-X | 164 | 4.3.1 pci_enable_msix |
270 | vectors in its internal data structure. | 165 | |
271 | 166 | int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec) | |
272 | A return of zero indicates that the number of MSI-X vectors was | 167 | |
273 | successfully allocated. A return of greater than zero indicates | 168 | Calling this function asks the PCI subsystem to allocate 'nvec' MSIs. |
274 | MSI-X vector shortage. Or a return of less than zero indicates | 169 | The 'entries' argument is a pointer to an array of msix_entry structs |
275 | a failure. This failure may be a result of duplicate entries | 170 | which should be at least 'nvec' entries in size. On success, the |
276 | specified in second argument, or a result of no available vector, | 171 | function will return 0 and the device will have been switched into |
277 | or a result of failing to initialize MSI-X table entries. | 172 | MSI-X interrupt mode. The 'vector' elements in each entry will have |
278 | 173 | been filled in with the interrupt number. The driver should then call | |
279 | 5.3.3 API pci_disable_msix | 174 | request_irq() for each 'vector' that it decides to use. |
175 | |||
176 | If this function returns a negative number, it indicates an error and | ||
177 | the driver should not attempt to allocate any more MSI-X interrupts for | ||
178 | this device. If it returns a positive number, it indicates the maximum | ||
179 | number of interrupt vectors that could have been allocated. See example | ||
180 | below. | ||
181 | |||
182 | This function, in contrast with pci_enable_msi(), does not adjust | ||
183 | dev->irq. The device will not generate interrupts for this interrupt | ||
184 | number once MSI-X is enabled. The device driver is responsible for | ||
185 | keeping track of the interrupts assigned to the MSI-X vectors so it can | ||
186 | free them again later. | ||
187 | |||
188 | Device drivers should normally call this function once per device | ||
189 | during the initialization phase. | ||
190 | |||
191 | It is ideal if drivers can cope with a variable number of MSI-X interrupts, | ||
192 | there are many reasons why the platform may not be able to provide the | ||
193 | exact number a driver asks for. | ||
194 | |||
195 | A request loop to achieve that might look like: | ||
196 | |||
197 | static int foo_driver_enable_msix(struct foo_adapter *adapter, int nvec) | ||
198 | { | ||
199 | while (nvec >= FOO_DRIVER_MINIMUM_NVEC) { | ||
200 | rc = pci_enable_msix(adapter->pdev, | ||
201 | adapter->msix_entries, nvec); | ||
202 | if (rc > 0) | ||
203 | nvec = rc; | ||
204 | else | ||
205 | return rc; | ||
206 | } | ||
207 | |||
208 | return -ENOSPC; | ||
209 | } | ||
210 | |||
211 | 4.3.2 pci_disable_msix | ||
280 | 212 | ||
281 | void pci_disable_msix(struct pci_dev *dev) | 213 | void pci_disable_msix(struct pci_dev *dev) |
282 | 214 | ||
283 | This API should always be used to undo the effect of pci_enable_msix() | 215 | This API should be used to undo the effect of pci_enable_msix(). It frees |
284 | when a device driver is unloading. Note that a device driver should | 216 | the previously allocated message signaled interrupts. The interrupts may |
285 | always call free_irq() on all MSI-X vectors it has done request_irq() | 217 | subsequently be assigned to another device, so drivers should not cache |
286 | on before calling this API. Failure to do so results in a BUG_ON() and | 218 | the value of the 'vector' elements over a call to pci_disable_msix(). |
287 | a device will be left with MSI-X enabled and leaks its vectors. | 219 | |
288 | 220 | A device driver must always call free_irq() on the interrupt(s) | |
289 | 5.3.4 MSI-X mode vs. legacy mode diagram | 221 | for which it has called request_irq() before calling this function. |
290 | 222 | Failure to do so will result in a BUG_ON(), the device will be left with | |
291 | The below diagram shows the events which switch the interrupt | 223 | MSI enabled and will leak its vector. |
292 | mode on the MSI-X capable device function between MSI-X mode and | 224 | |
293 | PIN-IRQ assertion mode (legacy). | 225 | 4.3.3 The MSI-X Table |
294 | 226 | ||
295 | ------------ pci_enable_msix(,,n) ------------------------ | 227 | The MSI-X capability specifies a BAR and offset within that BAR for the |
296 | | | <=============== | | | 228 | MSI-X Table. This address is mapped by the PCI subsystem, and should not |
297 | | MSI-X MODE | | PIN-IRQ ASSERTION MODE | | 229 | be accessed directly by the device driver. If the driver wishes to |
298 | | | ===============> | | | 230 | mask or unmask an interrupt, it should call disable_irq() / enable_irq(). |
299 | ------------ pci_disable_msix ------------------------ | 231 | |
300 | 232 | 4.4 Handling devices implementing both MSI and MSI-X capabilities | |
301 | Figure 2. MSI-X Mode vs. Legacy Mode | 233 | |
302 | 234 | If a device implements both MSI and MSI-X capabilities, it can | |
303 | In Figure 2, a device operates by default in legacy mode. A | 235 | run in either MSI mode or MSI-X mode but not both simultaneously. |
304 | successful MSI-X request (using pci_enable_msix()) switches a | 236 | This is a requirement of the PCI spec, and it is enforced by the |
305 | device's interrupt mode to MSI-X mode. A pre-assigned IOAPIC vector | 237 | PCI layer. Calling pci_enable_msi() when MSI-X is already enabled or |
306 | stored in dev->irq will be saved by the PCI subsystem; however, | 238 | pci_enable_msix() when MSI is already enabled will result in an error. |
307 | unlike MSI mode, the PCI subsystem will not replace dev->irq with | 239 | If a device driver wishes to switch between MSI and MSI-X at runtime, |
308 | assigned MSI-X vector because the PCI subsystem already writes the 1:1 | 240 | it must first quiesce the device, then switch it back to pin-interrupt |
309 | vector-to-entry mapping into the field 'vector' of each element | 241 | mode, before calling pci_enable_msi() or pci_enable_msix() and resuming |
310 | specified in second argument. | 242 | operation. This is not expected to be a common operation but may be |
311 | 243 | useful for debugging or testing during development. | |
312 | To return back to its default mode, a device driver should always call | 244 | |
313 | pci_disable_msix() to undo the effect of pci_enable_msix(). Note that | 245 | 4.5 Considerations when using MSIs |
314 | a device driver should always call free_irq() on all MSI-X vectors it | 246 | |
315 | has done request_irq() on before calling pci_disable_msix(). Failure | 247 | 4.5.1 Choosing between MSI-X and MSI |
316 | to do so results in a BUG_ON() and a device will be left with MSI-X | 248 | |
317 | enabled and leaks its vectors. Otherwise, the PCI subsystem switches a | 249 | If your device supports both MSI-X and MSI capabilities, you should use |
318 | device function's interrupt mode from MSI-X mode to legacy mode and | 250 | the MSI-X facilities in preference to the MSI facilities. As mentioned |
319 | marks all allocated MSI-X vectors as unused. | 251 | above, MSI-X supports any number of interrupts between 1 and 2048. |
320 | 252 | In constrast, MSI is restricted to a maximum of 32 interrupts (and | |
321 | Once being marked as unused, there is no guarantee that the PCI | 253 | must be a power of two). In addition, the MSI interrupt vectors must |
322 | subsystem will reserve these MSI-X vectors for a device. Depending on | 254 | be allocated consecutively, so the system may not be able to allocate |
323 | the availability of current PCI vector resources and the number of | 255 | as many vectors for MSI as it could for MSI-X. On some platforms, MSI |
324 | MSI/MSI-X requests from other drivers, these MSI-X vectors may be | 256 | interrupts must all be targetted at the same set of CPUs whereas MSI-X |
325 | re-assigned. | 257 | interrupts can all be targetted at different CPUs. |
326 | 258 | ||
327 | For the case where the PCI subsystem re-assigned these MSI-X vectors | 259 | 4.5.2 Spinlocks |
328 | to other drivers, a request to switch back to MSI-X mode may result | 260 | |
329 | being assigned with another set of MSI-X vectors or a failure if no | 261 | Most device drivers have a per-device spinlock which is taken in the |
330 | more vectors are available. | 262 | interrupt handler. With pin-based interrupts or a single MSI, it is not |
331 | 263 | necessary to disable interrupts (Linux guarantees the same interrupt will | |
332 | 5.4 Handling function implementing both MSI and MSI-X capabilities | 264 | not be re-entered). If a device uses multiple interrupts, the driver |
333 | 265 | must disable interrupts while the lock is held. If the device sends | |
334 | For the case where a function implements both MSI and MSI-X | 266 | a different interrupt, the driver will deadlock trying to recursively |
335 | capabilities, the PCI subsystem enables a device to run either in MSI | 267 | acquire the spinlock. |
336 | mode or MSI-X mode but not both. A device driver determines whether it | 268 | |
337 | wants MSI or MSI-X enabled on its hardware device. Once a device | 269 | There are two solutions. The first is to take the lock with |
338 | driver requests for MSI, for example, it is prohibited from requesting | 270 | spin_lock_irqsave() or spin_lock_irq() (see |
339 | MSI-X; in other words, a device driver is not permitted to ping-pong | 271 | Documentation/DocBook/kernel-locking). The second is to specify |
340 | between MSI mod MSI-X mode during a run-time. | 272 | IRQF_DISABLED to request_irq() so that the kernel runs the entire |
341 | 273 | interrupt routine with interrupts disabled. | |
342 | 5.5 Hardware requirements for MSI/MSI-X support | 274 | |
343 | 275 | If your MSI interrupt routine does not hold the lock for the whole time | |
344 | MSI/MSI-X support requires support from both system hardware and | 276 | it is running, the first solution may be best. The second solution is |
345 | individual hardware device functions. | 277 | normally preferred as it avoids making two transitions from interrupt |
346 | 278 | disabled to enabled and back again. | |
347 | 5.5.1 Required x86 hardware support | 279 | |
348 | 280 | 4.6 How to tell whether MSI/MSI-X is enabled on a device | |
349 | Since the target of MSI address is the local APIC CPU, enabling | 281 | |
350 | MSI/MSI-X support in the Linux kernel is dependent on whether existing | 282 | Using 'lspci -v' (as root) may show some devices with "MSI", "Message |
351 | system hardware supports local APIC. Users should verify that their | 283 | Signalled Interrupts" or "MSI-X" capabilities. Each of these capabilities |
352 | system supports local APIC operation by testing that it runs when | 284 | has an 'Enable' flag which will be followed with either "+" (enabled) |
353 | CONFIG_X86_LOCAL_APIC=y. | 285 | or "-" (disabled). |
354 | 286 | ||
355 | In SMP environment, CONFIG_X86_LOCAL_APIC is automatically set; | 287 | |
356 | however, in UP environment, users must manually set | 288 | 5. MSI quirks |
357 | CONFIG_X86_LOCAL_APIC. Once CONFIG_X86_LOCAL_APIC=y, setting | 289 | |
358 | CONFIG_PCI_MSI enables the VECTOR based scheme and the option for | 290 | Several PCI chipsets or devices are known not to support MSIs. |
359 | MSI-capable device drivers to selectively enable MSI/MSI-X. | 291 | The PCI stack provides three ways to disable MSIs: |
360 | 292 | ||
361 | Note that CONFIG_X86_IO_APIC setting is irrelevant because MSI/MSI-X | 293 | 1. globally |
362 | vector is allocated new during runtime and MSI/MSI-X support does not | 294 | 2. on all devices behind a specific bridge |
363 | depend on BIOS support. This key independency enables MSI/MSI-X | 295 | 3. on a single device |
364 | support on future IOxAPIC free platforms. | 296 | |
365 | 297 | 5.1. Disabling MSIs globally | |
366 | 5.5.2 Device hardware support | 298 | |
367 | 299 | Some host chipsets simply don't support MSIs properly. If we're | |
368 | The hardware device function supports MSI by indicating the | 300 | lucky, the manufacturer knows this and has indicated it in the ACPI |
369 | MSI/MSI-X capability structure on its PCI capability list. By | 301 | FADT table. In this case, Linux will automatically disable MSIs. |
370 | default, this capability structure will not be initialized by | 302 | Some boards don't include this information in the table and so we have |
371 | the kernel to enable MSI during the system boot. In other words, | 303 | to detect them ourselves. The complete list of these is found near the |
372 | the device function is running on its default pin assertion mode. | 304 | quirk_disable_all_msi() function in drivers/pci/quirks.c. |
373 | Note that in many cases the hardware supporting MSI have bugs, | 305 | |
374 | which may result in system hangs. The software driver of specific | 306 | If you have a board which has problems with MSIs, you can pass pci=nomsi |
375 | MSI-capable hardware is responsible for deciding whether to call | 307 | on the kernel command line to disable MSIs on all devices. It would be |
376 | pci_enable_msi or not. A return of zero indicates the kernel | 308 | in your best interests to report the problem to linux-pci@vger.kernel.org |
377 | successfully initialized the MSI/MSI-X capability structure of the | 309 | including a full 'lspci -v' so we can add the quirks to the kernel. |
378 | device function. The device function is now running on MSI/MSI-X mode. | 310 | |
379 | 311 | 5.2. Disabling MSIs below a bridge | |
380 | 5.6 How to tell whether MSI/MSI-X is enabled on device function | 312 | |
381 | 313 | Some PCI bridges are not able to route MSIs between busses properly. | |
382 | At the driver level, a return of zero from the function call of | 314 | In this case, MSIs must be disabled on all devices behind the bridge. |
383 | pci_enable_msi()/pci_enable_msix() indicates to a device driver that | 315 | |
384 | its device function is initialized successfully and ready to run in | 316 | Some bridges allow you to enable MSIs by changing some bits in their |
385 | MSI/MSI-X mode. | 317 | PCI configuration space (especially the Hypertransport chipsets such |
386 | 318 | as the nVidia nForce and Serverworks HT2000). As with host chipsets, | |
387 | At the user level, users can use the command 'cat /proc/interrupts' | 319 | Linux mostly knows about them and automatically enables MSIs if it can. |
388 | to display the vectors allocated for devices and their interrupt | 320 | If you have a bridge which Linux doesn't yet know about, you can enable |
389 | MSI/MSI-X modes ("PCI-MSI"/"PCI-MSI-X"). Below shows MSI mode is | 321 | MSIs in configuration space using whatever method you know works, then |
390 | enabled on a SCSI Adaptec 39320D Ultra320 controller. | 322 | enable MSIs on that bridge by doing: |
391 | 323 | ||
392 | CPU0 CPU1 | 324 | echo 1 > /sys/bus/pci/devices/$bridge/msi_bus |
393 | 0: 324639 0 IO-APIC-edge timer | 325 | |
394 | 1: 1186 0 IO-APIC-edge i8042 | 326 | where $bridge is the PCI address of the bridge you've enabled (eg |
395 | 2: 0 0 XT-PIC cascade | 327 | 0000:00:0e.0). |
396 | 12: 2797 0 IO-APIC-edge i8042 | 328 | |
397 | 14: 6543 0 IO-APIC-edge ide0 | 329 | To disable MSIs, echo 0 instead of 1. Changing this value should be |
398 | 15: 1 0 IO-APIC-edge ide1 | 330 | done with caution as it can break interrupt handling for all devices |
399 | 169: 0 0 IO-APIC-level uhci-hcd | 331 | below this bridge. |
400 | 185: 0 0 IO-APIC-level uhci-hcd | 332 | |
401 | 193: 138 10 PCI-MSI aic79xx | 333 | Again, please notify linux-pci@vger.kernel.org of any bridges that need |
402 | 201: 30 0 PCI-MSI aic79xx | 334 | special handling. |
403 | 225: 30 0 IO-APIC-level aic7xxx | 335 | |
404 | 233: 30 0 IO-APIC-level aic7xxx | 336 | 5.3. Disabling MSIs on a single device |
405 | NMI: 0 0 | 337 | |
406 | LOC: 324553 325068 | 338 | Some devices are known to have faulty MSI implementations. Usually this |
407 | ERR: 0 | 339 | is handled in the individual device driver but occasionally it's necessary |
408 | MIS: 0 | 340 | to handle this with a quirk. Some drivers have an option to disable use |
409 | 341 | of MSI. While this is a convenient workaround for the driver author, | |
410 | 6. MSI quirks | 342 | it is not good practise, and should not be emulated. |
411 | 343 | ||
412 | Several PCI chipsets or devices are known to not support MSI. | 344 | 5.4. Finding why MSIs are disabled on a device |
413 | The PCI stack provides 3 possible levels of MSI disabling: | 345 | |
414 | * on a single device | 346 | From the above three sections, you can see that there are many reasons |
415 | * on all devices behind a specific bridge | 347 | why MSIs may not be enabled for a given device. Your first step should |
416 | * globally | 348 | be to examine your dmesg carefully to determine whether MSIs are enabled |
417 | 349 | for your machine. You should also check your .config to be sure you | |
418 | 6.1. Disabling MSI on a single device | 350 | have enabled CONFIG_PCI_MSI. |
419 | 351 | ||
420 | Under some circumstances it might be required to disable MSI on a | 352 | Then, 'lspci -t' gives the list of bridges above a device. Reading |
421 | single device. This may be achieved by either not calling pci_enable_msi() | 353 | /sys/bus/pci/devices/*/msi_bus will tell you whether MSI are enabled (1) |
422 | or all, or setting the pci_dev->no_msi flag before (most of the time | 354 | or disabled (0). If 0 is found in any of the msi_bus files belonging |
423 | in a quirk). | 355 | to bridges between the PCI root and the device, MSIs are disabled. |
424 | 356 | ||
425 | 6.2. Disabling MSI below a bridge | 357 | It is also worth checking the device driver to see whether it supports MSIs. |
426 | 358 | For example, it may contain calls to pci_enable_msi(), pci_enable_msix() or | |
427 | The vast majority of MSI quirks are required by PCI bridges not | 359 | pci_enable_msi_block(). |
428 | being able to route MSI between busses. In this case, MSI have to be | ||
429 | disabled on all devices behind this bridge. It is achieves by setting | ||
430 | the PCI_BUS_FLAGS_NO_MSI flag in the pci_bus->bus_flags of the bridge | ||
431 | subordinate bus. There is no need to set the same flag on bridges that | ||
432 | are below the broken bridge. When pci_enable_msi() is called to enable | ||
433 | MSI on a device, pci_msi_supported() takes care of checking the NO_MSI | ||
434 | flag in all parent busses of the device. | ||
435 | |||
436 | Some bridges actually support dynamic MSI support enabling/disabling | ||
437 | by changing some bits in their PCI configuration space (especially | ||
438 | the Hypertransport chipsets such as the nVidia nForce and Serverworks | ||
439 | HT2000). It may then be required to update the NO_MSI flag on the | ||
440 | corresponding devices in the sysfs hierarchy. To enable MSI support | ||
441 | on device "0000:00:0e", do: | ||
442 | |||
443 | echo 1 > /sys/bus/pci/devices/0000:00:0e/msi_bus | ||
444 | |||
445 | To disable MSI support, echo 0 instead of 1. Note that it should be | ||
446 | used with caution since changing this value might break interrupts. | ||
447 | |||
448 | 6.3. Disabling MSI globally | ||
449 | |||
450 | Some extreme cases may require to disable MSI globally on the system. | ||
451 | For now, the only known case is a Serverworks PCI-X chipsets (MSI are | ||
452 | not supported on several busses that are not all connected to the | ||
453 | chipset in the Linux PCI hierarchy). In the vast majority of other | ||
454 | cases, disabling only behind a specific bridge is enough. | ||
455 | |||
456 | For debugging purpose, the user may also pass pci=nomsi on the kernel | ||
457 | command-line to explicitly disable MSI globally. But, once the appro- | ||
458 | priate quirks are added to the kernel, this option should not be | ||
459 | required anymore. | ||
460 | |||
461 | 6.4. Finding why MSI cannot be enabled on a device | ||
462 | |||
463 | Assuming that MSI are not enabled on a device, you should look at | ||
464 | dmesg to find messages that quirks may output when disabling MSI | ||
465 | on some devices, some bridges or even globally. | ||
466 | Then, lspci -t gives the list of bridges above a device. Reading | ||
467 | /sys/bus/pci/devices/0000:00:0e/msi_bus will tell you whether MSI | ||
468 | are enabled (1) or disabled (0). In 0 is found in a single bridge | ||
469 | msi_bus file above the device, MSI cannot be enabled. | ||
470 | |||
471 | 7. FAQ | ||
472 | |||
473 | Q1. Are there any limitations on using the MSI? | ||
474 | |||
475 | A1. If the PCI device supports MSI and conforms to the | ||
476 | specification and the platform supports the APIC local bus, | ||
477 | then using MSI should work. | ||
478 | |||
479 | Q2. Will it work on all the Pentium processors (P3, P4, Xeon, | ||
480 | AMD processors)? In P3 IPI's are transmitted on the APIC local | ||
481 | bus and in P4 and Xeon they are transmitted on the system | ||
482 | bus. Are there any implications with this? | ||
483 | |||
484 | A2. MSI support enables a PCI device sending an inbound | ||
485 | memory write (0xfeexxxxx as target address) on its PCI bus | ||
486 | directly to the FSB. Since the message address has a | ||
487 | redirection hint bit cleared, it should work. | ||
488 | |||
489 | Q3. The target address 0xfeexxxxx will be translated by the | ||
490 | Host Bridge into an interrupt message. Are there any | ||
491 | limitations on the chipsets such as Intel 8xx, Intel e7xxx, | ||
492 | or VIA? | ||
493 | |||
494 | A3. If these chipsets support an inbound memory write with | ||
495 | target address set as 0xfeexxxxx, as conformed to PCI | ||
496 | specification 2.3 or latest, then it should work. | ||
497 | |||
498 | Q4. From the driver point of view, if the MSI is lost because | ||
499 | of errors occurring during inbound memory write, then it may | ||
500 | wait forever. Is there a mechanism for it to recover? | ||
501 | |||
502 | A4. Since the target of the transaction is an inbound memory | ||
503 | write, all transaction termination conditions (Retry, | ||
504 | Master-Abort, Target-Abort, or normal completion) are | ||
505 | supported. A device sending an MSI must abide by all the PCI | ||
506 | rules and conditions regarding that inbound memory write. So, | ||
507 | if a retry is signaled it must retry, etc... We believe that | ||
508 | the recommendation for Abort is also a retry (refer to PCI | ||
509 | specification 2.3 or latest). | ||
diff --git a/Documentation/PCI/pci-iov-howto.txt b/Documentation/PCI/pci-iov-howto.txt new file mode 100644 index 000000000000..fc73ef5d65b8 --- /dev/null +++ b/Documentation/PCI/pci-iov-howto.txt | |||
@@ -0,0 +1,99 @@ | |||
1 | PCI Express I/O Virtualization Howto | ||
2 | Copyright (C) 2009 Intel Corporation | ||
3 | Yu Zhao <yu.zhao@intel.com> | ||
4 | |||
5 | |||
6 | 1. Overview | ||
7 | |||
8 | 1.1 What is SR-IOV | ||
9 | |||
10 | Single Root I/O Virtualization (SR-IOV) is a PCI Express Extended | ||
11 | capability which makes one physical device appear as multiple virtual | ||
12 | devices. The physical device is referred to as Physical Function (PF) | ||
13 | while the virtual devices are referred to as Virtual Functions (VF). | ||
14 | Allocation of the VF can be dynamically controlled by the PF via | ||
15 | registers encapsulated in the capability. By default, this feature is | ||
16 | not enabled and the PF behaves as traditional PCIe device. Once it's | ||
17 | turned on, each VF's PCI configuration space can be accessed by its own | ||
18 | Bus, Device and Function Number (Routing ID). And each VF also has PCI | ||
19 | Memory Space, which is used to map its register set. VF device driver | ||
20 | operates on the register set so it can be functional and appear as a | ||
21 | real existing PCI device. | ||
22 | |||
23 | 2. User Guide | ||
24 | |||
25 | 2.1 How can I enable SR-IOV capability | ||
26 | |||
27 | The device driver (PF driver) will control the enabling and disabling | ||
28 | of the capability via API provided by SR-IOV core. If the hardware | ||
29 | has SR-IOV capability, loading its PF driver would enable it and all | ||
30 | VFs associated with the PF. | ||
31 | |||
32 | 2.2 How can I use the Virtual Functions | ||
33 | |||
34 | The VF is treated as hot-plugged PCI devices in the kernel, so they | ||
35 | should be able to work in the same way as real PCI devices. The VF | ||
36 | requires device driver that is same as a normal PCI device's. | ||
37 | |||
38 | 3. Developer Guide | ||
39 | |||
40 | 3.1 SR-IOV API | ||
41 | |||
42 | To enable SR-IOV capability: | ||
43 | int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn); | ||
44 | 'nr_virtfn' is number of VFs to be enabled. | ||
45 | |||
46 | To disable SR-IOV capability: | ||
47 | void pci_disable_sriov(struct pci_dev *dev); | ||
48 | |||
49 | To notify SR-IOV core of Virtual Function Migration: | ||
50 | irqreturn_t pci_sriov_migration(struct pci_dev *dev); | ||
51 | |||
52 | 3.2 Usage example | ||
53 | |||
54 | Following piece of code illustrates the usage of the SR-IOV API. | ||
55 | |||
56 | static int __devinit dev_probe(struct pci_dev *dev, const struct pci_device_id *id) | ||
57 | { | ||
58 | pci_enable_sriov(dev, NR_VIRTFN); | ||
59 | |||
60 | ... | ||
61 | |||
62 | return 0; | ||
63 | } | ||
64 | |||
65 | static void __devexit dev_remove(struct pci_dev *dev) | ||
66 | { | ||
67 | pci_disable_sriov(dev); | ||
68 | |||
69 | ... | ||
70 | } | ||
71 | |||
72 | static int dev_suspend(struct pci_dev *dev, pm_message_t state) | ||
73 | { | ||
74 | ... | ||
75 | |||
76 | return 0; | ||
77 | } | ||
78 | |||
79 | static int dev_resume(struct pci_dev *dev) | ||
80 | { | ||
81 | ... | ||
82 | |||
83 | return 0; | ||
84 | } | ||
85 | |||
86 | static void dev_shutdown(struct pci_dev *dev) | ||
87 | { | ||
88 | ... | ||
89 | } | ||
90 | |||
91 | static struct pci_driver dev_driver = { | ||
92 | .name = "SR-IOV Physical Function driver", | ||
93 | .id_table = dev_id_table, | ||
94 | .probe = dev_probe, | ||
95 | .remove = __devexit_p(dev_remove), | ||
96 | .suspend = dev_suspend, | ||
97 | .resume = dev_resume, | ||
98 | .shutdown = dev_shutdown, | ||
99 | }; | ||
diff --git a/Documentation/PCI/pcieaer-howto.txt b/Documentation/PCI/pcieaer-howto.txt index ddeb14beacc8..be21001ab144 100644 --- a/Documentation/PCI/pcieaer-howto.txt +++ b/Documentation/PCI/pcieaer-howto.txt | |||
@@ -61,6 +61,10 @@ be initiated although firmwares have no _OSC support. To enable the | |||
61 | walkaround, pls. add aerdriver.forceload=y to kernel boot parameter line | 61 | walkaround, pls. add aerdriver.forceload=y to kernel boot parameter line |
62 | when booting kernel. Note that forceload=n by default. | 62 | when booting kernel. Note that forceload=n by default. |
63 | 63 | ||
64 | nosourceid, another parameter of type bool, can be used when broken | ||
65 | hardware (mostly chipsets) has root ports that cannot obtain the reporting | ||
66 | source ID. nosourceid=n by default. | ||
67 | |||
64 | 2.3 AER error output | 68 | 2.3 AER error output |
65 | When a PCI-E AER error is captured, an error message will be outputed to | 69 | When a PCI-E AER error is captured, an error message will be outputed to |
66 | console. If it's a correctable error, it is outputed as a warning. | 70 | console. If it's a correctable error, it is outputed as a warning. |
@@ -246,3 +250,24 @@ with the PCI Express AER Root driver? | |||
246 | A: It could call the helper functions to enable AER in devices and | 250 | A: It could call the helper functions to enable AER in devices and |
247 | cleanup uncorrectable status register. Pls. refer to section 3.3. | 251 | cleanup uncorrectable status register. Pls. refer to section 3.3. |
248 | 252 | ||
253 | |||
254 | 4. Software error injection | ||
255 | |||
256 | Debugging PCIE AER error recovery code is quite difficult because it | ||
257 | is hard to trigger real hardware errors. Software based error | ||
258 | injection can be used to fake various kinds of PCIE errors. | ||
259 | |||
260 | First you should enable PCIE AER software error injection in kernel | ||
261 | configuration, that is, following item should be in your .config. | ||
262 | |||
263 | CONFIG_PCIEAER_INJECT=y or CONFIG_PCIEAER_INJECT=m | ||
264 | |||
265 | After reboot with new kernel or insert the module, a device file named | ||
266 | /dev/aer_inject should be created. | ||
267 | |||
268 | Then, you need a user space tool named aer-inject, which can be gotten | ||
269 | from: | ||
270 | http://www.kernel.org/pub/linux/utils/pci/aer-inject/ | ||
271 | |||
272 | More information about aer-inject can be found in the document comes | ||
273 | with its source code. | ||
diff --git a/Documentation/RCU/listRCU.txt b/Documentation/RCU/listRCU.txt index 1fd175368a87..4349c1487e91 100644 --- a/Documentation/RCU/listRCU.txt +++ b/Documentation/RCU/listRCU.txt | |||
@@ -118,7 +118,7 @@ Following are the RCU equivalents for these two functions: | |||
118 | list_for_each_entry(e, list, list) { | 118 | list_for_each_entry(e, list, list) { |
119 | if (!audit_compare_rule(rule, &e->rule)) { | 119 | if (!audit_compare_rule(rule, &e->rule)) { |
120 | list_del_rcu(&e->list); | 120 | list_del_rcu(&e->list); |
121 | call_rcu(&e->rcu, audit_free_rule, e); | 121 | call_rcu(&e->rcu, audit_free_rule); |
122 | return 0; | 122 | return 0; |
123 | } | 123 | } |
124 | } | 124 | } |
@@ -206,7 +206,7 @@ RCU ("read-copy update") its name. The RCU code is as follows: | |||
206 | ne->rule.action = newaction; | 206 | ne->rule.action = newaction; |
207 | ne->rule.file_count = newfield_count; | 207 | ne->rule.file_count = newfield_count; |
208 | list_replace_rcu(e, ne); | 208 | list_replace_rcu(e, ne); |
209 | call_rcu(&e->rcu, audit_free_rule, e); | 209 | call_rcu(&e->rcu, audit_free_rule); |
210 | return 0; | 210 | return 0; |
211 | } | 211 | } |
212 | } | 212 | } |
@@ -283,7 +283,7 @@ flag under the spinlock as follows: | |||
283 | list_del_rcu(&e->list); | 283 | list_del_rcu(&e->list); |
284 | e->deleted = 1; | 284 | e->deleted = 1; |
285 | spin_unlock(&e->lock); | 285 | spin_unlock(&e->lock); |
286 | call_rcu(&e->rcu, audit_free_rule, e); | 286 | call_rcu(&e->rcu, audit_free_rule); |
287 | return 0; | 287 | return 0; |
288 | } | 288 | } |
289 | } | 289 | } |
diff --git a/Documentation/RCU/rcu.txt b/Documentation/RCU/rcu.txt index 95821a29ae41..7aa2002ade77 100644 --- a/Documentation/RCU/rcu.txt +++ b/Documentation/RCU/rcu.txt | |||
@@ -81,7 +81,7 @@ o I hear that RCU needs work in order to support realtime kernels? | |||
81 | This work is largely completed. Realtime-friendly RCU can be | 81 | This work is largely completed. Realtime-friendly RCU can be |
82 | enabled via the CONFIG_PREEMPT_RCU kernel configuration parameter. | 82 | enabled via the CONFIG_PREEMPT_RCU kernel configuration parameter. |
83 | However, work is in progress for enabling priority boosting of | 83 | However, work is in progress for enabling priority boosting of |
84 | preempted RCU read-side critical sections.This is needed if you | 84 | preempted RCU read-side critical sections. This is needed if you |
85 | have CPU-bound realtime threads. | 85 | have CPU-bound realtime threads. |
86 | 86 | ||
87 | o Where can I find more information on RCU? | 87 | o Where can I find more information on RCU? |
diff --git a/Documentation/RCU/rculist_nulls.txt b/Documentation/RCU/rculist_nulls.txt index 239f542d48ba..93cb28d05dcd 100644 --- a/Documentation/RCU/rculist_nulls.txt +++ b/Documentation/RCU/rculist_nulls.txt | |||
@@ -21,7 +21,7 @@ if (obj) { | |||
21 | /* | 21 | /* |
22 | * Because a writer could delete object, and a writer could | 22 | * Because a writer could delete object, and a writer could |
23 | * reuse these object before the RCU grace period, we | 23 | * reuse these object before the RCU grace period, we |
24 | * must check key after geting the reference on object | 24 | * must check key after getting the reference on object |
25 | */ | 25 | */ |
26 | if (obj->key != key) { // not the object we expected | 26 | if (obj->key != key) { // not the object we expected |
27 | put_ref(obj); | 27 | put_ref(obj); |
@@ -117,8 +117,8 @@ a race (some writer did a delete and/or a move of an object | |||
117 | to another chain) checking the final 'nulls' value if | 117 | to another chain) checking the final 'nulls' value if |
118 | the lookup met the end of chain. If final 'nulls' value | 118 | the lookup met the end of chain. If final 'nulls' value |
119 | is not the slot number, then we must restart the lookup at | 119 | is not the slot number, then we must restart the lookup at |
120 | the begining. If the object was moved to same chain, | 120 | the beginning. If the object was moved to the same chain, |
121 | then the reader doesnt care : It might eventually | 121 | then the reader doesn't care : It might eventually |
122 | scan the list again without harm. | 122 | scan the list again without harm. |
123 | 123 | ||
124 | 124 | ||
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt index 068848240a8b..02cced183b2d 100644 --- a/Documentation/RCU/trace.txt +++ b/Documentation/RCU/trace.txt | |||
@@ -192,23 +192,24 @@ rcu/rcuhier (which displays the struct rcu_node hierarchy). | |||
192 | The output of "cat rcu/rcudata" looks as follows: | 192 | The output of "cat rcu/rcudata" looks as follows: |
193 | 193 | ||
194 | rcu: | 194 | rcu: |
195 | 0 c=4011 g=4012 pq=1 pqc=4011 qp=0 rpfq=1 rp=3c2a dt=23301/73 dn=2 df=1882 of=0 ri=2126 ql=2 b=10 | 195 | rcu: |
196 | 1 c=4011 g=4012 pq=1 pqc=4011 qp=0 rpfq=3 rp=39a6 dt=78073/1 dn=2 df=1402 of=0 ri=1875 ql=46 b=10 | 196 | 0 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=10951/1 dn=0 df=1101 of=0 ri=36 ql=0 b=10 |
197 | 2 c=4010 g=4010 pq=1 pqc=4010 qp=0 rpfq=-5 rp=1d12 dt=16646/0 dn=2 df=3140 of=0 ri=2080 ql=0 b=10 | 197 | 1 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=16117/1 dn=0 df=1015 of=0 ri=0 ql=0 b=10 |
198 | 3 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=2b50 dt=21159/1 dn=2 df=2230 of=0 ri=1923 ql=72 b=10 | 198 | 2 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=1445/1 dn=0 df=1839 of=0 ri=0 ql=0 b=10 |
199 | 4 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=1644 dt=5783/1 dn=2 df=3348 of=0 ri=2805 ql=7 b=10 | 199 | 3 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=6681/1 dn=0 df=1545 of=0 ri=0 ql=0 b=10 |
200 | 5 c=4012 g=4013 pq=0 pqc=4011 qp=1 rpfq=3 rp=1aac dt=5879/1 dn=2 df=3140 of=0 ri=2066 ql=10 b=10 | 200 | 4 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=1003/1 dn=0 df=1992 of=0 ri=0 ql=0 b=10 |
201 | 6 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=ed8 dt=5847/1 dn=2 df=3797 of=0 ri=1266 ql=10 b=10 | 201 | 5 c=17829 g=17830 pq=1 pqc=17829 qp=1 dt=3887/1 dn=0 df=3331 of=0 ri=4 ql=2 b=10 |
202 | 7 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=1fa2 dt=6199/1 dn=2 df=2795 of=0 ri=2162 ql=28 b=10 | 202 | 6 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=859/1 dn=0 df=3224 of=0 ri=0 ql=0 b=10 |
203 | 7 c=17829 g=17830 pq=0 pqc=17829 qp=1 dt=3761/1 dn=0 df=1818 of=0 ri=0 ql=2 b=10 | ||
203 | rcu_bh: | 204 | rcu_bh: |
204 | 0 c=-268 g=-268 pq=1 pqc=-268 qp=0 rpfq=-145 rp=21d6 dt=23301/73 dn=2 df=0 of=0 ri=0 ql=0 b=10 | 205 | 0 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=10951/1 dn=0 df=0 of=0 ri=0 ql=0 b=10 |
205 | 1 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-170 rp=20ce dt=78073/1 dn=2 df=26 of=0 ri=5 ql=0 b=10 | 206 | 1 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=16117/1 dn=0 df=13 of=0 ri=0 ql=0 b=10 |
206 | 2 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-83 rp=fbd dt=16646/0 dn=2 df=28 of=0 ri=4 ql=0 b=10 | 207 | 2 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=1445/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 |
207 | 3 c=-268 g=-268 pq=1 pqc=-268 qp=0 rpfq=-105 rp=178c dt=21159/1 dn=2 df=28 of=0 ri=2 ql=0 b=10 | 208 | 3 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=6681/1 dn=0 df=9 of=0 ri=0 ql=0 b=10 |
208 | 4 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-30 rp=b54 dt=5783/1 dn=2 df=32 of=0 ri=0 ql=0 b=10 | 209 | 4 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=1003/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 |
209 | 5 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-29 rp=df5 dt=5879/1 dn=2 df=30 of=0 ri=3 ql=0 b=10 | 210 | 5 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=3887/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 |
210 | 6 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-28 rp=788 dt=5847/1 dn=2 df=32 of=0 ri=0 ql=0 b=10 | 211 | 6 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=859/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 |
211 | 7 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-53 rp=1098 dt=6199/1 dn=2 df=30 of=0 ri=3 ql=0 b=10 | 212 | 7 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=3761/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 |
212 | 213 | ||
213 | The first section lists the rcu_data structures for rcu, the second for | 214 | The first section lists the rcu_data structures for rcu, the second for |
214 | rcu_bh. Each section has one line per CPU, or eight for this 8-CPU system. | 215 | rcu_bh. Each section has one line per CPU, or eight for this 8-CPU system. |
@@ -253,12 +254,6 @@ o "pqc" indicates which grace period the last-observed quiescent | |||
253 | o "qp" indicates that RCU still expects a quiescent state from | 254 | o "qp" indicates that RCU still expects a quiescent state from |
254 | this CPU. | 255 | this CPU. |
255 | 256 | ||
256 | o "rpfq" is the number of rcu_pending() calls on this CPU required | ||
257 | to induce this CPU to invoke force_quiescent_state(). | ||
258 | |||
259 | o "rp" is low-order four hex digits of the count of how many times | ||
260 | rcu_pending() has been invoked on this CPU. | ||
261 | |||
262 | o "dt" is the current value of the dyntick counter that is incremented | 257 | o "dt" is the current value of the dyntick counter that is incremented |
263 | when entering or leaving dynticks idle state, either by the | 258 | when entering or leaving dynticks idle state, either by the |
264 | scheduler or by irq. The number after the "/" is the interrupt | 259 | scheduler or by irq. The number after the "/" is the interrupt |
@@ -305,6 +300,9 @@ o "b" is the batch limit for this CPU. If more than this number | |||
305 | of RCU callbacks is ready to invoke, then the remainder will | 300 | of RCU callbacks is ready to invoke, then the remainder will |
306 | be deferred. | 301 | be deferred. |
307 | 302 | ||
303 | There is also an rcu/rcudata.csv file with the same information in | ||
304 | comma-separated-variable spreadsheet format. | ||
305 | |||
308 | 306 | ||
309 | The output of "cat rcu/rcugp" looks as follows: | 307 | The output of "cat rcu/rcugp" looks as follows: |
310 | 308 | ||
@@ -411,3 +409,63 @@ o Each element of the form "1/1 0:127 ^0" represents one struct | |||
411 | For example, the first entry at the lowest level shows | 409 | For example, the first entry at the lowest level shows |
412 | "^0", indicating that it corresponds to bit zero in | 410 | "^0", indicating that it corresponds to bit zero in |
413 | the first entry at the middle level. | 411 | the first entry at the middle level. |
412 | |||
413 | |||
414 | The output of "cat rcu/rcu_pending" looks as follows: | ||
415 | |||
416 | rcu: | ||
417 | 0 np=255892 qsp=53936 cbr=0 cng=14417 gpc=10033 gps=24320 nf=6445 nn=146741 | ||
418 | 1 np=261224 qsp=54638 cbr=0 cng=25723 gpc=16310 gps=2849 nf=5912 nn=155792 | ||
419 | 2 np=237496 qsp=49664 cbr=0 cng=2762 gpc=45478 gps=1762 nf=1201 nn=136629 | ||
420 | 3 np=236249 qsp=48766 cbr=0 cng=286 gpc=48049 gps=1218 nf=207 nn=137723 | ||
421 | 4 np=221310 qsp=46850 cbr=0 cng=26 gpc=43161 gps=4634 nf=3529 nn=123110 | ||
422 | 5 np=237332 qsp=48449 cbr=0 cng=54 gpc=47920 gps=3252 nf=201 nn=137456 | ||
423 | 6 np=219995 qsp=46718 cbr=0 cng=50 gpc=42098 gps=6093 nf=4202 nn=120834 | ||
424 | 7 np=249893 qsp=49390 cbr=0 cng=72 gpc=38400 gps=17102 nf=41 nn=144888 | ||
425 | rcu_bh: | ||
426 | 0 np=146741 qsp=1419 cbr=0 cng=6 gpc=0 gps=0 nf=2 nn=145314 | ||
427 | 1 np=155792 qsp=12597 cbr=0 cng=0 gpc=4 gps=8 nf=3 nn=143180 | ||
428 | 2 np=136629 qsp=18680 cbr=0 cng=0 gpc=7 gps=6 nf=0 nn=117936 | ||
429 | 3 np=137723 qsp=2843 cbr=0 cng=0 gpc=10 gps=7 nf=0 nn=134863 | ||
430 | 4 np=123110 qsp=12433 cbr=0 cng=0 gpc=4 gps=2 nf=0 nn=110671 | ||
431 | 5 np=137456 qsp=4210 cbr=0 cng=0 gpc=6 gps=5 nf=0 nn=133235 | ||
432 | 6 np=120834 qsp=9902 cbr=0 cng=0 gpc=6 gps=3 nf=2 nn=110921 | ||
433 | 7 np=144888 qsp=26336 cbr=0 cng=0 gpc=8 gps=2 nf=0 nn=118542 | ||
434 | |||
435 | As always, this is once again split into "rcu" and "rcu_bh" portions. | ||
436 | The fields are as follows: | ||
437 | |||
438 | o "np" is the number of times that __rcu_pending() has been invoked | ||
439 | for the corresponding flavor of RCU. | ||
440 | |||
441 | o "qsp" is the number of times that the RCU was waiting for a | ||
442 | quiescent state from this CPU. | ||
443 | |||
444 | o "cbr" is the number of times that this CPU had RCU callbacks | ||
445 | that had passed through a grace period, and were thus ready | ||
446 | to be invoked. | ||
447 | |||
448 | o "cng" is the number of times that this CPU needed another | ||
449 | grace period while RCU was idle. | ||
450 | |||
451 | o "gpc" is the number of times that an old grace period had | ||
452 | completed, but this CPU was not yet aware of it. | ||
453 | |||
454 | o "gps" is the number of times that a new grace period had started, | ||
455 | but this CPU was not yet aware of it. | ||
456 | |||
457 | o "nf" is the number of times that this CPU suspected that the | ||
458 | current grace period had run for too long, and thus needed to | ||
459 | be forced. | ||
460 | |||
461 | Please note that "forcing" consists of sending resched IPIs | ||
462 | to holdout CPUs. If that CPU really still is in an old RCU | ||
463 | read-side critical section, then we really do have to wait for it. | ||
464 | The assumption behing "forcing" is that the CPU is not still in | ||
465 | an old RCU read-side critical section, but has not yet responded | ||
466 | for some other reason. | ||
467 | |||
468 | o "nn" is the number of times that this CPU needed nothing. Alert | ||
469 | readers will note that the rcu "nn" number for a given CPU very | ||
470 | closely matches the rcu_bh "np" number for that same CPU. This | ||
471 | is due to short-circuit evaluation in rcu_pending(). | ||
diff --git a/Documentation/SM501.txt b/Documentation/SM501.txt index 6fc656035925..561826f82093 100644 --- a/Documentation/SM501.txt +++ b/Documentation/SM501.txt | |||
@@ -5,7 +5,7 @@ Copyright 2006, 2007 Simtec Electronics | |||
5 | 5 | ||
6 | The Silicon Motion SM501 multimedia companion chip is a multifunction device | 6 | The Silicon Motion SM501 multimedia companion chip is a multifunction device |
7 | which may provide numerous interfaces including USB host controller USB gadget, | 7 | which may provide numerous interfaces including USB host controller USB gadget, |
8 | Asyncronous Serial ports, Audio functions and a dual display video interface. | 8 | asynchronous serial ports, audio functions, and a dual display video interface. |
9 | The device may be connected by PCI or local bus with varying functions enabled. | 9 | The device may be connected by PCI or local bus with varying functions enabled. |
10 | 10 | ||
11 | Core | 11 | Core |
diff --git a/Documentation/Smack.txt b/Documentation/Smack.txt index 989c2fcd8111..34614b4c708e 100644 --- a/Documentation/Smack.txt +++ b/Documentation/Smack.txt | |||
@@ -184,14 +184,17 @@ length. Single character labels using special characters, that being anything | |||
184 | other than a letter or digit, are reserved for use by the Smack development | 184 | other than a letter or digit, are reserved for use by the Smack development |
185 | team. Smack labels are unstructured, case sensitive, and the only operation | 185 | team. Smack labels are unstructured, case sensitive, and the only operation |
186 | ever performed on them is comparison for equality. Smack labels cannot | 186 | ever performed on them is comparison for equality. Smack labels cannot |
187 | contain unprintable characters or the "/" (slash) character. | 187 | contain unprintable characters, the "/" (slash), the "\" (backslash), the "'" |
188 | (quote) and '"' (double-quote) characters. | ||
189 | Smack labels cannot begin with a '-', which is reserved for special options. | ||
188 | 190 | ||
189 | There are some predefined labels: | 191 | There are some predefined labels: |
190 | 192 | ||
191 | _ Pronounced "floor", a single underscore character. | 193 | _ Pronounced "floor", a single underscore character. |
192 | ^ Pronounced "hat", a single circumflex character. | 194 | ^ Pronounced "hat", a single circumflex character. |
193 | * Pronounced "star", a single asterisk character. | 195 | * Pronounced "star", a single asterisk character. |
194 | ? Pronounced "huh", a single question mark character. | 196 | ? Pronounced "huh", a single question mark character. |
197 | @ Pronounced "Internet", a single at sign character. | ||
195 | 198 | ||
196 | Every task on a Smack system is assigned a label. System tasks, such as | 199 | Every task on a Smack system is assigned a label. System tasks, such as |
197 | init(8) and systems daemons, are run with the floor ("_") label. User tasks | 200 | init(8) and systems daemons, are run with the floor ("_") label. User tasks |
@@ -412,6 +415,36 @@ sockets. | |||
412 | A privileged program may set this to match the label of another | 415 | A privileged program may set this to match the label of another |
413 | task with which it hopes to communicate. | 416 | task with which it hopes to communicate. |
414 | 417 | ||
418 | Smack Netlabel Exceptions | ||
419 | |||
420 | You will often find that your labeled application has to talk to the outside, | ||
421 | unlabeled world. To do this there's a special file /smack/netlabel where you can | ||
422 | add some exceptions in the form of : | ||
423 | @IP1 LABEL1 or | ||
424 | @IP2/MASK LABEL2 | ||
425 | |||
426 | It means that your application will have unlabeled access to @IP1 if it has | ||
427 | write access on LABEL1, and access to the subnet @IP2/MASK if it has write | ||
428 | access on LABEL2. | ||
429 | |||
430 | Entries in the /smack/netlabel file are matched by longest mask first, like in | ||
431 | classless IPv4 routing. | ||
432 | |||
433 | A special label '@' and an option '-CIPSO' can be used there : | ||
434 | @ means Internet, any application with any label has access to it | ||
435 | -CIPSO means standard CIPSO networking | ||
436 | |||
437 | If you don't know what CIPSO is and don't plan to use it, you can just do : | ||
438 | echo 127.0.0.1 -CIPSO > /smack/netlabel | ||
439 | echo 0.0.0.0/0 @ > /smack/netlabel | ||
440 | |||
441 | If you use CIPSO on your 192.168.0.0/16 local network and need also unlabeled | ||
442 | Internet access, you can have : | ||
443 | echo 127.0.0.1 -CIPSO > /smack/netlabel | ||
444 | echo 192.168.0.0/16 -CIPSO > /smack/netlabel | ||
445 | echo 0.0.0.0/0 @ > /smack/netlabel | ||
446 | |||
447 | |||
415 | Writing Applications for Smack | 448 | Writing Applications for Smack |
416 | 449 | ||
417 | There are three sorts of applications that will run on a Smack system. How an | 450 | There are three sorts of applications that will run on a Smack system. How an |
@@ -491,3 +524,18 @@ Smack supports some mount options: | |||
491 | 524 | ||
492 | These mount options apply to all file system types. | 525 | These mount options apply to all file system types. |
493 | 526 | ||
527 | Smack auditing | ||
528 | |||
529 | If you want Smack auditing of security events, you need to set CONFIG_AUDIT | ||
530 | in your kernel configuration. | ||
531 | By default, all denied events will be audited. You can change this behavior by | ||
532 | writing a single character to the /smack/logging file : | ||
533 | 0 : no logging | ||
534 | 1 : log denied (default) | ||
535 | 2 : log accepted | ||
536 | 3 : log denied & accepted | ||
537 | |||
538 | Events are logged as 'key=value' pairs, for each event you at least will get | ||
539 | the subjet, the object, the rights requested, the action, the kernel function | ||
540 | that triggered the event, plus other pairs depending on the type of event | ||
541 | audited. | ||
diff --git a/Documentation/SubmitChecklist b/Documentation/SubmitChecklist index ac5e0b2f1097..78a9168ff377 100644 --- a/Documentation/SubmitChecklist +++ b/Documentation/SubmitChecklist | |||
@@ -54,7 +54,7 @@ kernel patches. | |||
54 | CONFIG_PREEMPT. | 54 | CONFIG_PREEMPT. |
55 | 55 | ||
56 | 14: If the patch affects IO/Disk, etc: has been tested with and without | 56 | 14: If the patch affects IO/Disk, etc: has been tested with and without |
57 | CONFIG_LBD. | 57 | CONFIG_LBDAF. |
58 | 58 | ||
59 | 15: All codepaths have been exercised with all lockdep features enabled. | 59 | 15: All codepaths have been exercised with all lockdep features enabled. |
60 | 60 | ||
diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches index f309d3c6221c..5c555a8b39e5 100644 --- a/Documentation/SubmittingPatches +++ b/Documentation/SubmittingPatches | |||
@@ -91,6 +91,10 @@ Be as specific as possible. The WORST descriptions possible include | |||
91 | things like "update driver X", "bug fix for driver X", or "this patch | 91 | things like "update driver X", "bug fix for driver X", or "this patch |
92 | includes updates for subsystem X. Please apply." | 92 | includes updates for subsystem X. Please apply." |
93 | 93 | ||
94 | The maintainer will thank you if you write your patch description in a | ||
95 | form which can be easily pulled into Linux's source code management | ||
96 | system, git, as a "commit log". See #15, below. | ||
97 | |||
94 | If your description starts to get long, that's a sign that you probably | 98 | If your description starts to get long, that's a sign that you probably |
95 | need to split up your patch. See #3, next. | 99 | need to split up your patch. See #3, next. |
96 | 100 | ||
@@ -183,8 +187,9 @@ Even if the maintainer did not respond in step #4, make sure to ALWAYS | |||
183 | copy the maintainer when you change their code. | 187 | copy the maintainer when you change their code. |
184 | 188 | ||
185 | For small patches you may want to CC the Trivial Patch Monkey | 189 | For small patches you may want to CC the Trivial Patch Monkey |
186 | trivial@kernel.org managed by Jesper Juhl; which collects "trivial" | 190 | trivial@kernel.org which collects "trivial" patches. Have a look |
187 | patches. Trivial patches must qualify for one of the following rules: | 191 | into the MAINTAINERS file for its current manager. |
192 | Trivial patches must qualify for one of the following rules: | ||
188 | Spelling fixes in documentation | 193 | Spelling fixes in documentation |
189 | Spelling fixes which could break grep(1) | 194 | Spelling fixes which could break grep(1) |
190 | Warning fixes (cluttering with useless warnings is bad) | 195 | Warning fixes (cluttering with useless warnings is bad) |
@@ -196,7 +201,6 @@ patches. Trivial patches must qualify for one of the following rules: | |||
196 | since people copy, as long as it's trivial) | 201 | since people copy, as long as it's trivial) |
197 | Any fix by the author/maintainer of the file (ie. patch monkey | 202 | Any fix by the author/maintainer of the file (ie. patch monkey |
198 | in re-transmission mode) | 203 | in re-transmission mode) |
199 | URL: <http://www.kernel.org/pub/linux/kernel/people/juhl/trivial/> | ||
200 | 204 | ||
201 | 205 | ||
202 | 206 | ||
@@ -405,7 +409,14 @@ person it names. This tag documents that potentially interested parties | |||
405 | have been included in the discussion | 409 | have been included in the discussion |
406 | 410 | ||
407 | 411 | ||
408 | 14) Using Tested-by: and Reviewed-by: | 412 | 14) Using Reported-by:, Tested-by: and Reviewed-by: |
413 | |||
414 | If this patch fixes a problem reported by somebody else, consider adding a | ||
415 | Reported-by: tag to credit the reporter for their contribution. Please | ||
416 | note that this tag should not be added without the reporter's permission, | ||
417 | especially if the problem was not reported in a public forum. That said, | ||
418 | if we diligently credit our bug reporters, they will, hopefully, be | ||
419 | inspired to help us again in the future. | ||
409 | 420 | ||
410 | A Tested-by: tag indicates that the patch has been successfully tested (in | 421 | A Tested-by: tag indicates that the patch has been successfully tested (in |
411 | some environment) by the person named. This tag informs maintainers that | 422 | some environment) by the person named. This tag informs maintainers that |
@@ -444,7 +455,7 @@ offer a Reviewed-by tag for a patch. This tag serves to give credit to | |||
444 | reviewers and to inform maintainers of the degree of review which has been | 455 | reviewers and to inform maintainers of the degree of review which has been |
445 | done on the patch. Reviewed-by: tags, when supplied by reviewers known to | 456 | done on the patch. Reviewed-by: tags, when supplied by reviewers known to |
446 | understand the subject area and to perform thorough reviews, will normally | 457 | understand the subject area and to perform thorough reviews, will normally |
447 | increase the liklihood of your patch getting into the kernel. | 458 | increase the likelihood of your patch getting into the kernel. |
448 | 459 | ||
449 | 460 | ||
450 | 15) The canonical patch format | 461 | 15) The canonical patch format |
@@ -485,12 +496,33 @@ phrase" should not be a filename. Do not use the same "summary | |||
485 | phrase" for every patch in a whole patch series (where a "patch | 496 | phrase" for every patch in a whole patch series (where a "patch |
486 | series" is an ordered sequence of multiple, related patches). | 497 | series" is an ordered sequence of multiple, related patches). |
487 | 498 | ||
488 | Bear in mind that the "summary phrase" of your email becomes | 499 | Bear in mind that the "summary phrase" of your email becomes a |
489 | a globally-unique identifier for that patch. It propagates | 500 | globally-unique identifier for that patch. It propagates all the way |
490 | all the way into the git changelog. The "summary phrase" may | 501 | into the git changelog. The "summary phrase" may later be used in |
491 | later be used in developer discussions which refer to the patch. | 502 | developer discussions which refer to the patch. People will want to |
492 | People will want to google for the "summary phrase" to read | 503 | google for the "summary phrase" to read discussion regarding that |
493 | discussion regarding that patch. | 504 | patch. It will also be the only thing that people may quickly see |
505 | when, two or three months later, they are going through perhaps | ||
506 | thousands of patches using tools such as "gitk" or "git log | ||
507 | --oneline". | ||
508 | |||
509 | For these reasons, the "summary" must be no more than 70-75 | ||
510 | characters, and it must describe both what the patch changes, as well | ||
511 | as why the patch might be necessary. It is challenging to be both | ||
512 | succinct and descriptive, but that is what a well-written summary | ||
513 | should do. | ||
514 | |||
515 | The "summary phrase" may be prefixed by tags enclosed in square | ||
516 | brackets: "Subject: [PATCH tag] <summary phrase>". The tags are not | ||
517 | considered part of the summary phrase, but describe how the patch | ||
518 | should be treated. Common tags might include a version descriptor if | ||
519 | the multiple versions of the patch have been sent out in response to | ||
520 | comments (i.e., "v1, v2, v3"), or "RFC" to indicate a request for | ||
521 | comments. If there are four patches in a patch series the individual | ||
522 | patches may be numbered like this: 1/4, 2/4, 3/4, 4/4. This assures | ||
523 | that developers understand the order in which the patches should be | ||
524 | applied and that they have reviewed or applied all of the patches in | ||
525 | the patch series. | ||
494 | 526 | ||
495 | A couple of example Subjects: | 527 | A couple of example Subjects: |
496 | 528 | ||
@@ -510,19 +542,31 @@ the patch author in the changelog. | |||
510 | The explanation body will be committed to the permanent source | 542 | The explanation body will be committed to the permanent source |
511 | changelog, so should make sense to a competent reader who has long | 543 | changelog, so should make sense to a competent reader who has long |
512 | since forgotten the immediate details of the discussion that might | 544 | since forgotten the immediate details of the discussion that might |
513 | have led to this patch. | 545 | have led to this patch. Including symptoms of the failure which the |
546 | patch addresses (kernel log messages, oops messages, etc.) is | ||
547 | especially useful for people who might be searching the commit logs | ||
548 | looking for the applicable patch. If a patch fixes a compile failure, | ||
549 | it may not be necessary to include _all_ of the compile failures; just | ||
550 | enough that it is likely that someone searching for the patch can find | ||
551 | it. As in the "summary phrase", it is important to be both succinct as | ||
552 | well as descriptive. | ||
514 | 553 | ||
515 | The "---" marker line serves the essential purpose of marking for patch | 554 | The "---" marker line serves the essential purpose of marking for patch |
516 | handling tools where the changelog message ends. | 555 | handling tools where the changelog message ends. |
517 | 556 | ||
518 | One good use for the additional comments after the "---" marker is for | 557 | One good use for the additional comments after the "---" marker is for |
519 | a diffstat, to show what files have changed, and the number of inserted | 558 | a diffstat, to show what files have changed, and the number of |
520 | and deleted lines per file. A diffstat is especially useful on bigger | 559 | inserted and deleted lines per file. A diffstat is especially useful |
521 | patches. Other comments relevant only to the moment or the maintainer, | 560 | on bigger patches. Other comments relevant only to the moment or the |
522 | not suitable for the permanent changelog, should also go here. | 561 | maintainer, not suitable for the permanent changelog, should also go |
523 | Use diffstat options "-p 1 -w 70" so that filenames are listed from the | 562 | here. A good example of such comments might be "patch changelogs" |
524 | top of the kernel source tree and don't use too much horizontal space | 563 | which describe what has changed between the v1 and v2 version of the |
525 | (easily fit in 80 columns, maybe with some indentation). | 564 | patch. |
565 | |||
566 | If you are going to include a diffstat after the "---" marker, please | ||
567 | use diffstat options "-p 1 -w 70" so that filenames are listed from | ||
568 | the top of the kernel source tree and don't use too much horizontal | ||
569 | space (easily fit in 80 columns, maybe with some indentation). | ||
526 | 570 | ||
527 | See more details on the proper patch format in the following | 571 | See more details on the proper patch format in the following |
528 | references. | 572 | references. |
diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c index 7ea231172c85..aa73e72fd793 100644 --- a/Documentation/accounting/getdelays.c +++ b/Documentation/accounting/getdelays.c | |||
@@ -246,7 +246,8 @@ void print_ioacct(struct taskstats *t) | |||
246 | 246 | ||
247 | int main(int argc, char *argv[]) | 247 | int main(int argc, char *argv[]) |
248 | { | 248 | { |
249 | int c, rc, rep_len, aggr_len, len2, cmd_type; | 249 | int c, rc, rep_len, aggr_len, len2; |
250 | int cmd_type = TASKSTATS_CMD_ATTR_UNSPEC; | ||
250 | __u16 id; | 251 | __u16 id; |
251 | __u32 mypid; | 252 | __u32 mypid; |
252 | 253 | ||
diff --git a/Documentation/arm/Samsung-S3C24XX/GPIO.txt b/Documentation/arm/Samsung-S3C24XX/GPIO.txt index ea7ccfc4b274..948c8718d967 100644 --- a/Documentation/arm/Samsung-S3C24XX/GPIO.txt +++ b/Documentation/arm/Samsung-S3C24XX/GPIO.txt | |||
@@ -51,7 +51,7 @@ PIN Numbers | |||
51 | ----------- | 51 | ----------- |
52 | 52 | ||
53 | Each pin has an unique number associated with it in regs-gpio.h, | 53 | Each pin has an unique number associated with it in regs-gpio.h, |
54 | eg S3C2410_GPA0 or S3C2410_GPF1. These defines are used to tell | 54 | eg S3C2410_GPA(0) or S3C2410_GPF(1). These defines are used to tell |
55 | the GPIO functions which pin is to be used. | 55 | the GPIO functions which pin is to be used. |
56 | 56 | ||
57 | 57 | ||
@@ -65,11 +65,11 @@ Configuring a pin | |||
65 | 65 | ||
66 | Eg: | 66 | Eg: |
67 | 67 | ||
68 | s3c2410_gpio_cfgpin(S3C2410_GPA0, S3C2410_GPA0_ADDR0); | 68 | s3c2410_gpio_cfgpin(S3C2410_GPA(0), S3C2410_GPA0_ADDR0); |
69 | s3c2410_gpio_cfgpin(S3C2410_GPE8, S3C2410_GPE8_SDDAT1); | 69 | s3c2410_gpio_cfgpin(S3C2410_GPE(8), S3C2410_GPE8_SDDAT1); |
70 | 70 | ||
71 | which would turn GPA0 into the lowest Address line A0, and set | 71 | which would turn GPA(0) into the lowest Address line A0, and set |
72 | GPE8 to be connected to the SDIO/MMC controller's SDDAT1 line. | 72 | GPE(8) to be connected to the SDIO/MMC controller's SDDAT1 line. |
73 | 73 | ||
74 | 74 | ||
75 | Reading the current configuration | 75 | Reading the current configuration |
diff --git a/Documentation/arm/Samsung-S3C24XX/Suspend.txt b/Documentation/arm/Samsung-S3C24XX/Suspend.txt index 0dab6e32c130..a30fe510572b 100644 --- a/Documentation/arm/Samsung-S3C24XX/Suspend.txt +++ b/Documentation/arm/Samsung-S3C24XX/Suspend.txt | |||
@@ -40,13 +40,13 @@ Resuming | |||
40 | Machine Support | 40 | Machine Support |
41 | --------------- | 41 | --------------- |
42 | 42 | ||
43 | The machine specific functions must call the s3c2410_pm_init() function | 43 | The machine specific functions must call the s3c_pm_init() function |
44 | to say that its bootloader is capable of resuming. This can be as | 44 | to say that its bootloader is capable of resuming. This can be as |
45 | simple as adding the following to the machine's definition: | 45 | simple as adding the following to the machine's definition: |
46 | 46 | ||
47 | INITMACHINE(s3c2410_pm_init) | 47 | INITMACHINE(s3c_pm_init) |
48 | 48 | ||
49 | A board can do its own setup before calling s3c2410_pm_init, if it | 49 | A board can do its own setup before calling s3c_pm_init, if it |
50 | needs to setup anything else for power management support. | 50 | needs to setup anything else for power management support. |
51 | 51 | ||
52 | There is currently no support for over-riding the default method of | 52 | There is currently no support for over-riding the default method of |
@@ -74,7 +74,7 @@ statuc void __init machine_init(void) | |||
74 | 74 | ||
75 | enable_irq_wake(IRQ_EINT0); | 75 | enable_irq_wake(IRQ_EINT0); |
76 | 76 | ||
77 | s3c2410_pm_init(); | 77 | s3c_pm_init(); |
78 | } | 78 | } |
79 | 79 | ||
80 | 80 | ||
diff --git a/Documentation/arm/memory.txt b/Documentation/arm/memory.txt index dc6045577a8b..43cb1004d35f 100644 --- a/Documentation/arm/memory.txt +++ b/Documentation/arm/memory.txt | |||
@@ -29,7 +29,14 @@ ffff0000 ffff0fff CPU vector page. | |||
29 | CPU supports vector relocation (control | 29 | CPU supports vector relocation (control |
30 | register V bit.) | 30 | register V bit.) |
31 | 31 | ||
32 | ffc00000 fffeffff DMA memory mapping region. Memory returned | 32 | fffe0000 fffeffff XScale cache flush area. This is used |
33 | in proc-xscale.S to flush the whole data | ||
34 | cache. Free for other usage on non-XScale. | ||
35 | |||
36 | fff00000 fffdffff Fixmap mapping region. Addresses provided | ||
37 | by fix_to_virt() will be located here. | ||
38 | |||
39 | ffc00000 ffefffff DMA memory mapping region. Memory returned | ||
33 | by the dma_alloc_xxx functions will be | 40 | by the dma_alloc_xxx functions will be |
34 | dynamically mapped here. | 41 | dynamically mapped here. |
35 | 42 | ||
diff --git a/Documentation/atomic_ops.txt b/Documentation/atomic_ops.txt index 4ef245010457..396bec3b74ed 100644 --- a/Documentation/atomic_ops.txt +++ b/Documentation/atomic_ops.txt | |||
@@ -229,10 +229,10 @@ kernel. It is the use of atomic counters to implement reference | |||
229 | counting, and it works such that once the counter falls to zero it can | 229 | counting, and it works such that once the counter falls to zero it can |
230 | be guaranteed that no other entity can be accessing the object: | 230 | be guaranteed that no other entity can be accessing the object: |
231 | 231 | ||
232 | static void obj_list_add(struct obj *obj) | 232 | static void obj_list_add(struct obj *obj, struct list_head *head) |
233 | { | 233 | { |
234 | obj->active = 1; | 234 | obj->active = 1; |
235 | list_add(&obj->list); | 235 | list_add(&obj->list, head); |
236 | } | 236 | } |
237 | 237 | ||
238 | static void obj_list_del(struct obj *obj) | 238 | static void obj_list_del(struct obj *obj) |
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt index ecad6ee75705..8d2158a1c6aa 100644 --- a/Documentation/block/biodoc.txt +++ b/Documentation/block/biodoc.txt | |||
@@ -186,7 +186,7 @@ a virtual address mapping (unlike the earlier scheme of virtual address | |||
186 | do not have a corresponding kernel virtual address space mapping) and | 186 | do not have a corresponding kernel virtual address space mapping) and |
187 | low-memory pages. | 187 | low-memory pages. |
188 | 188 | ||
189 | Note: Please refer to Documentation/PCI/PCI-DMA-mapping.txt for a discussion | 189 | Note: Please refer to Documentation/DMA-mapping.txt for a discussion |
190 | on PCI high mem DMA aspects and mapping of scatter gather lists, and support | 190 | on PCI high mem DMA aspects and mapping of scatter gather lists, and support |
191 | for 64 bit PCI. | 191 | for 64 bit PCI. |
192 | 192 | ||
@@ -1040,23 +1040,21 @@ Front merges are handled by the binary trees in AS and deadline schedulers. | |||
1040 | iii. Plugging the queue to batch requests in anticipation of opportunities for | 1040 | iii. Plugging the queue to batch requests in anticipation of opportunities for |
1041 | merge/sort optimizations | 1041 | merge/sort optimizations |
1042 | 1042 | ||
1043 | This is just the same as in 2.4 so far, though per-device unplugging | ||
1044 | support is anticipated for 2.5. Also with a priority-based i/o scheduler, | ||
1045 | such decisions could be based on request priorities. | ||
1046 | |||
1047 | Plugging is an approach that the current i/o scheduling algorithm resorts to so | 1043 | Plugging is an approach that the current i/o scheduling algorithm resorts to so |
1048 | that it collects up enough requests in the queue to be able to take | 1044 | that it collects up enough requests in the queue to be able to take |
1049 | advantage of the sorting/merging logic in the elevator. If the | 1045 | advantage of the sorting/merging logic in the elevator. If the |
1050 | queue is empty when a request comes in, then it plugs the request queue | 1046 | queue is empty when a request comes in, then it plugs the request queue |
1051 | (sort of like plugging the bottom of a vessel to get fluid to build up) | 1047 | (sort of like plugging the bath tub of a vessel to get fluid to build up) |
1052 | till it fills up with a few more requests, before starting to service | 1048 | till it fills up with a few more requests, before starting to service |
1053 | the requests. This provides an opportunity to merge/sort the requests before | 1049 | the requests. This provides an opportunity to merge/sort the requests before |
1054 | passing them down to the device. There are various conditions when the queue is | 1050 | passing them down to the device. There are various conditions when the queue is |
1055 | unplugged (to open up the flow again), either through a scheduled task or | 1051 | unplugged (to open up the flow again), either through a scheduled task or |
1056 | could be on demand. For example wait_on_buffer sets the unplugging going | 1052 | could be on demand. For example wait_on_buffer sets the unplugging going |
1057 | (by running tq_disk) so the read gets satisfied soon. So in the read case, | 1053 | through sync_buffer() running blk_run_address_space(mapping). Or the caller |
1058 | the queue gets explicitly unplugged as part of waiting for completion, | 1054 | can do it explicity through blk_unplug(bdev). So in the read case, |
1059 | in fact all queues get unplugged as a side-effect. | 1055 | the queue gets explicitly unplugged as part of waiting for completion on that |
1056 | buffer. For page driven IO, the address space ->sync_page() takes care of | ||
1057 | doing the blk_run_address_space(). | ||
1060 | 1058 | ||
1061 | Aside: | 1059 | Aside: |
1062 | This is kind of controversial territory, as it's not clear if plugging is | 1060 | This is kind of controversial territory, as it's not clear if plugging is |
@@ -1067,11 +1065,6 @@ Aside: | |||
1067 | multi-page bios being queued in one shot, we may not need to wait to merge | 1065 | multi-page bios being queued in one shot, we may not need to wait to merge |
1068 | a big request from the broken up pieces coming by. | 1066 | a big request from the broken up pieces coming by. |
1069 | 1067 | ||
1070 | Per-queue granularity unplugging (still a Todo) may help reduce some of the | ||
1071 | concerns with just a single tq_disk flush approach. Something like | ||
1072 | blk_kick_queue() to unplug a specific queue (right away ?) | ||
1073 | or optionally, all queues, is in the plan. | ||
1074 | |||
1075 | 4.4 I/O contexts | 1068 | 4.4 I/O contexts |
1076 | I/O contexts provide a dynamically allocated per process data area. They may | 1069 | I/O contexts provide a dynamically allocated per process data area. They may |
1077 | be used in I/O schedulers, and in the block layer (could be used for IO statis, | 1070 | be used in I/O schedulers, and in the block layer (could be used for IO statis, |
diff --git a/Documentation/block/deadline-iosched.txt b/Documentation/block/deadline-iosched.txt index 72576769e0f4..2d82c80322cb 100644 --- a/Documentation/block/deadline-iosched.txt +++ b/Documentation/block/deadline-iosched.txt | |||
@@ -58,7 +58,7 @@ same criteria as reads. | |||
58 | front_merges (bool) | 58 | front_merges (bool) |
59 | ------------ | 59 | ------------ |
60 | 60 | ||
61 | Sometimes it happens that a request enters the io scheduler that is contigious | 61 | Sometimes it happens that a request enters the io scheduler that is contiguous |
62 | with a request that is already on the queue. Either it fits in the back of that | 62 | with a request that is already on the queue. Either it fits in the back of that |
63 | request, or it fits at the front. That is called either a back merge candidate | 63 | request, or it fits at the front. That is called either a back merge candidate |
64 | or a front merge candidate. Due to the way files are typically laid out, | 64 | or a front merge candidate. Due to the way files are typically laid out, |
diff --git a/Documentation/block/switching-sched.txt b/Documentation/block/switching-sched.txt index 634c952e1964..d5af3f630814 100644 --- a/Documentation/block/switching-sched.txt +++ b/Documentation/block/switching-sched.txt | |||
@@ -35,9 +35,3 @@ noop anticipatory deadline [cfq] | |||
35 | # echo anticipatory > /sys/block/hda/queue/scheduler | 35 | # echo anticipatory > /sys/block/hda/queue/scheduler |
36 | # cat /sys/block/hda/queue/scheduler | 36 | # cat /sys/block/hda/queue/scheduler |
37 | noop [anticipatory] deadline cfq | 37 | noop [anticipatory] deadline cfq |
38 | |||
39 | Each io queue has a set of io scheduler tunables associated with it. These | ||
40 | tunables control how the io scheduler works. You can find these entries | ||
41 | in: | ||
42 | |||
43 | /sys/block/<device>/queue/iosched | ||
diff --git a/Documentation/blockdev/00-INDEX b/Documentation/blockdev/00-INDEX index 86f054c47013..c08df56dd91b 100644 --- a/Documentation/blockdev/00-INDEX +++ b/Documentation/blockdev/00-INDEX | |||
@@ -8,6 +8,8 @@ cpqarray.txt | |||
8 | - info on using Compaq's SMART2 Intelligent Disk Array Controllers. | 8 | - info on using Compaq's SMART2 Intelligent Disk Array Controllers. |
9 | floppy.txt | 9 | floppy.txt |
10 | - notes and driver options for the floppy disk driver. | 10 | - notes and driver options for the floppy disk driver. |
11 | mflash.txt | ||
12 | - info on mGine m(g)flash driver for linux. | ||
11 | nbd.txt | 13 | nbd.txt |
12 | - info on a TCP implementation of a network block device. | 14 | - info on a TCP implementation of a network block device. |
13 | paride.txt | 15 | paride.txt |
diff --git a/Documentation/blockdev/mflash.txt b/Documentation/blockdev/mflash.txt new file mode 100644 index 000000000000..1f610ecf698a --- /dev/null +++ b/Documentation/blockdev/mflash.txt | |||
@@ -0,0 +1,84 @@ | |||
1 | This document describes m[g]flash support in linux. | ||
2 | |||
3 | Contents | ||
4 | 1. Overview | ||
5 | 2. Reserved area configuration | ||
6 | 3. Example of mflash platform driver registration | ||
7 | |||
8 | 1. Overview | ||
9 | |||
10 | Mflash and gflash are embedded flash drive. The only difference is mflash is | ||
11 | MCP(Multi Chip Package) device. These two device operate exactly same way. | ||
12 | So the rest mflash repersents mflash and gflash altogether. | ||
13 | |||
14 | Internally, mflash has nand flash and other hardware logics and supports | ||
15 | 2 different operation (ATA, IO) modes. ATA mode doesn't need any new | ||
16 | driver and currently works well under standard IDE subsystem. Actually it's | ||
17 | one chip SSD. IO mode is ATA-like custom mode for the host that doesn't have | ||
18 | IDE interface. | ||
19 | |||
20 | Followings are brief descriptions about IO mode. | ||
21 | A. IO mode based on ATA protocol and uses some custom command. (read confirm, | ||
22 | write confirm) | ||
23 | B. IO mode uses SRAM bus interface. | ||
24 | C. IO mode supports 4kB boot area, so host can boot from mflash. | ||
25 | |||
26 | 2. Reserved area configuration | ||
27 | If host boot from mflash, usually needs raw area for boot loader image. All of | ||
28 | the mflash's block device operation will be taken this value as start offset. | ||
29 | Note that boot loader's size of reserved area and kernel configuration value | ||
30 | must be same. | ||
31 | |||
32 | 3. Example of mflash platform driver registration | ||
33 | Working mflash is very straight forward. Adding platform device stuff to board | ||
34 | configuration file is all. Here is some pseudo example. | ||
35 | |||
36 | static struct mg_drv_data mflash_drv_data = { | ||
37 | /* If you want to polling driver set to 1 */ | ||
38 | .use_polling = 0, | ||
39 | /* device attribution */ | ||
40 | .dev_attr = MG_BOOT_DEV | ||
41 | }; | ||
42 | |||
43 | static struct resource mg_mflash_rsc[] = { | ||
44 | /* Base address of mflash */ | ||
45 | [0] = { | ||
46 | .start = 0x08000000, | ||
47 | .end = 0x08000000 + SZ_64K - 1, | ||
48 | .flags = IORESOURCE_MEM | ||
49 | }, | ||
50 | /* mflash interrupt pin */ | ||
51 | [1] = { | ||
52 | .start = IRQ_GPIO(84), | ||
53 | .end = IRQ_GPIO(84), | ||
54 | .flags = IORESOURCE_IRQ | ||
55 | }, | ||
56 | /* mflash reset pin */ | ||
57 | [2] = { | ||
58 | .start = 43, | ||
59 | .end = 43, | ||
60 | .name = MG_RST_PIN, | ||
61 | .flags = IORESOURCE_IO | ||
62 | }, | ||
63 | /* mflash reset-out pin | ||
64 | * If you use mflash as storage device (i.e. other than MG_BOOT_DEV), | ||
65 | * should assign this */ | ||
66 | [3] = { | ||
67 | .start = 51, | ||
68 | .end = 51, | ||
69 | .name = MG_RSTOUT_PIN, | ||
70 | .flags = IORESOURCE_IO | ||
71 | } | ||
72 | }; | ||
73 | |||
74 | static struct platform_device mflash_dev = { | ||
75 | .name = MG_DEV_NAME, | ||
76 | .id = -1, | ||
77 | .dev = { | ||
78 | .platform_data = &mflash_drv_data, | ||
79 | }, | ||
80 | .num_resources = ARRAY_SIZE(mg_mflash_rsc), | ||
81 | .resource = mg_mflash_rsc | ||
82 | }; | ||
83 | |||
84 | platform_device_register(&mflash_dev); | ||
diff --git a/Documentation/braille-console.txt b/Documentation/braille-console.txt index 000b0fbdc105..d0d042c2fd5e 100644 --- a/Documentation/braille-console.txt +++ b/Documentation/braille-console.txt | |||
@@ -27,7 +27,7 @@ parameter. | |||
27 | 27 | ||
28 | For simplicity, only one braille console can be enabled, other uses of | 28 | For simplicity, only one braille console can be enabled, other uses of |
29 | console=brl,... will be discarded. Also note that it does not interfere with | 29 | console=brl,... will be discarded. Also note that it does not interfere with |
30 | the console selection mecanism described in serial-console.txt | 30 | the console selection mechanism described in serial-console.txt |
31 | 31 | ||
32 | For now, only the VisioBraille device is supported. | 32 | For now, only the VisioBraille device is supported. |
33 | 33 | ||
diff --git a/Documentation/cdrom/packet-writing.txt b/Documentation/cdrom/packet-writing.txt index cf1f8126991c..1c407778c8b2 100644 --- a/Documentation/cdrom/packet-writing.txt +++ b/Documentation/cdrom/packet-writing.txt | |||
@@ -117,7 +117,7 @@ Using the pktcdvd debugfs interface | |||
117 | 117 | ||
118 | To read pktcdvd device infos in human readable form, do: | 118 | To read pktcdvd device infos in human readable form, do: |
119 | 119 | ||
120 | # cat /debug/pktcdvd/pktcdvd[0-7]/info | 120 | # cat /sys/kernel/debug/pktcdvd/pktcdvd[0-7]/info |
121 | 121 | ||
122 | For a description of the debugfs interface look into the file: | 122 | For a description of the debugfs interface look into the file: |
123 | 123 | ||
diff --git a/Documentation/cgroups/00-INDEX b/Documentation/cgroups/00-INDEX new file mode 100644 index 000000000000..3f58fa3d6d00 --- /dev/null +++ b/Documentation/cgroups/00-INDEX | |||
@@ -0,0 +1,18 @@ | |||
1 | 00-INDEX | ||
2 | - this file | ||
3 | cgroups.txt | ||
4 | - Control Groups definition, implementation details, examples and API. | ||
5 | cpuacct.txt | ||
6 | - CPU Accounting Controller; account CPU usage for groups of tasks. | ||
7 | cpusets.txt | ||
8 | - documents the cpusets feature; assign CPUs and Mem to a set of tasks. | ||
9 | devices.txt | ||
10 | - Device Whitelist Controller; description, interface and security. | ||
11 | freezer-subsystem.txt | ||
12 | - checkpointing; rationale to not use signals, interface. | ||
13 | memcg_test.txt | ||
14 | - Memory Resource Controller; implementation details. | ||
15 | memory.txt | ||
16 | - Memory Resource Controller; design, accounting, interface, testing. | ||
17 | resource_counter.txt | ||
18 | - Resource Counter API. | ||
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt index 93feb8444489..6eb1a97e88ce 100644 --- a/Documentation/cgroups/cgroups.txt +++ b/Documentation/cgroups/cgroups.txt | |||
@@ -56,7 +56,7 @@ hierarchy, and a set of subsystems; each subsystem has system-specific | |||
56 | state attached to each cgroup in the hierarchy. Each hierarchy has | 56 | state attached to each cgroup in the hierarchy. Each hierarchy has |
57 | an instance of the cgroup virtual filesystem associated with it. | 57 | an instance of the cgroup virtual filesystem associated with it. |
58 | 58 | ||
59 | At any one time there may be multiple active hierachies of task | 59 | At any one time there may be multiple active hierarchies of task |
60 | cgroups. Each hierarchy is a partition of all tasks in the system. | 60 | cgroups. Each hierarchy is a partition of all tasks in the system. |
61 | 61 | ||
62 | User level code may create and destroy cgroups by name in an | 62 | User level code may create and destroy cgroups by name in an |
@@ -124,10 +124,10 @@ following lines: | |||
124 | / \ | 124 | / \ |
125 | Prof (15%) students (5%) | 125 | Prof (15%) students (5%) |
126 | 126 | ||
127 | Browsers like firefox/lynx go into the WWW network class, while (k)nfsd go | 127 | Browsers like Firefox/Lynx go into the WWW network class, while (k)nfsd go |
128 | into NFS network class. | 128 | into NFS network class. |
129 | 129 | ||
130 | At the same time firefox/lynx will share an appropriate CPU/Memory class | 130 | At the same time Firefox/Lynx will share an appropriate CPU/Memory class |
131 | depending on who launched it (prof/student). | 131 | depending on who launched it (prof/student). |
132 | 132 | ||
133 | With the ability to classify tasks differently for different resources | 133 | With the ability to classify tasks differently for different resources |
@@ -325,7 +325,7 @@ and then start a subshell 'sh' in that cgroup: | |||
325 | Creating, modifying, using the cgroups can be done through the cgroup | 325 | Creating, modifying, using the cgroups can be done through the cgroup |
326 | virtual filesystem. | 326 | virtual filesystem. |
327 | 327 | ||
328 | To mount a cgroup hierarchy will all available subsystems, type: | 328 | To mount a cgroup hierarchy with all available subsystems, type: |
329 | # mount -t cgroup xxx /dev/cgroup | 329 | # mount -t cgroup xxx /dev/cgroup |
330 | 330 | ||
331 | The "xxx" is not interpreted by the cgroup code, but will appear in | 331 | The "xxx" is not interpreted by the cgroup code, but will appear in |
@@ -333,12 +333,23 @@ The "xxx" is not interpreted by the cgroup code, but will appear in | |||
333 | 333 | ||
334 | To mount a cgroup hierarchy with just the cpuset and numtasks | 334 | To mount a cgroup hierarchy with just the cpuset and numtasks |
335 | subsystems, type: | 335 | subsystems, type: |
336 | # mount -t cgroup -o cpuset,numtasks hier1 /dev/cgroup | 336 | # mount -t cgroup -o cpuset,memory hier1 /dev/cgroup |
337 | 337 | ||
338 | To change the set of subsystems bound to a mounted hierarchy, just | 338 | To change the set of subsystems bound to a mounted hierarchy, just |
339 | remount with different options: | 339 | remount with different options: |
340 | # mount -o remount,cpuset,ns hier1 /dev/cgroup | ||
340 | 341 | ||
341 | # mount -o remount,cpuset,ns /dev/cgroup | 342 | Now memory is removed from the hierarchy and ns is added. |
343 | |||
344 | Note this will add ns to the hierarchy but won't remove memory or | ||
345 | cpuset, because the new options are appended to the old ones: | ||
346 | # mount -o remount,ns /dev/cgroup | ||
347 | |||
348 | To Specify a hierarchy's release_agent: | ||
349 | # mount -t cgroup -o cpuset,release_agent="/sbin/cpuset_release_agent" \ | ||
350 | xxx /dev/cgroup | ||
351 | |||
352 | Note that specifying 'release_agent' more than once will return failure. | ||
342 | 353 | ||
343 | Note that changing the set of subsystems is currently only supported | 354 | Note that changing the set of subsystems is currently only supported |
344 | when the hierarchy consists of a single (root) cgroup. Supporting | 355 | when the hierarchy consists of a single (root) cgroup. Supporting |
@@ -349,6 +360,11 @@ Then under /dev/cgroup you can find a tree that corresponds to the | |||
349 | tree of the cgroups in the system. For instance, /dev/cgroup | 360 | tree of the cgroups in the system. For instance, /dev/cgroup |
350 | is the cgroup that holds the whole system. | 361 | is the cgroup that holds the whole system. |
351 | 362 | ||
363 | If you want to change the value of release_agent: | ||
364 | # echo "/sbin/new_release_agent" > /dev/cgroup/release_agent | ||
365 | |||
366 | It can also be changed via remount. | ||
367 | |||
352 | If you want to create a new cgroup under /dev/cgroup: | 368 | If you want to create a new cgroup under /dev/cgroup: |
353 | # cd /dev/cgroup | 369 | # cd /dev/cgroup |
354 | # mkdir my_cgroup | 370 | # mkdir my_cgroup |
@@ -476,11 +492,13 @@ cgroup->parent is still valid. (Note - can also be called for a | |||
476 | newly-created cgroup if an error occurs after this subsystem's | 492 | newly-created cgroup if an error occurs after this subsystem's |
477 | create() method has been called for the new cgroup). | 493 | create() method has been called for the new cgroup). |
478 | 494 | ||
479 | void pre_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp); | 495 | int pre_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp); |
480 | 496 | ||
481 | Called before checking the reference count on each subsystem. This may | 497 | Called before checking the reference count on each subsystem. This may |
482 | be useful for subsystems which have some extra references even if | 498 | be useful for subsystems which have some extra references even if |
483 | there are not tasks in the cgroup. | 499 | there are not tasks in the cgroup. If pre_destroy() returns error code, |
500 | rmdir() will fail with it. From this behavior, pre_destroy() can be | ||
501 | called multiple times against a cgroup. | ||
484 | 502 | ||
485 | int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | 503 | int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, |
486 | struct task_struct *task) | 504 | struct task_struct *task) |
@@ -521,7 +539,7 @@ always handled well. | |||
521 | void post_clone(struct cgroup_subsys *ss, struct cgroup *cgrp) | 539 | void post_clone(struct cgroup_subsys *ss, struct cgroup *cgrp) |
522 | (cgroup_mutex held by caller) | 540 | (cgroup_mutex held by caller) |
523 | 541 | ||
524 | Called at the end of cgroup_clone() to do any paramater | 542 | Called at the end of cgroup_clone() to do any parameter |
525 | initialization which might be required before a task could attach. For | 543 | initialization which might be required before a task could attach. For |
526 | example in cpusets, no task may attach before 'cpus' and 'mems' are set | 544 | example in cpusets, no task may attach before 'cpus' and 'mems' are set |
527 | up. | 545 | up. |
diff --git a/Documentation/cgroups/cpuacct.txt b/Documentation/cgroups/cpuacct.txt index bb775fbe43d7..8b930946c52a 100644 --- a/Documentation/cgroups/cpuacct.txt +++ b/Documentation/cgroups/cpuacct.txt | |||
@@ -30,3 +30,21 @@ The above steps create a new group g1 and move the current shell | |||
30 | process (bash) into it. CPU time consumed by this bash and its children | 30 | process (bash) into it. CPU time consumed by this bash and its children |
31 | can be obtained from g1/cpuacct.usage and the same is accumulated in | 31 | can be obtained from g1/cpuacct.usage and the same is accumulated in |
32 | /cgroups/cpuacct.usage also. | 32 | /cgroups/cpuacct.usage also. |
33 | |||
34 | cpuacct.stat file lists a few statistics which further divide the | ||
35 | CPU time obtained by the cgroup into user and system times. Currently | ||
36 | the following statistics are supported: | ||
37 | |||
38 | user: Time spent by tasks of the cgroup in user mode. | ||
39 | system: Time spent by tasks of the cgroup in kernel mode. | ||
40 | |||
41 | user and system are in USER_HZ unit. | ||
42 | |||
43 | cpuacct controller uses percpu_counter interface to collect user and | ||
44 | system times. This has two side effects: | ||
45 | |||
46 | - It is theoretically possible to see wrong values for user and system times. | ||
47 | This is because percpu_counter_read() on 32bit systems isn't safe | ||
48 | against concurrent writes. | ||
49 | - It is possible to see slightly outdated values for user and system times | ||
50 | due to the batch processing nature of percpu_counter. | ||
diff --git a/Documentation/cgroups/cpusets.txt b/Documentation/cgroups/cpusets.txt index 0611e9528c7c..f9ca389dddf4 100644 --- a/Documentation/cgroups/cpusets.txt +++ b/Documentation/cgroups/cpusets.txt | |||
@@ -131,7 +131,7 @@ Cpusets extends these two mechanisms as follows: | |||
131 | - The hierarchy of cpusets can be mounted at /dev/cpuset, for | 131 | - The hierarchy of cpusets can be mounted at /dev/cpuset, for |
132 | browsing and manipulation from user space. | 132 | browsing and manipulation from user space. |
133 | - A cpuset may be marked exclusive, which ensures that no other | 133 | - A cpuset may be marked exclusive, which ensures that no other |
134 | cpuset (except direct ancestors and descendents) may contain | 134 | cpuset (except direct ancestors and descendants) may contain |
135 | any overlapping CPUs or Memory Nodes. | 135 | any overlapping CPUs or Memory Nodes. |
136 | - You can list all the tasks (by pid) attached to any cpuset. | 136 | - You can list all the tasks (by pid) attached to any cpuset. |
137 | 137 | ||
@@ -226,7 +226,7 @@ nodes with memory--using the cpuset_track_online_nodes() hook. | |||
226 | -------------------------------- | 226 | -------------------------------- |
227 | 227 | ||
228 | If a cpuset is cpu or mem exclusive, no other cpuset, other than | 228 | If a cpuset is cpu or mem exclusive, no other cpuset, other than |
229 | a direct ancestor or descendent, may share any of the same CPUs or | 229 | a direct ancestor or descendant, may share any of the same CPUs or |
230 | Memory Nodes. | 230 | Memory Nodes. |
231 | 231 | ||
232 | A cpuset that is mem_exclusive *or* mem_hardwall is "hardwalled", | 232 | A cpuset that is mem_exclusive *or* mem_hardwall is "hardwalled", |
@@ -427,7 +427,7 @@ child cpusets have this flag enabled. | |||
427 | When doing this, you don't usually want to leave any unpinned tasks in | 427 | When doing this, you don't usually want to leave any unpinned tasks in |
428 | the top cpuset that might use non-trivial amounts of CPU, as such tasks | 428 | the top cpuset that might use non-trivial amounts of CPU, as such tasks |
429 | may be artificially constrained to some subset of CPUs, depending on | 429 | may be artificially constrained to some subset of CPUs, depending on |
430 | the particulars of this flag setting in descendent cpusets. Even if | 430 | the particulars of this flag setting in descendant cpusets. Even if |
431 | such a task could use spare CPU cycles in some other CPUs, the kernel | 431 | such a task could use spare CPU cycles in some other CPUs, the kernel |
432 | scheduler might not consider the possibility of load balancing that | 432 | scheduler might not consider the possibility of load balancing that |
433 | task to that underused CPU. | 433 | task to that underused CPU. |
@@ -531,9 +531,9 @@ be idle. | |||
531 | 531 | ||
532 | Of course it takes some searching cost to find movable tasks and/or | 532 | Of course it takes some searching cost to find movable tasks and/or |
533 | idle CPUs, the scheduler might not search all CPUs in the domain | 533 | idle CPUs, the scheduler might not search all CPUs in the domain |
534 | everytime. In fact, in some architectures, the searching ranges on | 534 | every time. In fact, in some architectures, the searching ranges on |
535 | events are limited in the same socket or node where the CPU locates, | 535 | events are limited in the same socket or node where the CPU locates, |
536 | while the load balance on tick searchs all. | 536 | while the load balance on tick searches all. |
537 | 537 | ||
538 | For example, assume CPU Z is relatively far from CPU X. Even if CPU Z | 538 | For example, assume CPU Z is relatively far from CPU X. Even if CPU Z |
539 | is idle while CPU X and the siblings are busy, scheduler can't migrate | 539 | is idle while CPU X and the siblings are busy, scheduler can't migrate |
@@ -601,7 +601,7 @@ its new cpuset, then the task will continue to use whatever subset | |||
601 | of MPOL_BIND nodes are still allowed in the new cpuset. If the task | 601 | of MPOL_BIND nodes are still allowed in the new cpuset. If the task |
602 | was using MPOL_BIND and now none of its MPOL_BIND nodes are allowed | 602 | was using MPOL_BIND and now none of its MPOL_BIND nodes are allowed |
603 | in the new cpuset, then the task will be essentially treated as if it | 603 | in the new cpuset, then the task will be essentially treated as if it |
604 | was MPOL_BIND bound to the new cpuset (even though its numa placement, | 604 | was MPOL_BIND bound to the new cpuset (even though its NUMA placement, |
605 | as queried by get_mempolicy(), doesn't change). If a task is moved | 605 | as queried by get_mempolicy(), doesn't change). If a task is moved |
606 | from one cpuset to another, then the kernel will adjust the tasks | 606 | from one cpuset to another, then the kernel will adjust the tasks |
607 | memory placement, as above, the next time that the kernel attempts | 607 | memory placement, as above, the next time that the kernel attempts |
diff --git a/Documentation/cgroups/devices.txt b/Documentation/cgroups/devices.txt index 7cc6e6a60672..57ca4c89fe5c 100644 --- a/Documentation/cgroups/devices.txt +++ b/Documentation/cgroups/devices.txt | |||
@@ -42,7 +42,7 @@ suffice, but we can decide the best way to adequately restrict | |||
42 | movement as people get some experience with this. We may just want | 42 | movement as people get some experience with this. We may just want |
43 | to require CAP_SYS_ADMIN, which at least is a separate bit from | 43 | to require CAP_SYS_ADMIN, which at least is a separate bit from |
44 | CAP_MKNOD. We may want to just refuse moving to a cgroup which | 44 | CAP_MKNOD. We may want to just refuse moving to a cgroup which |
45 | isn't a descendent of the current one. Or we may want to use | 45 | isn't a descendant of the current one. Or we may want to use |
46 | CAP_MAC_ADMIN, since we really are trying to lock down root. | 46 | CAP_MAC_ADMIN, since we really are trying to lock down root. |
47 | 47 | ||
48 | CAP_SYS_ADMIN is needed to modify the whitelist or move another | 48 | CAP_SYS_ADMIN is needed to modify the whitelist or move another |
diff --git a/Documentation/cgroups/memcg_test.txt b/Documentation/cgroups/memcg_test.txt index 523a9c16c400..72db89ed0609 100644 --- a/Documentation/cgroups/memcg_test.txt +++ b/Documentation/cgroups/memcg_test.txt | |||
@@ -1,5 +1,5 @@ | |||
1 | Memory Resource Controller(Memcg) Implementation Memo. | 1 | Memory Resource Controller(Memcg) Implementation Memo. |
2 | Last Updated: 2009/1/19 | 2 | Last Updated: 2009/1/20 |
3 | Base Kernel Version: based on 2.6.29-rc2. | 3 | Base Kernel Version: based on 2.6.29-rc2. |
4 | 4 | ||
5 | Because VM is getting complex (one of reasons is memcg...), memcg's behavior | 5 | Because VM is getting complex (one of reasons is memcg...), memcg's behavior |
@@ -356,7 +356,25 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y. | |||
356 | (Shell-B) | 356 | (Shell-B) |
357 | # move all tasks in /cgroup/test to /cgroup | 357 | # move all tasks in /cgroup/test to /cgroup |
358 | # /sbin/swapoff -a | 358 | # /sbin/swapoff -a |
359 | # rmdir /test/cgroup | 359 | # rmdir /cgroup/test |
360 | # kill malloc task. | 360 | # kill malloc task. |
361 | 361 | ||
362 | Of course, tmpfs v.s. swapoff test should be tested, too. | 362 | Of course, tmpfs v.s. swapoff test should be tested, too. |
363 | |||
364 | 9.8 OOM-Killer | ||
365 | Out-of-memory caused by memcg's limit will kill tasks under | ||
366 | the memcg. When hierarchy is used, a task under hierarchy | ||
367 | will be killed by the kernel. | ||
368 | In this case, panic_on_oom shouldn't be invoked and tasks | ||
369 | in other groups shouldn't be killed. | ||
370 | |||
371 | It's not difficult to cause OOM under memcg as following. | ||
372 | Case A) when you can swapoff | ||
373 | #swapoff -a | ||
374 | #echo 50M > /memory.limit_in_bytes | ||
375 | run 51M of malloc | ||
376 | |||
377 | Case B) when you use mem+swap limitation. | ||
378 | #echo 50M > memory.limit_in_bytes | ||
379 | #echo 50M > memory.memsw.limit_in_bytes | ||
380 | run 51M of malloc | ||
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt index e1501964df1e..23d1262c0775 100644 --- a/Documentation/cgroups/memory.txt +++ b/Documentation/cgroups/memory.txt | |||
@@ -6,15 +6,14 @@ used here with the memory controller that is used in hardware. | |||
6 | 6 | ||
7 | Salient features | 7 | Salient features |
8 | 8 | ||
9 | a. Enable control of both RSS (mapped) and Page Cache (unmapped) pages | 9 | a. Enable control of Anonymous, Page Cache (mapped and unmapped) and |
10 | Swap Cache memory pages. | ||
10 | b. The infrastructure allows easy addition of other types of memory to control | 11 | b. The infrastructure allows easy addition of other types of memory to control |
11 | c. Provides *zero overhead* for non memory controller users | 12 | c. Provides *zero overhead* for non memory controller users |
12 | d. Provides a double LRU: global memory pressure causes reclaim from the | 13 | d. Provides a double LRU: global memory pressure causes reclaim from the |
13 | global LRU; a cgroup on hitting a limit, reclaims from the per | 14 | global LRU; a cgroup on hitting a limit, reclaims from the per |
14 | cgroup LRU | 15 | cgroup LRU |
15 | 16 | ||
16 | NOTE: Swap Cache (unmapped) is not accounted now. | ||
17 | |||
18 | Benefits and Purpose of the memory controller | 17 | Benefits and Purpose of the memory controller |
19 | 18 | ||
20 | The memory controller isolates the memory behaviour of a group of tasks | 19 | The memory controller isolates the memory behaviour of a group of tasks |
@@ -153,14 +152,19 @@ When swap is accounted, following files are added. | |||
153 | 152 | ||
154 | usage of mem+swap is limited by memsw.limit_in_bytes. | 153 | usage of mem+swap is limited by memsw.limit_in_bytes. |
155 | 154 | ||
156 | Note: why 'mem+swap' rather than swap. | 155 | * why 'mem+swap' rather than swap. |
157 | The global LRU(kswapd) can swap out arbitrary pages. Swap-out means | 156 | The global LRU(kswapd) can swap out arbitrary pages. Swap-out means |
158 | to move account from memory to swap...there is no change in usage of | 157 | to move account from memory to swap...there is no change in usage of |
159 | mem+swap. | 158 | mem+swap. In other words, when we want to limit the usage of swap without |
159 | affecting global LRU, mem+swap limit is better than just limiting swap from | ||
160 | OS point of view. | ||
160 | 161 | ||
161 | In other words, when we want to limit the usage of swap without affecting | 162 | * What happens when a cgroup hits memory.memsw.limit_in_bytes |
162 | global LRU, mem+swap limit is better than just limiting swap from OS point | 163 | When a cgroup his memory.memsw.limit_in_bytes, it's useless to do swap-out |
163 | of view. | 164 | in this cgroup. Then, swap-out will not be done by cgroup routine and file |
165 | caches are dropped. But as mentioned above, global LRU can do swapout memory | ||
166 | from it for sanity of the system's memory management state. You can't forbid | ||
167 | it by cgroup. | ||
164 | 168 | ||
165 | 2.5 Reclaim | 169 | 2.5 Reclaim |
166 | 170 | ||
@@ -205,6 +209,7 @@ We can alter the memory limit: | |||
205 | 209 | ||
206 | NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo, | 210 | NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo, |
207 | mega or gigabytes. | 211 | mega or gigabytes. |
212 | NOTE: We can write "-1" to reset the *.limit_in_bytes(unlimited). | ||
208 | 213 | ||
209 | # cat /cgroups/0/memory.limit_in_bytes | 214 | # cat /cgroups/0/memory.limit_in_bytes |
210 | 4194304 | 215 | 4194304 |
@@ -290,34 +295,44 @@ will be charged as a new owner of it. | |||
290 | moved to the parent. If you want to avoid that, force_empty will be useful. | 295 | moved to the parent. If you want to avoid that, force_empty will be useful. |
291 | 296 | ||
292 | 5.2 stat file | 297 | 5.2 stat file |
293 | memory.stat file includes following statistics (now) | 298 | |
294 | cache - # of pages from page-cache and shmem. | 299 | memory.stat file includes following statistics |
295 | rss - # of pages from anonymous memory. | 300 | |
296 | pgpgin - # of event of charging | 301 | cache - # of bytes of page cache memory. |
297 | pgpgout - # of event of uncharging | 302 | rss - # of bytes of anonymous and swap cache memory. |
298 | active_anon - # of pages on active lru of anon, shmem. | 303 | pgpgin - # of pages paged in (equivalent to # of charging events). |
299 | inactive_anon - # of pages on active lru of anon, shmem | 304 | pgpgout - # of pages paged out (equivalent to # of uncharging events). |
300 | active_file - # of pages on active lru of file-cache | 305 | active_anon - # of bytes of anonymous and swap cache memory on active |
301 | inactive_file - # of pages on inactive lru of file cache | 306 | lru list. |
302 | unevictable - # of pages cannot be reclaimed.(mlocked etc) | 307 | inactive_anon - # of bytes of anonymous memory and swap cache memory on |
303 | 308 | inactive lru list. | |
304 | Below is depend on CONFIG_DEBUG_VM. | 309 | active_file - # of bytes of file-backed memory on active lru list. |
305 | inactive_ratio - VM inernal parameter. (see mm/page_alloc.c) | 310 | inactive_file - # of bytes of file-backed memory on inactive lru list. |
306 | recent_rotated_anon - VM internal parameter. (see mm/vmscan.c) | 311 | unevictable - # of bytes of memory that cannot be reclaimed (mlocked etc). |
307 | recent_rotated_file - VM internal parameter. (see mm/vmscan.c) | 312 | |
308 | recent_scanned_anon - VM internal parameter. (see mm/vmscan.c) | 313 | The following additional stats are dependent on CONFIG_DEBUG_VM. |
309 | recent_scanned_file - VM internal parameter. (see mm/vmscan.c) | 314 | |
310 | 315 | inactive_ratio - VM internal parameter. (see mm/page_alloc.c) | |
311 | Memo: | 316 | recent_rotated_anon - VM internal parameter. (see mm/vmscan.c) |
317 | recent_rotated_file - VM internal parameter. (see mm/vmscan.c) | ||
318 | recent_scanned_anon - VM internal parameter. (see mm/vmscan.c) | ||
319 | recent_scanned_file - VM internal parameter. (see mm/vmscan.c) | ||
320 | |||
321 | Memo: | ||
312 | recent_rotated means recent frequency of lru rotation. | 322 | recent_rotated means recent frequency of lru rotation. |
313 | recent_scanned means recent # of scans to lru. | 323 | recent_scanned means recent # of scans to lru. |
314 | showing for better debug please see the code for meanings. | 324 | showing for better debug please see the code for meanings. |
315 | 325 | ||
326 | Note: | ||
327 | Only anonymous and swap cache memory is listed as part of 'rss' stat. | ||
328 | This should not be confused with the true 'resident set size' or the | ||
329 | amount of physical memory used by the cgroup. Per-cgroup rss | ||
330 | accounting is not done yet. | ||
316 | 331 | ||
317 | 5.3 swappiness | 332 | 5.3 swappiness |
318 | Similar to /proc/sys/vm/swappiness, but affecting a hierarchy of groups only. | 333 | Similar to /proc/sys/vm/swappiness, but affecting a hierarchy of groups only. |
319 | 334 | ||
320 | Following cgroup's swapiness can't be changed. | 335 | Following cgroups' swapiness can't be changed. |
321 | - root cgroup (uses /proc/sys/vm/swappiness). | 336 | - root cgroup (uses /proc/sys/vm/swappiness). |
322 | - a cgroup which uses hierarchy and it has child cgroup. | 337 | - a cgroup which uses hierarchy and it has child cgroup. |
323 | - a cgroup which uses hierarchy and not the root of hierarchy. | 338 | - a cgroup which uses hierarchy and not the root of hierarchy. |
diff --git a/Documentation/cgroups/resource_counter.txt b/Documentation/cgroups/resource_counter.txt index f196ac1d7d25..95b24d766eab 100644 --- a/Documentation/cgroups/resource_counter.txt +++ b/Documentation/cgroups/resource_counter.txt | |||
@@ -47,13 +47,18 @@ to work with it. | |||
47 | 47 | ||
48 | 2. Basic accounting routines | 48 | 2. Basic accounting routines |
49 | 49 | ||
50 | a. void res_counter_init(struct res_counter *rc) | 50 | a. void res_counter_init(struct res_counter *rc, |
51 | struct res_counter *rc_parent) | ||
51 | 52 | ||
52 | Initializes the resource counter. As usual, should be the first | 53 | Initializes the resource counter. As usual, should be the first |
53 | routine called for a new counter. | 54 | routine called for a new counter. |
54 | 55 | ||
55 | b. int res_counter_charge[_locked] | 56 | The struct res_counter *parent can be used to define a hierarchical |
56 | (struct res_counter *rc, unsigned long val) | 57 | child -> parent relationship directly in the res_counter structure, |
58 | NULL can be used to define no relationship. | ||
59 | |||
60 | c. int res_counter_charge(struct res_counter *rc, unsigned long val, | ||
61 | struct res_counter **limit_fail_at) | ||
57 | 62 | ||
58 | When a resource is about to be allocated it has to be accounted | 63 | When a resource is about to be allocated it has to be accounted |
59 | with the appropriate resource counter (controller should determine | 64 | with the appropriate resource counter (controller should determine |
@@ -67,15 +72,25 @@ to work with it. | |||
67 | * if the charging is performed first, then it should be uncharged | 72 | * if the charging is performed first, then it should be uncharged |
68 | on error path (if the one is called). | 73 | on error path (if the one is called). |
69 | 74 | ||
70 | c. void res_counter_uncharge[_locked] | 75 | If the charging fails and a hierarchical dependency exists, the |
76 | limit_fail_at parameter is set to the particular res_counter element | ||
77 | where the charging failed. | ||
78 | |||
79 | d. int res_counter_charge_locked | ||
80 | (struct res_counter *rc, unsigned long val) | ||
81 | |||
82 | The same as res_counter_charge(), but it must not acquire/release the | ||
83 | res_counter->lock internally (it must be called with res_counter->lock | ||
84 | held). | ||
85 | |||
86 | e. void res_counter_uncharge[_locked] | ||
71 | (struct res_counter *rc, unsigned long val) | 87 | (struct res_counter *rc, unsigned long val) |
72 | 88 | ||
73 | When a resource is released (freed) it should be de-accounted | 89 | When a resource is released (freed) it should be de-accounted |
74 | from the resource counter it was accounted to. This is called | 90 | from the resource counter it was accounted to. This is called |
75 | "uncharging". | 91 | "uncharging". |
76 | 92 | ||
77 | The _locked routines imply that the res_counter->lock is taken. | 93 | The _locked routines imply that the res_counter->lock is taken. |
78 | |||
79 | 94 | ||
80 | 2.1 Other accounting routines | 95 | 2.1 Other accounting routines |
81 | 96 | ||
diff --git a/Documentation/connector/cn_test.c b/Documentation/connector/cn_test.c index 6977c178729a..f688eba87704 100644 --- a/Documentation/connector/cn_test.c +++ b/Documentation/connector/cn_test.c | |||
@@ -41,6 +41,12 @@ void cn_test_callback(void *data) | |||
41 | msg->seq, msg->ack, msg->len, (char *)msg->data); | 41 | msg->seq, msg->ack, msg->len, (char *)msg->data); |
42 | } | 42 | } |
43 | 43 | ||
44 | /* | ||
45 | * Do not remove this function even if no one is using it as | ||
46 | * this is an example of how to get notifications about new | ||
47 | * connector user registration | ||
48 | */ | ||
49 | #if 0 | ||
44 | static int cn_test_want_notify(void) | 50 | static int cn_test_want_notify(void) |
45 | { | 51 | { |
46 | struct cn_ctl_msg *ctl; | 52 | struct cn_ctl_msg *ctl; |
@@ -117,6 +123,7 @@ nlmsg_failure: | |||
117 | kfree_skb(skb); | 123 | kfree_skb(skb); |
118 | return -EINVAL; | 124 | return -EINVAL; |
119 | } | 125 | } |
126 | #endif | ||
120 | 127 | ||
121 | static u32 cn_test_timer_counter; | 128 | static u32 cn_test_timer_counter; |
122 | static void cn_test_timer_func(unsigned long __data) | 129 | static void cn_test_timer_func(unsigned long __data) |
diff --git a/Documentation/cpu-freq/cpu-drivers.txt b/Documentation/cpu-freq/cpu-drivers.txt index 43c743903dd7..75a58d14d3cf 100644 --- a/Documentation/cpu-freq/cpu-drivers.txt +++ b/Documentation/cpu-freq/cpu-drivers.txt | |||
@@ -155,7 +155,7 @@ actual frequency must be determined using the following rules: | |||
155 | - if relation==CPUFREQ_REL_H, try to select a new_freq lower than or equal | 155 | - if relation==CPUFREQ_REL_H, try to select a new_freq lower than or equal |
156 | target_freq. ("H for highest, but no higher than") | 156 | target_freq. ("H for highest, but no higher than") |
157 | 157 | ||
158 | Here again the frequency table helper might assist you - see section 3 | 158 | Here again the frequency table helper might assist you - see section 2 |
159 | for details. | 159 | for details. |
160 | 160 | ||
161 | 161 | ||
diff --git a/Documentation/cpu-freq/governors.txt b/Documentation/cpu-freq/governors.txt index 5b0cfa67aff9..aed082f49d09 100644 --- a/Documentation/cpu-freq/governors.txt +++ b/Documentation/cpu-freq/governors.txt | |||
@@ -117,10 +117,30 @@ accessible parameters: | |||
117 | sampling_rate: measured in uS (10^-6 seconds), this is how often you | 117 | sampling_rate: measured in uS (10^-6 seconds), this is how often you |
118 | want the kernel to look at the CPU usage and to make decisions on | 118 | want the kernel to look at the CPU usage and to make decisions on |
119 | what to do about the frequency. Typically this is set to values of | 119 | what to do about the frequency. Typically this is set to values of |
120 | around '10000' or more. | 120 | around '10000' or more. It's default value is (cmp. with users-guide.txt): |
121 | 121 | transition_latency * 1000 | |
122 | show_sampling_rate_(min|max): the minimum and maximum sampling rates | 122 | Be aware that transition latency is in ns and sampling_rate is in us, so you |
123 | available that you may set 'sampling_rate' to. | 123 | get the same sysfs value by default. |
124 | Sampling rate should always get adjusted considering the transition latency | ||
125 | To set the sampling rate 750 times as high as the transition latency | ||
126 | in the bash (as said, 1000 is default), do: | ||
127 | echo `$(($(cat cpuinfo_transition_latency) * 750 / 1000)) \ | ||
128 | >ondemand/sampling_rate | ||
129 | |||
130 | show_sampling_rate_min: | ||
131 | The sampling rate is limited by the HW transition latency: | ||
132 | transition_latency * 100 | ||
133 | Or by kernel restrictions: | ||
134 | If CONFIG_NO_HZ is set, the limit is 10ms fixed. | ||
135 | If CONFIG_NO_HZ is not set or no_hz=off boot parameter is used, the | ||
136 | limits depend on the CONFIG_HZ option: | ||
137 | HZ=1000: min=20000us (20ms) | ||
138 | HZ=250: min=80000us (80ms) | ||
139 | HZ=100: min=200000us (200ms) | ||
140 | The highest value of kernel and HW latency restrictions is shown and | ||
141 | used as the minimum sampling rate. | ||
142 | |||
143 | show_sampling_rate_max: THIS INTERFACE IS DEPRECATED, DON'T USE IT. | ||
124 | 144 | ||
125 | up_threshold: defines what the average CPU usage between the samplings | 145 | up_threshold: defines what the average CPU usage between the samplings |
126 | of 'sampling_rate' needs to be for the kernel to make a decision on | 146 | of 'sampling_rate' needs to be for the kernel to make a decision on |
diff --git a/Documentation/cpu-freq/user-guide.txt b/Documentation/cpu-freq/user-guide.txt index 917918f84fc7..5d5f5fadd1c2 100644 --- a/Documentation/cpu-freq/user-guide.txt +++ b/Documentation/cpu-freq/user-guide.txt | |||
@@ -31,7 +31,6 @@ Contents: | |||
31 | 31 | ||
32 | 3. How to change the CPU cpufreq policy and/or speed | 32 | 3. How to change the CPU cpufreq policy and/or speed |
33 | 3.1 Preferred interface: sysfs | 33 | 3.1 Preferred interface: sysfs |
34 | 3.2 Deprecated interfaces | ||
35 | 34 | ||
36 | 35 | ||
37 | 36 | ||
@@ -152,6 +151,18 @@ cpuinfo_min_freq : this file shows the minimum operating | |||
152 | frequency the processor can run at(in kHz) | 151 | frequency the processor can run at(in kHz) |
153 | cpuinfo_max_freq : this file shows the maximum operating | 152 | cpuinfo_max_freq : this file shows the maximum operating |
154 | frequency the processor can run at(in kHz) | 153 | frequency the processor can run at(in kHz) |
154 | cpuinfo_transition_latency The time it takes on this CPU to | ||
155 | switch between two frequencies in nano | ||
156 | seconds. If unknown or known to be | ||
157 | that high that the driver does not | ||
158 | work with the ondemand governor, -1 | ||
159 | (CPUFREQ_ETERNAL) will be returned. | ||
160 | Using this information can be useful | ||
161 | to choose an appropriate polling | ||
162 | frequency for a kernel governor or | ||
163 | userspace daemon. Make sure to not | ||
164 | switch the frequency too often | ||
165 | resulting in performance loss. | ||
155 | scaling_driver : this file shows what cpufreq driver is | 166 | scaling_driver : this file shows what cpufreq driver is |
156 | used to set the frequency on this CPU | 167 | used to set the frequency on this CPU |
157 | 168 | ||
diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt index 45932ec21cee..b41f3e58aefa 100644 --- a/Documentation/cputopology.txt +++ b/Documentation/cputopology.txt | |||
@@ -18,11 +18,11 @@ For an architecture to support this feature, it must define some of | |||
18 | these macros in include/asm-XXX/topology.h: | 18 | these macros in include/asm-XXX/topology.h: |
19 | #define topology_physical_package_id(cpu) | 19 | #define topology_physical_package_id(cpu) |
20 | #define topology_core_id(cpu) | 20 | #define topology_core_id(cpu) |
21 | #define topology_thread_siblings(cpu) | 21 | #define topology_thread_cpumask(cpu) |
22 | #define topology_core_siblings(cpu) | 22 | #define topology_core_cpumask(cpu) |
23 | 23 | ||
24 | The type of **_id is int. | 24 | The type of **_id is int. |
25 | The type of siblings is cpumask_t. | 25 | The type of siblings is (const) struct cpumask *. |
26 | 26 | ||
27 | To be consistent on all architectures, include/linux/topology.h | 27 | To be consistent on all architectures, include/linux/topology.h |
28 | provides default definitions for any of the above macros that are | 28 | provides default definitions for any of the above macros that are |
diff --git a/Documentation/dell_rbu.txt b/Documentation/dell_rbu.txt index c11b931f8f98..15174985ad08 100644 --- a/Documentation/dell_rbu.txt +++ b/Documentation/dell_rbu.txt | |||
@@ -76,9 +76,9 @@ Do the steps below to download the BIOS image. | |||
76 | 76 | ||
77 | The /sys/class/firmware/dell_rbu/ entries will remain till the following is | 77 | The /sys/class/firmware/dell_rbu/ entries will remain till the following is |
78 | done. | 78 | done. |
79 | echo -1 > /sys/class/firmware/dell_rbu/loading. | 79 | echo -1 > /sys/class/firmware/dell_rbu/loading |
80 | Until this step is completed the driver cannot be unloaded. | 80 | Until this step is completed the driver cannot be unloaded. |
81 | Also echoing either mono ,packet or init in to image_type will free up the | 81 | Also echoing either mono, packet or init in to image_type will free up the |
82 | memory allocated by the driver. | 82 | memory allocated by the driver. |
83 | 83 | ||
84 | If a user by accident executes steps 1 and 3 above without executing step 2; | 84 | If a user by accident executes steps 1 and 3 above without executing step 2; |
diff --git a/Documentation/development-process/5.Posting b/Documentation/development-process/5.Posting index dd48132a74dd..f622c1e9f0f9 100644 --- a/Documentation/development-process/5.Posting +++ b/Documentation/development-process/5.Posting | |||
@@ -119,7 +119,7 @@ which takes quite a bit of time and thought after the "real work" has been | |||
119 | done. When done properly, though, it is time well spent. | 119 | done. When done properly, though, it is time well spent. |
120 | 120 | ||
121 | 121 | ||
122 | 5.4: PATCH FORMATTING | 122 | 5.4: PATCH FORMATTING AND CHANGELOGS |
123 | 123 | ||
124 | So now you have a perfect series of patches for posting, but the work is | 124 | So now you have a perfect series of patches for posting, but the work is |
125 | not done quite yet. Each patch needs to be formatted into a message which | 125 | not done quite yet. Each patch needs to be formatted into a message which |
@@ -146,8 +146,33 @@ that end, each patch will be composed of the following: | |||
146 | - One or more tag lines, with, at a minimum, one Signed-off-by: line from | 146 | - One or more tag lines, with, at a minimum, one Signed-off-by: line from |
147 | the author of the patch. Tags will be described in more detail below. | 147 | the author of the patch. Tags will be described in more detail below. |
148 | 148 | ||
149 | The above three items should, normally, be the text used when committing | 149 | The items above, together, form the changelog for the patch. Writing good |
150 | the change to a revision control system. They are followed by: | 150 | changelogs is a crucial but often-neglected art; it's worth spending |
151 | another moment discussing this issue. When writing a changelog, you should | ||
152 | bear in mind that a number of different people will be reading your words. | ||
153 | These include subsystem maintainers and reviewers who need to decide | ||
154 | whether the patch should be included, distributors and other maintainers | ||
155 | trying to decide whether a patch should be backported to other kernels, bug | ||
156 | hunters wondering whether the patch is responsible for a problem they are | ||
157 | chasing, users who want to know how the kernel has changed, and more. A | ||
158 | good changelog conveys the needed information to all of these people in the | ||
159 | most direct and concise way possible. | ||
160 | |||
161 | To that end, the summary line should describe the effects of and motivation | ||
162 | for the change as well as possible given the one-line constraint. The | ||
163 | detailed description can then amplify on those topics and provide any | ||
164 | needed additional information. If the patch fixes a bug, cite the commit | ||
165 | which introduced the bug if possible. If a problem is associated with | ||
166 | specific log or compiler output, include that output to help others | ||
167 | searching for a solution to the same problem. If the change is meant to | ||
168 | support other changes coming in later patch, say so. If internal APIs are | ||
169 | changed, detail those changes and how other developers should respond. In | ||
170 | general, the more you can put yourself into the shoes of everybody who will | ||
171 | be reading your changelog, the better that changelog (and the kernel as a | ||
172 | whole) will be. | ||
173 | |||
174 | Needless to say, the changelog should be the text used when committing the | ||
175 | change to a revision control system. It will be followed by: | ||
151 | 176 | ||
152 | - The patch itself, in the unified ("-u") patch format. Using the "-p" | 177 | - The patch itself, in the unified ("-u") patch format. Using the "-p" |
153 | option to diff will associate function names with changes, making the | 178 | option to diff will associate function names with changes, making the |
diff --git a/Documentation/devices.txt b/Documentation/devices.txt index 2be08240ee80..53d64d382343 100644 --- a/Documentation/devices.txt +++ b/Documentation/devices.txt | |||
@@ -1,9 +1,9 @@ | |||
1 | 1 | ||
2 | LINUX ALLOCATED DEVICES (2.6+ version) | 2 | LINUX ALLOCATED DEVICES (2.6+ version) |
3 | 3 | ||
4 | Maintained by Torben Mathiasen <device@lanana.org> | 4 | Maintained by Alan Cox <device@lanana.org> |
5 | 5 | ||
6 | Last revised: 29 November 2006 | 6 | Last revised: 6th April 2009 |
7 | 7 | ||
8 | This list is the Linux Device List, the official registry of allocated | 8 | This list is the Linux Device List, the official registry of allocated |
9 | device numbers and /dev directory nodes for the Linux operating | 9 | device numbers and /dev directory nodes for the Linux operating |
@@ -67,6 +67,11 @@ up to date. Due to the number of registrations I have to maintain it | |||
67 | in "batch mode", so there is likely additional registrations that | 67 | in "batch mode", so there is likely additional registrations that |
68 | haven't been listed yet. | 68 | haven't been listed yet. |
69 | 69 | ||
70 | Fourth, remember that Linux now has extensive support for dynamic allocation | ||
71 | of device numbering and can use sysfs and udev to handle the naming needs. | ||
72 | There are still some exceptions in the serial and boot device area. Before | ||
73 | asking for a device number make sure you actually need one. | ||
74 | |||
70 | Finally, sometimes I have to play "namespace police." Please don't be | 75 | Finally, sometimes I have to play "namespace police." Please don't be |
71 | offended. I often get submissions for /dev names that would be bound | 76 | offended. I often get submissions for /dev names that would be bound |
72 | to cause conflicts down the road. I am trying to avoid getting in a | 77 | to cause conflicts down the road. I am trying to avoid getting in a |
@@ -101,7 +106,7 @@ Your cooperation is appreciated. | |||
101 | 0 = /dev/ram0 First RAM disk | 106 | 0 = /dev/ram0 First RAM disk |
102 | 1 = /dev/ram1 Second RAM disk | 107 | 1 = /dev/ram1 Second RAM disk |
103 | ... | 108 | ... |
104 | 250 = /dev/initrd Initial RAM disk {2.6} | 109 | 250 = /dev/initrd Initial RAM disk |
105 | 110 | ||
106 | Older kernels had /dev/ramdisk (1, 1) here. | 111 | Older kernels had /dev/ramdisk (1, 1) here. |
107 | /dev/initrd refers to a RAM disk which was preloaded | 112 | /dev/initrd refers to a RAM disk which was preloaded |
@@ -340,7 +345,7 @@ Your cooperation is appreciated. | |||
340 | 14 = /dev/touchscreen/ucb1x00 UCB 1x00 touchscreen | 345 | 14 = /dev/touchscreen/ucb1x00 UCB 1x00 touchscreen |
341 | 15 = /dev/touchscreen/mk712 MK712 touchscreen | 346 | 15 = /dev/touchscreen/mk712 MK712 touchscreen |
342 | 128 = /dev/beep Fancy beep device | 347 | 128 = /dev/beep Fancy beep device |
343 | 129 = /dev/modreq Kernel module load request {2.6} | 348 | 129 = |
344 | 130 = /dev/watchdog Watchdog timer port | 349 | 130 = /dev/watchdog Watchdog timer port |
345 | 131 = /dev/temperature Machine internal temperature | 350 | 131 = /dev/temperature Machine internal temperature |
346 | 132 = /dev/hwtrap Hardware fault trap | 351 | 132 = /dev/hwtrap Hardware fault trap |
@@ -350,10 +355,10 @@ Your cooperation is appreciated. | |||
350 | 139 = /dev/openprom SPARC OpenBoot PROM | 355 | 139 = /dev/openprom SPARC OpenBoot PROM |
351 | 140 = /dev/relay8 Berkshire Products Octal relay card | 356 | 140 = /dev/relay8 Berkshire Products Octal relay card |
352 | 141 = /dev/relay16 Berkshire Products ISO-16 relay card | 357 | 141 = /dev/relay16 Berkshire Products ISO-16 relay card |
353 | 142 = /dev/msr x86 model-specific registers {2.6} | 358 | 142 = |
354 | 143 = /dev/pciconf PCI configuration space | 359 | 143 = /dev/pciconf PCI configuration space |
355 | 144 = /dev/nvram Non-volatile configuration RAM | 360 | 144 = /dev/nvram Non-volatile configuration RAM |
356 | 145 = /dev/hfmodem Soundcard shortwave modem control {2.6} | 361 | 145 = /dev/hfmodem Soundcard shortwave modem control |
357 | 146 = /dev/graphics Linux/SGI graphics device | 362 | 146 = /dev/graphics Linux/SGI graphics device |
358 | 147 = /dev/opengl Linux/SGI OpenGL pipe | 363 | 147 = /dev/opengl Linux/SGI OpenGL pipe |
359 | 148 = /dev/gfx Linux/SGI graphics effects device | 364 | 148 = /dev/gfx Linux/SGI graphics effects device |
@@ -435,6 +440,9 @@ Your cooperation is appreciated. | |||
435 | 228 = /dev/hpet HPET driver | 440 | 228 = /dev/hpet HPET driver |
436 | 229 = /dev/fuse Fuse (virtual filesystem in user-space) | 441 | 229 = /dev/fuse Fuse (virtual filesystem in user-space) |
437 | 230 = /dev/midishare MidiShare driver | 442 | 230 = /dev/midishare MidiShare driver |
443 | 231 = /dev/snapshot System memory snapshot device | ||
444 | 232 = /dev/kvm Kernel-based virtual machine (hardware virtualization extensions) | ||
445 | 233 = /dev/kmview View-OS A process with a view | ||
438 | 240-254 Reserved for local use | 446 | 240-254 Reserved for local use |
439 | 255 Reserved for MISC_DYNAMIC_MINOR | 447 | 255 Reserved for MISC_DYNAMIC_MINOR |
440 | 448 | ||
@@ -466,10 +474,7 @@ Your cooperation is appreciated. | |||
466 | The device names specified are proposed -- if there | 474 | The device names specified are proposed -- if there |
467 | are "standard" names for these devices, please let me know. | 475 | are "standard" names for these devices, please let me know. |
468 | 476 | ||
469 | 12 block MSCDEX CD-ROM callback support {2.6} | 477 | 12 block |
470 | 0 = /dev/dos_cd0 First MSCDEX CD-ROM | ||
471 | 1 = /dev/dos_cd1 Second MSCDEX CD-ROM | ||
472 | ... | ||
473 | 478 | ||
474 | 13 char Input core | 479 | 13 char Input core |
475 | 0 = /dev/input/js0 First joystick | 480 | 0 = /dev/input/js0 First joystick |
@@ -498,7 +503,7 @@ Your cooperation is appreciated. | |||
498 | 2 = /dev/midi00 First MIDI port | 503 | 2 = /dev/midi00 First MIDI port |
499 | 3 = /dev/dsp Digital audio | 504 | 3 = /dev/dsp Digital audio |
500 | 4 = /dev/audio Sun-compatible digital audio | 505 | 4 = /dev/audio Sun-compatible digital audio |
501 | 6 = /dev/sndstat Sound card status information {2.6} | 506 | 6 = |
502 | 7 = /dev/audioctl SPARC audio control device | 507 | 7 = /dev/audioctl SPARC audio control device |
503 | 8 = /dev/sequencer2 Sequencer -- alternate device | 508 | 8 = /dev/sequencer2 Sequencer -- alternate device |
504 | 16 = /dev/mixer1 Second soundcard mixer control | 509 | 16 = /dev/mixer1 Second soundcard mixer control |
@@ -510,14 +515,7 @@ Your cooperation is appreciated. | |||
510 | 34 = /dev/midi02 Third MIDI port | 515 | 34 = /dev/midi02 Third MIDI port |
511 | 50 = /dev/midi03 Fourth MIDI port | 516 | 50 = /dev/midi03 Fourth MIDI port |
512 | 517 | ||
513 | 14 block BIOS harddrive callback support {2.6} | 518 | 14 block |
514 | 0 = /dev/dos_hda First BIOS harddrive whole disk | ||
515 | 64 = /dev/dos_hdb Second BIOS harddrive whole disk | ||
516 | 128 = /dev/dos_hdc Third BIOS harddrive whole disk | ||
517 | 192 = /dev/dos_hdd Fourth BIOS harddrive whole disk | ||
518 | |||
519 | Partitions are handled in the same way as IDE disks | ||
520 | (see major number 3). | ||
521 | 519 | ||
522 | 15 char Joystick | 520 | 15 char Joystick |
523 | 0 = /dev/js0 First analog joystick | 521 | 0 = /dev/js0 First analog joystick |
@@ -535,14 +533,14 @@ Your cooperation is appreciated. | |||
535 | 16 block GoldStar CD-ROM | 533 | 16 block GoldStar CD-ROM |
536 | 0 = /dev/gscd GoldStar CD-ROM | 534 | 0 = /dev/gscd GoldStar CD-ROM |
537 | 535 | ||
538 | 17 char Chase serial card | 536 | 17 char OBSOLETE (was Chase serial card) |
539 | 0 = /dev/ttyH0 First Chase port | 537 | 0 = /dev/ttyH0 First Chase port |
540 | 1 = /dev/ttyH1 Second Chase port | 538 | 1 = /dev/ttyH1 Second Chase port |
541 | ... | 539 | ... |
542 | 17 block Optics Storage CD-ROM | 540 | 17 block Optics Storage CD-ROM |
543 | 0 = /dev/optcd Optics Storage CD-ROM | 541 | 0 = /dev/optcd Optics Storage CD-ROM |
544 | 542 | ||
545 | 18 char Chase serial card - alternate devices | 543 | 18 char OBSOLETE (was Chase serial card - alternate devices) |
546 | 0 = /dev/cuh0 Callout device for ttyH0 | 544 | 0 = /dev/cuh0 Callout device for ttyH0 |
547 | 1 = /dev/cuh1 Callout device for ttyH1 | 545 | 1 = /dev/cuh1 Callout device for ttyH1 |
548 | ... | 546 | ... |
@@ -644,8 +642,7 @@ Your cooperation is appreciated. | |||
644 | 2 = /dev/sbpcd2 Panasonic CD-ROM controller 0 unit 2 | 642 | 2 = /dev/sbpcd2 Panasonic CD-ROM controller 0 unit 2 |
645 | 3 = /dev/sbpcd3 Panasonic CD-ROM controller 0 unit 3 | 643 | 3 = /dev/sbpcd3 Panasonic CD-ROM controller 0 unit 3 |
646 | 644 | ||
647 | 26 char Quanta WinVision frame grabber {2.6} | 645 | 26 char |
648 | 0 = /dev/wvisfgrab Quanta WinVision frame grabber | ||
649 | 646 | ||
650 | 26 block Second Matsushita (Panasonic/SoundBlaster) CD-ROM | 647 | 26 block Second Matsushita (Panasonic/SoundBlaster) CD-ROM |
651 | 0 = /dev/sbpcd4 Panasonic CD-ROM controller 1 unit 0 | 648 | 0 = /dev/sbpcd4 Panasonic CD-ROM controller 1 unit 0 |
@@ -872,7 +869,7 @@ Your cooperation is appreciated. | |||
872 | and "user level packet I/O." This board is also | 869 | and "user level packet I/O." This board is also |
873 | accessible as a standard networking "eth" device. | 870 | accessible as a standard networking "eth" device. |
874 | 871 | ||
875 | 38 block Reserved for Linux/AP+ | 872 | 38 block OBSOLETE (was Linux/AP+) |
876 | 873 | ||
877 | 39 char ML-16P experimental I/O board | 874 | 39 char ML-16P experimental I/O board |
878 | 0 = /dev/ml16pa-a0 First card, first analog channel | 875 | 0 = /dev/ml16pa-a0 First card, first analog channel |
@@ -892,29 +889,16 @@ Your cooperation is appreciated. | |||
892 | 50 = /dev/ml16pb-c1 Second card, second counter/timer | 889 | 50 = /dev/ml16pb-c1 Second card, second counter/timer |
893 | 51 = /dev/ml16pb-c2 Second card, third counter/timer | 890 | 51 = /dev/ml16pb-c2 Second card, third counter/timer |
894 | ... | 891 | ... |
895 | 39 block Reserved for Linux/AP+ | 892 | 39 block |
896 | 893 | ||
897 | 40 char Matrox Meteor frame grabber {2.6} | 894 | 40 char |
898 | 0 = /dev/mmetfgrab Matrox Meteor frame grabber | ||
899 | 895 | ||
900 | 40 block Syquest EZ135 parallel port removable drive | 896 | 40 block |
901 | 0 = /dev/eza Parallel EZ135 drive, whole disk | ||
902 | |||
903 | This device is obsolete and will be removed in a | ||
904 | future version of Linux. It has been replaced with | ||
905 | the parallel port IDE disk driver at major number 45. | ||
906 | Partitions are handled in the same way as IDE disks | ||
907 | (see major number 3). | ||
908 | 897 | ||
909 | 41 char Yet Another Micro Monitor | 898 | 41 char Yet Another Micro Monitor |
910 | 0 = /dev/yamm Yet Another Micro Monitor | 899 | 0 = /dev/yamm Yet Another Micro Monitor |
911 | 900 | ||
912 | 41 block MicroSolutions BackPack parallel port CD-ROM | 901 | 41 block |
913 | 0 = /dev/bpcd BackPack CD-ROM | ||
914 | |||
915 | This device is obsolete and will be removed in a | ||
916 | future version of Linux. It has been replaced with | ||
917 | the parallel port ATAPI CD-ROM driver at major number 46. | ||
918 | 902 | ||
919 | 42 char Demo/sample use | 903 | 42 char Demo/sample use |
920 | 904 | ||
@@ -1681,13 +1665,7 @@ Your cooperation is appreciated. | |||
1681 | disks (see major number 3) except that the limit on | 1665 | disks (see major number 3) except that the limit on |
1682 | partitions is 15. | 1666 | partitions is 15. |
1683 | 1667 | ||
1684 | 93 char IBM Smart Capture Card frame grabber {2.6} | 1668 | 93 char |
1685 | 0 = /dev/iscc0 First Smart Capture Card | ||
1686 | 1 = /dev/iscc1 Second Smart Capture Card | ||
1687 | ... | ||
1688 | 128 = /dev/isccctl0 First Smart Capture Card control | ||
1689 | 129 = /dev/isccctl1 Second Smart Capture Card control | ||
1690 | ... | ||
1691 | 1669 | ||
1692 | 93 block NAND Flash Translation Layer filesystem | 1670 | 93 block NAND Flash Translation Layer filesystem |
1693 | 0 = /dev/nftla First NFTL layer | 1671 | 0 = /dev/nftla First NFTL layer |
@@ -1695,10 +1673,7 @@ Your cooperation is appreciated. | |||
1695 | ... | 1673 | ... |
1696 | 240 = /dev/nftlp 16th NTFL layer | 1674 | 240 = /dev/nftlp 16th NTFL layer |
1697 | 1675 | ||
1698 | 94 char miroVIDEO DC10/30 capture/playback device {2.6} | 1676 | 94 char |
1699 | 0 = /dev/dcxx0 First capture card | ||
1700 | 1 = /dev/dcxx1 Second capture card | ||
1701 | ... | ||
1702 | 1677 | ||
1703 | 94 block IBM S/390 DASD block storage | 1678 | 94 block IBM S/390 DASD block storage |
1704 | 0 = /dev/dasda First DASD device, major | 1679 | 0 = /dev/dasda First DASD device, major |
@@ -1791,11 +1766,7 @@ Your cooperation is appreciated. | |||
1791 | ... | 1766 | ... |
1792 | 15 = /dev/amiraid/ar?p15 15th partition | 1767 | 15 = /dev/amiraid/ar?p15 15th partition |
1793 | 1768 | ||
1794 | 102 char Philips SAA5249 Teletext signal decoder {2.6} | 1769 | 102 char |
1795 | 0 = /dev/tlk0 First Teletext decoder | ||
1796 | 1 = /dev/tlk1 Second Teletext decoder | ||
1797 | 2 = /dev/tlk2 Third Teletext decoder | ||
1798 | 3 = /dev/tlk3 Fourth Teletext decoder | ||
1799 | 1770 | ||
1800 | 102 block Compressed block device | 1771 | 102 block Compressed block device |
1801 | 0 = /dev/cbd/a First compressed block device, whole device | 1772 | 0 = /dev/cbd/a First compressed block device, whole device |
@@ -1916,10 +1887,7 @@ Your cooperation is appreciated. | |||
1916 | DAC960 (see major number 48) except that the limit on | 1887 | DAC960 (see major number 48) except that the limit on |
1917 | partitions is 15. | 1888 | partitions is 15. |
1918 | 1889 | ||
1919 | 111 char Philips SAA7146-based audio/video card {2.6} | 1890 | 111 char |
1920 | 0 = /dev/av0 First A/V card | ||
1921 | 1 = /dev/av1 Second A/V card | ||
1922 | ... | ||
1923 | 1891 | ||
1924 | 111 block Compaq Next Generation Drive Array, eighth controller | 1892 | 111 block Compaq Next Generation Drive Array, eighth controller |
1925 | 0 = /dev/cciss/c7d0 First logical drive, whole disk | 1893 | 0 = /dev/cciss/c7d0 First logical drive, whole disk |
@@ -2079,8 +2047,8 @@ Your cooperation is appreciated. | |||
2079 | ... | 2047 | ... |
2080 | 2048 | ||
2081 | 119 char VMware virtual network control | 2049 | 119 char VMware virtual network control |
2082 | 0 = /dev/vmnet0 1st virtual network | 2050 | 0 = /dev/vnet0 1st virtual network |
2083 | 1 = /dev/vmnet1 2nd virtual network | 2051 | 1 = /dev/vnet1 2nd virtual network |
2084 | ... | 2052 | ... |
2085 | 2053 | ||
2086 | 120-127 char LOCAL/EXPERIMENTAL USE | 2054 | 120-127 char LOCAL/EXPERIMENTAL USE |
@@ -2450,7 +2418,7 @@ Your cooperation is appreciated. | |||
2450 | 2 = /dev/raw/raw2 Second raw I/O device | 2418 | 2 = /dev/raw/raw2 Second raw I/O device |
2451 | ... | 2419 | ... |
2452 | 2420 | ||
2453 | 163 char UNASSIGNED (was Radio Tech BIM-XXX-RS232 radio modem - see 51) | 2421 | 163 char |
2454 | 2422 | ||
2455 | 164 char Chase Research AT/PCI-Fast serial card | 2423 | 164 char Chase Research AT/PCI-Fast serial card |
2456 | 0 = /dev/ttyCH0 AT/PCI-Fast board 0, port 0 | 2424 | 0 = /dev/ttyCH0 AT/PCI-Fast board 0, port 0 |
@@ -2542,6 +2510,12 @@ Your cooperation is appreciated. | |||
2542 | 1 = /dev/clanvi1 Second cLAN adapter | 2510 | 1 = /dev/clanvi1 Second cLAN adapter |
2543 | ... | 2511 | ... |
2544 | 2512 | ||
2513 | 179 block MMC block devices | ||
2514 | 0 = /dev/mmcblk0 First SD/MMC card | ||
2515 | 1 = /dev/mmcblk0p1 First partition on first MMC card | ||
2516 | 8 = /dev/mmcblk1 Second SD/MMC card | ||
2517 | ... | ||
2518 | |||
2545 | 179 char CCube DVXChip-based PCI products | 2519 | 179 char CCube DVXChip-based PCI products |
2546 | 0 = /dev/dvxirq0 First DVX device | 2520 | 0 = /dev/dvxirq0 First DVX device |
2547 | 1 = /dev/dvxirq1 Second DVX device | 2521 | 1 = /dev/dvxirq1 Second DVX device |
@@ -2560,6 +2534,9 @@ Your cooperation is appreciated. | |||
2560 | 96 = /dev/usb/hiddev0 1st USB HID device | 2534 | 96 = /dev/usb/hiddev0 1st USB HID device |
2561 | ... | 2535 | ... |
2562 | 111 = /dev/usb/hiddev15 16th USB HID device | 2536 | 111 = /dev/usb/hiddev15 16th USB HID device |
2537 | 112 = /dev/usb/auer0 1st auerswald ISDN device | ||
2538 | ... | ||
2539 | 127 = /dev/usb/auer15 16th auerswald ISDN device | ||
2563 | 128 = /dev/usb/brlvgr0 First Braille Voyager device | 2540 | 128 = /dev/usb/brlvgr0 First Braille Voyager device |
2564 | ... | 2541 | ... |
2565 | 131 = /dev/usb/brlvgr3 Fourth Braille Voyager device | 2542 | 131 = /dev/usb/brlvgr3 Fourth Braille Voyager device |
@@ -2810,6 +2787,20 @@ Your cooperation is appreciated. | |||
2810 | ... | 2787 | ... |
2811 | 190 = /dev/ttyUL3 Xilinx uartlite - port 3 | 2788 | 190 = /dev/ttyUL3 Xilinx uartlite - port 3 |
2812 | 191 = /dev/xvc0 Xen virtual console - port 0 | 2789 | 191 = /dev/xvc0 Xen virtual console - port 0 |
2790 | 192 = /dev/ttyPZ0 pmac_zilog - port 0 | ||
2791 | ... | ||
2792 | 195 = /dev/ttyPZ3 pmac_zilog - port 3 | ||
2793 | 196 = /dev/ttyTX0 TX39/49 serial port 0 | ||
2794 | ... | ||
2795 | 204 = /dev/ttyTX7 TX39/49 serial port 7 | ||
2796 | 205 = /dev/ttySC0 SC26xx serial port 0 | ||
2797 | 206 = /dev/ttySC1 SC26xx serial port 1 | ||
2798 | 207 = /dev/ttySC2 SC26xx serial port 2 | ||
2799 | 208 = /dev/ttySC3 SC26xx serial port 3 | ||
2800 | 209 = /dev/ttyMAX0 MAX3100 serial port 0 | ||
2801 | 210 = /dev/ttyMAX1 MAX3100 serial port 1 | ||
2802 | 211 = /dev/ttyMAX2 MAX3100 serial port 2 | ||
2803 | 212 = /dev/ttyMAX3 MAX3100 serial port 3 | ||
2813 | 2804 | ||
2814 | 205 char Low-density serial ports (alternate device) | 2805 | 205 char Low-density serial ports (alternate device) |
2815 | 0 = /dev/culu0 Callout device for ttyLU0 | 2806 | 0 = /dev/culu0 Callout device for ttyLU0 |
@@ -3145,6 +3136,20 @@ Your cooperation is appreciated. | |||
3145 | 1 = /dev/blockrom1 Second ROM card's translation layer interface | 3136 | 1 = /dev/blockrom1 Second ROM card's translation layer interface |
3146 | ... | 3137 | ... |
3147 | 3138 | ||
3139 | 259 block Block Extended Major | ||
3140 | Used dynamically to hold additional partition minor | ||
3141 | numbers and allow large numbers of partitions per device | ||
3142 | |||
3143 | 259 char FPGA configuration interfaces | ||
3144 | 0 = /dev/icap0 First Xilinx internal configuration | ||
3145 | 1 = /dev/icap1 Second Xilinx internal configuration | ||
3146 | |||
3147 | 260 char OSD (Object-based-device) SCSI Device | ||
3148 | 0 = /dev/osd0 First OSD Device | ||
3149 | 1 = /dev/osd1 Second OSD Device | ||
3150 | ... | ||
3151 | 255 = /dev/osd255 256th OSD Device | ||
3152 | |||
3148 | **** ADDITIONAL /dev DIRECTORY ENTRIES | 3153 | **** ADDITIONAL /dev DIRECTORY ENTRIES |
3149 | 3154 | ||
3150 | This section details additional entries that should or may exist in | 3155 | This section details additional entries that should or may exist in |
diff --git a/Documentation/dontdiff b/Documentation/dontdiff index 1e89a51ea49b..88519daab6e9 100644 --- a/Documentation/dontdiff +++ b/Documentation/dontdiff | |||
@@ -62,7 +62,6 @@ aic7*reg_print.c* | |||
62 | aic7*seq.h* | 62 | aic7*seq.h* |
63 | aicasm | 63 | aicasm |
64 | aicdb.h* | 64 | aicdb.h* |
65 | asm | ||
66 | asm-offsets.h | 65 | asm-offsets.h |
67 | asm_offsets.h | 66 | asm_offsets.h |
68 | autoconf.h* | 67 | autoconf.h* |
diff --git a/Documentation/driver-model/device.txt b/Documentation/driver-model/device.txt index a7cbfff40d07..a124f3126b0d 100644 --- a/Documentation/driver-model/device.txt +++ b/Documentation/driver-model/device.txt | |||
@@ -162,3 +162,35 @@ device_remove_file(dev,&dev_attr_power); | |||
162 | 162 | ||
163 | The file name will be 'power' with a mode of 0644 (-rw-r--r--). | 163 | The file name will be 'power' with a mode of 0644 (-rw-r--r--). |
164 | 164 | ||
165 | Word of warning: While the kernel allows device_create_file() and | ||
166 | device_remove_file() to be called on a device at any time, userspace has | ||
167 | strict expectations on when attributes get created. When a new device is | ||
168 | registered in the kernel, a uevent is generated to notify userspace (like | ||
169 | udev) that a new device is available. If attributes are added after the | ||
170 | device is registered, then userspace won't get notified and userspace will | ||
171 | not know about the new attributes. | ||
172 | |||
173 | This is important for device driver that need to publish additional | ||
174 | attributes for a device at driver probe time. If the device driver simply | ||
175 | calls device_create_file() on the device structure passed to it, then | ||
176 | userspace will never be notified of the new attributes. Instead, it should | ||
177 | probably use class_create() and class->dev_attrs to set up a list of | ||
178 | desired attributes in the modules_init function, and then in the .probe() | ||
179 | hook, and then use device_create() to create a new device as a child | ||
180 | of the probed device. The new device will generate a new uevent and | ||
181 | properly advertise the new attributes to userspace. | ||
182 | |||
183 | For example, if a driver wanted to add the following attributes: | ||
184 | struct device_attribute mydriver_attribs[] = { | ||
185 | __ATTR(port_count, 0444, port_count_show), | ||
186 | __ATTR(serial_number, 0444, serial_number_show), | ||
187 | NULL | ||
188 | }; | ||
189 | |||
190 | Then in the module init function is would do: | ||
191 | mydriver_class = class_create(THIS_MODULE, "my_attrs"); | ||
192 | mydriver_class.dev_attr = mydriver_attribs; | ||
193 | |||
194 | And assuming 'dev' is the struct device passed into the probe hook, the driver | ||
195 | probe function would do something like: | ||
196 | create_device(&mydriver_class, dev, chrdev, &private_data, "my_name"); | ||
diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt index 387b8a720f4a..d79aead9418b 100644 --- a/Documentation/driver-model/devres.txt +++ b/Documentation/driver-model/devres.txt | |||
@@ -188,7 +188,7 @@ For example, you can do something like the following. | |||
188 | 188 | ||
189 | void my_midlayer_destroy_something() | 189 | void my_midlayer_destroy_something() |
190 | { | 190 | { |
191 | devres_release_group(dev, my_midlayer_create_soemthing); | 191 | devres_release_group(dev, my_midlayer_create_something); |
192 | } | 192 | } |
193 | 193 | ||
194 | 194 | ||
diff --git a/Documentation/driver-model/platform.txt b/Documentation/driver-model/platform.txt index 83009fdcbbc8..2e2c2ea90ceb 100644 --- a/Documentation/driver-model/platform.txt +++ b/Documentation/driver-model/platform.txt | |||
@@ -169,3 +169,62 @@ three different ways to find such a match: | |||
169 | be probed later if another device registers. (Which is OK, since | 169 | be probed later if another device registers. (Which is OK, since |
170 | this interface is only for use with non-hotpluggable devices.) | 170 | this interface is only for use with non-hotpluggable devices.) |
171 | 171 | ||
172 | |||
173 | Early Platform Devices and Drivers | ||
174 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
175 | The early platform interfaces provide platform data to platform device | ||
176 | drivers early on during the system boot. The code is built on top of the | ||
177 | early_param() command line parsing and can be executed very early on. | ||
178 | |||
179 | Example: "earlyprintk" class early serial console in 6 steps | ||
180 | |||
181 | 1. Registering early platform device data | ||
182 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
183 | The architecture code registers platform device data using the function | ||
184 | early_platform_add_devices(). In the case of early serial console this | ||
185 | should be hardware configuration for the serial port. Devices registered | ||
186 | at this point will later on be matched against early platform drivers. | ||
187 | |||
188 | 2. Parsing kernel command line | ||
189 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
190 | The architecture code calls parse_early_param() to parse the kernel | ||
191 | command line. This will execute all matching early_param() callbacks. | ||
192 | User specified early platform devices will be registered at this point. | ||
193 | For the early serial console case the user can specify port on the | ||
194 | kernel command line as "earlyprintk=serial.0" where "earlyprintk" is | ||
195 | the class string, "serial" is the name of the platfrom driver and | ||
196 | 0 is the platform device id. If the id is -1 then the dot and the | ||
197 | id can be omitted. | ||
198 | |||
199 | 3. Installing early platform drivers belonging to a certain class | ||
200 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
201 | The architecture code may optionally force registration of all early | ||
202 | platform drivers belonging to a certain class using the function | ||
203 | early_platform_driver_register_all(). User specified devices from | ||
204 | step 2 have priority over these. This step is omitted by the serial | ||
205 | driver example since the early serial driver code should be disabled | ||
206 | unless the user has specified port on the kernel command line. | ||
207 | |||
208 | 4. Early platform driver registration | ||
209 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
210 | Compiled-in platform drivers making use of early_platform_init() are | ||
211 | automatically registered during step 2 or 3. The serial driver example | ||
212 | should use early_platform_init("earlyprintk", &platform_driver). | ||
213 | |||
214 | 5. Probing of early platform drivers belonging to a certain class | ||
215 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
216 | The architecture code calls early_platform_driver_probe() to match | ||
217 | registered early platform devices associated with a certain class with | ||
218 | registered early platform drivers. Matched devices will get probed(). | ||
219 | This step can be executed at any point during the early boot. As soon | ||
220 | as possible may be good for the serial port case. | ||
221 | |||
222 | 6. Inside the early platform driver probe() | ||
223 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
224 | The driver code needs to take special care during early boot, especially | ||
225 | when it comes to memory allocation and interrupt registration. The code | ||
226 | in the probe() function can use is_early_platform_device() to check if | ||
227 | it is called at early platform device or at the regular platform device | ||
228 | time. The early serial driver performs register_console() at this point. | ||
229 | |||
230 | For further information, see <linux/platform_device.h>. | ||
diff --git a/Documentation/dvb/get_dvb_firmware b/Documentation/dvb/get_dvb_firmware index f2e908d7f90d..a52adfc9a57f 100644 --- a/Documentation/dvb/get_dvb_firmware +++ b/Documentation/dvb/get_dvb_firmware | |||
@@ -25,7 +25,7 @@ use IO::Handle; | |||
25 | "tda10046lifeview", "av7110", "dec2000t", "dec2540t", | 25 | "tda10046lifeview", "av7110", "dec2000t", "dec2540t", |
26 | "dec3000s", "vp7041", "dibusb", "nxt2002", "nxt2004", | 26 | "dec3000s", "vp7041", "dibusb", "nxt2002", "nxt2004", |
27 | "or51211", "or51132_qam", "or51132_vsb", "bluebird", | 27 | "or51211", "or51132_qam", "or51132_vsb", "bluebird", |
28 | "opera1"); | 28 | "opera1", "cx231xx", "cx18", "cx23885", "pvrusb2" ); |
29 | 29 | ||
30 | # Check args | 30 | # Check args |
31 | syntax() if (scalar(@ARGV) != 1); | 31 | syntax() if (scalar(@ARGV) != 1); |
@@ -37,8 +37,8 @@ for ($i=0; $i < scalar(@components); $i++) { | |||
37 | $outfile = eval($cid); | 37 | $outfile = eval($cid); |
38 | die $@ if $@; | 38 | die $@ if $@; |
39 | print STDERR <<EOF; | 39 | print STDERR <<EOF; |
40 | Firmware $outfile extracted successfully. | 40 | Firmware(s) $outfile extracted successfully. |
41 | Now copy it to either /usr/lib/hotplug/firmware or /lib/firmware | 41 | Now copy it(they) to either /usr/lib/hotplug/firmware or /lib/firmware |
42 | (depending on configuration of firmware hotplug). | 42 | (depending on configuration of firmware hotplug). |
43 | EOF | 43 | EOF |
44 | exit(0); | 44 | exit(0); |
@@ -112,7 +112,7 @@ sub tda10045 { | |||
112 | 112 | ||
113 | sub tda10046 { | 113 | sub tda10046 { |
114 | my $sourcefile = "TT_PCI_2.19h_28_11_2006.zip"; | 114 | my $sourcefile = "TT_PCI_2.19h_28_11_2006.zip"; |
115 | my $url = "http://technotrend-online.com/download/software/219/$sourcefile"; | 115 | my $url = "http://www.tt-download.com/download/updates/219/$sourcefile"; |
116 | my $hash = "6a7e1e2f2644b162ff0502367553c72d"; | 116 | my $hash = "6a7e1e2f2644b162ff0502367553c72d"; |
117 | my $outfile = "dvb-fe-tda10046.fw"; | 117 | my $outfile = "dvb-fe-tda10046.fw"; |
118 | my $tmpdir = tempdir(DIR => "/tmp", CLEANUP => 1); | 118 | my $tmpdir = tempdir(DIR => "/tmp", CLEANUP => 1); |
@@ -129,8 +129,8 @@ sub tda10046 { | |||
129 | } | 129 | } |
130 | 130 | ||
131 | sub tda10046lifeview { | 131 | sub tda10046lifeview { |
132 | my $sourcefile = "Drv_2.11.02.zip"; | 132 | my $sourcefile = "7%5Cdrv_2.11.02.zip"; |
133 | my $url = "http://www.lifeview.com.tw/drivers/pci_card/FlyDVB-T/$sourcefile"; | 133 | my $url = "http://www.lifeview.hk/dbimages/document/$sourcefile"; |
134 | my $hash = "1ea24dee4eea8fe971686981f34fd2e0"; | 134 | my $hash = "1ea24dee4eea8fe971686981f34fd2e0"; |
135 | my $outfile = "dvb-fe-tda10046.fw"; | 135 | my $outfile = "dvb-fe-tda10046.fw"; |
136 | my $tmpdir = tempdir(DIR => "/tmp", CLEANUP => 1); | 136 | my $tmpdir = tempdir(DIR => "/tmp", CLEANUP => 1); |
@@ -317,7 +317,7 @@ sub nxt2002 { | |||
317 | 317 | ||
318 | sub nxt2004 { | 318 | sub nxt2004 { |
319 | my $sourcefile = "AVerTVHD_MCE_A180_Drv_v1.2.2.16.zip"; | 319 | my $sourcefile = "AVerTVHD_MCE_A180_Drv_v1.2.2.16.zip"; |
320 | my $url = "http://www.aver.com/support/Drivers/$sourcefile"; | 320 | my $url = "http://www.avermedia-usa.com/support/Drivers/$sourcefile"; |
321 | my $hash = "111cb885b1e009188346d72acfed024c"; | 321 | my $hash = "111cb885b1e009188346d72acfed024c"; |
322 | my $outfile = "dvb-fe-nxt2004.fw"; | 322 | my $outfile = "dvb-fe-nxt2004.fw"; |
323 | my $tmpdir = tempdir(DIR => "/tmp", CLEANUP => 1); | 323 | my $tmpdir = tempdir(DIR => "/tmp", CLEANUP => 1); |
@@ -345,6 +345,85 @@ sub or51211 { | |||
345 | $fwfile; | 345 | $fwfile; |
346 | } | 346 | } |
347 | 347 | ||
348 | sub cx231xx { | ||
349 | my $fwfile = "v4l-cx231xx-avcore-01.fw"; | ||
350 | my $url = "http://linuxtv.org/downloads/firmware/$fwfile"; | ||
351 | my $hash = "7d3bb956dc9df0eafded2b56ba57cc42"; | ||
352 | |||
353 | checkstandard(); | ||
354 | |||
355 | wgetfile($fwfile, $url); | ||
356 | verify($fwfile, $hash); | ||
357 | |||
358 | $fwfile; | ||
359 | } | ||
360 | |||
361 | sub cx18 { | ||
362 | my $url = "http://linuxtv.org/downloads/firmware/"; | ||
363 | |||
364 | my %files = ( | ||
365 | 'v4l-cx23418-apu.fw' => '588f081b562f5c653a3db1ad8f65939a', | ||
366 | 'v4l-cx23418-cpu.fw' => 'b6c7ed64bc44b1a6e0840adaeac39d79', | ||
367 | 'v4l-cx23418-dig.fw' => '95bc688d3e7599fd5800161e9971cc55', | ||
368 | ); | ||
369 | |||
370 | checkstandard(); | ||
371 | |||
372 | my $allfiles; | ||
373 | foreach my $fwfile (keys %files) { | ||
374 | wgetfile($fwfile, "$url/$fwfile"); | ||
375 | verify($fwfile, $files{$fwfile}); | ||
376 | $allfiles .= " $fwfile"; | ||
377 | } | ||
378 | |||
379 | $allfiles =~ s/^\s//; | ||
380 | |||
381 | $allfiles; | ||
382 | } | ||
383 | |||
384 | sub cx23885 { | ||
385 | my $url = "http://linuxtv.org/downloads/firmware/"; | ||
386 | |||
387 | my %files = ( | ||
388 | 'v4l-cx23885-avcore-01.fw' => 'a9f8f5d901a7fb42f552e1ee6384f3bb', | ||
389 | 'v4l-cx23885-enc.fw' => 'a9f8f5d901a7fb42f552e1ee6384f3bb', | ||
390 | ); | ||
391 | |||
392 | checkstandard(); | ||
393 | |||
394 | my $allfiles; | ||
395 | foreach my $fwfile (keys %files) { | ||
396 | wgetfile($fwfile, "$url/$fwfile"); | ||
397 | verify($fwfile, $files{$fwfile}); | ||
398 | $allfiles .= " $fwfile"; | ||
399 | } | ||
400 | |||
401 | $allfiles =~ s/^\s//; | ||
402 | |||
403 | $allfiles; | ||
404 | } | ||
405 | |||
406 | sub pvrusb2 { | ||
407 | my $url = "http://linuxtv.org/downloads/firmware/"; | ||
408 | |||
409 | my %files = ( | ||
410 | 'v4l-cx25840.fw' => 'dadb79e9904fc8af96e8111d9cb59320', | ||
411 | ); | ||
412 | |||
413 | checkstandard(); | ||
414 | |||
415 | my $allfiles; | ||
416 | foreach my $fwfile (keys %files) { | ||
417 | wgetfile($fwfile, "$url/$fwfile"); | ||
418 | verify($fwfile, $files{$fwfile}); | ||
419 | $allfiles .= " $fwfile"; | ||
420 | } | ||
421 | |||
422 | $allfiles =~ s/^\s//; | ||
423 | |||
424 | $allfiles; | ||
425 | } | ||
426 | |||
348 | sub or51132_qam { | 427 | sub or51132_qam { |
349 | my $fwfile = "dvb-fe-or51132-qam.fw"; | 428 | my $fwfile = "dvb-fe-or51132-qam.fw"; |
350 | my $url = "http://linuxtv.org/downloads/firmware/$fwfile"; | 429 | my $url = "http://linuxtv.org/downloads/firmware/$fwfile"; |
diff --git a/Documentation/dynamic-debug-howto.txt b/Documentation/dynamic-debug-howto.txt new file mode 100644 index 000000000000..674c5663d346 --- /dev/null +++ b/Documentation/dynamic-debug-howto.txt | |||
@@ -0,0 +1,240 @@ | |||
1 | |||
2 | Introduction | ||
3 | ============ | ||
4 | |||
5 | This document describes how to use the dynamic debug (ddebug) feature. | ||
6 | |||
7 | Dynamic debug is designed to allow you to dynamically enable/disable kernel | ||
8 | code to obtain additional kernel information. Currently, if | ||
9 | CONFIG_DYNAMIC_DEBUG is set, then all pr_debug()/dev_debug() calls can be | ||
10 | dynamically enabled per-callsite. | ||
11 | |||
12 | Dynamic debug has even more useful features: | ||
13 | |||
14 | * Simple query language allows turning on and off debugging statements by | ||
15 | matching any combination of: | ||
16 | |||
17 | - source filename | ||
18 | - function name | ||
19 | - line number (including ranges of line numbers) | ||
20 | - module name | ||
21 | - format string | ||
22 | |||
23 | * Provides a debugfs control file: <debugfs>/dynamic_debug/control which can be | ||
24 | read to display the complete list of known debug statements, to help guide you | ||
25 | |||
26 | Controlling dynamic debug Behaviour | ||
27 | =============================== | ||
28 | |||
29 | The behaviour of pr_debug()/dev_debug()s are controlled via writing to a | ||
30 | control file in the 'debugfs' filesystem. Thus, you must first mount the debugfs | ||
31 | filesystem, in order to make use of this feature. Subsequently, we refer to the | ||
32 | control file as: <debugfs>/dynamic_debug/control. For example, if you want to | ||
33 | enable printing from source file 'svcsock.c', line 1603 you simply do: | ||
34 | |||
35 | nullarbor:~ # echo 'file svcsock.c line 1603 +p' > | ||
36 | <debugfs>/dynamic_debug/control | ||
37 | |||
38 | If you make a mistake with the syntax, the write will fail thus: | ||
39 | |||
40 | nullarbor:~ # echo 'file svcsock.c wtf 1 +p' > | ||
41 | <debugfs>/dynamic_debug/control | ||
42 | -bash: echo: write error: Invalid argument | ||
43 | |||
44 | Viewing Dynamic Debug Behaviour | ||
45 | =========================== | ||
46 | |||
47 | You can view the currently configured behaviour of all the debug statements | ||
48 | via: | ||
49 | |||
50 | nullarbor:~ # cat <debugfs>/dynamic_debug/control | ||
51 | # filename:lineno [module]function flags format | ||
52 | /usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svc_rdma.c:323 [svcxprt_rdma]svc_rdma_cleanup - "SVCRDMA Module Removed, deregister RPC RDMA transport\012" | ||
53 | /usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svc_rdma.c:341 [svcxprt_rdma]svc_rdma_init - "\011max_inline : %d\012" | ||
54 | /usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svc_rdma.c:340 [svcxprt_rdma]svc_rdma_init - "\011sq_depth : %d\012" | ||
55 | /usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svc_rdma.c:338 [svcxprt_rdma]svc_rdma_init - "\011max_requests : %d\012" | ||
56 | ... | ||
57 | |||
58 | |||
59 | You can also apply standard Unix text manipulation filters to this | ||
60 | data, e.g. | ||
61 | |||
62 | nullarbor:~ # grep -i rdma <debugfs>/dynamic_debug/control | wc -l | ||
63 | 62 | ||
64 | |||
65 | nullarbor:~ # grep -i tcp <debugfs>/dynamic_debug/control | wc -l | ||
66 | 42 | ||
67 | |||
68 | Note in particular that the third column shows the enabled behaviour | ||
69 | flags for each debug statement callsite (see below for definitions of the | ||
70 | flags). The default value, no extra behaviour enabled, is "-". So | ||
71 | you can view all the debug statement callsites with any non-default flags: | ||
72 | |||
73 | nullarbor:~ # awk '$3 != "-"' <debugfs>/dynamic_debug/control | ||
74 | # filename:lineno [module]function flags format | ||
75 | /usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svcsock.c:1603 [sunrpc]svc_send p "svc_process: st_sendto returned %d\012" | ||
76 | |||
77 | |||
78 | Command Language Reference | ||
79 | ========================== | ||
80 | |||
81 | At the lexical level, a command comprises a sequence of words separated | ||
82 | by whitespace characters. Note that newlines are treated as word | ||
83 | separators and do *not* end a command or allow multiple commands to | ||
84 | be done together. So these are all equivalent: | ||
85 | |||
86 | nullarbor:~ # echo -c 'file svcsock.c line 1603 +p' > | ||
87 | <debugfs>/dynamic_debug/control | ||
88 | nullarbor:~ # echo -c ' file svcsock.c line 1603 +p ' > | ||
89 | <debugfs>/dynamic_debug/control | ||
90 | nullarbor:~ # echo -c 'file svcsock.c\nline 1603 +p' > | ||
91 | <debugfs>/dynamic_debug/control | ||
92 | nullarbor:~ # echo -n 'file svcsock.c line 1603 +p' > | ||
93 | <debugfs>/dynamic_debug/control | ||
94 | |||
95 | Commands are bounded by a write() system call. If you want to do | ||
96 | multiple commands you need to do a separate "echo" for each, like: | ||
97 | |||
98 | nullarbor:~ # echo 'file svcsock.c line 1603 +p' > /proc/dprintk ;\ | ||
99 | > echo 'file svcsock.c line 1563 +p' > /proc/dprintk | ||
100 | |||
101 | or even like: | ||
102 | |||
103 | nullarbor:~ # ( | ||
104 | > echo 'file svcsock.c line 1603 +p' ;\ | ||
105 | > echo 'file svcsock.c line 1563 +p' ;\ | ||
106 | > ) > /proc/dprintk | ||
107 | |||
108 | At the syntactical level, a command comprises a sequence of match | ||
109 | specifications, followed by a flags change specification. | ||
110 | |||
111 | command ::= match-spec* flags-spec | ||
112 | |||
113 | The match-spec's are used to choose a subset of the known dprintk() | ||
114 | callsites to which to apply the flags-spec. Think of them as a query | ||
115 | with implicit ANDs between each pair. Note that an empty list of | ||
116 | match-specs is possible, but is not very useful because it will not | ||
117 | match any debug statement callsites. | ||
118 | |||
119 | A match specification comprises a keyword, which controls the attribute | ||
120 | of the callsite to be compared, and a value to compare against. Possible | ||
121 | keywords are: | ||
122 | |||
123 | match-spec ::= 'func' string | | ||
124 | 'file' string | | ||
125 | 'module' string | | ||
126 | 'format' string | | ||
127 | 'line' line-range | ||
128 | |||
129 | line-range ::= lineno | | ||
130 | '-'lineno | | ||
131 | lineno'-' | | ||
132 | lineno'-'lineno | ||
133 | // Note: line-range cannot contain space, e.g. | ||
134 | // "1-30" is valid range but "1 - 30" is not. | ||
135 | |||
136 | lineno ::= unsigned-int | ||
137 | |||
138 | The meanings of each keyword are: | ||
139 | |||
140 | func | ||
141 | The given string is compared against the function name | ||
142 | of each callsite. Example: | ||
143 | |||
144 | func svc_tcp_accept | ||
145 | |||
146 | file | ||
147 | The given string is compared against either the full | ||
148 | pathname or the basename of the source file of each | ||
149 | callsite. Examples: | ||
150 | |||
151 | file svcsock.c | ||
152 | file /usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svcsock.c | ||
153 | |||
154 | module | ||
155 | The given string is compared against the module name | ||
156 | of each callsite. The module name is the string as | ||
157 | seen in "lsmod", i.e. without the directory or the .ko | ||
158 | suffix and with '-' changed to '_'. Examples: | ||
159 | |||
160 | module sunrpc | ||
161 | module nfsd | ||
162 | |||
163 | format | ||
164 | The given string is searched for in the dynamic debug format | ||
165 | string. Note that the string does not need to match the | ||
166 | entire format, only some part. Whitespace and other | ||
167 | special characters can be escaped using C octal character | ||
168 | escape \ooo notation, e.g. the space character is \040. | ||
169 | Alternatively, the string can be enclosed in double quote | ||
170 | characters (") or single quote characters ('). | ||
171 | Examples: | ||
172 | |||
173 | format svcrdma: // many of the NFS/RDMA server dprintks | ||
174 | format readahead // some dprintks in the readahead cache | ||
175 | format nfsd:\040SETATTR // one way to match a format with whitespace | ||
176 | format "nfsd: SETATTR" // a neater way to match a format with whitespace | ||
177 | format 'nfsd: SETATTR' // yet another way to match a format with whitespace | ||
178 | |||
179 | line | ||
180 | The given line number or range of line numbers is compared | ||
181 | against the line number of each dprintk() callsite. A single | ||
182 | line number matches the callsite line number exactly. A | ||
183 | range of line numbers matches any callsite between the first | ||
184 | and last line number inclusive. An empty first number means | ||
185 | the first line in the file, an empty line number means the | ||
186 | last number in the file. Examples: | ||
187 | |||
188 | line 1603 // exactly line 1603 | ||
189 | line 1600-1605 // the six lines from line 1600 to line 1605 | ||
190 | line -1605 // the 1605 lines from line 1 to line 1605 | ||
191 | line 1600- // all lines from line 1600 to the end of the file | ||
192 | |||
193 | The flags specification comprises a change operation followed | ||
194 | by one or more flag characters. The change operation is one | ||
195 | of the characters: | ||
196 | |||
197 | - | ||
198 | remove the given flags | ||
199 | |||
200 | + | ||
201 | add the given flags | ||
202 | |||
203 | = | ||
204 | set the flags to the given flags | ||
205 | |||
206 | The flags are: | ||
207 | |||
208 | p | ||
209 | Causes a printk() message to be emitted to dmesg | ||
210 | |||
211 | Note the regexp ^[-+=][scp]+$ matches a flags specification. | ||
212 | Note also that there is no convenient syntax to remove all | ||
213 | the flags at once, you need to use "-psc". | ||
214 | |||
215 | Examples | ||
216 | ======== | ||
217 | |||
218 | // enable the message at line 1603 of file svcsock.c | ||
219 | nullarbor:~ # echo -n 'file svcsock.c line 1603 +p' > | ||
220 | <debugfs>/dynamic_debug/control | ||
221 | |||
222 | // enable all the messages in file svcsock.c | ||
223 | nullarbor:~ # echo -n 'file svcsock.c +p' > | ||
224 | <debugfs>/dynamic_debug/control | ||
225 | |||
226 | // enable all the messages in the NFS server module | ||
227 | nullarbor:~ # echo -n 'module nfsd +p' > | ||
228 | <debugfs>/dynamic_debug/control | ||
229 | |||
230 | // enable all 12 messages in the function svc_process() | ||
231 | nullarbor:~ # echo -n 'func svc_process +p' > | ||
232 | <debugfs>/dynamic_debug/control | ||
233 | |||
234 | // disable all 12 messages in the function svc_process() | ||
235 | nullarbor:~ # echo -n 'func svc_process -p' > | ||
236 | <debugfs>/dynamic_debug/control | ||
237 | |||
238 | // enable messages for NFS calls READ, READLINK, READDIR and READDIR+. | ||
239 | nullarbor:~ # echo -n 'format "nfsd: READ" +p' > | ||
240 | <debugfs>/dynamic_debug/control | ||
diff --git a/Documentation/edac.txt b/Documentation/edac.txt index 8eda3fb66416..06f8f46692dc 100644 --- a/Documentation/edac.txt +++ b/Documentation/edac.txt | |||
@@ -23,8 +23,8 @@ first time, it was renamed to 'EDAC'. | |||
23 | The bluesmoke project at sourceforge.net is now utilized as a 'staging area' | 23 | The bluesmoke project at sourceforge.net is now utilized as a 'staging area' |
24 | for EDAC development, before it is sent upstream to kernel.org | 24 | for EDAC development, before it is sent upstream to kernel.org |
25 | 25 | ||
26 | At the bluesmoke/EDAC project site, is a series of quilt patches against | 26 | At the bluesmoke/EDAC project site is a series of quilt patches against |
27 | recent kernels, stored in a SVN respository. For easier downloading, there | 27 | recent kernels, stored in a SVN repository. For easier downloading, there |
28 | is also a tarball snapshot available. | 28 | is also a tarball snapshot available. |
29 | 29 | ||
30 | ============================================================================ | 30 | ============================================================================ |
@@ -73,9 +73,9 @@ the vendor should tie the parity status bits to 0 if they do not intend | |||
73 | to generate parity. Some vendors do not do this, and thus the parity bit | 73 | to generate parity. Some vendors do not do this, and thus the parity bit |
74 | can "float" giving false positives. | 74 | can "float" giving false positives. |
75 | 75 | ||
76 | In the kernel there is a pci device attribute located in sysfs that is | 76 | In the kernel there is a PCI device attribute located in sysfs that is |
77 | checked by the EDAC PCI scanning code. If that attribute is set, | 77 | checked by the EDAC PCI scanning code. If that attribute is set, |
78 | PCI parity/error scannining is skipped for that device. The attribute | 78 | PCI parity/error scanning is skipped for that device. The attribute |
79 | is: | 79 | is: |
80 | 80 | ||
81 | broken_parity_status | 81 | broken_parity_status |
diff --git a/Documentation/fault-injection/fault-injection.txt b/Documentation/fault-injection/fault-injection.txt index 4bc374a14345..079305640790 100644 --- a/Documentation/fault-injection/fault-injection.txt +++ b/Documentation/fault-injection/fault-injection.txt | |||
@@ -29,16 +29,16 @@ o debugfs entries | |||
29 | fault-inject-debugfs kernel module provides some debugfs entries for runtime | 29 | fault-inject-debugfs kernel module provides some debugfs entries for runtime |
30 | configuration of fault-injection capabilities. | 30 | configuration of fault-injection capabilities. |
31 | 31 | ||
32 | - /debug/fail*/probability: | 32 | - /sys/kernel/debug/fail*/probability: |
33 | 33 | ||
34 | likelihood of failure injection, in percent. | 34 | likelihood of failure injection, in percent. |
35 | Format: <percent> | 35 | Format: <percent> |
36 | 36 | ||
37 | Note that one-failure-per-hundred is a very high error rate | 37 | Note that one-failure-per-hundred is a very high error rate |
38 | for some testcases. Consider setting probability=100 and configure | 38 | for some testcases. Consider setting probability=100 and configure |
39 | /debug/fail*/interval for such testcases. | 39 | /sys/kernel/debug/fail*/interval for such testcases. |
40 | 40 | ||
41 | - /debug/fail*/interval: | 41 | - /sys/kernel/debug/fail*/interval: |
42 | 42 | ||
43 | specifies the interval between failures, for calls to | 43 | specifies the interval between failures, for calls to |
44 | should_fail() that pass all the other tests. | 44 | should_fail() that pass all the other tests. |
@@ -46,18 +46,18 @@ configuration of fault-injection capabilities. | |||
46 | Note that if you enable this, by setting interval>1, you will | 46 | Note that if you enable this, by setting interval>1, you will |
47 | probably want to set probability=100. | 47 | probably want to set probability=100. |
48 | 48 | ||
49 | - /debug/fail*/times: | 49 | - /sys/kernel/debug/fail*/times: |
50 | 50 | ||
51 | specifies how many times failures may happen at most. | 51 | specifies how many times failures may happen at most. |
52 | A value of -1 means "no limit". | 52 | A value of -1 means "no limit". |
53 | 53 | ||
54 | - /debug/fail*/space: | 54 | - /sys/kernel/debug/fail*/space: |
55 | 55 | ||
56 | specifies an initial resource "budget", decremented by "size" | 56 | specifies an initial resource "budget", decremented by "size" |
57 | on each call to should_fail(,size). Failure injection is | 57 | on each call to should_fail(,size). Failure injection is |
58 | suppressed until "space" reaches zero. | 58 | suppressed until "space" reaches zero. |
59 | 59 | ||
60 | - /debug/fail*/verbose | 60 | - /sys/kernel/debug/fail*/verbose |
61 | 61 | ||
62 | Format: { 0 | 1 | 2 } | 62 | Format: { 0 | 1 | 2 } |
63 | specifies the verbosity of the messages when failure is | 63 | specifies the verbosity of the messages when failure is |
@@ -65,17 +65,17 @@ configuration of fault-injection capabilities. | |||
65 | log line per failure; '2' will print a call trace too -- useful | 65 | log line per failure; '2' will print a call trace too -- useful |
66 | to debug the problems revealed by fault injection. | 66 | to debug the problems revealed by fault injection. |
67 | 67 | ||
68 | - /debug/fail*/task-filter: | 68 | - /sys/kernel/debug/fail*/task-filter: |
69 | 69 | ||
70 | Format: { 'Y' | 'N' } | 70 | Format: { 'Y' | 'N' } |
71 | A value of 'N' disables filtering by process (default). | 71 | A value of 'N' disables filtering by process (default). |
72 | Any positive value limits failures to only processes indicated by | 72 | Any positive value limits failures to only processes indicated by |
73 | /proc/<pid>/make-it-fail==1. | 73 | /proc/<pid>/make-it-fail==1. |
74 | 74 | ||
75 | - /debug/fail*/require-start: | 75 | - /sys/kernel/debug/fail*/require-start: |
76 | - /debug/fail*/require-end: | 76 | - /sys/kernel/debug/fail*/require-end: |
77 | - /debug/fail*/reject-start: | 77 | - /sys/kernel/debug/fail*/reject-start: |
78 | - /debug/fail*/reject-end: | 78 | - /sys/kernel/debug/fail*/reject-end: |
79 | 79 | ||
80 | specifies the range of virtual addresses tested during | 80 | specifies the range of virtual addresses tested during |
81 | stacktrace walking. Failure is injected only if some caller | 81 | stacktrace walking. Failure is injected only if some caller |
@@ -84,26 +84,26 @@ configuration of fault-injection capabilities. | |||
84 | Default required range is [0,ULONG_MAX) (whole of virtual address space). | 84 | Default required range is [0,ULONG_MAX) (whole of virtual address space). |
85 | Default rejected range is [0,0). | 85 | Default rejected range is [0,0). |
86 | 86 | ||
87 | - /debug/fail*/stacktrace-depth: | 87 | - /sys/kernel/debug/fail*/stacktrace-depth: |
88 | 88 | ||
89 | specifies the maximum stacktrace depth walked during search | 89 | specifies the maximum stacktrace depth walked during search |
90 | for a caller within [require-start,require-end) OR | 90 | for a caller within [require-start,require-end) OR |
91 | [reject-start,reject-end). | 91 | [reject-start,reject-end). |
92 | 92 | ||
93 | - /debug/fail_page_alloc/ignore-gfp-highmem: | 93 | - /sys/kernel/debug/fail_page_alloc/ignore-gfp-highmem: |
94 | 94 | ||
95 | Format: { 'Y' | 'N' } | 95 | Format: { 'Y' | 'N' } |
96 | default is 'N', setting it to 'Y' won't inject failures into | 96 | default is 'N', setting it to 'Y' won't inject failures into |
97 | highmem/user allocations. | 97 | highmem/user allocations. |
98 | 98 | ||
99 | - /debug/failslab/ignore-gfp-wait: | 99 | - /sys/kernel/debug/failslab/ignore-gfp-wait: |
100 | - /debug/fail_page_alloc/ignore-gfp-wait: | 100 | - /sys/kernel/debug/fail_page_alloc/ignore-gfp-wait: |
101 | 101 | ||
102 | Format: { 'Y' | 'N' } | 102 | Format: { 'Y' | 'N' } |
103 | default is 'N', setting it to 'Y' will inject failures | 103 | default is 'N', setting it to 'Y' will inject failures |
104 | only into non-sleep allocations (GFP_ATOMIC allocations). | 104 | only into non-sleep allocations (GFP_ATOMIC allocations). |
105 | 105 | ||
106 | - /debug/fail_page_alloc/min-order: | 106 | - /sys/kernel/debug/fail_page_alloc/min-order: |
107 | 107 | ||
108 | specifies the minimum page allocation order to be injected | 108 | specifies the minimum page allocation order to be injected |
109 | failures. | 109 | failures. |
@@ -166,13 +166,13 @@ o Inject slab allocation failures into module init/exit code | |||
166 | #!/bin/bash | 166 | #!/bin/bash |
167 | 167 | ||
168 | FAILTYPE=failslab | 168 | FAILTYPE=failslab |
169 | echo Y > /debug/$FAILTYPE/task-filter | 169 | echo Y > /sys/kernel/debug/$FAILTYPE/task-filter |
170 | echo 10 > /debug/$FAILTYPE/probability | 170 | echo 10 > /sys/kernel/debug/$FAILTYPE/probability |
171 | echo 100 > /debug/$FAILTYPE/interval | 171 | echo 100 > /sys/kernel/debug/$FAILTYPE/interval |
172 | echo -1 > /debug/$FAILTYPE/times | 172 | echo -1 > /sys/kernel/debug/$FAILTYPE/times |
173 | echo 0 > /debug/$FAILTYPE/space | 173 | echo 0 > /sys/kernel/debug/$FAILTYPE/space |
174 | echo 2 > /debug/$FAILTYPE/verbose | 174 | echo 2 > /sys/kernel/debug/$FAILTYPE/verbose |
175 | echo 1 > /debug/$FAILTYPE/ignore-gfp-wait | 175 | echo 1 > /sys/kernel/debug/$FAILTYPE/ignore-gfp-wait |
176 | 176 | ||
177 | faulty_system() | 177 | faulty_system() |
178 | { | 178 | { |
@@ -217,20 +217,20 @@ then | |||
217 | exit 1 | 217 | exit 1 |
218 | fi | 218 | fi |
219 | 219 | ||
220 | cat /sys/module/$module/sections/.text > /debug/$FAILTYPE/require-start | 220 | cat /sys/module/$module/sections/.text > /sys/kernel/debug/$FAILTYPE/require-start |
221 | cat /sys/module/$module/sections/.data > /debug/$FAILTYPE/require-end | 221 | cat /sys/module/$module/sections/.data > /sys/kernel/debug/$FAILTYPE/require-end |
222 | 222 | ||
223 | echo N > /debug/$FAILTYPE/task-filter | 223 | echo N > /sys/kernel/debug/$FAILTYPE/task-filter |
224 | echo 10 > /debug/$FAILTYPE/probability | 224 | echo 10 > /sys/kernel/debug/$FAILTYPE/probability |
225 | echo 100 > /debug/$FAILTYPE/interval | 225 | echo 100 > /sys/kernel/debug/$FAILTYPE/interval |
226 | echo -1 > /debug/$FAILTYPE/times | 226 | echo -1 > /sys/kernel/debug/$FAILTYPE/times |
227 | echo 0 > /debug/$FAILTYPE/space | 227 | echo 0 > /sys/kernel/debug/$FAILTYPE/space |
228 | echo 2 > /debug/$FAILTYPE/verbose | 228 | echo 2 > /sys/kernel/debug/$FAILTYPE/verbose |
229 | echo 1 > /debug/$FAILTYPE/ignore-gfp-wait | 229 | echo 1 > /sys/kernel/debug/$FAILTYPE/ignore-gfp-wait |
230 | echo 1 > /debug/$FAILTYPE/ignore-gfp-highmem | 230 | echo 1 > /sys/kernel/debug/$FAILTYPE/ignore-gfp-highmem |
231 | echo 10 > /debug/$FAILTYPE/stacktrace-depth | 231 | echo 10 > /sys/kernel/debug/$FAILTYPE/stacktrace-depth |
232 | 232 | ||
233 | trap "echo 0 > /debug/$FAILTYPE/probability" SIGINT SIGTERM EXIT | 233 | trap "echo 0 > /sys/kernel/debug/$FAILTYPE/probability" SIGINT SIGTERM EXIT |
234 | 234 | ||
235 | echo "Injecting errors into the module $module... (interrupt to stop)" | 235 | echo "Injecting errors into the module $module... (interrupt to stop)" |
236 | sleep 1000000 | 236 | sleep 1000000 |
diff --git a/Documentation/fb/00-INDEX b/Documentation/fb/00-INDEX index caabbd395e61..a618fd99c9f0 100644 --- a/Documentation/fb/00-INDEX +++ b/Documentation/fb/00-INDEX | |||
@@ -11,8 +11,6 @@ aty128fb.txt | |||
11 | - info on the ATI Rage128 frame buffer driver. | 11 | - info on the ATI Rage128 frame buffer driver. |
12 | cirrusfb.txt | 12 | cirrusfb.txt |
13 | - info on the driver for Cirrus Logic chipsets. | 13 | - info on the driver for Cirrus Logic chipsets. |
14 | cyblafb/ | ||
15 | - directory with documentation files related to the cyblafb driver. | ||
16 | deferred_io.txt | 14 | deferred_io.txt |
17 | - an introduction to deferred IO. | 15 | - an introduction to deferred IO. |
18 | fbcon.txt | 16 | fbcon.txt |
diff --git a/Documentation/fb/cyblafb/bugs b/Documentation/fb/cyblafb/bugs deleted file mode 100644 index 9443a6d72cdd..000000000000 --- a/Documentation/fb/cyblafb/bugs +++ /dev/null | |||
@@ -1,13 +0,0 @@ | |||
1 | Bugs | ||
2 | ==== | ||
3 | |||
4 | I currently don't know of any bug. Please do send reports to: | ||
5 | - linux-fbdev-devel@lists.sourceforge.net | ||
6 | - Knut_Petersen@t-online.de. | ||
7 | |||
8 | |||
9 | Untested features | ||
10 | ================= | ||
11 | |||
12 | All LCD stuff is untested. If it worked in tridentfb, it should work in | ||
13 | cyblafb. Please test and report the results to Knut_Petersen@t-online.de. | ||
diff --git a/Documentation/fb/cyblafb/credits b/Documentation/fb/cyblafb/credits deleted file mode 100644 index 0eb3b443dc2b..000000000000 --- a/Documentation/fb/cyblafb/credits +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | Thanks to | ||
2 | ========= | ||
3 | * Alan Hourihane, for writing the X trident driver | ||
4 | * Jani Monoses, for writing the tridentfb driver | ||
5 | * Antonino A. Daplas, for review of the first published | ||
6 | version of cyblafb and some code | ||
7 | * Jochen Hein, for testing and a helpfull bug report | ||
diff --git a/Documentation/fb/cyblafb/documentation b/Documentation/fb/cyblafb/documentation deleted file mode 100644 index bb1aac048425..000000000000 --- a/Documentation/fb/cyblafb/documentation +++ /dev/null | |||
@@ -1,17 +0,0 @@ | |||
1 | Available Documentation | ||
2 | ======================= | ||
3 | |||
4 | Apollo PLE 133 Chipset VT8601A North Bridge Datasheet, Rev. 1.82, October 22, | ||
5 | 2001, available from VIA: | ||
6 | |||
7 | http://www.viavpsd.com/product/6/15/DS8601A182.pdf | ||
8 | |||
9 | The datasheet is incomplete, some registers that need to be programmed are not | ||
10 | explained at all and important bits are listed as "reserved". But you really | ||
11 | need the datasheet to understand the code. "p. xxx" comments refer to page | ||
12 | numbers of this document. | ||
13 | |||
14 | XFree/XOrg drivers are available and of good quality, looking at the code | ||
15 | there is a good idea if the datasheet does not provide enough information | ||
16 | or if the datasheet seems to be wrong. | ||
17 | |||
diff --git a/Documentation/fb/cyblafb/fb.modes b/Documentation/fb/cyblafb/fb.modes deleted file mode 100644 index fe0e5223ba86..000000000000 --- a/Documentation/fb/cyblafb/fb.modes +++ /dev/null | |||
@@ -1,154 +0,0 @@ | |||
1 | # | ||
2 | # Sample fb.modes file | ||
3 | # | ||
4 | # Provides an incomplete list of working modes for | ||
5 | # the cyberblade/i1 graphics core. | ||
6 | # | ||
7 | # The value 4294967256 is used instead of -40. Of course, -40 is not | ||
8 | # a really reasonable value, but chip design does not always follow | ||
9 | # logic. Believe me, it's ok, and it's the way the BIOS does it. | ||
10 | # | ||
11 | # fbset requires 4294967256 in fb.modes and -40 as an argument to | ||
12 | # the -t parameter. That's also not too reasonable, and it might change | ||
13 | # in the future or might even be differt for your current version. | ||
14 | # | ||
15 | |||
16 | mode "640x480-50" | ||
17 | geometry 640 480 2048 4096 8 | ||
18 | timings 47619 4294967256 24 17 0 216 3 | ||
19 | endmode | ||
20 | |||
21 | mode "640x480-60" | ||
22 | geometry 640 480 2048 4096 8 | ||
23 | timings 39682 4294967256 24 17 0 216 3 | ||
24 | endmode | ||
25 | |||
26 | mode "640x480-70" | ||
27 | geometry 640 480 2048 4096 8 | ||
28 | timings 34013 4294967256 24 17 0 216 3 | ||
29 | endmode | ||
30 | |||
31 | mode "640x480-72" | ||
32 | geometry 640 480 2048 4096 8 | ||
33 | timings 33068 4294967256 24 17 0 216 3 | ||
34 | endmode | ||
35 | |||
36 | mode "640x480-75" | ||
37 | geometry 640 480 2048 4096 8 | ||
38 | timings 31746 4294967256 24 17 0 216 3 | ||
39 | endmode | ||
40 | |||
41 | mode "640x480-80" | ||
42 | geometry 640 480 2048 4096 8 | ||
43 | timings 29761 4294967256 24 17 0 216 3 | ||
44 | endmode | ||
45 | |||
46 | mode "640x480-85" | ||
47 | geometry 640 480 2048 4096 8 | ||
48 | timings 28011 4294967256 24 17 0 216 3 | ||
49 | endmode | ||
50 | |||
51 | mode "800x600-50" | ||
52 | geometry 800 600 2048 4096 8 | ||
53 | timings 30303 96 24 14 0 136 11 | ||
54 | endmode | ||
55 | |||
56 | mode "800x600-60" | ||
57 | geometry 800 600 2048 4096 8 | ||
58 | timings 25252 96 24 14 0 136 11 | ||
59 | endmode | ||
60 | |||
61 | mode "800x600-70" | ||
62 | geometry 800 600 2048 4096 8 | ||
63 | timings 21645 96 24 14 0 136 11 | ||
64 | endmode | ||
65 | |||
66 | mode "800x600-72" | ||
67 | geometry 800 600 2048 4096 8 | ||
68 | timings 21043 96 24 14 0 136 11 | ||
69 | endmode | ||
70 | |||
71 | mode "800x600-75" | ||
72 | geometry 800 600 2048 4096 8 | ||
73 | timings 20202 96 24 14 0 136 11 | ||
74 | endmode | ||
75 | |||
76 | mode "800x600-80" | ||
77 | geometry 800 600 2048 4096 8 | ||
78 | timings 18939 96 24 14 0 136 11 | ||
79 | endmode | ||
80 | |||
81 | mode "800x600-85" | ||
82 | geometry 800 600 2048 4096 8 | ||
83 | timings 17825 96 24 14 0 136 11 | ||
84 | endmode | ||
85 | |||
86 | mode "1024x768-50" | ||
87 | geometry 1024 768 2048 4096 8 | ||
88 | timings 19054 144 24 29 0 120 3 | ||
89 | endmode | ||
90 | |||
91 | mode "1024x768-60" | ||
92 | geometry 1024 768 2048 4096 8 | ||
93 | timings 15880 144 24 29 0 120 3 | ||
94 | endmode | ||
95 | |||
96 | mode "1024x768-70" | ||
97 | geometry 1024 768 2048 4096 8 | ||
98 | timings 13610 144 24 29 0 120 3 | ||
99 | endmode | ||
100 | |||
101 | mode "1024x768-72" | ||
102 | geometry 1024 768 2048 4096 8 | ||
103 | timings 13232 144 24 29 0 120 3 | ||
104 | endmode | ||
105 | |||
106 | mode "1024x768-75" | ||
107 | geometry 1024 768 2048 4096 8 | ||
108 | timings 12703 144 24 29 0 120 3 | ||
109 | endmode | ||
110 | |||
111 | mode "1024x768-80" | ||
112 | geometry 1024 768 2048 4096 8 | ||
113 | timings 11910 144 24 29 0 120 3 | ||
114 | endmode | ||
115 | |||
116 | mode "1024x768-85" | ||
117 | geometry 1024 768 2048 4096 8 | ||
118 | timings 11209 144 24 29 0 120 3 | ||
119 | endmode | ||
120 | |||
121 | mode "1280x1024-50" | ||
122 | geometry 1280 1024 2048 4096 8 | ||
123 | timings 11114 232 16 39 0 160 3 | ||
124 | endmode | ||
125 | |||
126 | mode "1280x1024-60" | ||
127 | geometry 1280 1024 2048 4096 8 | ||
128 | timings 9262 232 16 39 0 160 3 | ||
129 | endmode | ||
130 | |||
131 | mode "1280x1024-70" | ||
132 | geometry 1280 1024 2048 4096 8 | ||
133 | timings 7939 232 16 39 0 160 3 | ||
134 | endmode | ||
135 | |||
136 | mode "1280x1024-72" | ||
137 | geometry 1280 1024 2048 4096 8 | ||
138 | timings 7719 232 16 39 0 160 3 | ||
139 | endmode | ||
140 | |||
141 | mode "1280x1024-75" | ||
142 | geometry 1280 1024 2048 4096 8 | ||
143 | timings 7410 232 16 39 0 160 3 | ||
144 | endmode | ||
145 | |||
146 | mode "1280x1024-80" | ||
147 | geometry 1280 1024 2048 4096 8 | ||
148 | timings 6946 232 16 39 0 160 3 | ||
149 | endmode | ||
150 | |||
151 | mode "1280x1024-85" | ||
152 | geometry 1280 1024 2048 4096 8 | ||
153 | timings 6538 232 16 39 0 160 3 | ||
154 | endmode | ||
diff --git a/Documentation/fb/cyblafb/performance b/Documentation/fb/cyblafb/performance deleted file mode 100644 index 8d15d5dfc6b3..000000000000 --- a/Documentation/fb/cyblafb/performance +++ /dev/null | |||
@@ -1,79 +0,0 @@ | |||
1 | Speed | ||
2 | ===== | ||
3 | |||
4 | CyBlaFB is much faster than tridentfb and vesafb. Compare the performance data | ||
5 | for mode 1280x1024-[8,16,32]@61 Hz. | ||
6 | |||
7 | Test 1: Cat a file with 2000 lines of 0 characters. | ||
8 | Test 2: Cat a file with 2000 lines of 80 characters. | ||
9 | Test 3: Cat a file with 2000 lines of 160 characters. | ||
10 | |||
11 | All values show system time use in seconds, kernel 2.6.12 was used for | ||
12 | the measurements. 2.6.13 is a bit slower, 2.6.14 hopefully will include a | ||
13 | patch that speeds up kernel bitblitting a lot ( > 20%). | ||
14 | |||
15 | +-----------+-----------------------------------------------------+ | ||
16 | | | not accelerated | | ||
17 | | TRIDENTFB +-----------------+-----------------+-----------------+ | ||
18 | | of 2.6.12 | 8 bpp | 16 bpp | 32 bpp | | ||
19 | | | noypan | ypan | noypan | ypan | noypan | ypan | | ||
20 | +-----------+--------+--------+--------+--------+--------+--------+ | ||
21 | | Test 1 | 4.31 | 4.33 | 6.05 | 12.81 | ---- | ---- | | ||
22 | | Test 2 | 67.94 | 5.44 | 123.16 | 14.79 | ---- | ---- | | ||
23 | | Test 3 | 131.36 | 6.55 | 240.12 | 16.76 | ---- | ---- | | ||
24 | +-----------+--------+--------+--------+--------+--------+--------+ | ||
25 | | Comments | | | completely bro- | | ||
26 | | | | | ken, monitor | | ||
27 | | | | | switches off | | ||
28 | +-----------+-----------------+-----------------+-----------------+ | ||
29 | |||
30 | |||
31 | +-----------+-----------------------------------------------------+ | ||
32 | | | accelerated | | ||
33 | | TRIDENTFB +-----------------+-----------------+-----------------+ | ||
34 | | of 2.6.12 | 8 bpp | 16 bpp | 32 bpp | | ||
35 | | | noypan | ypan | noypan | ypan | noypan | ypan | | ||
36 | +-----------+--------+--------+--------+--------+--------+--------+ | ||
37 | | Test 1 | ---- | ---- | 20.62 | 1.22 | ---- | ---- | | ||
38 | | Test 2 | ---- | ---- | 22.61 | 3.19 | ---- | ---- | | ||
39 | | Test 3 | ---- | ---- | 24.59 | 5.16 | ---- | ---- | | ||
40 | +-----------+--------+--------+--------+--------+--------+--------+ | ||
41 | | Comments | broken, writing | broken, ok only | completely bro- | | ||
42 | | | to wrong places | if bgcolor is | ken, monitor | | ||
43 | | | on screen + bug | black, bug in | switches off | | ||
44 | | | in fillrect() | fillrect() | | | ||
45 | +-----------+-----------------+-----------------+-----------------+ | ||
46 | |||
47 | |||
48 | +-----------+-----------------------------------------------------+ | ||
49 | | | not accelerated | | ||
50 | | VESAFB +-----------------+-----------------+-----------------+ | ||
51 | | of 2.6.12 | 8 bpp | 16 bpp | 32 bpp | | ||
52 | | | noypan | ypan | noypan | ypan | noypan | ypan | | ||
53 | +-----------+--------+--------+--------+--------+--------+--------+ | ||
54 | | Test 1 | 4.26 | 3.76 | 5.99 | 7.23 | ---- | ---- | | ||
55 | | Test 2 | 65.65 | 4.89 | 120.88 | 9.08 | ---- | ---- | | ||
56 | | Test 3 | 126.91 | 5.94 | 235.77 | 11.03 | ---- | ---- | | ||
57 | +-----------+--------+--------+--------+--------+--------+--------+ | ||
58 | | Comments | vga=0x307 | vga=0x31a | vga=0x31b not | | ||
59 | | | fh=80kHz | fh=80kHz | supported by | | ||
60 | | | fv=75kHz | fv=75kHz | video BIOS and | | ||
61 | | | | | hardware | | ||
62 | +-----------+-----------------+-----------------+-----------------+ | ||
63 | |||
64 | |||
65 | +-----------+-----------------------------------------------------+ | ||
66 | | | accelerated | | ||
67 | | CYBLAFB +-----------------+-----------------+-----------------+ | ||
68 | | | 8 bpp | 16 bpp | 32 bpp | | ||
69 | | | noypan | ypan | noypan | ypan | noypan | ypan | | ||
70 | +-----------+--------+--------+--------+--------+--------+--------+ | ||
71 | | Test 1 | 8.02 | 0.23 | 19.04 | 0.61 | 57.12 | 2.74 | | ||
72 | | Test 2 | 8.38 | 0.55 | 19.39 | 0.92 | 57.54 | 3.13 | | ||
73 | | Test 3 | 8.73 | 0.86 | 19.74 | 1.24 | 57.95 | 3.51 | | ||
74 | +-----------+--------+--------+--------+--------+--------+--------+ | ||
75 | | Comments | | | | | ||
76 | | | | | | | ||
77 | | | | | | | ||
78 | | | | | | | ||
79 | +-----------+-----------------+-----------------+-----------------+ | ||
diff --git a/Documentation/fb/cyblafb/todo b/Documentation/fb/cyblafb/todo deleted file mode 100644 index c5f6d0eae545..000000000000 --- a/Documentation/fb/cyblafb/todo +++ /dev/null | |||
@@ -1,31 +0,0 @@ | |||
1 | TODO / Missing features | ||
2 | ======================= | ||
3 | |||
4 | Verify LCD stuff "stretch" and "center" options are | ||
5 | completely untested ... this code needs to be | ||
6 | verified. As I don't have access to such | ||
7 | hardware, please contact me if you are | ||
8 | willing run some tests. | ||
9 | |||
10 | Interlaced video modes The reason that interleaved | ||
11 | modes are disabled is that I do not know | ||
12 | the meaning of the vertical interlace | ||
13 | parameter. Also the datasheet mentions a | ||
14 | bit d8 of a horizontal interlace parameter, | ||
15 | but nowhere the lower 8 bits. Please help | ||
16 | if you can. | ||
17 | |||
18 | low-res double scan modes Who needs it? | ||
19 | |||
20 | accelerated color blitting Who needs it? The console driver does use color | ||
21 | blitting for nothing but drawing the penguine, | ||
22 | everything else is done using color expanding | ||
23 | blitting of 1bpp character bitmaps. | ||
24 | |||
25 | ioctls Who needs it? | ||
26 | |||
27 | TV-out Will be done later. Use "vga= " at boot time | ||
28 | to set a suitable video mode. | ||
29 | |||
30 | ??? Feel free to contact me if you have any | ||
31 | feature requests | ||
diff --git a/Documentation/fb/cyblafb/usage b/Documentation/fb/cyblafb/usage deleted file mode 100644 index a39bb3d402a2..000000000000 --- a/Documentation/fb/cyblafb/usage +++ /dev/null | |||
@@ -1,217 +0,0 @@ | |||
1 | CyBlaFB is a framebuffer driver for the Cyberblade/i1 graphics core integrated | ||
2 | into the VIA Apollo PLE133 (aka vt8601) south bridge. It is developed and | ||
3 | tested using a VIA EPIA 5000 board. | ||
4 | |||
5 | Cyblafb - compiled into the kernel or as a module? | ||
6 | ================================================== | ||
7 | |||
8 | You might compile cyblafb either as a module or compile it permanently into the | ||
9 | kernel. | ||
10 | |||
11 | Unless you have a real reason to do so you should not compile both vesafb and | ||
12 | cyblafb permanently into the kernel. It's possible and it helps during the | ||
13 | developement cycle, but it's useless and will at least block some otherwise | ||
14 | usefull memory for ordinary users. | ||
15 | |||
16 | Selecting Modes | ||
17 | =============== | ||
18 | |||
19 | Startup Mode | ||
20 | ============ | ||
21 | |||
22 | First of all, you might use the "vga=???" boot parameter as it is | ||
23 | documented in vesafb.txt and svga.txt. Cyblafb will detect the video | ||
24 | mode selected and will use the geometry and timings found by | ||
25 | inspecting the hardware registers. | ||
26 | |||
27 | video=cyblafb vga=0x317 | ||
28 | |||
29 | Alternatively you might use a combination of the mode, ref and bpp | ||
30 | parameters. If you compiled the driver into the kernel, add something | ||
31 | like this to the kernel command line: | ||
32 | |||
33 | video=cyblafb:1280x1024,bpp=16,ref=50 ... | ||
34 | |||
35 | If you compiled the driver as a module, the same mode would be | ||
36 | selected by the following command: | ||
37 | |||
38 | modprobe cyblafb mode=1280x1024 bpp=16 ref=50 ... | ||
39 | |||
40 | None of the modes possible to select as startup modes are affected by | ||
41 | the problems described at the end of the next subsection. | ||
42 | |||
43 | For all startup modes cyblafb chooses a virtual x resolution of 2048, | ||
44 | the only exception is mode 1280x1024 in combination with 32 bpp. This | ||
45 | allows ywrap scrolling for all those modes if rotation is 0 or 2, and | ||
46 | also fast scrolling if rotation is 1 or 3. The default virtual y reso- | ||
47 | lution is 4096 for bpp == 8, 2048 for bpp==16 and 1024 for bpp == 32, | ||
48 | again with the only exception of 1280x1024 at 32 bpp. | ||
49 | |||
50 | Please do set your video memory size to 8 Mb in the Bios setup. Other | ||
51 | values will work, but performace is decreased for a lot of modes. | ||
52 | |||
53 | Mode changes using fbset | ||
54 | ======================== | ||
55 | |||
56 | You might use fbset to change the video mode, see "man fbset". Cyblafb | ||
57 | generally does assume that you know what you are doing. But it does | ||
58 | some checks, especially those that are needed to prevent you from | ||
59 | damaging your hardware. | ||
60 | |||
61 | - only 8, 16, 24 and 32 bpp video modes are accepted | ||
62 | - interlaced video modes are not accepted | ||
63 | - double scan video modes are not accepted | ||
64 | - if a flat panel is found, cyblafb does not allow you | ||
65 | to program a resolution higher than the physical | ||
66 | resolution of the flat panel monitor | ||
67 | - cyblafb does not allow vclk to exceed 230 MHz. As 32 bpp | ||
68 | and (currently) 24 bit modes use a doubled vclk internally, | ||
69 | the dotclock limit as seen by fbset is 115 MHz for those | ||
70 | modes and 230 MHz for 8 and 16 bpp modes. | ||
71 | - cyblafb will allow you to select very high resolutions as | ||
72 | long as the hardware can be programmed to these modes. The | ||
73 | documented limit 1600x1200 is not enforced, but don't expect | ||
74 | perfect signal quality. | ||
75 | |||
76 | Any request that violates the rules given above will be either changed | ||
77 | to something the hardware supports or an error value will be returned. | ||
78 | |||
79 | If you program a virtual y resolution higher than the hardware limit, | ||
80 | cyblafb will silently decrease that value to the highest possible | ||
81 | value. The same is true for a virtual x resolution that is not | ||
82 | supported by the hardware. Cyblafb tries to adapt vyres first because | ||
83 | vxres decides if ywrap scrolling is possible or not. | ||
84 | |||
85 | Attempts to disable acceleration are ignored, I believe that this is | ||
86 | safe. | ||
87 | |||
88 | Some video modes that should work do not work as expected. If you use | ||
89 | the standard fb.modes, fbset 640x480-60 will program that mode, but | ||
90 | you will see a vertical area, about two characters wide, with only | ||
91 | much darker characters than the other characters on the screen. | ||
92 | Cyblafb does allow that mode to be set, as it does not violate the | ||
93 | official specifications. It would need a lot of code to reliably sort | ||
94 | out all invalid modes, playing around with the margin values will | ||
95 | give a valid mode quickly. And if cyblafb would detect such an invalid | ||
96 | mode, should it silently alter the requested values or should it | ||
97 | report an error? Both options have some pros and cons. As stated | ||
98 | above, none of the startup modes are affected, and if you set | ||
99 | verbosity to 1 or higher, cyblafb will print the fbset command that | ||
100 | would be needed to program that mode using fbset. | ||
101 | |||
102 | |||
103 | Other Parameters | ||
104 | ================ | ||
105 | |||
106 | |||
107 | crt don't autodetect, assume monitor connected to | ||
108 | standard VGA connector | ||
109 | |||
110 | fp don't autodetect, assume flat panel display | ||
111 | connected to flat panel monitor interface | ||
112 | |||
113 | nativex inform driver about native x resolution of | ||
114 | flat panel monitor connected to special | ||
115 | interface (should be autodetected) | ||
116 | |||
117 | stretch stretch image to adapt low resolution modes to | ||
118 | higer resolutions of flat panel monitors | ||
119 | connected to special interface | ||
120 | |||
121 | center center image to adapt low resolution modes to | ||
122 | higer resolutions of flat panel monitors | ||
123 | connected to special interface | ||
124 | |||
125 | memsize use if autodetected memsize is wrong ... | ||
126 | should never be necessary | ||
127 | |||
128 | nopcirr disable PCI read retry | ||
129 | nopciwr disable PCI write retry | ||
130 | nopcirb disable PCI read bursts | ||
131 | nopciwb disable PCI write bursts | ||
132 | |||
133 | bpp bpp for specified modes | ||
134 | valid values: 8 || 16 || 24 || 32 | ||
135 | |||
136 | ref refresh rate for specified mode | ||
137 | valid values: 50 <= ref <= 85 | ||
138 | |||
139 | mode 640x480 or 800x600 or 1024x768 or 1280x1024 | ||
140 | if not specified, the startup mode will be detected | ||
141 | and used, so you might also use the vga=??? parameter | ||
142 | described in vesafb.txt. If you do not specify a mode, | ||
143 | bpp and ref parameters are ignored. | ||
144 | |||
145 | verbosity 0 is the default, increase to at least 2 for every | ||
146 | bug report! | ||
147 | |||
148 | Development hints | ||
149 | ================= | ||
150 | |||
151 | It's much faster do compile a module and to load the new version after | ||
152 | unloading the old module than to compile a new kernel and to reboot. So if you | ||
153 | try to work on cyblafb, it might be a good idea to use cyblafb as a module. | ||
154 | In real life, fast often means dangerous, and that's also the case here. If | ||
155 | you introduce a serious bug when cyblafb is compiled into the kernel, the | ||
156 | kernel will lock or oops with a high probability before the file system is | ||
157 | mounted, and the danger for your data is low. If you load a broken own version | ||
158 | of cyblafb on a running system, the danger for the integrity of the file | ||
159 | system is much higher as you might need a hard reset afterwards. Decide | ||
160 | yourself. | ||
161 | |||
162 | Module unloading, the vfb method | ||
163 | ================================ | ||
164 | |||
165 | If you want to unload/reload cyblafb using the virtual framebuffer, you need | ||
166 | to enable vfb support in the kernel first. After that, load the modules as | ||
167 | shown below: | ||
168 | |||
169 | modprobe vfb vfb_enable=1 | ||
170 | modprobe fbcon | ||
171 | modprobe cyblafb | ||
172 | fbset -fb /dev/fb1 1280x1024-60 -vyres 2662 | ||
173 | con2fb /dev/fb1 /dev/tty1 | ||
174 | ... | ||
175 | |||
176 | If you now made some changes to cyblafb and want to reload it, you might do it | ||
177 | as show below: | ||
178 | |||
179 | con2fb /dev/fb0 /dev/tty1 | ||
180 | ... | ||
181 | rmmod cyblafb | ||
182 | modprobe cyblafb | ||
183 | con2fb /dev/fb1 /dev/tty1 | ||
184 | ... | ||
185 | |||
186 | Of course, you might choose another mode, and most certainly you also want to | ||
187 | map some other /dev/tty* to the real framebuffer device. You might also choose | ||
188 | to compile fbcon as a kernel module or place it permanently in the kernel. | ||
189 | |||
190 | I do not know of any way to unload fbcon, and fbcon will prevent the | ||
191 | framebuffer device loaded first from unloading. [If there is a way, then | ||
192 | please add a description here!] | ||
193 | |||
194 | Module unloading, the vesafb method | ||
195 | =================================== | ||
196 | |||
197 | Configure the kernel: | ||
198 | |||
199 | <*> Support for frame buffer devices | ||
200 | [*] VESA VGA graphics support | ||
201 | <M> Cyberblade/i1 support | ||
202 | |||
203 | Add e.g. "video=vesafb:ypan vga=0x307" to the kernel parameters. The ypan | ||
204 | parameter is important, choose any vga parameter you like as long as it is | ||
205 | a graphics mode. | ||
206 | |||
207 | After booting, load cyblafb without any mode and bpp parameter and assign | ||
208 | cyblafb to individual ttys using con2fb, e.g.: | ||
209 | |||
210 | modprobe cyblafb | ||
211 | con2fb /dev/fb1 /dev/tty1 | ||
212 | |||
213 | Unloading cyblafb works without problems after you assign vesafb to all | ||
214 | ttys again, e.g.: | ||
215 | |||
216 | con2fb /dev/fb0 /dev/tty1 | ||
217 | rmmod cyblafb | ||
diff --git a/Documentation/fb/cyblafb/whatsnew b/Documentation/fb/cyblafb/whatsnew deleted file mode 100644 index 76c07a26e044..000000000000 --- a/Documentation/fb/cyblafb/whatsnew +++ /dev/null | |||
@@ -1,29 +0,0 @@ | |||
1 | 0.62 | ||
2 | ==== | ||
3 | |||
4 | - the vesafb parameter has been removed as I decided to allow the | ||
5 | feature without any special parameter. | ||
6 | |||
7 | - Cyblafb does not use the vga style of panning any longer, now the | ||
8 | "right view" register in the graphics engine IO space is used. Without | ||
9 | that change it was impossible to use all available memory, and without | ||
10 | access to all available memory it is impossible to ywrap. | ||
11 | |||
12 | - The imageblit function now uses hardware acceleration for all font | ||
13 | widths. Hardware blitting across pixel column 2048 is broken in the | ||
14 | cyberblade/i1 graphics core, but we work around that hardware bug. | ||
15 | |||
16 | - modes with vxres != xres are supported now. | ||
17 | |||
18 | - ywrap scrolling is supported now and the default. This is a big | ||
19 | performance gain. | ||
20 | |||
21 | - default video modes use vyres > yres and vxres > xres to allow | ||
22 | almost optimal scrolling speed for normal and rotated screens | ||
23 | |||
24 | - some features mainly usefull for debugging the upper layers of the | ||
25 | framebuffer system have been added, have a look at the code | ||
26 | |||
27 | - fixed: Oops after unloading cyblafb when reading /proc/io* | ||
28 | |||
29 | - we work around some bugs of the higher framebuffer layers. | ||
diff --git a/Documentation/fb/cyblafb/whycyblafb b/Documentation/fb/cyblafb/whycyblafb deleted file mode 100644 index a123bc11e698..000000000000 --- a/Documentation/fb/cyblafb/whycyblafb +++ /dev/null | |||
@@ -1,85 +0,0 @@ | |||
1 | I tried the following framebuffer drivers: | ||
2 | |||
3 | - TRIDENTFB is full of bugs. Acceleration is broken for Blade3D | ||
4 | graphics cores like the cyberblade/i1. It claims to support a great | ||
5 | number of devices, but documentation for most of these devices is | ||
6 | unfortunately not available. There is _no_ reason to use tridentfb | ||
7 | for cyberblade/i1 + CRT users. VESAFB is faster, and the one | ||
8 | advantage, mode switching, is broken in tridentfb. | ||
9 | |||
10 | - VESAFB is used by many distributions as a standard. Vesafb does | ||
11 | not support mode switching. VESAFB is a bit faster than the working | ||
12 | configurations of TRIDENTFB, but it is still too slow, even if you | ||
13 | use ypan. | ||
14 | |||
15 | - EPIAFB (you'll find it on sourceforge) supports the Cyberblade/i1 | ||
16 | graphics core, but it still has serious bugs and developement seems | ||
17 | to have stopped. This is the one driver with TV-out support. If you | ||
18 | do need this feature, try epiafb. | ||
19 | |||
20 | None of these drivers was a real option for me. | ||
21 | |||
22 | I believe that is unreasonable to change code that announces to support 20 | ||
23 | devices if I only have more or less sufficient documentation for exactly one | ||
24 | of these. The risk of breaking device foo while fixing device bar is too high. | ||
25 | |||
26 | So I decided to start CyBlaFB as a stripped down tridentfb. | ||
27 | |||
28 | All code specific to other Trident chips has been removed. After that there | ||
29 | were a lot of cosmetic changes to increase the readability of the code. All | ||
30 | register names were changed to those mnemonics used in the datasheet. Function | ||
31 | and macro names were changed if they hindered easy understanding of the code. | ||
32 | |||
33 | After that I debugged the code and implemented some new features. I'll try to | ||
34 | give a little summary of the main changes: | ||
35 | |||
36 | - calculation of vertical and horizontal timings was fixed | ||
37 | |||
38 | - video signal quality has been improved dramatically | ||
39 | |||
40 | - acceleration: | ||
41 | |||
42 | - fillrect and copyarea were fixed and reenabled | ||
43 | |||
44 | - color expanding imageblit was newly implemented, color | ||
45 | imageblit (only used to draw the penguine) still uses the | ||
46 | generic code. | ||
47 | |||
48 | - init of the acceleration engine was improved and moved to a | ||
49 | place where it really works ... | ||
50 | |||
51 | - sync function has a timeout now and tries to reset and | ||
52 | reinit the accel engine if necessary | ||
53 | |||
54 | - fewer slow copyarea calls when doing ypan scrolling by using | ||
55 | undocumented bit d21 of screen start address stored in | ||
56 | CR2B[5]. BIOS does use it also, so this should be safe. | ||
57 | |||
58 | - cyblafb rejects any attempt to set modes that would cause vclk | ||
59 | values above reasonable 230 MHz. 32bit modes use a clock | ||
60 | multiplicator of 2, so fbset does show the correct values for | ||
61 | pixclock but not for vclk in this case. The fbset limit is 115 MHz | ||
62 | for 32 bpp modes. | ||
63 | |||
64 | - cyblafb rejects modes known to be broken or unimplemented (all | ||
65 | interlaced modes, all doublescan modes for now) | ||
66 | |||
67 | - cyblafb now works independant of the video mode in effect at startup | ||
68 | time (tridentfb does not init all needed registers to reasonable | ||
69 | values) | ||
70 | |||
71 | - switching between video modes does work reliably now | ||
72 | |||
73 | - the first video mode now is the one selected on startup using the | ||
74 | vga=???? mechanism or any of | ||
75 | - 640x480, 800x600, 1024x768, 1280x1024 | ||
76 | - 8, 16, 24 or 32 bpp | ||
77 | - refresh between 50 Hz and 85 Hz, 1 Hz steps (1280x1024-32 | ||
78 | is limited to 63Hz) | ||
79 | |||
80 | - pci retry and pci burst mode are settable (try to disable if you | ||
81 | experience latency problems) | ||
82 | |||
83 | - built as a module cyblafb might be unloaded and reloaded using | ||
84 | the vfb module and con2vt or might be used together with vesafb | ||
85 | |||
diff --git a/Documentation/fb/sh7760fb.txt b/Documentation/fb/sh7760fb.txt index c87bfe5c630a..b994c3b10549 100644 --- a/Documentation/fb/sh7760fb.txt +++ b/Documentation/fb/sh7760fb.txt | |||
@@ -1,7 +1,7 @@ | |||
1 | SH7760/SH7763 integrated LCDC Framebuffer driver | 1 | SH7760/SH7763 integrated LCDC Framebuffer driver |
2 | ================================================ | 2 | ================================================ |
3 | 3 | ||
4 | 0. Overwiew | 4 | 0. Overview |
5 | ----------- | 5 | ----------- |
6 | The SH7760/SH7763 have an integrated LCD Display controller (LCDC) which | 6 | The SH7760/SH7763 have an integrated LCD Display controller (LCDC) which |
7 | supports (in theory) resolutions ranging from 1x1 to 1024x1024, | 7 | supports (in theory) resolutions ranging from 1x1 to 1024x1024, |
diff --git a/Documentation/fb/uvesafb.txt b/Documentation/fb/uvesafb.txt index 7ac3c4078ff9..eefdd91d298a 100644 --- a/Documentation/fb/uvesafb.txt +++ b/Documentation/fb/uvesafb.txt | |||
@@ -59,7 +59,8 @@ Accepted options: | |||
59 | ypan Enable display panning using the VESA protected mode | 59 | ypan Enable display panning using the VESA protected mode |
60 | interface. The visible screen is just a window of the | 60 | interface. The visible screen is just a window of the |
61 | video memory, console scrolling is done by changing the | 61 | video memory, console scrolling is done by changing the |
62 | start of the window. Available on x86 only. | 62 | start of the window. This option is available on x86 |
63 | only and is the default option on that architecture. | ||
63 | 64 | ||
64 | ywrap Same as ypan, but assumes your gfx board can wrap-around | 65 | ywrap Same as ypan, but assumes your gfx board can wrap-around |
65 | the video memory (i.e. starts reading from top if it | 66 | the video memory (i.e. starts reading from top if it |
@@ -67,7 +68,7 @@ ywrap Same as ypan, but assumes your gfx board can wrap-around | |||
67 | Available on x86 only. | 68 | Available on x86 only. |
68 | 69 | ||
69 | redraw Scroll by redrawing the affected part of the screen, this | 70 | redraw Scroll by redrawing the affected part of the screen, this |
70 | is the safe (and slow) default. | 71 | is the default on non-x86. |
71 | 72 | ||
72 | (If you're using uvesafb as a module, the above three options are | 73 | (If you're using uvesafb as a module, the above three options are |
73 | used a parameter of the scroll option, e.g. scroll=ypan.) | 74 | used a parameter of the scroll option, e.g. scroll=ypan.) |
@@ -182,7 +183,7 @@ from the Video BIOS if you set pixclock to 0 in fb_var_screeninfo. | |||
182 | 183 | ||
183 | -- | 184 | -- |
184 | Michal Januszewski <spock@gentoo.org> | 185 | Michal Januszewski <spock@gentoo.org> |
185 | Last updated: 2007-06-16 | 186 | Last updated: 2009-03-30 |
186 | 187 | ||
187 | Documentation of the uvesafb options is loosely based on vesafb.txt. | 188 | Documentation of the uvesafb options is loosely based on vesafb.txt. |
188 | 189 | ||
diff --git a/Documentation/fb/vesafb.txt b/Documentation/fb/vesafb.txt index ee277dd204b0..950d5a658cb3 100644 --- a/Documentation/fb/vesafb.txt +++ b/Documentation/fb/vesafb.txt | |||
@@ -95,7 +95,7 @@ There is no way to change the vesafb video mode and/or timings after | |||
95 | booting linux. If you are not happy with the 60 Hz refresh rate, you | 95 | booting linux. If you are not happy with the 60 Hz refresh rate, you |
96 | have these options: | 96 | have these options: |
97 | 97 | ||
98 | * configure and load the DOS-Tools for your the graphics board (if | 98 | * configure and load the DOS-Tools for the graphics board (if |
99 | available) and boot linux with loadlin. | 99 | available) and boot linux with loadlin. |
100 | * use a native driver (matroxfb/atyfb) instead if vesafb. If none | 100 | * use a native driver (matroxfb/atyfb) instead if vesafb. If none |
101 | is available, write a new one! | 101 | is available, write a new one! |
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 20d3b94703a4..f8cd450be9aa 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt | |||
@@ -6,20 +6,61 @@ be removed from this file. | |||
6 | 6 | ||
7 | --------------------------- | 7 | --------------------------- |
8 | 8 | ||
9 | What: old static regulatory information and ieee80211_regdom module parameter | 9 | What: IRQF_SAMPLE_RANDOM |
10 | When: 2.6.29 | 10 | Check: IRQF_SAMPLE_RANDOM |
11 | When: July 2009 | ||
12 | |||
13 | Why: Many of IRQF_SAMPLE_RANDOM users are technically bogus as entropy | ||
14 | sources in the kernel's current entropy model. To resolve this, every | ||
15 | input point to the kernel's entropy pool needs to better document the | ||
16 | type of entropy source it actually is. This will be replaced with | ||
17 | additional add_*_randomness functions in drivers/char/random.c | ||
18 | |||
19 | Who: Robin Getz <rgetz@blackfin.uclinux.org> & Matt Mackall <mpm@selenic.com> | ||
20 | |||
21 | --------------------------- | ||
22 | |||
23 | What: The ieee80211_regdom module parameter | ||
24 | When: March 2010 / desktop catchup | ||
25 | |||
26 | Why: This was inherited by the CONFIG_WIRELESS_OLD_REGULATORY code, | ||
27 | and currently serves as an option for users to define an | ||
28 | ISO / IEC 3166 alpha2 code for the country they are currently | ||
29 | present in. Although there are userspace API replacements for this | ||
30 | through nl80211 distributions haven't yet caught up with implementing | ||
31 | decent alternatives through standard GUIs. Although available as an | ||
32 | option through iw or wpa_supplicant its just a matter of time before | ||
33 | distributions pick up good GUI options for this. The ideal solution | ||
34 | would actually consist of intelligent designs which would do this for | ||
35 | the user automatically even when travelling through different countries. | ||
36 | Until then we leave this module parameter as a compromise. | ||
37 | |||
38 | When userspace improves with reasonable widely-available alternatives for | ||
39 | this we will no longer need this module parameter. This entry hopes that | ||
40 | by the super-futuristically looking date of "March 2010" we will have | ||
41 | such replacements widely available. | ||
42 | |||
43 | Who: Luis R. Rodriguez <lrodriguez@atheros.com> | ||
44 | |||
45 | --------------------------- | ||
46 | |||
47 | What: CONFIG_WIRELESS_OLD_REGULATORY - old static regulatory information | ||
48 | When: March 2010 / desktop catchup | ||
49 | |||
11 | Why: The old regulatory infrastructure has been replaced with a new one | 50 | Why: The old regulatory infrastructure has been replaced with a new one |
12 | which does not require statically defined regulatory domains. We do | 51 | which does not require statically defined regulatory domains. We do |
13 | not want to keep static regulatory domains in the kernel due to the | 52 | not want to keep static regulatory domains in the kernel due to the |
14 | the dynamic nature of regulatory law and localization. We kept around | 53 | the dynamic nature of regulatory law and localization. We kept around |
15 | the old static definitions for the regulatory domains of: | 54 | the old static definitions for the regulatory domains of: |
55 | |||
16 | * US | 56 | * US |
17 | * JP | 57 | * JP |
18 | * EU | 58 | * EU |
59 | |||
19 | and used by default the US when CONFIG_WIRELESS_OLD_REGULATORY was | 60 | and used by default the US when CONFIG_WIRELESS_OLD_REGULATORY was |
20 | set. We also kept around the ieee80211_regdom module parameter in case | 61 | set. We will remove this option once the standard Linux desktop catches |
21 | some applications were relying on it. Changing regulatory domains | 62 | up with the new userspace APIs we have implemented. |
22 | can now be done instead by using nl80211, as is done with iw. | 63 | |
23 | Who: Luis R. Rodriguez <lrodriguez@atheros.com> | 64 | Who: Luis R. Rodriguez <lrodriguez@atheros.com> |
24 | 65 | ||
25 | --------------------------- | 66 | --------------------------- |
@@ -37,10 +78,10 @@ Who: Pavel Machek <pavel@suse.cz> | |||
37 | 78 | ||
38 | --------------------------- | 79 | --------------------------- |
39 | 80 | ||
40 | What: Video4Linux API 1 ioctls and video_decoder.h from Video devices. | 81 | What: Video4Linux API 1 ioctls and from Video devices. |
41 | When: December 2008 | 82 | When: July 2009 |
42 | Files: include/linux/video_decoder.h include/linux/videodev.h | 83 | Files: include/linux/videodev.h |
43 | Check: include/linux/video_decoder.h include/linux/videodev.h | 84 | Check: include/linux/videodev.h |
44 | Why: V4L1 AP1 was replaced by V4L2 API during migration from 2.4 to 2.6 | 85 | Why: V4L1 AP1 was replaced by V4L2 API during migration from 2.4 to 2.6 |
45 | series. The old API have lots of drawbacks and don't provide enough | 86 | series. The old API have lots of drawbacks and don't provide enough |
46 | means to work with all video and audio standards. The newer API is | 87 | means to work with all video and audio standards. The newer API is |
@@ -228,8 +269,20 @@ Who: Jan Engelhardt <jengelh@computergmbh.de> | |||
228 | 269 | ||
229 | --------------------------- | 270 | --------------------------- |
230 | 271 | ||
272 | What: GPIO autorequest on gpio_direction_{input,output}() in gpiolib | ||
273 | When: February 2010 | ||
274 | Why: All callers should use explicit gpio_request()/gpio_free(). | ||
275 | The autorequest mechanism in gpiolib was provided mostly as a | ||
276 | migration aid for legacy GPIO interfaces (for SOC based GPIOs). | ||
277 | Those users have now largely migrated. Platforms implementing | ||
278 | the GPIO interfaces without using gpiolib will see no changes. | ||
279 | Who: David Brownell <dbrownell@users.sourceforge.net> | ||
280 | --------------------------- | ||
281 | |||
231 | What: b43 support for firmware revision < 410 | 282 | What: b43 support for firmware revision < 410 |
232 | When: July 2008 | 283 | When: The schedule was July 2008, but it was decided that we are going to keep the |
284 | code as long as there are no major maintanance headaches. | ||
285 | So it _could_ be removed _any_ time now, if it conflicts with something new. | ||
233 | Why: The support code for the old firmware hurts code readability/maintainability | 286 | Why: The support code for the old firmware hurts code readability/maintainability |
234 | and slightly hurts runtime performance. Bugfixes for the old firmware | 287 | and slightly hurts runtime performance. Bugfixes for the old firmware |
235 | are not provided by Broadcom anymore. | 288 | are not provided by Broadcom anymore. |
@@ -244,13 +297,6 @@ Who: Glauber Costa <gcosta@redhat.com> | |||
244 | 297 | ||
245 | --------------------------- | 298 | --------------------------- |
246 | 299 | ||
247 | What: remove HID compat support | ||
248 | When: 2.6.29 | ||
249 | Why: needed only as a temporary solution until distros fix themselves up | ||
250 | Who: Jiri Slaby <jirislaby@gmail.com> | ||
251 | |||
252 | --------------------------- | ||
253 | |||
254 | What: print_fn_descriptor_symbol() | 300 | What: print_fn_descriptor_symbol() |
255 | When: October 2009 | 301 | When: October 2009 |
256 | Why: The %pF vsprintf format provides the same functionality in a | 302 | Why: The %pF vsprintf format provides the same functionality in a |
@@ -282,6 +328,18 @@ Who: Vlad Yasevich <vladislav.yasevich@hp.com> | |||
282 | 328 | ||
283 | --------------------------- | 329 | --------------------------- |
284 | 330 | ||
331 | What: Ability for non root users to shm_get hugetlb pages based on mlock | ||
332 | resource limits | ||
333 | When: 2.6.31 | ||
334 | Why: Non root users need to be part of /proc/sys/vm/hugetlb_shm_group or | ||
335 | have CAP_IPC_LOCK to be able to allocate shm segments backed by | ||
336 | huge pages. The mlock based rlimit check to allow shm hugetlb is | ||
337 | inconsistent with mmap based allocations. Hence it is being | ||
338 | deprecated. | ||
339 | Who: Ravikiran Thirumalai <kiran@scalex86.org> | ||
340 | |||
341 | --------------------------- | ||
342 | |||
285 | What: CONFIG_THERMAL_HWMON | 343 | What: CONFIG_THERMAL_HWMON |
286 | When: January 2009 | 344 | When: January 2009 |
287 | Why: This option was introduced just to allow older lm-sensors userspace | 345 | Why: This option was introduced just to allow older lm-sensors userspace |
@@ -310,14 +368,6 @@ Who: Krzysztof Piotr Oledzki <ole@ans.pl> | |||
310 | 368 | ||
311 | --------------------------- | 369 | --------------------------- |
312 | 370 | ||
313 | What: i2c_attach_client(), i2c_detach_client(), i2c_driver->detach_client() | ||
314 | When: 2.6.29 (ideally) or 2.6.30 (more likely) | ||
315 | Why: Deprecated by the new (standard) device driver binding model. Use | ||
316 | i2c_driver->probe() and ->remove() instead. | ||
317 | Who: Jean Delvare <khali@linux-fr.org> | ||
318 | |||
319 | --------------------------- | ||
320 | |||
321 | What: fscher and fscpos drivers | 371 | What: fscher and fscpos drivers |
322 | When: June 2009 | 372 | When: June 2009 |
323 | Why: Deprecated by the new fschmd driver. | 373 | Why: Deprecated by the new fschmd driver. |
@@ -326,17 +376,6 @@ Who: Hans de Goede <hdegoede@redhat.com> | |||
326 | 376 | ||
327 | --------------------------- | 377 | --------------------------- |
328 | 378 | ||
329 | What: SELinux "compat_net" functionality | ||
330 | When: 2.6.30 at the earliest | ||
331 | Why: In 2.6.18 the Secmark concept was introduced to replace the "compat_net" | ||
332 | network access control functionality of SELinux. Secmark offers both | ||
333 | better performance and greater flexibility than the "compat_net" | ||
334 | mechanism. Now that the major Linux distributions have moved to | ||
335 | Secmark, it is time to deprecate the older mechanism and start the | ||
336 | process of removing the old code. | ||
337 | Who: Paul Moore <paul.moore@hp.com> | ||
338 | --------------------------- | ||
339 | |||
340 | What: sysfs ui for changing p4-clockmod parameters | 379 | What: sysfs ui for changing p4-clockmod parameters |
341 | When: September 2009 | 380 | When: September 2009 |
342 | Why: See commits 129f8ae9b1b5be94517da76009ea956e89104ce8 and | 381 | Why: See commits 129f8ae9b1b5be94517da76009ea956e89104ce8 and |
@@ -344,3 +383,78 @@ Why: See commits 129f8ae9b1b5be94517da76009ea956e89104ce8 and | |||
344 | Removal is subject to fixing any remaining bugs in ACPI which may | 383 | Removal is subject to fixing any remaining bugs in ACPI which may |
345 | cause the thermal throttling not to happen at the right time. | 384 | cause the thermal throttling not to happen at the right time. |
346 | Who: Dave Jones <davej@redhat.com>, Matthew Garrett <mjg@redhat.com> | 385 | Who: Dave Jones <davej@redhat.com>, Matthew Garrett <mjg@redhat.com> |
386 | |||
387 | ----------------------------- | ||
388 | |||
389 | What: __do_IRQ all in one fits nothing interrupt handler | ||
390 | When: 2.6.32 | ||
391 | Why: __do_IRQ was kept for easy migration to the type flow handlers. | ||
392 | More than two years of migration time is enough. | ||
393 | Who: Thomas Gleixner <tglx@linutronix.de> | ||
394 | |||
395 | ----------------------------- | ||
396 | |||
397 | What: obsolete generic irq defines and typedefs | ||
398 | When: 2.6.30 | ||
399 | Why: The defines and typedefs (hw_interrupt_type, no_irq_type, irq_desc_t) | ||
400 | have been kept around for migration reasons. After more than two years | ||
401 | it's time to remove them finally | ||
402 | Who: Thomas Gleixner <tglx@linutronix.de> | ||
403 | |||
404 | --------------------------- | ||
405 | |||
406 | What: fakephp and associated sysfs files in /sys/bus/pci/slots/ | ||
407 | When: 2011 | ||
408 | Why: In 2.6.27, the semantics of /sys/bus/pci/slots was redefined to | ||
409 | represent a machine's physical PCI slots. The change in semantics | ||
410 | had userspace implications, as the hotplug core no longer allowed | ||
411 | drivers to create multiple sysfs files per physical slot (required | ||
412 | for multi-function devices, e.g.). fakephp was seen as a developer's | ||
413 | tool only, and its interface changed. Too late, we learned that | ||
414 | there were some users of the fakephp interface. | ||
415 | |||
416 | In 2.6.30, the original fakephp interface was restored. At the same | ||
417 | time, the PCI core gained the ability that fakephp provided, namely | ||
418 | function-level hot-remove and hot-add. | ||
419 | |||
420 | Since the PCI core now provides the same functionality, exposed in: | ||
421 | |||
422 | /sys/bus/pci/rescan | ||
423 | /sys/bus/pci/devices/.../remove | ||
424 | /sys/bus/pci/devices/.../rescan | ||
425 | |||
426 | there is no functional reason to maintain fakephp as well. | ||
427 | |||
428 | We will keep the existing module so that 'modprobe fakephp' will | ||
429 | present the old /sys/bus/pci/slots/... interface for compatibility, | ||
430 | but users are urged to migrate their applications to the API above. | ||
431 | |||
432 | After a reasonable transition period, we will remove the legacy | ||
433 | fakephp interface. | ||
434 | Who: Alex Chiang <achiang@hp.com> | ||
435 | |||
436 | --------------------------- | ||
437 | |||
438 | What: i2c-voodoo3 driver | ||
439 | When: October 2009 | ||
440 | Why: Superseded by tdfxfb. I2C/DDC support used to live in a separate | ||
441 | driver but this caused driver conflicts. | ||
442 | Who: Jean Delvare <khali@linux-fr.org> | ||
443 | Krzysztof Helt <krzysztof.h1@wp.pl> | ||
444 | |||
445 | --------------------------- | ||
446 | |||
447 | What: CONFIG_RFKILL_INPUT | ||
448 | When: 2.6.33 | ||
449 | Why: Should be implemented in userspace, policy daemon. | ||
450 | Who: Johannes Berg <johannes@sipsolutions.net> | ||
451 | |||
452 | ---------------------------- | ||
453 | |||
454 | What: CONFIG_X86_OLD_MCE | ||
455 | When: 2.6.32 | ||
456 | Why: Remove the old legacy 32bit machine check code. This has been | ||
457 | superseded by the newer machine check code from the 64bit port, | ||
458 | but the old version has been kept around for easier testing. Note this | ||
459 | doesn't impact the old P5 and WinChip machine check handlers. | ||
460 | Who: Andi Kleen <andi@firstfloor.org> | ||
diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX index 52cd611277a3..f15621ee5599 100644 --- a/Documentation/filesystems/00-INDEX +++ b/Documentation/filesystems/00-INDEX | |||
@@ -66,8 +66,14 @@ mandatory-locking.txt | |||
66 | - info on the Linux implementation of Sys V mandatory file locking. | 66 | - info on the Linux implementation of Sys V mandatory file locking. |
67 | ncpfs.txt | 67 | ncpfs.txt |
68 | - info on Novell Netware(tm) filesystem using NCP protocol. | 68 | - info on Novell Netware(tm) filesystem using NCP protocol. |
69 | nfs41-server.txt | ||
70 | - info on the Linux server implementation of NFSv4 minor version 1. | ||
71 | nfs-rdma.txt | ||
72 | - how to install and setup the Linux NFS/RDMA client and server software. | ||
69 | nfsroot.txt | 73 | nfsroot.txt |
70 | - short guide on setting up a diskless box with NFS root filesystem. | 74 | - short guide on setting up a diskless box with NFS root filesystem. |
75 | nilfs2.txt | ||
76 | - info and mount options for the NILFS2 filesystem. | ||
71 | ntfs.txt | 77 | ntfs.txt |
72 | - info and mount options for the NTFS filesystem (Windows NT). | 78 | - info and mount options for the NTFS filesystem (Windows NT). |
73 | ocfs2.txt | 79 | ocfs2.txt |
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index ec6a9392a173..229d7b7c50a3 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking | |||
@@ -187,7 +187,7 @@ readpages: no | |||
187 | write_begin: no locks the page yes | 187 | write_begin: no locks the page yes |
188 | write_end: no yes, unlocks yes | 188 | write_end: no yes, unlocks yes |
189 | perform_write: no n/a yes | 189 | perform_write: no n/a yes |
190 | bmap: yes | 190 | bmap: no |
191 | invalidatepage: no yes | 191 | invalidatepage: no yes |
192 | releasepage: no yes | 192 | releasepage: no yes |
193 | direct_IO: no | 193 | direct_IO: no |
@@ -437,8 +437,11 @@ grab BKL for cases when we close a file that had been opened r/w, but that | |||
437 | can and should be done using the internal locking with smaller critical areas). | 437 | can and should be done using the internal locking with smaller critical areas). |
438 | Current worst offender is ext2_get_block()... | 438 | Current worst offender is ext2_get_block()... |
439 | 439 | ||
440 | ->fasync() is a mess. This area needs a big cleanup and that will probably | 440 | ->fasync() is called without BKL protection, and is responsible for |
441 | affect locking. | 441 | maintaining the FASYNC bit in filp->f_flags. Most instances call |
442 | fasync_helper(), which does that maintenance, so it's not normally | ||
443 | something one needs to worry about. Return values > 0 will be mapped to | ||
444 | zero in the VFS layer. | ||
442 | 445 | ||
443 | ->readdir() and ->ioctl() on directories must be changed. Ideally we would | 446 | ->readdir() and ->ioctl() on directories must be changed. Ideally we would |
444 | move ->readdir() to inode_operations and use a separate method for directory | 447 | move ->readdir() to inode_operations and use a separate method for directory |
@@ -502,23 +505,31 @@ prototypes: | |||
502 | void (*open)(struct vm_area_struct*); | 505 | void (*open)(struct vm_area_struct*); |
503 | void (*close)(struct vm_area_struct*); | 506 | void (*close)(struct vm_area_struct*); |
504 | int (*fault)(struct vm_area_struct*, struct vm_fault *); | 507 | int (*fault)(struct vm_area_struct*, struct vm_fault *); |
505 | int (*page_mkwrite)(struct vm_area_struct *, struct page *); | 508 | int (*page_mkwrite)(struct vm_area_struct *, struct vm_fault *); |
506 | int (*access)(struct vm_area_struct *, unsigned long, void*, int, int); | 509 | int (*access)(struct vm_area_struct *, unsigned long, void*, int, int); |
507 | 510 | ||
508 | locking rules: | 511 | locking rules: |
509 | BKL mmap_sem PageLocked(page) | 512 | BKL mmap_sem PageLocked(page) |
510 | open: no yes | 513 | open: no yes |
511 | close: no yes | 514 | close: no yes |
512 | fault: no yes | 515 | fault: no yes can return with page locked |
513 | page_mkwrite: no yes no | 516 | page_mkwrite: no yes can return with page locked |
514 | access: no yes | 517 | access: no yes |
515 | 518 | ||
516 | ->page_mkwrite() is called when a previously read-only page is | 519 | ->fault() is called when a previously not present pte is about |
517 | about to become writeable. The file system is responsible for | 520 | to be faulted in. The filesystem must find and return the page associated |
518 | protecting against truncate races. Once appropriate action has been | 521 | with the passed in "pgoff" in the vm_fault structure. If it is possible that |
519 | taking to lock out truncate, the page range should be verified to be | 522 | the page may be truncated and/or invalidated, then the filesystem must lock |
520 | within i_size. The page mapping should also be checked that it is not | 523 | the page, then ensure it is not already truncated (the page lock will block |
521 | NULL. | 524 | subsequent truncate), and then return with VM_FAULT_LOCKED, and the page |
525 | locked. The VM will unlock the page. | ||
526 | |||
527 | ->page_mkwrite() is called when a previously read-only pte is | ||
528 | about to become writeable. The filesystem again must ensure that there are | ||
529 | no truncate/invalidate races, and then return with the page locked. If | ||
530 | the page has been truncated, the filesystem should not look up a new page | ||
531 | like the ->fault() handler, but simply return with VM_FAULT_NOPAGE, which | ||
532 | will cause the VM to retry the fault. | ||
522 | 533 | ||
523 | ->access() is called when get_user_pages() fails in | 534 | ->access() is called when get_user_pages() fails in |
524 | acces_process_vm(), typically used to debug a process through | 535 | acces_process_vm(), typically used to debug a process through |
diff --git a/Documentation/filesystems/autofs4-mount-control.txt b/Documentation/filesystems/autofs4-mount-control.txt index c6341745df37..8f78ded4b648 100644 --- a/Documentation/filesystems/autofs4-mount-control.txt +++ b/Documentation/filesystems/autofs4-mount-control.txt | |||
@@ -369,7 +369,7 @@ The call requires an initialized struct autofs_dev_ioctl. There are two | |||
369 | possible variations. Both use the path field set to the path of the mount | 369 | possible variations. Both use the path field set to the path of the mount |
370 | point to check and the size field adjusted appropriately. One uses the | 370 | point to check and the size field adjusted appropriately. One uses the |
371 | ioctlfd field to identify a specific mount point to check while the other | 371 | ioctlfd field to identify a specific mount point to check while the other |
372 | variation uses the path and optionaly arg1 set to an autofs mount type. | 372 | variation uses the path and optionally arg1 set to an autofs mount type. |
373 | The call returns 1 if this is a mount point and sets arg1 to the device | 373 | The call returns 1 if this is a mount point and sets arg1 to the device |
374 | number of the mount and field arg2 to the relevant super block magic | 374 | number of the mount and field arg2 to the relevant super block magic |
375 | number (described below) or 0 if it isn't a mountpoint. In both cases | 375 | number (described below) or 0 if it isn't a mountpoint. In both cases |
diff --git a/Documentation/filesystems/caching/backend-api.txt b/Documentation/filesystems/caching/backend-api.txt new file mode 100644 index 000000000000..382d52cdaf2d --- /dev/null +++ b/Documentation/filesystems/caching/backend-api.txt | |||
@@ -0,0 +1,658 @@ | |||
1 | ========================== | ||
2 | FS-CACHE CACHE BACKEND API | ||
3 | ========================== | ||
4 | |||
5 | The FS-Cache system provides an API by which actual caches can be supplied to | ||
6 | FS-Cache for it to then serve out to network filesystems and other interested | ||
7 | parties. | ||
8 | |||
9 | This API is declared in <linux/fscache-cache.h>. | ||
10 | |||
11 | |||
12 | ==================================== | ||
13 | INITIALISING AND REGISTERING A CACHE | ||
14 | ==================================== | ||
15 | |||
16 | To start off, a cache definition must be initialised and registered for each | ||
17 | cache the backend wants to make available. For instance, CacheFS does this in | ||
18 | the fill_super() operation on mounting. | ||
19 | |||
20 | The cache definition (struct fscache_cache) should be initialised by calling: | ||
21 | |||
22 | void fscache_init_cache(struct fscache_cache *cache, | ||
23 | struct fscache_cache_ops *ops, | ||
24 | const char *idfmt, | ||
25 | ...); | ||
26 | |||
27 | Where: | ||
28 | |||
29 | (*) "cache" is a pointer to the cache definition; | ||
30 | |||
31 | (*) "ops" is a pointer to the table of operations that the backend supports on | ||
32 | this cache; and | ||
33 | |||
34 | (*) "idfmt" is a format and printf-style arguments for constructing a label | ||
35 | for the cache. | ||
36 | |||
37 | |||
38 | The cache should then be registered with FS-Cache by passing a pointer to the | ||
39 | previously initialised cache definition to: | ||
40 | |||
41 | int fscache_add_cache(struct fscache_cache *cache, | ||
42 | struct fscache_object *fsdef, | ||
43 | const char *tagname); | ||
44 | |||
45 | Two extra arguments should also be supplied: | ||
46 | |||
47 | (*) "fsdef" which should point to the object representation for the FS-Cache | ||
48 | master index in this cache. Netfs primary index entries will be created | ||
49 | here. FS-Cache keeps the caller's reference to the index object if | ||
50 | successful and will release it upon withdrawal of the cache. | ||
51 | |||
52 | (*) "tagname" which, if given, should be a text string naming this cache. If | ||
53 | this is NULL, the identifier will be used instead. For CacheFS, the | ||
54 | identifier is set to name the underlying block device and the tag can be | ||
55 | supplied by mount. | ||
56 | |||
57 | This function may return -ENOMEM if it ran out of memory or -EEXIST if the tag | ||
58 | is already in use. 0 will be returned on success. | ||
59 | |||
60 | |||
61 | ===================== | ||
62 | UNREGISTERING A CACHE | ||
63 | ===================== | ||
64 | |||
65 | A cache can be withdrawn from the system by calling this function with a | ||
66 | pointer to the cache definition: | ||
67 | |||
68 | void fscache_withdraw_cache(struct fscache_cache *cache); | ||
69 | |||
70 | In CacheFS's case, this is called by put_super(). | ||
71 | |||
72 | |||
73 | ======== | ||
74 | SECURITY | ||
75 | ======== | ||
76 | |||
77 | The cache methods are executed one of two contexts: | ||
78 | |||
79 | (1) that of the userspace process that issued the netfs operation that caused | ||
80 | the cache method to be invoked, or | ||
81 | |||
82 | (2) that of one of the processes in the FS-Cache thread pool. | ||
83 | |||
84 | In either case, this may not be an appropriate context in which to access the | ||
85 | cache. | ||
86 | |||
87 | The calling process's fsuid, fsgid and SELinux security identities may need to | ||
88 | be masqueraded for the duration of the cache driver's access to the cache. | ||
89 | This is left to the cache to handle; FS-Cache makes no effort in this regard. | ||
90 | |||
91 | |||
92 | =================================== | ||
93 | CONTROL AND STATISTICS PRESENTATION | ||
94 | =================================== | ||
95 | |||
96 | The cache may present data to the outside world through FS-Cache's interfaces | ||
97 | in sysfs and procfs - the former for control and the latter for statistics. | ||
98 | |||
99 | A sysfs directory called /sys/fs/fscache/<cachetag>/ is created if CONFIG_SYSFS | ||
100 | is enabled. This is accessible through the kobject struct fscache_cache::kobj | ||
101 | and is for use by the cache as it sees fit. | ||
102 | |||
103 | |||
104 | ======================== | ||
105 | RELEVANT DATA STRUCTURES | ||
106 | ======================== | ||
107 | |||
108 | (*) Index/Data file FS-Cache representation cookie: | ||
109 | |||
110 | struct fscache_cookie { | ||
111 | struct fscache_object_def *def; | ||
112 | struct fscache_netfs *netfs; | ||
113 | void *netfs_data; | ||
114 | ... | ||
115 | }; | ||
116 | |||
117 | The fields that might be of use to the backend describe the object | ||
118 | definition, the netfs definition and the netfs's data for this cookie. | ||
119 | The object definition contain functions supplied by the netfs for loading | ||
120 | and matching index entries; these are required to provide some of the | ||
121 | cache operations. | ||
122 | |||
123 | |||
124 | (*) In-cache object representation: | ||
125 | |||
126 | struct fscache_object { | ||
127 | int debug_id; | ||
128 | enum { | ||
129 | FSCACHE_OBJECT_RECYCLING, | ||
130 | ... | ||
131 | } state; | ||
132 | spinlock_t lock | ||
133 | struct fscache_cache *cache; | ||
134 | struct fscache_cookie *cookie; | ||
135 | ... | ||
136 | }; | ||
137 | |||
138 | Structures of this type should be allocated by the cache backend and | ||
139 | passed to FS-Cache when requested by the appropriate cache operation. In | ||
140 | the case of CacheFS, they're embedded in CacheFS's internal object | ||
141 | structures. | ||
142 | |||
143 | The debug_id is a simple integer that can be used in debugging messages | ||
144 | that refer to a particular object. In such a case it should be printed | ||
145 | using "OBJ%x" to be consistent with FS-Cache. | ||
146 | |||
147 | Each object contains a pointer to the cookie that represents the object it | ||
148 | is backing. An object should retired when put_object() is called if it is | ||
149 | in state FSCACHE_OBJECT_RECYCLING. The fscache_object struct should be | ||
150 | initialised by calling fscache_object_init(object). | ||
151 | |||
152 | |||
153 | (*) FS-Cache operation record: | ||
154 | |||
155 | struct fscache_operation { | ||
156 | atomic_t usage; | ||
157 | struct fscache_object *object; | ||
158 | unsigned long flags; | ||
159 | #define FSCACHE_OP_EXCLUSIVE | ||
160 | void (*processor)(struct fscache_operation *op); | ||
161 | void (*release)(struct fscache_operation *op); | ||
162 | ... | ||
163 | }; | ||
164 | |||
165 | FS-Cache has a pool of threads that it uses to give CPU time to the | ||
166 | various asynchronous operations that need to be done as part of driving | ||
167 | the cache. These are represented by the above structure. The processor | ||
168 | method is called to give the op CPU time, and the release method to get | ||
169 | rid of it when its usage count reaches 0. | ||
170 | |||
171 | An operation can be made exclusive upon an object by setting the | ||
172 | appropriate flag before enqueuing it with fscache_enqueue_operation(). If | ||
173 | an operation needs more processing time, it should be enqueued again. | ||
174 | |||
175 | |||
176 | (*) FS-Cache retrieval operation record: | ||
177 | |||
178 | struct fscache_retrieval { | ||
179 | struct fscache_operation op; | ||
180 | struct address_space *mapping; | ||
181 | struct list_head *to_do; | ||
182 | ... | ||
183 | }; | ||
184 | |||
185 | A structure of this type is allocated by FS-Cache to record retrieval and | ||
186 | allocation requests made by the netfs. This struct is then passed to the | ||
187 | backend to do the operation. The backend may get extra refs to it by | ||
188 | calling fscache_get_retrieval() and refs may be discarded by calling | ||
189 | fscache_put_retrieval(). | ||
190 | |||
191 | A retrieval operation can be used by the backend to do retrieval work. To | ||
192 | do this, the retrieval->op.processor method pointer should be set | ||
193 | appropriately by the backend and fscache_enqueue_retrieval() called to | ||
194 | submit it to the thread pool. CacheFiles, for example, uses this to queue | ||
195 | page examination when it detects PG_lock being cleared. | ||
196 | |||
197 | The to_do field is an empty list available for the cache backend to use as | ||
198 | it sees fit. | ||
199 | |||
200 | |||
201 | (*) FS-Cache storage operation record: | ||
202 | |||
203 | struct fscache_storage { | ||
204 | struct fscache_operation op; | ||
205 | pgoff_t store_limit; | ||
206 | ... | ||
207 | }; | ||
208 | |||
209 | A structure of this type is allocated by FS-Cache to record outstanding | ||
210 | writes to be made. FS-Cache itself enqueues this operation and invokes | ||
211 | the write_page() method on the object at appropriate times to effect | ||
212 | storage. | ||
213 | |||
214 | |||
215 | ================ | ||
216 | CACHE OPERATIONS | ||
217 | ================ | ||
218 | |||
219 | The cache backend provides FS-Cache with a table of operations that can be | ||
220 | performed on the denizens of the cache. These are held in a structure of type: | ||
221 | |||
222 | struct fscache_cache_ops | ||
223 | |||
224 | (*) Name of cache provider [mandatory]: | ||
225 | |||
226 | const char *name | ||
227 | |||
228 | This isn't strictly an operation, but should be pointed at a string naming | ||
229 | the backend. | ||
230 | |||
231 | |||
232 | (*) Allocate a new object [mandatory]: | ||
233 | |||
234 | struct fscache_object *(*alloc_object)(struct fscache_cache *cache, | ||
235 | struct fscache_cookie *cookie) | ||
236 | |||
237 | This method is used to allocate a cache object representation to back a | ||
238 | cookie in a particular cache. fscache_object_init() should be called on | ||
239 | the object to initialise it prior to returning. | ||
240 | |||
241 | This function may also be used to parse the index key to be used for | ||
242 | multiple lookup calls to turn it into a more convenient form. FS-Cache | ||
243 | will call the lookup_complete() method to allow the cache to release the | ||
244 | form once lookup is complete or aborted. | ||
245 | |||
246 | |||
247 | (*) Look up and create object [mandatory]: | ||
248 | |||
249 | void (*lookup_object)(struct fscache_object *object) | ||
250 | |||
251 | This method is used to look up an object, given that the object is already | ||
252 | allocated and attached to the cookie. This should instantiate that object | ||
253 | in the cache if it can. | ||
254 | |||
255 | The method should call fscache_object_lookup_negative() as soon as | ||
256 | possible if it determines the object doesn't exist in the cache. If the | ||
257 | object is found to exist and the netfs indicates that it is valid then | ||
258 | fscache_obtained_object() should be called once the object is in a | ||
259 | position to have data stored in it. Similarly, fscache_obtained_object() | ||
260 | should also be called once a non-present object has been created. | ||
261 | |||
262 | If a lookup error occurs, fscache_object_lookup_error() should be called | ||
263 | to abort the lookup of that object. | ||
264 | |||
265 | |||
266 | (*) Release lookup data [mandatory]: | ||
267 | |||
268 | void (*lookup_complete)(struct fscache_object *object) | ||
269 | |||
270 | This method is called to ask the cache to release any resources it was | ||
271 | using to perform a lookup. | ||
272 | |||
273 | |||
274 | (*) Increment object refcount [mandatory]: | ||
275 | |||
276 | struct fscache_object *(*grab_object)(struct fscache_object *object) | ||
277 | |||
278 | This method is called to increment the reference count on an object. It | ||
279 | may fail (for instance if the cache is being withdrawn) by returning NULL. | ||
280 | It should return the object pointer if successful. | ||
281 | |||
282 | |||
283 | (*) Lock/Unlock object [mandatory]: | ||
284 | |||
285 | void (*lock_object)(struct fscache_object *object) | ||
286 | void (*unlock_object)(struct fscache_object *object) | ||
287 | |||
288 | These methods are used to exclusively lock an object. It must be possible | ||
289 | to schedule with the lock held, so a spinlock isn't sufficient. | ||
290 | |||
291 | |||
292 | (*) Pin/Unpin object [optional]: | ||
293 | |||
294 | int (*pin_object)(struct fscache_object *object) | ||
295 | void (*unpin_object)(struct fscache_object *object) | ||
296 | |||
297 | These methods are used to pin an object into the cache. Once pinned an | ||
298 | object cannot be reclaimed to make space. Return -ENOSPC if there's not | ||
299 | enough space in the cache to permit this. | ||
300 | |||
301 | |||
302 | (*) Update object [mandatory]: | ||
303 | |||
304 | int (*update_object)(struct fscache_object *object) | ||
305 | |||
306 | This is called to update the index entry for the specified object. The | ||
307 | new information should be in object->cookie->netfs_data. This can be | ||
308 | obtained by calling object->cookie->def->get_aux()/get_attr(). | ||
309 | |||
310 | |||
311 | (*) Discard object [mandatory]: | ||
312 | |||
313 | void (*drop_object)(struct fscache_object *object) | ||
314 | |||
315 | This method is called to indicate that an object has been unbound from its | ||
316 | cookie, and that the cache should release the object's resources and | ||
317 | retire it if it's in state FSCACHE_OBJECT_RECYCLING. | ||
318 | |||
319 | This method should not attempt to release any references held by the | ||
320 | caller. The caller will invoke the put_object() method as appropriate. | ||
321 | |||
322 | |||
323 | (*) Release object reference [mandatory]: | ||
324 | |||
325 | void (*put_object)(struct fscache_object *object) | ||
326 | |||
327 | This method is used to discard a reference to an object. The object may | ||
328 | be freed when all the references to it are released. | ||
329 | |||
330 | |||
331 | (*) Synchronise a cache [mandatory]: | ||
332 | |||
333 | void (*sync)(struct fscache_cache *cache) | ||
334 | |||
335 | This is called to ask the backend to synchronise a cache with its backing | ||
336 | device. | ||
337 | |||
338 | |||
339 | (*) Dissociate a cache [mandatory]: | ||
340 | |||
341 | void (*dissociate_pages)(struct fscache_cache *cache) | ||
342 | |||
343 | This is called to ask a cache to perform any page dissociations as part of | ||
344 | cache withdrawal. | ||
345 | |||
346 | |||
347 | (*) Notification that the attributes on a netfs file changed [mandatory]: | ||
348 | |||
349 | int (*attr_changed)(struct fscache_object *object); | ||
350 | |||
351 | This is called to indicate to the cache that certain attributes on a netfs | ||
352 | file have changed (for example the maximum size a file may reach). The | ||
353 | cache can read these from the netfs by calling the cookie's get_attr() | ||
354 | method. | ||
355 | |||
356 | The cache may use the file size information to reserve space on the cache. | ||
357 | It should also call fscache_set_store_limit() to indicate to FS-Cache the | ||
358 | highest byte it's willing to store for an object. | ||
359 | |||
360 | This method may return -ve if an error occurred or the cache object cannot | ||
361 | be expanded. In such a case, the object will be withdrawn from service. | ||
362 | |||
363 | This operation is run asynchronously from FS-Cache's thread pool, and | ||
364 | storage and retrieval operations from the netfs are excluded during the | ||
365 | execution of this operation. | ||
366 | |||
367 | |||
368 | (*) Reserve cache space for an object's data [optional]: | ||
369 | |||
370 | int (*reserve_space)(struct fscache_object *object, loff_t size); | ||
371 | |||
372 | This is called to request that cache space be reserved to hold the data | ||
373 | for an object and the metadata used to track it. Zero size should be | ||
374 | taken as request to cancel a reservation. | ||
375 | |||
376 | This should return 0 if successful, -ENOSPC if there isn't enough space | ||
377 | available, or -ENOMEM or -EIO on other errors. | ||
378 | |||
379 | The reservation may exceed the current size of the object, thus permitting | ||
380 | future expansion. If the amount of space consumed by an object would | ||
381 | exceed the reservation, it's permitted to refuse requests to allocate | ||
382 | pages, but not required. An object may be pruned down to its reservation | ||
383 | size if larger than that already. | ||
384 | |||
385 | |||
386 | (*) Request page be read from cache [mandatory]: | ||
387 | |||
388 | int (*read_or_alloc_page)(struct fscache_retrieval *op, | ||
389 | struct page *page, | ||
390 | gfp_t gfp) | ||
391 | |||
392 | This is called to attempt to read a netfs page from the cache, or to | ||
393 | reserve a backing block if not. FS-Cache will have done as much checking | ||
394 | as it can before calling, but most of the work belongs to the backend. | ||
395 | |||
396 | If there's no page in the cache, then -ENODATA should be returned if the | ||
397 | backend managed to reserve a backing block; -ENOBUFS or -ENOMEM if it | ||
398 | didn't. | ||
399 | |||
400 | If there is suitable data in the cache, then a read operation should be | ||
401 | queued and 0 returned. When the read finishes, fscache_end_io() should be | ||
402 | called. | ||
403 | |||
404 | The fscache_mark_pages_cached() should be called for the page if any cache | ||
405 | metadata is retained. This will indicate to the netfs that the page needs | ||
406 | explicit uncaching. This operation takes a pagevec, thus allowing several | ||
407 | pages to be marked at once. | ||
408 | |||
409 | The retrieval record pointed to by op should be retained for each page | ||
410 | queued and released when I/O on the page has been formally ended. | ||
411 | fscache_get/put_retrieval() are available for this purpose. | ||
412 | |||
413 | The retrieval record may be used to get CPU time via the FS-Cache thread | ||
414 | pool. If this is desired, the op->op.processor should be set to point to | ||
415 | the appropriate processing routine, and fscache_enqueue_retrieval() should | ||
416 | be called at an appropriate point to request CPU time. For instance, the | ||
417 | retrieval routine could be enqueued upon the completion of a disk read. | ||
418 | The to_do field in the retrieval record is provided to aid in this. | ||
419 | |||
420 | If an I/O error occurs, fscache_io_error() should be called and -ENOBUFS | ||
421 | returned if possible or fscache_end_io() called with a suitable error | ||
422 | code.. | ||
423 | |||
424 | |||
425 | (*) Request pages be read from cache [mandatory]: | ||
426 | |||
427 | int (*read_or_alloc_pages)(struct fscache_retrieval *op, | ||
428 | struct list_head *pages, | ||
429 | unsigned *nr_pages, | ||
430 | gfp_t gfp) | ||
431 | |||
432 | This is like the read_or_alloc_page() method, except it is handed a list | ||
433 | of pages instead of one page. Any pages on which a read operation is | ||
434 | started must be added to the page cache for the specified mapping and also | ||
435 | to the LRU. Such pages must also be removed from the pages list and | ||
436 | *nr_pages decremented per page. | ||
437 | |||
438 | If there was an error such as -ENOMEM, then that should be returned; else | ||
439 | if one or more pages couldn't be read or allocated, then -ENOBUFS should | ||
440 | be returned; else if one or more pages couldn't be read, then -ENODATA | ||
441 | should be returned. If all the pages are dispatched then 0 should be | ||
442 | returned. | ||
443 | |||
444 | |||
445 | (*) Request page be allocated in the cache [mandatory]: | ||
446 | |||
447 | int (*allocate_page)(struct fscache_retrieval *op, | ||
448 | struct page *page, | ||
449 | gfp_t gfp) | ||
450 | |||
451 | This is like the read_or_alloc_page() method, except that it shouldn't | ||
452 | read from the cache, even if there's data there that could be retrieved. | ||
453 | It should, however, set up any internal metadata required such that | ||
454 | the write_page() method can write to the cache. | ||
455 | |||
456 | If there's no backing block available, then -ENOBUFS should be returned | ||
457 | (or -ENOMEM if there were other problems). If a block is successfully | ||
458 | allocated, then the netfs page should be marked and 0 returned. | ||
459 | |||
460 | |||
461 | (*) Request pages be allocated in the cache [mandatory]: | ||
462 | |||
463 | int (*allocate_pages)(struct fscache_retrieval *op, | ||
464 | struct list_head *pages, | ||
465 | unsigned *nr_pages, | ||
466 | gfp_t gfp) | ||
467 | |||
468 | This is an multiple page version of the allocate_page() method. pages and | ||
469 | nr_pages should be treated as for the read_or_alloc_pages() method. | ||
470 | |||
471 | |||
472 | (*) Request page be written to cache [mandatory]: | ||
473 | |||
474 | int (*write_page)(struct fscache_storage *op, | ||
475 | struct page *page); | ||
476 | |||
477 | This is called to write from a page on which there was a previously | ||
478 | successful read_or_alloc_page() call or similar. FS-Cache filters out | ||
479 | pages that don't have mappings. | ||
480 | |||
481 | This method is called asynchronously from the FS-Cache thread pool. It is | ||
482 | not required to actually store anything, provided -ENODATA is then | ||
483 | returned to the next read of this page. | ||
484 | |||
485 | If an error occurred, then a negative error code should be returned, | ||
486 | otherwise zero should be returned. FS-Cache will take appropriate action | ||
487 | in response to an error, such as withdrawing this object. | ||
488 | |||
489 | If this method returns success then FS-Cache will inform the netfs | ||
490 | appropriately. | ||
491 | |||
492 | |||
493 | (*) Discard retained per-page metadata [mandatory]: | ||
494 | |||
495 | void (*uncache_page)(struct fscache_object *object, struct page *page) | ||
496 | |||
497 | This is called when a netfs page is being evicted from the pagecache. The | ||
498 | cache backend should tear down any internal representation or tracking it | ||
499 | maintains for this page. | ||
500 | |||
501 | |||
502 | ================== | ||
503 | FS-CACHE UTILITIES | ||
504 | ================== | ||
505 | |||
506 | FS-Cache provides some utilities that a cache backend may make use of: | ||
507 | |||
508 | (*) Note occurrence of an I/O error in a cache: | ||
509 | |||
510 | void fscache_io_error(struct fscache_cache *cache) | ||
511 | |||
512 | This tells FS-Cache that an I/O error occurred in the cache. After this | ||
513 | has been called, only resource dissociation operations (object and page | ||
514 | release) will be passed from the netfs to the cache backend for the | ||
515 | specified cache. | ||
516 | |||
517 | This does not actually withdraw the cache. That must be done separately. | ||
518 | |||
519 | |||
520 | (*) Invoke the retrieval I/O completion function: | ||
521 | |||
522 | void fscache_end_io(struct fscache_retrieval *op, struct page *page, | ||
523 | int error); | ||
524 | |||
525 | This is called to note the end of an attempt to retrieve a page. The | ||
526 | error value should be 0 if successful and an error otherwise. | ||
527 | |||
528 | |||
529 | (*) Set highest store limit: | ||
530 | |||
531 | void fscache_set_store_limit(struct fscache_object *object, | ||
532 | loff_t i_size); | ||
533 | |||
534 | This sets the limit FS-Cache imposes on the highest byte it's willing to | ||
535 | try and store for a netfs. Any page over this limit is automatically | ||
536 | rejected by fscache_read_alloc_page() and co with -ENOBUFS. | ||
537 | |||
538 | |||
539 | (*) Mark pages as being cached: | ||
540 | |||
541 | void fscache_mark_pages_cached(struct fscache_retrieval *op, | ||
542 | struct pagevec *pagevec); | ||
543 | |||
544 | This marks a set of pages as being cached. After this has been called, | ||
545 | the netfs must call fscache_uncache_page() to unmark the pages. | ||
546 | |||
547 | |||
548 | (*) Perform coherency check on an object: | ||
549 | |||
550 | enum fscache_checkaux fscache_check_aux(struct fscache_object *object, | ||
551 | const void *data, | ||
552 | uint16_t datalen); | ||
553 | |||
554 | This asks the netfs to perform a coherency check on an object that has | ||
555 | just been looked up. The cookie attached to the object will determine the | ||
556 | netfs to use. data and datalen should specify where the auxiliary data | ||
557 | retrieved from the cache can be found. | ||
558 | |||
559 | One of three values will be returned: | ||
560 | |||
561 | (*) FSCACHE_CHECKAUX_OKAY | ||
562 | |||
563 | The coherency data indicates the object is valid as is. | ||
564 | |||
565 | (*) FSCACHE_CHECKAUX_NEEDS_UPDATE | ||
566 | |||
567 | The coherency data needs updating, but otherwise the object is | ||
568 | valid. | ||
569 | |||
570 | (*) FSCACHE_CHECKAUX_OBSOLETE | ||
571 | |||
572 | The coherency data indicates that the object is obsolete and should | ||
573 | be discarded. | ||
574 | |||
575 | |||
576 | (*) Initialise a freshly allocated object: | ||
577 | |||
578 | void fscache_object_init(struct fscache_object *object); | ||
579 | |||
580 | This initialises all the fields in an object representation. | ||
581 | |||
582 | |||
583 | (*) Indicate the destruction of an object: | ||
584 | |||
585 | void fscache_object_destroyed(struct fscache_cache *cache); | ||
586 | |||
587 | This must be called to inform FS-Cache that an object that belonged to a | ||
588 | cache has been destroyed and deallocated. This will allow continuation | ||
589 | of the cache withdrawal process when it is stopped pending destruction of | ||
590 | all the objects. | ||
591 | |||
592 | |||
593 | (*) Indicate negative lookup on an object: | ||
594 | |||
595 | void fscache_object_lookup_negative(struct fscache_object *object); | ||
596 | |||
597 | This is called to indicate to FS-Cache that a lookup process for an object | ||
598 | found a negative result. | ||
599 | |||
600 | This changes the state of an object to permit reads pending on lookup | ||
601 | completion to go off and start fetching data from the netfs server as it's | ||
602 | known at this point that there can't be any data in the cache. | ||
603 | |||
604 | This may be called multiple times on an object. Only the first call is | ||
605 | significant - all subsequent calls are ignored. | ||
606 | |||
607 | |||
608 | (*) Indicate an object has been obtained: | ||
609 | |||
610 | void fscache_obtained_object(struct fscache_object *object); | ||
611 | |||
612 | This is called to indicate to FS-Cache that a lookup process for an object | ||
613 | produced a positive result, or that an object was created. This should | ||
614 | only be called once for any particular object. | ||
615 | |||
616 | This changes the state of an object to indicate: | ||
617 | |||
618 | (1) if no call to fscache_object_lookup_negative() has been made on | ||
619 | this object, that there may be data available, and that reads can | ||
620 | now go and look for it; and | ||
621 | |||
622 | (2) that writes may now proceed against this object. | ||
623 | |||
624 | |||
625 | (*) Indicate that object lookup failed: | ||
626 | |||
627 | void fscache_object_lookup_error(struct fscache_object *object); | ||
628 | |||
629 | This marks an object as having encountered a fatal error (usually EIO) | ||
630 | and causes it to move into a state whereby it will be withdrawn as soon | ||
631 | as possible. | ||
632 | |||
633 | |||
634 | (*) Get and release references on a retrieval record: | ||
635 | |||
636 | void fscache_get_retrieval(struct fscache_retrieval *op); | ||
637 | void fscache_put_retrieval(struct fscache_retrieval *op); | ||
638 | |||
639 | These two functions are used to retain a retrieval record whilst doing | ||
640 | asynchronous data retrieval and block allocation. | ||
641 | |||
642 | |||
643 | (*) Enqueue a retrieval record for processing. | ||
644 | |||
645 | void fscache_enqueue_retrieval(struct fscache_retrieval *op); | ||
646 | |||
647 | This enqueues a retrieval record for processing by the FS-Cache thread | ||
648 | pool. One of the threads in the pool will invoke the retrieval record's | ||
649 | op->op.processor callback function. This function may be called from | ||
650 | within the callback function. | ||
651 | |||
652 | |||
653 | (*) List of object state names: | ||
654 | |||
655 | const char *fscache_object_states[]; | ||
656 | |||
657 | For debugging purposes, this may be used to turn the state that an object | ||
658 | is in into a text string for display purposes. | ||
diff --git a/Documentation/filesystems/caching/cachefiles.txt b/Documentation/filesystems/caching/cachefiles.txt new file mode 100644 index 000000000000..748a1ae49e12 --- /dev/null +++ b/Documentation/filesystems/caching/cachefiles.txt | |||
@@ -0,0 +1,501 @@ | |||
1 | =============================================== | ||
2 | CacheFiles: CACHE ON ALREADY MOUNTED FILESYSTEM | ||
3 | =============================================== | ||
4 | |||
5 | Contents: | ||
6 | |||
7 | (*) Overview. | ||
8 | |||
9 | (*) Requirements. | ||
10 | |||
11 | (*) Configuration. | ||
12 | |||
13 | (*) Starting the cache. | ||
14 | |||
15 | (*) Things to avoid. | ||
16 | |||
17 | (*) Cache culling. | ||
18 | |||
19 | (*) Cache structure. | ||
20 | |||
21 | (*) Security model and SELinux. | ||
22 | |||
23 | (*) A note on security. | ||
24 | |||
25 | (*) Statistical information. | ||
26 | |||
27 | (*) Debugging. | ||
28 | |||
29 | |||
30 | ======== | ||
31 | OVERVIEW | ||
32 | ======== | ||
33 | |||
34 | CacheFiles is a caching backend that's meant to use as a cache a directory on | ||
35 | an already mounted filesystem of a local type (such as Ext3). | ||
36 | |||
37 | CacheFiles uses a userspace daemon to do some of the cache management - such as | ||
38 | reaping stale nodes and culling. This is called cachefilesd and lives in | ||
39 | /sbin. | ||
40 | |||
41 | The filesystem and data integrity of the cache are only as good as those of the | ||
42 | filesystem providing the backing services. Note that CacheFiles does not | ||
43 | attempt to journal anything since the journalling interfaces of the various | ||
44 | filesystems are very specific in nature. | ||
45 | |||
46 | CacheFiles creates a misc character device - "/dev/cachefiles" - that is used | ||
47 | to communication with the daemon. Only one thing may have this open at once, | ||
48 | and whilst it is open, a cache is at least partially in existence. The daemon | ||
49 | opens this and sends commands down it to control the cache. | ||
50 | |||
51 | CacheFiles is currently limited to a single cache. | ||
52 | |||
53 | CacheFiles attempts to maintain at least a certain percentage of free space on | ||
54 | the filesystem, shrinking the cache by culling the objects it contains to make | ||
55 | space if necessary - see the "Cache Culling" section. This means it can be | ||
56 | placed on the same medium as a live set of data, and will expand to make use of | ||
57 | spare space and automatically contract when the set of data requires more | ||
58 | space. | ||
59 | |||
60 | |||
61 | ============ | ||
62 | REQUIREMENTS | ||
63 | ============ | ||
64 | |||
65 | The use of CacheFiles and its daemon requires the following features to be | ||
66 | available in the system and in the cache filesystem: | ||
67 | |||
68 | - dnotify. | ||
69 | |||
70 | - extended attributes (xattrs). | ||
71 | |||
72 | - openat() and friends. | ||
73 | |||
74 | - bmap() support on files in the filesystem (FIBMAP ioctl). | ||
75 | |||
76 | - The use of bmap() to detect a partial page at the end of the file. | ||
77 | |||
78 | It is strongly recommended that the "dir_index" option is enabled on Ext3 | ||
79 | filesystems being used as a cache. | ||
80 | |||
81 | |||
82 | ============= | ||
83 | CONFIGURATION | ||
84 | ============= | ||
85 | |||
86 | The cache is configured by a script in /etc/cachefilesd.conf. These commands | ||
87 | set up cache ready for use. The following script commands are available: | ||
88 | |||
89 | (*) brun <N>% | ||
90 | (*) bcull <N>% | ||
91 | (*) bstop <N>% | ||
92 | (*) frun <N>% | ||
93 | (*) fcull <N>% | ||
94 | (*) fstop <N>% | ||
95 | |||
96 | Configure the culling limits. Optional. See the section on culling | ||
97 | The defaults are 7% (run), 5% (cull) and 1% (stop) respectively. | ||
98 | |||
99 | The commands beginning with a 'b' are file space (block) limits, those | ||
100 | beginning with an 'f' are file count limits. | ||
101 | |||
102 | (*) dir <path> | ||
103 | |||
104 | Specify the directory containing the root of the cache. Mandatory. | ||
105 | |||
106 | (*) tag <name> | ||
107 | |||
108 | Specify a tag to FS-Cache to use in distinguishing multiple caches. | ||
109 | Optional. The default is "CacheFiles". | ||
110 | |||
111 | (*) debug <mask> | ||
112 | |||
113 | Specify a numeric bitmask to control debugging in the kernel module. | ||
114 | Optional. The default is zero (all off). The following values can be | ||
115 | OR'd into the mask to collect various information: | ||
116 | |||
117 | 1 Turn on trace of function entry (_enter() macros) | ||
118 | 2 Turn on trace of function exit (_leave() macros) | ||
119 | 4 Turn on trace of internal debug points (_debug()) | ||
120 | |||
121 | This mask can also be set through sysfs, eg: | ||
122 | |||
123 | echo 5 >/sys/modules/cachefiles/parameters/debug | ||
124 | |||
125 | |||
126 | ================== | ||
127 | STARTING THE CACHE | ||
128 | ================== | ||
129 | |||
130 | The cache is started by running the daemon. The daemon opens the cache device, | ||
131 | configures the cache and tells it to begin caching. At that point the cache | ||
132 | binds to fscache and the cache becomes live. | ||
133 | |||
134 | The daemon is run as follows: | ||
135 | |||
136 | /sbin/cachefilesd [-d]* [-s] [-n] [-f <configfile>] | ||
137 | |||
138 | The flags are: | ||
139 | |||
140 | (*) -d | ||
141 | |||
142 | Increase the debugging level. This can be specified multiple times and | ||
143 | is cumulative with itself. | ||
144 | |||
145 | (*) -s | ||
146 | |||
147 | Send messages to stderr instead of syslog. | ||
148 | |||
149 | (*) -n | ||
150 | |||
151 | Don't daemonise and go into background. | ||
152 | |||
153 | (*) -f <configfile> | ||
154 | |||
155 | Use an alternative configuration file rather than the default one. | ||
156 | |||
157 | |||
158 | =============== | ||
159 | THINGS TO AVOID | ||
160 | =============== | ||
161 | |||
162 | Do not mount other things within the cache as this will cause problems. The | ||
163 | kernel module contains its own very cut-down path walking facility that ignores | ||
164 | mountpoints, but the daemon can't avoid them. | ||
165 | |||
166 | Do not create, rename or unlink files and directories in the cache whilst the | ||
167 | cache is active, as this may cause the state to become uncertain. | ||
168 | |||
169 | Renaming files in the cache might make objects appear to be other objects (the | ||
170 | filename is part of the lookup key). | ||
171 | |||
172 | Do not change or remove the extended attributes attached to cache files by the | ||
173 | cache as this will cause the cache state management to get confused. | ||
174 | |||
175 | Do not create files or directories in the cache, lest the cache get confused or | ||
176 | serve incorrect data. | ||
177 | |||
178 | Do not chmod files in the cache. The module creates things with minimal | ||
179 | permissions to prevent random users being able to access them directly. | ||
180 | |||
181 | |||
182 | ============= | ||
183 | CACHE CULLING | ||
184 | ============= | ||
185 | |||
186 | The cache may need culling occasionally to make space. This involves | ||
187 | discarding objects from the cache that have been used less recently than | ||
188 | anything else. Culling is based on the access time of data objects. Empty | ||
189 | directories are culled if not in use. | ||
190 | |||
191 | Cache culling is done on the basis of the percentage of blocks and the | ||
192 | percentage of files available in the underlying filesystem. There are six | ||
193 | "limits": | ||
194 | |||
195 | (*) brun | ||
196 | (*) frun | ||
197 | |||
198 | If the amount of free space and the number of available files in the cache | ||
199 | rises above both these limits, then culling is turned off. | ||
200 | |||
201 | (*) bcull | ||
202 | (*) fcull | ||
203 | |||
204 | If the amount of available space or the number of available files in the | ||
205 | cache falls below either of these limits, then culling is started. | ||
206 | |||
207 | (*) bstop | ||
208 | (*) fstop | ||
209 | |||
210 | If the amount of available space or the number of available files in the | ||
211 | cache falls below either of these limits, then no further allocation of | ||
212 | disk space or files is permitted until culling has raised things above | ||
213 | these limits again. | ||
214 | |||
215 | These must be configured thusly: | ||
216 | |||
217 | 0 <= bstop < bcull < brun < 100 | ||
218 | 0 <= fstop < fcull < frun < 100 | ||
219 | |||
220 | Note that these are percentages of available space and available files, and do | ||
221 | _not_ appear as 100 minus the percentage displayed by the "df" program. | ||
222 | |||
223 | The userspace daemon scans the cache to build up a table of cullable objects. | ||
224 | These are then culled in least recently used order. A new scan of the cache is | ||
225 | started as soon as space is made in the table. Objects will be skipped if | ||
226 | their atimes have changed or if the kernel module says it is still using them. | ||
227 | |||
228 | |||
229 | =============== | ||
230 | CACHE STRUCTURE | ||
231 | =============== | ||
232 | |||
233 | The CacheFiles module will create two directories in the directory it was | ||
234 | given: | ||
235 | |||
236 | (*) cache/ | ||
237 | |||
238 | (*) graveyard/ | ||
239 | |||
240 | The active cache objects all reside in the first directory. The CacheFiles | ||
241 | kernel module moves any retired or culled objects that it can't simply unlink | ||
242 | to the graveyard from which the daemon will actually delete them. | ||
243 | |||
244 | The daemon uses dnotify to monitor the graveyard directory, and will delete | ||
245 | anything that appears therein. | ||
246 | |||
247 | |||
248 | The module represents index objects as directories with the filename "I..." or | ||
249 | "J...". Note that the "cache/" directory is itself a special index. | ||
250 | |||
251 | Data objects are represented as files if they have no children, or directories | ||
252 | if they do. Their filenames all begin "D..." or "E...". If represented as a | ||
253 | directory, data objects will have a file in the directory called "data" that | ||
254 | actually holds the data. | ||
255 | |||
256 | Special objects are similar to data objects, except their filenames begin | ||
257 | "S..." or "T...". | ||
258 | |||
259 | |||
260 | If an object has children, then it will be represented as a directory. | ||
261 | Immediately in the representative directory are a collection of directories | ||
262 | named for hash values of the child object keys with an '@' prepended. Into | ||
263 | this directory, if possible, will be placed the representations of the child | ||
264 | objects: | ||
265 | |||
266 | INDEX INDEX INDEX DATA FILES | ||
267 | ========= ========== ================================= ================ | ||
268 | cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400 | ||
269 | cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...DB1ry | ||
270 | cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...N22ry | ||
271 | cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...FP1ry | ||
272 | |||
273 | |||
274 | If the key is so long that it exceeds NAME_MAX with the decorations added on to | ||
275 | it, then it will be cut into pieces, the first few of which will be used to | ||
276 | make a nest of directories, and the last one of which will be the objects | ||
277 | inside the last directory. The names of the intermediate directories will have | ||
278 | '+' prepended: | ||
279 | |||
280 | J1223/@23/+xy...z/+kl...m/Epqr | ||
281 | |||
282 | |||
283 | Note that keys are raw data, and not only may they exceed NAME_MAX in size, | ||
284 | they may also contain things like '/' and NUL characters, and so they may not | ||
285 | be suitable for turning directly into a filename. | ||
286 | |||
287 | To handle this, CacheFiles will use a suitably printable filename directly and | ||
288 | "base-64" encode ones that aren't directly suitable. The two versions of | ||
289 | object filenames indicate the encoding: | ||
290 | |||
291 | OBJECT TYPE PRINTABLE ENCODED | ||
292 | =============== =============== =============== | ||
293 | Index "I..." "J..." | ||
294 | Data "D..." "E..." | ||
295 | Special "S..." "T..." | ||
296 | |||
297 | Intermediate directories are always "@" or "+" as appropriate. | ||
298 | |||
299 | |||
300 | Each object in the cache has an extended attribute label that holds the object | ||
301 | type ID (required to distinguish special objects) and the auxiliary data from | ||
302 | the netfs. The latter is used to detect stale objects in the cache and update | ||
303 | or retire them. | ||
304 | |||
305 | |||
306 | Note that CacheFiles will erase from the cache any file it doesn't recognise or | ||
307 | any file of an incorrect type (such as a FIFO file or a device file). | ||
308 | |||
309 | |||
310 | ========================== | ||
311 | SECURITY MODEL AND SELINUX | ||
312 | ========================== | ||
313 | |||
314 | CacheFiles is implemented to deal properly with the LSM security features of | ||
315 | the Linux kernel and the SELinux facility. | ||
316 | |||
317 | One of the problems that CacheFiles faces is that it is generally acting on | ||
318 | behalf of a process, and running in that process's context, and that includes a | ||
319 | security context that is not appropriate for accessing the cache - either | ||
320 | because the files in the cache are inaccessible to that process, or because if | ||
321 | the process creates a file in the cache, that file may be inaccessible to other | ||
322 | processes. | ||
323 | |||
324 | The way CacheFiles works is to temporarily change the security context (fsuid, | ||
325 | fsgid and actor security label) that the process acts as - without changing the | ||
326 | security context of the process when it the target of an operation performed by | ||
327 | some other process (so signalling and suchlike still work correctly). | ||
328 | |||
329 | |||
330 | When the CacheFiles module is asked to bind to its cache, it: | ||
331 | |||
332 | (1) Finds the security label attached to the root cache directory and uses | ||
333 | that as the security label with which it will create files. By default, | ||
334 | this is: | ||
335 | |||
336 | cachefiles_var_t | ||
337 | |||
338 | (2) Finds the security label of the process which issued the bind request | ||
339 | (presumed to be the cachefilesd daemon), which by default will be: | ||
340 | |||
341 | cachefilesd_t | ||
342 | |||
343 | and asks LSM to supply a security ID as which it should act given the | ||
344 | daemon's label. By default, this will be: | ||
345 | |||
346 | cachefiles_kernel_t | ||
347 | |||
348 | SELinux transitions the daemon's security ID to the module's security ID | ||
349 | based on a rule of this form in the policy. | ||
350 | |||
351 | type_transition <daemon's-ID> kernel_t : process <module's-ID>; | ||
352 | |||
353 | For instance: | ||
354 | |||
355 | type_transition cachefilesd_t kernel_t : process cachefiles_kernel_t; | ||
356 | |||
357 | |||
358 | The module's security ID gives it permission to create, move and remove files | ||
359 | and directories in the cache, to find and access directories and files in the | ||
360 | cache, to set and access extended attributes on cache objects, and to read and | ||
361 | write files in the cache. | ||
362 | |||
363 | The daemon's security ID gives it only a very restricted set of permissions: it | ||
364 | may scan directories, stat files and erase files and directories. It may | ||
365 | not read or write files in the cache, and so it is precluded from accessing the | ||
366 | data cached therein; nor is it permitted to create new files in the cache. | ||
367 | |||
368 | |||
369 | There are policy source files available in: | ||
370 | |||
371 | http://people.redhat.com/~dhowells/fscache/cachefilesd-0.8.tar.bz2 | ||
372 | |||
373 | and later versions. In that tarball, see the files: | ||
374 | |||
375 | cachefilesd.te | ||
376 | cachefilesd.fc | ||
377 | cachefilesd.if | ||
378 | |||
379 | They are built and installed directly by the RPM. | ||
380 | |||
381 | If a non-RPM based system is being used, then copy the above files to their own | ||
382 | directory and run: | ||
383 | |||
384 | make -f /usr/share/selinux/devel/Makefile | ||
385 | semodule -i cachefilesd.pp | ||
386 | |||
387 | You will need checkpolicy and selinux-policy-devel installed prior to the | ||
388 | build. | ||
389 | |||
390 | |||
391 | By default, the cache is located in /var/fscache, but if it is desirable that | ||
392 | it should be elsewhere, than either the above policy files must be altered, or | ||
393 | an auxiliary policy must be installed to label the alternate location of the | ||
394 | cache. | ||
395 | |||
396 | For instructions on how to add an auxiliary policy to enable the cache to be | ||
397 | located elsewhere when SELinux is in enforcing mode, please see: | ||
398 | |||
399 | /usr/share/doc/cachefilesd-*/move-cache.txt | ||
400 | |||
401 | When the cachefilesd rpm is installed; alternatively, the document can be found | ||
402 | in the sources. | ||
403 | |||
404 | |||
405 | ================== | ||
406 | A NOTE ON SECURITY | ||
407 | ================== | ||
408 | |||
409 | CacheFiles makes use of the split security in the task_struct. It allocates | ||
410 | its own task_security structure, and redirects current->cred to point to it | ||
411 | when it acts on behalf of another process, in that process's context. | ||
412 | |||
413 | The reason it does this is that it calls vfs_mkdir() and suchlike rather than | ||
414 | bypassing security and calling inode ops directly. Therefore the VFS and LSM | ||
415 | may deny the CacheFiles access to the cache data because under some | ||
416 | circumstances the caching code is running in the security context of whatever | ||
417 | process issued the original syscall on the netfs. | ||
418 | |||
419 | Furthermore, should CacheFiles create a file or directory, the security | ||
420 | parameters with that object is created (UID, GID, security label) would be | ||
421 | derived from that process that issued the system call, thus potentially | ||
422 | preventing other processes from accessing the cache - including CacheFiles's | ||
423 | cache management daemon (cachefilesd). | ||
424 | |||
425 | What is required is to temporarily override the security of the process that | ||
426 | issued the system call. We can't, however, just do an in-place change of the | ||
427 | security data as that affects the process as an object, not just as a subject. | ||
428 | This means it may lose signals or ptrace events for example, and affects what | ||
429 | the process looks like in /proc. | ||
430 | |||
431 | So CacheFiles makes use of a logical split in the security between the | ||
432 | objective security (task->real_cred) and the subjective security (task->cred). | ||
433 | The objective security holds the intrinsic security properties of a process and | ||
434 | is never overridden. This is what appears in /proc, and is what is used when a | ||
435 | process is the target of an operation by some other process (SIGKILL for | ||
436 | example). | ||
437 | |||
438 | The subjective security holds the active security properties of a process, and | ||
439 | may be overridden. This is not seen externally, and is used whan a process | ||
440 | acts upon another object, for example SIGKILLing another process or opening a | ||
441 | file. | ||
442 | |||
443 | LSM hooks exist that allow SELinux (or Smack or whatever) to reject a request | ||
444 | for CacheFiles to run in a context of a specific security label, or to create | ||
445 | files and directories with another security label. | ||
446 | |||
447 | |||
448 | ======================= | ||
449 | STATISTICAL INFORMATION | ||
450 | ======================= | ||
451 | |||
452 | If FS-Cache is compiled with the following option enabled: | ||
453 | |||
454 | CONFIG_CACHEFILES_HISTOGRAM=y | ||
455 | |||
456 | then it will gather certain statistics and display them through a proc file. | ||
457 | |||
458 | (*) /proc/fs/cachefiles/histogram | ||
459 | |||
460 | cat /proc/fs/cachefiles/histogram | ||
461 | JIFS SECS LOOKUPS MKDIRS CREATES | ||
462 | ===== ===== ========= ========= ========= | ||
463 | |||
464 | This shows the breakdown of the number of times each amount of time | ||
465 | between 0 jiffies and HZ-1 jiffies a variety of tasks took to run. The | ||
466 | columns are as follows: | ||
467 | |||
468 | COLUMN TIME MEASUREMENT | ||
469 | ======= ======================================================= | ||
470 | LOOKUPS Length of time to perform a lookup on the backing fs | ||
471 | MKDIRS Length of time to perform a mkdir on the backing fs | ||
472 | CREATES Length of time to perform a create on the backing fs | ||
473 | |||
474 | Each row shows the number of events that took a particular range of times. | ||
475 | Each step is 1 jiffy in size. The JIFS column indicates the particular | ||
476 | jiffy range covered, and the SECS field the equivalent number of seconds. | ||
477 | |||
478 | |||
479 | ========= | ||
480 | DEBUGGING | ||
481 | ========= | ||
482 | |||
483 | If CONFIG_CACHEFILES_DEBUG is enabled, the CacheFiles facility can have runtime | ||
484 | debugging enabled by adjusting the value in: | ||
485 | |||
486 | /sys/module/cachefiles/parameters/debug | ||
487 | |||
488 | This is a bitmask of debugging streams to enable: | ||
489 | |||
490 | BIT VALUE STREAM POINT | ||
491 | ======= ======= =============================== ======================= | ||
492 | 0 1 General Function entry trace | ||
493 | 1 2 Function exit trace | ||
494 | 2 4 General | ||
495 | |||
496 | The appropriate set of values should be OR'd together and the result written to | ||
497 | the control file. For example: | ||
498 | |||
499 | echo $((1|4|8)) >/sys/module/cachefiles/parameters/debug | ||
500 | |||
501 | will turn on all function entry debugging. | ||
diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt new file mode 100644 index 000000000000..9e94b9491d89 --- /dev/null +++ b/Documentation/filesystems/caching/fscache.txt | |||
@@ -0,0 +1,333 @@ | |||
1 | ========================== | ||
2 | General Filesystem Caching | ||
3 | ========================== | ||
4 | |||
5 | ======== | ||
6 | OVERVIEW | ||
7 | ======== | ||
8 | |||
9 | This facility is a general purpose cache for network filesystems, though it | ||
10 | could be used for caching other things such as ISO9660 filesystems too. | ||
11 | |||
12 | FS-Cache mediates between cache backends (such as CacheFS) and network | ||
13 | filesystems: | ||
14 | |||
15 | +---------+ | ||
16 | | | +--------------+ | ||
17 | | NFS |--+ | | | ||
18 | | | | +-->| CacheFS | | ||
19 | +---------+ | +----------+ | | /dev/hda5 | | ||
20 | | | | | +--------------+ | ||
21 | +---------+ +-->| | | | ||
22 | | | | |--+ | ||
23 | | AFS |----->| FS-Cache | | ||
24 | | | | |--+ | ||
25 | +---------+ +-->| | | | ||
26 | | | | | +--------------+ | ||
27 | +---------+ | +----------+ | | | | ||
28 | | | | +-->| CacheFiles | | ||
29 | | ISOFS |--+ | /var/cache | | ||
30 | | | +--------------+ | ||
31 | +---------+ | ||
32 | |||
33 | Or to look at it another way, FS-Cache is a module that provides a caching | ||
34 | facility to a network filesystem such that the cache is transparent to the | ||
35 | user: | ||
36 | |||
37 | +---------+ | ||
38 | | | | ||
39 | | Server | | ||
40 | | | | ||
41 | +---------+ | ||
42 | | NETWORK | ||
43 | ~~~~~|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
44 | | | ||
45 | | +----------+ | ||
46 | V | | | ||
47 | +---------+ | | | ||
48 | | | | | | ||
49 | | NFS |----->| FS-Cache | | ||
50 | | | | |--+ | ||
51 | +---------+ | | | +--------------+ +--------------+ | ||
52 | | | | | | | | | | ||
53 | V +----------+ +-->| CacheFiles |-->| Ext3 | | ||
54 | +---------+ | /var/cache | | /dev/sda6 | | ||
55 | | | +--------------+ +--------------+ | ||
56 | | VFS | ^ ^ | ||
57 | | | | | | ||
58 | +---------+ +--------------+ | | ||
59 | | KERNEL SPACE | | | ||
60 | ~~~~~|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~|~~~~~~|~~~~ | ||
61 | | USER SPACE | | | ||
62 | V | | | ||
63 | +---------+ +--------------+ | ||
64 | | | | | | ||
65 | | Process | | cachefilesd | | ||
66 | | | | | | ||
67 | +---------+ +--------------+ | ||
68 | |||
69 | |||
70 | FS-Cache does not follow the idea of completely loading every netfs file | ||
71 | opened in its entirety into a cache before permitting it to be accessed and | ||
72 | then serving the pages out of that cache rather than the netfs inode because: | ||
73 | |||
74 | (1) It must be practical to operate without a cache. | ||
75 | |||
76 | (2) The size of any accessible file must not be limited to the size of the | ||
77 | cache. | ||
78 | |||
79 | (3) The combined size of all opened files (this includes mapped libraries) | ||
80 | must not be limited to the size of the cache. | ||
81 | |||
82 | (4) The user should not be forced to download an entire file just to do a | ||
83 | one-off access of a small portion of it (such as might be done with the | ||
84 | "file" program). | ||
85 | |||
86 | It instead serves the cache out in PAGE_SIZE chunks as and when requested by | ||
87 | the netfs('s) using it. | ||
88 | |||
89 | |||
90 | FS-Cache provides the following facilities: | ||
91 | |||
92 | (1) More than one cache can be used at once. Caches can be selected | ||
93 | explicitly by use of tags. | ||
94 | |||
95 | (2) Caches can be added / removed at any time. | ||
96 | |||
97 | (3) The netfs is provided with an interface that allows either party to | ||
98 | withdraw caching facilities from a file (required for (2)). | ||
99 | |||
100 | (4) The interface to the netfs returns as few errors as possible, preferring | ||
101 | rather to let the netfs remain oblivious. | ||
102 | |||
103 | (5) Cookies are used to represent indices, files and other objects to the | ||
104 | netfs. The simplest cookie is just a NULL pointer - indicating nothing | ||
105 | cached there. | ||
106 | |||
107 | (6) The netfs is allowed to propose - dynamically - any index hierarchy it | ||
108 | desires, though it must be aware that the index search function is | ||
109 | recursive, stack space is limited, and indices can only be children of | ||
110 | indices. | ||
111 | |||
112 | (7) Data I/O is done direct to and from the netfs's pages. The netfs | ||
113 | indicates that page A is at index B of the data-file represented by cookie | ||
114 | C, and that it should be read or written. The cache backend may or may | ||
115 | not start I/O on that page, but if it does, a netfs callback will be | ||
116 | invoked to indicate completion. The I/O may be either synchronous or | ||
117 | asynchronous. | ||
118 | |||
119 | (8) Cookies can be "retired" upon release. At this point FS-Cache will mark | ||
120 | them as obsolete and the index hierarchy rooted at that point will get | ||
121 | recycled. | ||
122 | |||
123 | (9) The netfs provides a "match" function for index searches. In addition to | ||
124 | saying whether a match was made or not, this can also specify that an | ||
125 | entry should be updated or deleted. | ||
126 | |||
127 | (10) As much as possible is done asynchronously. | ||
128 | |||
129 | |||
130 | FS-Cache maintains a virtual indexing tree in which all indices, files, objects | ||
131 | and pages are kept. Bits of this tree may actually reside in one or more | ||
132 | caches. | ||
133 | |||
134 | FSDEF | ||
135 | | | ||
136 | +------------------------------------+ | ||
137 | | | | ||
138 | NFS AFS | ||
139 | | | | ||
140 | +--------------------------+ +-----------+ | ||
141 | | | | | | ||
142 | homedir mirror afs.org redhat.com | ||
143 | | | | | ||
144 | +------------+ +---------------+ +----------+ | ||
145 | | | | | | | | ||
146 | 00001 00002 00007 00125 vol00001 vol00002 | ||
147 | | | | | | | ||
148 | +---+---+ +-----+ +---+ +------+------+ +-----+----+ | ||
149 | | | | | | | | | | | | | | | ||
150 | PG0 PG1 PG2 PG0 XATTR PG0 PG1 DIRENT DIRENT DIRENT R/W R/O Bak | ||
151 | | | | ||
152 | PG0 +-------+ | ||
153 | | | | ||
154 | 00001 00003 | ||
155 | | | ||
156 | +---+---+ | ||
157 | | | | | ||
158 | PG0 PG1 PG2 | ||
159 | |||
160 | In the example above, you can see two netfs's being backed: NFS and AFS. These | ||
161 | have different index hierarchies: | ||
162 | |||
163 | (*) The NFS primary index contains per-server indices. Each server index is | ||
164 | indexed by NFS file handles to get data file objects. Each data file | ||
165 | objects can have an array of pages, but may also have further child | ||
166 | objects, such as extended attributes and directory entries. Extended | ||
167 | attribute objects themselves have page-array contents. | ||
168 | |||
169 | (*) The AFS primary index contains per-cell indices. Each cell index contains | ||
170 | per-logical-volume indices. Each of volume index contains up to three | ||
171 | indices for the read-write, read-only and backup mirrors of those volumes. | ||
172 | Each of these contains vnode data file objects, each of which contains an | ||
173 | array of pages. | ||
174 | |||
175 | The very top index is the FS-Cache master index in which individual netfs's | ||
176 | have entries. | ||
177 | |||
178 | Any index object may reside in more than one cache, provided it only has index | ||
179 | children. Any index with non-index object children will be assumed to only | ||
180 | reside in one cache. | ||
181 | |||
182 | |||
183 | The netfs API to FS-Cache can be found in: | ||
184 | |||
185 | Documentation/filesystems/caching/netfs-api.txt | ||
186 | |||
187 | The cache backend API to FS-Cache can be found in: | ||
188 | |||
189 | Documentation/filesystems/caching/backend-api.txt | ||
190 | |||
191 | A description of the internal representations and object state machine can be | ||
192 | found in: | ||
193 | |||
194 | Documentation/filesystems/caching/object.txt | ||
195 | |||
196 | |||
197 | ======================= | ||
198 | STATISTICAL INFORMATION | ||
199 | ======================= | ||
200 | |||
201 | If FS-Cache is compiled with the following options enabled: | ||
202 | |||
203 | CONFIG_FSCACHE_STATS=y | ||
204 | CONFIG_FSCACHE_HISTOGRAM=y | ||
205 | |||
206 | then it will gather certain statistics and display them through a number of | ||
207 | proc files. | ||
208 | |||
209 | (*) /proc/fs/fscache/stats | ||
210 | |||
211 | This shows counts of a number of events that can happen in FS-Cache: | ||
212 | |||
213 | CLASS EVENT MEANING | ||
214 | ======= ======= ======================================================= | ||
215 | Cookies idx=N Number of index cookies allocated | ||
216 | dat=N Number of data storage cookies allocated | ||
217 | spc=N Number of special cookies allocated | ||
218 | Objects alc=N Number of objects allocated | ||
219 | nal=N Number of object allocation failures | ||
220 | avl=N Number of objects that reached the available state | ||
221 | ded=N Number of objects that reached the dead state | ||
222 | ChkAux non=N Number of objects that didn't have a coherency check | ||
223 | ok=N Number of objects that passed a coherency check | ||
224 | upd=N Number of objects that needed a coherency data update | ||
225 | obs=N Number of objects that were declared obsolete | ||
226 | Pages mrk=N Number of pages marked as being cached | ||
227 | unc=N Number of uncache page requests seen | ||
228 | Acquire n=N Number of acquire cookie requests seen | ||
229 | nul=N Number of acq reqs given a NULL parent | ||
230 | noc=N Number of acq reqs rejected due to no cache available | ||
231 | ok=N Number of acq reqs succeeded | ||
232 | nbf=N Number of acq reqs rejected due to error | ||
233 | oom=N Number of acq reqs failed on ENOMEM | ||
234 | Lookups n=N Number of lookup calls made on cache backends | ||
235 | neg=N Number of negative lookups made | ||
236 | pos=N Number of positive lookups made | ||
237 | crt=N Number of objects created by lookup | ||
238 | Updates n=N Number of update cookie requests seen | ||
239 | nul=N Number of upd reqs given a NULL parent | ||
240 | run=N Number of upd reqs granted CPU time | ||
241 | Relinqs n=N Number of relinquish cookie requests seen | ||
242 | nul=N Number of rlq reqs given a NULL parent | ||
243 | wcr=N Number of rlq reqs waited on completion of creation | ||
244 | AttrChg n=N Number of attribute changed requests seen | ||
245 | ok=N Number of attr changed requests queued | ||
246 | nbf=N Number of attr changed rejected -ENOBUFS | ||
247 | oom=N Number of attr changed failed -ENOMEM | ||
248 | run=N Number of attr changed ops given CPU time | ||
249 | Allocs n=N Number of allocation requests seen | ||
250 | ok=N Number of successful alloc reqs | ||
251 | wt=N Number of alloc reqs that waited on lookup completion | ||
252 | nbf=N Number of alloc reqs rejected -ENOBUFS | ||
253 | ops=N Number of alloc reqs submitted | ||
254 | owt=N Number of alloc reqs waited for CPU time | ||
255 | Retrvls n=N Number of retrieval (read) requests seen | ||
256 | ok=N Number of successful retr reqs | ||
257 | wt=N Number of retr reqs that waited on lookup completion | ||
258 | nod=N Number of retr reqs returned -ENODATA | ||
259 | nbf=N Number of retr reqs rejected -ENOBUFS | ||
260 | int=N Number of retr reqs aborted -ERESTARTSYS | ||
261 | oom=N Number of retr reqs failed -ENOMEM | ||
262 | ops=N Number of retr reqs submitted | ||
263 | owt=N Number of retr reqs waited for CPU time | ||
264 | Stores n=N Number of storage (write) requests seen | ||
265 | ok=N Number of successful store reqs | ||
266 | agn=N Number of store reqs on a page already pending storage | ||
267 | nbf=N Number of store reqs rejected -ENOBUFS | ||
268 | oom=N Number of store reqs failed -ENOMEM | ||
269 | ops=N Number of store reqs submitted | ||
270 | run=N Number of store reqs granted CPU time | ||
271 | Ops pend=N Number of times async ops added to pending queues | ||
272 | run=N Number of times async ops given CPU time | ||
273 | enq=N Number of times async ops queued for processing | ||
274 | dfr=N Number of async ops queued for deferred release | ||
275 | rel=N Number of async ops released | ||
276 | gc=N Number of deferred-release async ops garbage collected | ||
277 | |||
278 | |||
279 | (*) /proc/fs/fscache/histogram | ||
280 | |||
281 | cat /proc/fs/fscache/histogram | ||
282 | JIFS SECS OBJ INST OP RUNS OBJ RUNS RETRV DLY RETRIEVLS | ||
283 | ===== ===== ========= ========= ========= ========= ========= | ||
284 | |||
285 | This shows the breakdown of the number of times each amount of time | ||
286 | between 0 jiffies and HZ-1 jiffies a variety of tasks took to run. The | ||
287 | columns are as follows: | ||
288 | |||
289 | COLUMN TIME MEASUREMENT | ||
290 | ======= ======================================================= | ||
291 | OBJ INST Length of time to instantiate an object | ||
292 | OP RUNS Length of time a call to process an operation took | ||
293 | OBJ RUNS Length of time a call to process an object event took | ||
294 | RETRV DLY Time between an requesting a read and lookup completing | ||
295 | RETRIEVLS Time between beginning and end of a retrieval | ||
296 | |||
297 | Each row shows the number of events that took a particular range of times. | ||
298 | Each step is 1 jiffy in size. The JIFS column indicates the particular | ||
299 | jiffy range covered, and the SECS field the equivalent number of seconds. | ||
300 | |||
301 | |||
302 | ========= | ||
303 | DEBUGGING | ||
304 | ========= | ||
305 | |||
306 | If CONFIG_FSCACHE_DEBUG is enabled, the FS-Cache facility can have runtime | ||
307 | debugging enabled by adjusting the value in: | ||
308 | |||
309 | /sys/module/fscache/parameters/debug | ||
310 | |||
311 | This is a bitmask of debugging streams to enable: | ||
312 | |||
313 | BIT VALUE STREAM POINT | ||
314 | ======= ======= =============================== ======================= | ||
315 | 0 1 Cache management Function entry trace | ||
316 | 1 2 Function exit trace | ||
317 | 2 4 General | ||
318 | 3 8 Cookie management Function entry trace | ||
319 | 4 16 Function exit trace | ||
320 | 5 32 General | ||
321 | 6 64 Page handling Function entry trace | ||
322 | 7 128 Function exit trace | ||
323 | 8 256 General | ||
324 | 9 512 Operation management Function entry trace | ||
325 | 10 1024 Function exit trace | ||
326 | 11 2048 General | ||
327 | |||
328 | The appropriate set of values should be OR'd together and the result written to | ||
329 | the control file. For example: | ||
330 | |||
331 | echo $((1|8|64)) >/sys/module/fscache/parameters/debug | ||
332 | |||
333 | will turn on all function entry debugging. | ||
diff --git a/Documentation/filesystems/caching/netfs-api.txt b/Documentation/filesystems/caching/netfs-api.txt new file mode 100644 index 000000000000..2666b1ed5e9e --- /dev/null +++ b/Documentation/filesystems/caching/netfs-api.txt | |||
@@ -0,0 +1,778 @@ | |||
1 | =============================== | ||
2 | FS-CACHE NETWORK FILESYSTEM API | ||
3 | =============================== | ||
4 | |||
5 | There's an API by which a network filesystem can make use of the FS-Cache | ||
6 | facilities. This is based around a number of principles: | ||
7 | |||
8 | (1) Caches can store a number of different object types. There are two main | ||
9 | object types: indices and files. The first is a special type used by | ||
10 | FS-Cache to make finding objects faster and to make retiring of groups of | ||
11 | objects easier. | ||
12 | |||
13 | (2) Every index, file or other object is represented by a cookie. This cookie | ||
14 | may or may not have anything associated with it, but the netfs doesn't | ||
15 | need to care. | ||
16 | |||
17 | (3) Barring the top-level index (one entry per cached netfs), the index | ||
18 | hierarchy for each netfs is structured according the whim of the netfs. | ||
19 | |||
20 | This API is declared in <linux/fscache.h>. | ||
21 | |||
22 | This document contains the following sections: | ||
23 | |||
24 | (1) Network filesystem definition | ||
25 | (2) Index definition | ||
26 | (3) Object definition | ||
27 | (4) Network filesystem (un)registration | ||
28 | (5) Cache tag lookup | ||
29 | (6) Index registration | ||
30 | (7) Data file registration | ||
31 | (8) Miscellaneous object registration | ||
32 | (9) Setting the data file size | ||
33 | (10) Page alloc/read/write | ||
34 | (11) Page uncaching | ||
35 | (12) Index and data file update | ||
36 | (13) Miscellaneous cookie operations | ||
37 | (14) Cookie unregistration | ||
38 | (15) Index and data file invalidation | ||
39 | (16) FS-Cache specific page flags. | ||
40 | |||
41 | |||
42 | ============================= | ||
43 | NETWORK FILESYSTEM DEFINITION | ||
44 | ============================= | ||
45 | |||
46 | FS-Cache needs a description of the network filesystem. This is specified | ||
47 | using a record of the following structure: | ||
48 | |||
49 | struct fscache_netfs { | ||
50 | uint32_t version; | ||
51 | const char *name; | ||
52 | struct fscache_cookie *primary_index; | ||
53 | ... | ||
54 | }; | ||
55 | |||
56 | This first two fields should be filled in before registration, and the third | ||
57 | will be filled in by the registration function; any other fields should just be | ||
58 | ignored and are for internal use only. | ||
59 | |||
60 | The fields are: | ||
61 | |||
62 | (1) The name of the netfs (used as the key in the toplevel index). | ||
63 | |||
64 | (2) The version of the netfs (if the name matches but the version doesn't, the | ||
65 | entire in-cache hierarchy for this netfs will be scrapped and begun | ||
66 | afresh). | ||
67 | |||
68 | (3) The cookie representing the primary index will be allocated according to | ||
69 | another parameter passed into the registration function. | ||
70 | |||
71 | For example, kAFS (linux/fs/afs/) uses the following definitions to describe | ||
72 | itself: | ||
73 | |||
74 | struct fscache_netfs afs_cache_netfs = { | ||
75 | .version = 0, | ||
76 | .name = "afs", | ||
77 | }; | ||
78 | |||
79 | |||
80 | ================ | ||
81 | INDEX DEFINITION | ||
82 | ================ | ||
83 | |||
84 | Indices are used for two purposes: | ||
85 | |||
86 | (1) To aid the finding of a file based on a series of keys (such as AFS's | ||
87 | "cell", "volume ID", "vnode ID"). | ||
88 | |||
89 | (2) To make it easier to discard a subset of all the files cached based around | ||
90 | a particular key - for instance to mirror the removal of an AFS volume. | ||
91 | |||
92 | However, since it's unlikely that any two netfs's are going to want to define | ||
93 | their index hierarchies in quite the same way, FS-Cache tries to impose as few | ||
94 | restraints as possible on how an index is structured and where it is placed in | ||
95 | the tree. The netfs can even mix indices and data files at the same level, but | ||
96 | it's not recommended. | ||
97 | |||
98 | Each index entry consists of a key of indeterminate length plus some auxilliary | ||
99 | data, also of indeterminate length. | ||
100 | |||
101 | There are some limits on indices: | ||
102 | |||
103 | (1) Any index containing non-index objects should be restricted to a single | ||
104 | cache. Any such objects created within an index will be created in the | ||
105 | first cache only. The cache in which an index is created can be | ||
106 | controlled by cache tags (see below). | ||
107 | |||
108 | (2) The entry data must be atomically journallable, so it is limited to about | ||
109 | 400 bytes at present. At least 400 bytes will be available. | ||
110 | |||
111 | (3) The depth of the index tree should be judged with care as the search | ||
112 | function is recursive. Too many layers will run the kernel out of stack. | ||
113 | |||
114 | |||
115 | ================= | ||
116 | OBJECT DEFINITION | ||
117 | ================= | ||
118 | |||
119 | To define an object, a structure of the following type should be filled out: | ||
120 | |||
121 | struct fscache_cookie_def | ||
122 | { | ||
123 | uint8_t name[16]; | ||
124 | uint8_t type; | ||
125 | |||
126 | struct fscache_cache_tag *(*select_cache)( | ||
127 | const void *parent_netfs_data, | ||
128 | const void *cookie_netfs_data); | ||
129 | |||
130 | uint16_t (*get_key)(const void *cookie_netfs_data, | ||
131 | void *buffer, | ||
132 | uint16_t bufmax); | ||
133 | |||
134 | void (*get_attr)(const void *cookie_netfs_data, | ||
135 | uint64_t *size); | ||
136 | |||
137 | uint16_t (*get_aux)(const void *cookie_netfs_data, | ||
138 | void *buffer, | ||
139 | uint16_t bufmax); | ||
140 | |||
141 | enum fscache_checkaux (*check_aux)(void *cookie_netfs_data, | ||
142 | const void *data, | ||
143 | uint16_t datalen); | ||
144 | |||
145 | void (*get_context)(void *cookie_netfs_data, void *context); | ||
146 | |||
147 | void (*put_context)(void *cookie_netfs_data, void *context); | ||
148 | |||
149 | void (*mark_pages_cached)(void *cookie_netfs_data, | ||
150 | struct address_space *mapping, | ||
151 | struct pagevec *cached_pvec); | ||
152 | |||
153 | void (*now_uncached)(void *cookie_netfs_data); | ||
154 | }; | ||
155 | |||
156 | This has the following fields: | ||
157 | |||
158 | (1) The type of the object [mandatory]. | ||
159 | |||
160 | This is one of the following values: | ||
161 | |||
162 | (*) FSCACHE_COOKIE_TYPE_INDEX | ||
163 | |||
164 | This defines an index, which is a special FS-Cache type. | ||
165 | |||
166 | (*) FSCACHE_COOKIE_TYPE_DATAFILE | ||
167 | |||
168 | This defines an ordinary data file. | ||
169 | |||
170 | (*) Any other value between 2 and 255 | ||
171 | |||
172 | This defines an extraordinary object such as an XATTR. | ||
173 | |||
174 | (2) The name of the object type (NUL terminated unless all 16 chars are used) | ||
175 | [optional]. | ||
176 | |||
177 | (3) A function to select the cache in which to store an index [optional]. | ||
178 | |||
179 | This function is invoked when an index needs to be instantiated in a cache | ||
180 | during the instantiation of a non-index object. Only the immediate index | ||
181 | parent for the non-index object will be queried. Any indices above that | ||
182 | in the hierarchy may be stored in multiple caches. This function does not | ||
183 | need to be supplied for any non-index object or any index that will only | ||
184 | have index children. | ||
185 | |||
186 | If this function is not supplied or if it returns NULL then the first | ||
187 | cache in the parent's list will be chosen, or failing that, the first | ||
188 | cache in the master list. | ||
189 | |||
190 | (4) A function to retrieve an object's key from the netfs [mandatory]. | ||
191 | |||
192 | This function will be called with the netfs data that was passed to the | ||
193 | cookie acquisition function and the maximum length of key data that it may | ||
194 | provide. It should write the required key data into the given buffer and | ||
195 | return the quantity it wrote. | ||
196 | |||
197 | (5) A function to retrieve attribute data from the netfs [optional]. | ||
198 | |||
199 | This function will be called with the netfs data that was passed to the | ||
200 | cookie acquisition function. It should return the size of the file if | ||
201 | this is a data file. The size may be used to govern how much cache must | ||
202 | be reserved for this file in the cache. | ||
203 | |||
204 | If the function is absent, a file size of 0 is assumed. | ||
205 | |||
206 | (6) A function to retrieve auxilliary data from the netfs [optional]. | ||
207 | |||
208 | This function will be called with the netfs data that was passed to the | ||
209 | cookie acquisition function and the maximum length of auxilliary data that | ||
210 | it may provide. It should write the auxilliary data into the given buffer | ||
211 | and return the quantity it wrote. | ||
212 | |||
213 | If this function is absent, the auxilliary data length will be set to 0. | ||
214 | |||
215 | The length of the auxilliary data buffer may be dependent on the key | ||
216 | length. A netfs mustn't rely on being able to provide more than 400 bytes | ||
217 | for both. | ||
218 | |||
219 | (7) A function to check the auxilliary data [optional]. | ||
220 | |||
221 | This function will be called to check that a match found in the cache for | ||
222 | this object is valid. For instance with AFS it could check the auxilliary | ||
223 | data against the data version number returned by the server to determine | ||
224 | whether the index entry in a cache is still valid. | ||
225 | |||
226 | If this function is absent, it will be assumed that matching objects in a | ||
227 | cache are always valid. | ||
228 | |||
229 | If present, the function should return one of the following values: | ||
230 | |||
231 | (*) FSCACHE_CHECKAUX_OKAY - the entry is okay as is | ||
232 | (*) FSCACHE_CHECKAUX_NEEDS_UPDATE - the entry requires update | ||
233 | (*) FSCACHE_CHECKAUX_OBSOLETE - the entry should be deleted | ||
234 | |||
235 | This function can also be used to extract data from the auxilliary data in | ||
236 | the cache and copy it into the netfs's structures. | ||
237 | |||
238 | (8) A pair of functions to manage contexts for the completion callback | ||
239 | [optional]. | ||
240 | |||
241 | The cache read/write functions are passed a context which is then passed | ||
242 | to the I/O completion callback function. To ensure this context remains | ||
243 | valid until after the I/O completion is called, two functions may be | ||
244 | provided: one to get an extra reference on the context, and one to drop a | ||
245 | reference to it. | ||
246 | |||
247 | If the context is not used or is a type of object that won't go out of | ||
248 | scope, then these functions are not required. These functions are not | ||
249 | required for indices as indices may not contain data. These functions may | ||
250 | be called in interrupt context and so may not sleep. | ||
251 | |||
252 | (9) A function to mark a page as retaining cache metadata [optional]. | ||
253 | |||
254 | This is called by the cache to indicate that it is retaining in-memory | ||
255 | information for this page and that the netfs should uncache the page when | ||
256 | it has finished. This does not indicate whether there's data on the disk | ||
257 | or not. Note that several pages at once may be presented for marking. | ||
258 | |||
259 | The PG_fscache bit is set on the pages before this function would be | ||
260 | called, so the function need not be provided if this is sufficient. | ||
261 | |||
262 | This function is not required for indices as they're not permitted data. | ||
263 | |||
264 | (10) A function to unmark all the pages retaining cache metadata [mandatory]. | ||
265 | |||
266 | This is called by FS-Cache to indicate that a backing store is being | ||
267 | unbound from a cookie and that all the marks on the pages should be | ||
268 | cleared to prevent confusion. Note that the cache will have torn down all | ||
269 | its tracking information so that the pages don't need to be explicitly | ||
270 | uncached. | ||
271 | |||
272 | This function is not required for indices as they're not permitted data. | ||
273 | |||
274 | |||
275 | =================================== | ||
276 | NETWORK FILESYSTEM (UN)REGISTRATION | ||
277 | =================================== | ||
278 | |||
279 | The first step is to declare the network filesystem to the cache. This also | ||
280 | involves specifying the layout of the primary index (for AFS, this would be the | ||
281 | "cell" level). | ||
282 | |||
283 | The registration function is: | ||
284 | |||
285 | int fscache_register_netfs(struct fscache_netfs *netfs); | ||
286 | |||
287 | It just takes a pointer to the netfs definition. It returns 0 or an error as | ||
288 | appropriate. | ||
289 | |||
290 | For kAFS, registration is done as follows: | ||
291 | |||
292 | ret = fscache_register_netfs(&afs_cache_netfs); | ||
293 | |||
294 | The last step is, of course, unregistration: | ||
295 | |||
296 | void fscache_unregister_netfs(struct fscache_netfs *netfs); | ||
297 | |||
298 | |||
299 | ================ | ||
300 | CACHE TAG LOOKUP | ||
301 | ================ | ||
302 | |||
303 | FS-Cache permits the use of more than one cache. To permit particular index | ||
304 | subtrees to be bound to particular caches, the second step is to look up cache | ||
305 | representation tags. This step is optional; it can be left entirely up to | ||
306 | FS-Cache as to which cache should be used. The problem with doing that is that | ||
307 | FS-Cache will always pick the first cache that was registered. | ||
308 | |||
309 | To get the representation for a named tag: | ||
310 | |||
311 | struct fscache_cache_tag *fscache_lookup_cache_tag(const char *name); | ||
312 | |||
313 | This takes a text string as the name and returns a representation of a tag. It | ||
314 | will never return an error. It may return a dummy tag, however, if it runs out | ||
315 | of memory; this will inhibit caching with this tag. | ||
316 | |||
317 | Any representation so obtained must be released by passing it to this function: | ||
318 | |||
319 | void fscache_release_cache_tag(struct fscache_cache_tag *tag); | ||
320 | |||
321 | The tag will be retrieved by FS-Cache when it calls the object definition | ||
322 | operation select_cache(). | ||
323 | |||
324 | |||
325 | ================== | ||
326 | INDEX REGISTRATION | ||
327 | ================== | ||
328 | |||
329 | The third step is to inform FS-Cache about part of an index hierarchy that can | ||
330 | be used to locate files. This is done by requesting a cookie for each index in | ||
331 | the path to the file: | ||
332 | |||
333 | struct fscache_cookie * | ||
334 | fscache_acquire_cookie(struct fscache_cookie *parent, | ||
335 | const struct fscache_object_def *def, | ||
336 | void *netfs_data); | ||
337 | |||
338 | This function creates an index entry in the index represented by parent, | ||
339 | filling in the index entry by calling the operations pointed to by def. | ||
340 | |||
341 | Note that this function never returns an error - all errors are handled | ||
342 | internally. It may, however, return NULL to indicate no cookie. It is quite | ||
343 | acceptable to pass this token back to this function as the parent to another | ||
344 | acquisition (or even to the relinquish cookie, read page and write page | ||
345 | functions - see below). | ||
346 | |||
347 | Note also that no indices are actually created in a cache until a non-index | ||
348 | object needs to be created somewhere down the hierarchy. Furthermore, an index | ||
349 | may be created in several different caches independently at different times. | ||
350 | This is all handled transparently, and the netfs doesn't see any of it. | ||
351 | |||
352 | For example, with AFS, a cell would be added to the primary index. This index | ||
353 | entry would have a dependent inode containing a volume location index for the | ||
354 | volume mappings within this cell: | ||
355 | |||
356 | cell->cache = | ||
357 | fscache_acquire_cookie(afs_cache_netfs.primary_index, | ||
358 | &afs_cell_cache_index_def, | ||
359 | cell); | ||
360 | |||
361 | Then when a volume location was accessed, it would be entered into the cell's | ||
362 | index and an inode would be allocated that acts as a volume type and hash chain | ||
363 | combination: | ||
364 | |||
365 | vlocation->cache = | ||
366 | fscache_acquire_cookie(cell->cache, | ||
367 | &afs_vlocation_cache_index_def, | ||
368 | vlocation); | ||
369 | |||
370 | And then a particular flavour of volume (R/O for example) could be added to | ||
371 | that index, creating another index for vnodes (AFS inode equivalents): | ||
372 | |||
373 | volume->cache = | ||
374 | fscache_acquire_cookie(vlocation->cache, | ||
375 | &afs_volume_cache_index_def, | ||
376 | volume); | ||
377 | |||
378 | |||
379 | ====================== | ||
380 | DATA FILE REGISTRATION | ||
381 | ====================== | ||
382 | |||
383 | The fourth step is to request a data file be created in the cache. This is | ||
384 | identical to index cookie acquisition. The only difference is that the type in | ||
385 | the object definition should be something other than index type. | ||
386 | |||
387 | vnode->cache = | ||
388 | fscache_acquire_cookie(volume->cache, | ||
389 | &afs_vnode_cache_object_def, | ||
390 | vnode); | ||
391 | |||
392 | |||
393 | ================================= | ||
394 | MISCELLANEOUS OBJECT REGISTRATION | ||
395 | ================================= | ||
396 | |||
397 | An optional step is to request an object of miscellaneous type be created in | ||
398 | the cache. This is almost identical to index cookie acquisition. The only | ||
399 | difference is that the type in the object definition should be something other | ||
400 | than index type. Whilst the parent object could be an index, it's more likely | ||
401 | it would be some other type of object such as a data file. | ||
402 | |||
403 | xattr->cache = | ||
404 | fscache_acquire_cookie(vnode->cache, | ||
405 | &afs_xattr_cache_object_def, | ||
406 | xattr); | ||
407 | |||
408 | Miscellaneous objects might be used to store extended attributes or directory | ||
409 | entries for example. | ||
410 | |||
411 | |||
412 | ========================== | ||
413 | SETTING THE DATA FILE SIZE | ||
414 | ========================== | ||
415 | |||
416 | The fifth step is to set the physical attributes of the file, such as its size. | ||
417 | This doesn't automatically reserve any space in the cache, but permits the | ||
418 | cache to adjust its metadata for data tracking appropriately: | ||
419 | |||
420 | int fscache_attr_changed(struct fscache_cookie *cookie); | ||
421 | |||
422 | The cache will return -ENOBUFS if there is no backing cache or if there is no | ||
423 | space to allocate any extra metadata required in the cache. The attributes | ||
424 | will be accessed with the get_attr() cookie definition operation. | ||
425 | |||
426 | Note that attempts to read or write data pages in the cache over this size may | ||
427 | be rebuffed with -ENOBUFS. | ||
428 | |||
429 | This operation schedules an attribute adjustment to happen asynchronously at | ||
430 | some point in the future, and as such, it may happen after the function returns | ||
431 | to the caller. The attribute adjustment excludes read and write operations. | ||
432 | |||
433 | |||
434 | ===================== | ||
435 | PAGE READ/ALLOC/WRITE | ||
436 | ===================== | ||
437 | |||
438 | And the sixth step is to store and retrieve pages in the cache. There are | ||
439 | three functions that are used to do this. | ||
440 | |||
441 | Note: | ||
442 | |||
443 | (1) A page should not be re-read or re-allocated without uncaching it first. | ||
444 | |||
445 | (2) A read or allocated page must be uncached when the netfs page is released | ||
446 | from the pagecache. | ||
447 | |||
448 | (3) A page should only be written to the cache if previous read or allocated. | ||
449 | |||
450 | This permits the cache to maintain its page tracking in proper order. | ||
451 | |||
452 | |||
453 | PAGE READ | ||
454 | --------- | ||
455 | |||
456 | Firstly, the netfs should ask FS-Cache to examine the caches and read the | ||
457 | contents cached for a particular page of a particular file if present, or else | ||
458 | allocate space to store the contents if not: | ||
459 | |||
460 | typedef | ||
461 | void (*fscache_rw_complete_t)(struct page *page, | ||
462 | void *context, | ||
463 | int error); | ||
464 | |||
465 | int fscache_read_or_alloc_page(struct fscache_cookie *cookie, | ||
466 | struct page *page, | ||
467 | fscache_rw_complete_t end_io_func, | ||
468 | void *context, | ||
469 | gfp_t gfp); | ||
470 | |||
471 | The cookie argument must specify a cookie for an object that isn't an index, | ||
472 | the page specified will have the data loaded into it (and is also used to | ||
473 | specify the page number), and the gfp argument is used to control how any | ||
474 | memory allocations made are satisfied. | ||
475 | |||
476 | If the cookie indicates the inode is not cached: | ||
477 | |||
478 | (1) The function will return -ENOBUFS. | ||
479 | |||
480 | Else if there's a copy of the page resident in the cache: | ||
481 | |||
482 | (1) The mark_pages_cached() cookie operation will be called on that page. | ||
483 | |||
484 | (2) The function will submit a request to read the data from the cache's | ||
485 | backing device directly into the page specified. | ||
486 | |||
487 | (3) The function will return 0. | ||
488 | |||
489 | (4) When the read is complete, end_io_func() will be invoked with: | ||
490 | |||
491 | (*) The netfs data supplied when the cookie was created. | ||
492 | |||
493 | (*) The page descriptor. | ||
494 | |||
495 | (*) The context argument passed to the above function. This will be | ||
496 | maintained with the get_context/put_context functions mentioned above. | ||
497 | |||
498 | (*) An argument that's 0 on success or negative for an error code. | ||
499 | |||
500 | If an error occurs, it should be assumed that the page contains no usable | ||
501 | data. | ||
502 | |||
503 | end_io_func() will be called in process context if the read is results in | ||
504 | an error, but it might be called in interrupt context if the read is | ||
505 | successful. | ||
506 | |||
507 | Otherwise, if there's not a copy available in cache, but the cache may be able | ||
508 | to store the page: | ||
509 | |||
510 | (1) The mark_pages_cached() cookie operation will be called on that page. | ||
511 | |||
512 | (2) A block may be reserved in the cache and attached to the object at the | ||
513 | appropriate place. | ||
514 | |||
515 | (3) The function will return -ENODATA. | ||
516 | |||
517 | This function may also return -ENOMEM or -EINTR, in which case it won't have | ||
518 | read any data from the cache. | ||
519 | |||
520 | |||
521 | PAGE ALLOCATE | ||
522 | ------------- | ||
523 | |||
524 | Alternatively, if there's not expected to be any data in the cache for a page | ||
525 | because the file has been extended, a block can simply be allocated instead: | ||
526 | |||
527 | int fscache_alloc_page(struct fscache_cookie *cookie, | ||
528 | struct page *page, | ||
529 | gfp_t gfp); | ||
530 | |||
531 | This is similar to the fscache_read_or_alloc_page() function, except that it | ||
532 | never reads from the cache. It will return 0 if a block has been allocated, | ||
533 | rather than -ENODATA as the other would. One or the other must be performed | ||
534 | before writing to the cache. | ||
535 | |||
536 | The mark_pages_cached() cookie operation will be called on the page if | ||
537 | successful. | ||
538 | |||
539 | |||
540 | PAGE WRITE | ||
541 | ---------- | ||
542 | |||
543 | Secondly, if the netfs changes the contents of the page (either due to an | ||
544 | initial download or if a user performs a write), then the page should be | ||
545 | written back to the cache: | ||
546 | |||
547 | int fscache_write_page(struct fscache_cookie *cookie, | ||
548 | struct page *page, | ||
549 | gfp_t gfp); | ||
550 | |||
551 | The cookie argument must specify a data file cookie, the page specified should | ||
552 | contain the data to be written (and is also used to specify the page number), | ||
553 | and the gfp argument is used to control how any memory allocations made are | ||
554 | satisfied. | ||
555 | |||
556 | The page must have first been read or allocated successfully and must not have | ||
557 | been uncached before writing is performed. | ||
558 | |||
559 | If the cookie indicates the inode is not cached then: | ||
560 | |||
561 | (1) The function will return -ENOBUFS. | ||
562 | |||
563 | Else if space can be allocated in the cache to hold this page: | ||
564 | |||
565 | (1) PG_fscache_write will be set on the page. | ||
566 | |||
567 | (2) The function will submit a request to write the data to cache's backing | ||
568 | device directly from the page specified. | ||
569 | |||
570 | (3) The function will return 0. | ||
571 | |||
572 | (4) When the write is complete PG_fscache_write is cleared on the page and | ||
573 | anyone waiting for that bit will be woken up. | ||
574 | |||
575 | Else if there's no space available in the cache, -ENOBUFS will be returned. It | ||
576 | is also possible for the PG_fscache_write bit to be cleared when no write took | ||
577 | place if unforeseen circumstances arose (such as a disk error). | ||
578 | |||
579 | Writing takes place asynchronously. | ||
580 | |||
581 | |||
582 | MULTIPLE PAGE READ | ||
583 | ------------------ | ||
584 | |||
585 | A facility is provided to read several pages at once, as requested by the | ||
586 | readpages() address space operation: | ||
587 | |||
588 | int fscache_read_or_alloc_pages(struct fscache_cookie *cookie, | ||
589 | struct address_space *mapping, | ||
590 | struct list_head *pages, | ||
591 | int *nr_pages, | ||
592 | fscache_rw_complete_t end_io_func, | ||
593 | void *context, | ||
594 | gfp_t gfp); | ||
595 | |||
596 | This works in a similar way to fscache_read_or_alloc_page(), except: | ||
597 | |||
598 | (1) Any page it can retrieve data for is removed from pages and nr_pages and | ||
599 | dispatched for reading to the disk. Reads of adjacent pages on disk may | ||
600 | be merged for greater efficiency. | ||
601 | |||
602 | (2) The mark_pages_cached() cookie operation will be called on several pages | ||
603 | at once if they're being read or allocated. | ||
604 | |||
605 | (3) If there was an general error, then that error will be returned. | ||
606 | |||
607 | Else if some pages couldn't be allocated or read, then -ENOBUFS will be | ||
608 | returned. | ||
609 | |||
610 | Else if some pages couldn't be read but were allocated, then -ENODATA will | ||
611 | be returned. | ||
612 | |||
613 | Otherwise, if all pages had reads dispatched, then 0 will be returned, the | ||
614 | list will be empty and *nr_pages will be 0. | ||
615 | |||
616 | (4) end_io_func will be called once for each page being read as the reads | ||
617 | complete. It will be called in process context if error != 0, but it may | ||
618 | be called in interrupt context if there is no error. | ||
619 | |||
620 | Note that a return of -ENODATA, -ENOBUFS or any other error does not preclude | ||
621 | some of the pages being read and some being allocated. Those pages will have | ||
622 | been marked appropriately and will need uncaching. | ||
623 | |||
624 | |||
625 | ============== | ||
626 | PAGE UNCACHING | ||
627 | ============== | ||
628 | |||
629 | To uncache a page, this function should be called: | ||
630 | |||
631 | void fscache_uncache_page(struct fscache_cookie *cookie, | ||
632 | struct page *page); | ||
633 | |||
634 | This function permits the cache to release any in-memory representation it | ||
635 | might be holding for this netfs page. This function must be called once for | ||
636 | each page on which the read or write page functions above have been called to | ||
637 | make sure the cache's in-memory tracking information gets torn down. | ||
638 | |||
639 | Note that pages can't be explicitly deleted from the a data file. The whole | ||
640 | data file must be retired (see the relinquish cookie function below). | ||
641 | |||
642 | Furthermore, note that this does not cancel the asynchronous read or write | ||
643 | operation started by the read/alloc and write functions, so the page | ||
644 | invalidation and release functions must use: | ||
645 | |||
646 | bool fscache_check_page_write(struct fscache_cookie *cookie, | ||
647 | struct page *page); | ||
648 | |||
649 | to see if a page is being written to the cache, and: | ||
650 | |||
651 | void fscache_wait_on_page_write(struct fscache_cookie *cookie, | ||
652 | struct page *page); | ||
653 | |||
654 | to wait for it to finish if it is. | ||
655 | |||
656 | |||
657 | ========================== | ||
658 | INDEX AND DATA FILE UPDATE | ||
659 | ========================== | ||
660 | |||
661 | To request an update of the index data for an index or other object, the | ||
662 | following function should be called: | ||
663 | |||
664 | void fscache_update_cookie(struct fscache_cookie *cookie); | ||
665 | |||
666 | This function will refer back to the netfs_data pointer stored in the cookie by | ||
667 | the acquisition function to obtain the data to write into each revised index | ||
668 | entry. The update method in the parent index definition will be called to | ||
669 | transfer the data. | ||
670 | |||
671 | Note that partial updates may happen automatically at other times, such as when | ||
672 | data blocks are added to a data file object. | ||
673 | |||
674 | |||
675 | =============================== | ||
676 | MISCELLANEOUS COOKIE OPERATIONS | ||
677 | =============================== | ||
678 | |||
679 | There are a number of operations that can be used to control cookies: | ||
680 | |||
681 | (*) Cookie pinning: | ||
682 | |||
683 | int fscache_pin_cookie(struct fscache_cookie *cookie); | ||
684 | void fscache_unpin_cookie(struct fscache_cookie *cookie); | ||
685 | |||
686 | These operations permit data cookies to be pinned into the cache and to | ||
687 | have the pinning removed. They are not permitted on index cookies. | ||
688 | |||
689 | The pinning function will return 0 if successful, -ENOBUFS in the cookie | ||
690 | isn't backed by a cache, -EOPNOTSUPP if the cache doesn't support pinning, | ||
691 | -ENOSPC if there isn't enough space to honour the operation, -ENOMEM or | ||
692 | -EIO if there's any other problem. | ||
693 | |||
694 | (*) Data space reservation: | ||
695 | |||
696 | int fscache_reserve_space(struct fscache_cookie *cookie, loff_t size); | ||
697 | |||
698 | This permits a netfs to request cache space be reserved to store up to the | ||
699 | given amount of a file. It is permitted to ask for more than the current | ||
700 | size of the file to allow for future file expansion. | ||
701 | |||
702 | If size is given as zero then the reservation will be cancelled. | ||
703 | |||
704 | The function will return 0 if successful, -ENOBUFS in the cookie isn't | ||
705 | backed by a cache, -EOPNOTSUPP if the cache doesn't support reservations, | ||
706 | -ENOSPC if there isn't enough space to honour the operation, -ENOMEM or | ||
707 | -EIO if there's any other problem. | ||
708 | |||
709 | Note that this doesn't pin an object in a cache; it can still be culled to | ||
710 | make space if it's not in use. | ||
711 | |||
712 | |||
713 | ===================== | ||
714 | COOKIE UNREGISTRATION | ||
715 | ===================== | ||
716 | |||
717 | To get rid of a cookie, this function should be called. | ||
718 | |||
719 | void fscache_relinquish_cookie(struct fscache_cookie *cookie, | ||
720 | int retire); | ||
721 | |||
722 | If retire is non-zero, then the object will be marked for recycling, and all | ||
723 | copies of it will be removed from all active caches in which it is present. | ||
724 | Not only that but all child objects will also be retired. | ||
725 | |||
726 | If retire is zero, then the object may be available again when next the | ||
727 | acquisition function is called. Retirement here will overrule the pinning on a | ||
728 | cookie. | ||
729 | |||
730 | One very important note - relinquish must NOT be called for a cookie unless all | ||
731 | the cookies for "child" indices, objects and pages have been relinquished | ||
732 | first. | ||
733 | |||
734 | |||
735 | ================================ | ||
736 | INDEX AND DATA FILE INVALIDATION | ||
737 | ================================ | ||
738 | |||
739 | There is no direct way to invalidate an index subtree or a data file. To do | ||
740 | this, the caller should relinquish and retire the cookie they have, and then | ||
741 | acquire a new one. | ||
742 | |||
743 | |||
744 | =========================== | ||
745 | FS-CACHE SPECIFIC PAGE FLAG | ||
746 | =========================== | ||
747 | |||
748 | FS-Cache makes use of a page flag, PG_private_2, for its own purpose. This is | ||
749 | given the alternative name PG_fscache. | ||
750 | |||
751 | PG_fscache is used to indicate that the page is known by the cache, and that | ||
752 | the cache must be informed if the page is going to go away. It's an indication | ||
753 | to the netfs that the cache has an interest in this page, where an interest may | ||
754 | be a pointer to it, resources allocated or reserved for it, or I/O in progress | ||
755 | upon it. | ||
756 | |||
757 | The netfs can use this information in methods such as releasepage() to | ||
758 | determine whether it needs to uncache a page or update it. | ||
759 | |||
760 | Furthermore, if this bit is set, releasepage() and invalidatepage() operations | ||
761 | will be called on a page to get rid of it, even if PG_private is not set. This | ||
762 | allows caching to attempted on a page before read_cache_pages() to be called | ||
763 | after fscache_read_or_alloc_pages() as the former will try and release pages it | ||
764 | was given under certain circumstances. | ||
765 | |||
766 | This bit does not overlap with such as PG_private. This means that FS-Cache | ||
767 | can be used with a filesystem that uses the block buffering code. | ||
768 | |||
769 | There are a number of operations defined on this flag: | ||
770 | |||
771 | int PageFsCache(struct page *page); | ||
772 | void SetPageFsCache(struct page *page) | ||
773 | void ClearPageFsCache(struct page *page) | ||
774 | int TestSetPageFsCache(struct page *page) | ||
775 | int TestClearPageFsCache(struct page *page) | ||
776 | |||
777 | These functions are bit test, bit set, bit clear, bit test and set and bit | ||
778 | test and clear operations on PG_fscache. | ||
diff --git a/Documentation/filesystems/caching/object.txt b/Documentation/filesystems/caching/object.txt new file mode 100644 index 000000000000..e8b0a35d8fe5 --- /dev/null +++ b/Documentation/filesystems/caching/object.txt | |||
@@ -0,0 +1,313 @@ | |||
1 | ==================================================== | ||
2 | IN-KERNEL CACHE OBJECT REPRESENTATION AND MANAGEMENT | ||
3 | ==================================================== | ||
4 | |||
5 | By: David Howells <dhowells@redhat.com> | ||
6 | |||
7 | Contents: | ||
8 | |||
9 | (*) Representation | ||
10 | |||
11 | (*) Object management state machine. | ||
12 | |||
13 | - Provision of cpu time. | ||
14 | - Locking simplification. | ||
15 | |||
16 | (*) The set of states. | ||
17 | |||
18 | (*) The set of events. | ||
19 | |||
20 | |||
21 | ============== | ||
22 | REPRESENTATION | ||
23 | ============== | ||
24 | |||
25 | FS-Cache maintains an in-kernel representation of each object that a netfs is | ||
26 | currently interested in. Such objects are represented by the fscache_cookie | ||
27 | struct and are referred to as cookies. | ||
28 | |||
29 | FS-Cache also maintains a separate in-kernel representation of the objects that | ||
30 | a cache backend is currently actively caching. Such objects are represented by | ||
31 | the fscache_object struct. The cache backends allocate these upon request, and | ||
32 | are expected to embed them in their own representations. These are referred to | ||
33 | as objects. | ||
34 | |||
35 | There is a 1:N relationship between cookies and objects. A cookie may be | ||
36 | represented by multiple objects - an index may exist in more than one cache - | ||
37 | or even by no objects (it may not be cached). | ||
38 | |||
39 | Furthermore, both cookies and objects are hierarchical. The two hierarchies | ||
40 | correspond, but the cookies tree is a superset of the union of the object trees | ||
41 | of multiple caches: | ||
42 | |||
43 | NETFS INDEX TREE : CACHE 1 : CACHE 2 | ||
44 | : : | ||
45 | : +-----------+ : | ||
46 | +----------->| IObject | : | ||
47 | +-----------+ | : +-----------+ : | ||
48 | | ICookie |-------+ : | : | ||
49 | +-----------+ | : | : +-----------+ | ||
50 | | +------------------------------>| IObject | | ||
51 | | : | : +-----------+ | ||
52 | | : V : | | ||
53 | | : +-----------+ : | | ||
54 | V +----------->| IObject | : | | ||
55 | +-----------+ | : +-----------+ : | | ||
56 | | ICookie |-------+ : | : V | ||
57 | +-----------+ | : | : +-----------+ | ||
58 | | +------------------------------>| IObject | | ||
59 | +-----+-----+ : | : +-----------+ | ||
60 | | | : | : | | ||
61 | V | : V : | | ||
62 | +-----------+ | : +-----------+ : | | ||
63 | | ICookie |------------------------->| IObject | : | | ||
64 | +-----------+ | : +-----------+ : | | ||
65 | | V : | : V | ||
66 | | +-----------+ : | : +-----------+ | ||
67 | | | ICookie |-------------------------------->| IObject | | ||
68 | | +-----------+ : | : +-----------+ | ||
69 | V | : V : | | ||
70 | +-----------+ | : +-----------+ : | | ||
71 | | DCookie |------------------------->| DObject | : | | ||
72 | +-----------+ | : +-----------+ : | | ||
73 | | : : | | ||
74 | +-------+-------+ : : | | ||
75 | | | : : | | ||
76 | V V : : V | ||
77 | +-----------+ +-----------+ : : +-----------+ | ||
78 | | DCookie | | DCookie |------------------------>| DObject | | ||
79 | +-----------+ +-----------+ : : +-----------+ | ||
80 | : : | ||
81 | |||
82 | In the above illustration, ICookie and IObject represent indices and DCookie | ||
83 | and DObject represent data storage objects. Indices may have representation in | ||
84 | multiple caches, but currently, non-index objects may not. Objects of any type | ||
85 | may also be entirely unrepresented. | ||
86 | |||
87 | As far as the netfs API goes, the netfs is only actually permitted to see | ||
88 | pointers to the cookies. The cookies themselves and any objects attached to | ||
89 | those cookies are hidden from it. | ||
90 | |||
91 | |||
92 | =============================== | ||
93 | OBJECT MANAGEMENT STATE MACHINE | ||
94 | =============================== | ||
95 | |||
96 | Within FS-Cache, each active object is managed by its own individual state | ||
97 | machine. The state for an object is kept in the fscache_object struct, in | ||
98 | object->state. A cookie may point to a set of objects that are in different | ||
99 | states. | ||
100 | |||
101 | Each state has an action associated with it that is invoked when the machine | ||
102 | wakes up in that state. There are four logical sets of states: | ||
103 | |||
104 | (1) Preparation: states that wait for the parent objects to become ready. The | ||
105 | representations are hierarchical, and it is expected that an object must | ||
106 | be created or accessed with respect to its parent object. | ||
107 | |||
108 | (2) Initialisation: states that perform lookups in the cache and validate | ||
109 | what's found and that create on disk any missing metadata. | ||
110 | |||
111 | (3) Normal running: states that allow netfs operations on objects to proceed | ||
112 | and that update the state of objects. | ||
113 | |||
114 | (4) Termination: states that detach objects from their netfs cookies, that | ||
115 | delete objects from disk, that handle disk and system errors and that free | ||
116 | up in-memory resources. | ||
117 | |||
118 | |||
119 | In most cases, transitioning between states is in response to signalled events. | ||
120 | When a state has finished processing, it will usually set the mask of events in | ||
121 | which it is interested (object->event_mask) and relinquish the worker thread. | ||
122 | Then when an event is raised (by calling fscache_raise_event()), if the event | ||
123 | is not masked, the object will be queued for processing (by calling | ||
124 | fscache_enqueue_object()). | ||
125 | |||
126 | |||
127 | PROVISION OF CPU TIME | ||
128 | --------------------- | ||
129 | |||
130 | The work to be done by the various states is given CPU time by the threads of | ||
131 | the slow work facility (see Documentation/slow-work.txt). This is used in | ||
132 | preference to the workqueue facility because: | ||
133 | |||
134 | (1) Threads may be completely occupied for very long periods of time by a | ||
135 | particular work item. These state actions may be doing sequences of | ||
136 | synchronous, journalled disk accesses (lookup, mkdir, create, setxattr, | ||
137 | getxattr, truncate, unlink, rmdir, rename). | ||
138 | |||
139 | (2) Threads may do little actual work, but may rather spend a lot of time | ||
140 | sleeping on I/O. This means that single-threaded and 1-per-CPU-threaded | ||
141 | workqueues don't necessarily have the right numbers of threads. | ||
142 | |||
143 | |||
144 | LOCKING SIMPLIFICATION | ||
145 | ---------------------- | ||
146 | |||
147 | Because only one worker thread may be operating on any particular object's | ||
148 | state machine at once, this simplifies the locking, particularly with respect | ||
149 | to disconnecting the netfs's representation of a cache object (fscache_cookie) | ||
150 | from the cache backend's representation (fscache_object) - which may be | ||
151 | requested from either end. | ||
152 | |||
153 | |||
154 | ================= | ||
155 | THE SET OF STATES | ||
156 | ================= | ||
157 | |||
158 | The object state machine has a set of states that it can be in. There are | ||
159 | preparation states in which the object sets itself up and waits for its parent | ||
160 | object to transit to a state that allows access to its children: | ||
161 | |||
162 | (1) State FSCACHE_OBJECT_INIT. | ||
163 | |||
164 | Initialise the object and wait for the parent object to become active. In | ||
165 | the cache, it is expected that it will not be possible to look an object | ||
166 | up from the parent object, until that parent object itself has been looked | ||
167 | up. | ||
168 | |||
169 | There are initialisation states in which the object sets itself up and accesses | ||
170 | disk for the object metadata: | ||
171 | |||
172 | (2) State FSCACHE_OBJECT_LOOKING_UP. | ||
173 | |||
174 | Look up the object on disk, using the parent as a starting point. | ||
175 | FS-Cache expects the cache backend to probe the cache to see whether this | ||
176 | object is represented there, and if it is, to see if it's valid (coherency | ||
177 | management). | ||
178 | |||
179 | The cache should call fscache_object_lookup_negative() to indicate lookup | ||
180 | failure for whatever reason, and should call fscache_obtained_object() to | ||
181 | indicate success. | ||
182 | |||
183 | At the completion of lookup, FS-Cache will let the netfs go ahead with | ||
184 | read operations, no matter whether the file is yet cached. If not yet | ||
185 | cached, read operations will be immediately rejected with ENODATA until | ||
186 | the first known page is uncached - as to that point there can be no data | ||
187 | to be read out of the cache for that file that isn't currently also held | ||
188 | in the pagecache. | ||
189 | |||
190 | (3) State FSCACHE_OBJECT_CREATING. | ||
191 | |||
192 | Create an object on disk, using the parent as a starting point. This | ||
193 | happens if the lookup failed to find the object, or if the object's | ||
194 | coherency data indicated what's on disk is out of date. In this state, | ||
195 | FS-Cache expects the cache to create | ||
196 | |||
197 | The cache should call fscache_obtained_object() if creation completes | ||
198 | successfully, fscache_object_lookup_negative() otherwise. | ||
199 | |||
200 | At the completion of creation, FS-Cache will start processing write | ||
201 | operations the netfs has queued for an object. If creation failed, the | ||
202 | write ops will be transparently discarded, and nothing recorded in the | ||
203 | cache. | ||
204 | |||
205 | There are some normal running states in which the object spends its time | ||
206 | servicing netfs requests: | ||
207 | |||
208 | (4) State FSCACHE_OBJECT_AVAILABLE. | ||
209 | |||
210 | A transient state in which pending operations are started, child objects | ||
211 | are permitted to advance from FSCACHE_OBJECT_INIT state, and temporary | ||
212 | lookup data is freed. | ||
213 | |||
214 | (5) State FSCACHE_OBJECT_ACTIVE. | ||
215 | |||
216 | The normal running state. In this state, requests the netfs makes will be | ||
217 | passed on to the cache. | ||
218 | |||
219 | (6) State FSCACHE_OBJECT_UPDATING. | ||
220 | |||
221 | The state machine comes here to update the object in the cache from the | ||
222 | netfs's records. This involves updating the auxiliary data that is used | ||
223 | to maintain coherency. | ||
224 | |||
225 | And there are terminal states in which an object cleans itself up, deallocates | ||
226 | memory and potentially deletes stuff from disk: | ||
227 | |||
228 | (7) State FSCACHE_OBJECT_LC_DYING. | ||
229 | |||
230 | The object comes here if it is dying because of a lookup or creation | ||
231 | error. This would be due to a disk error or system error of some sort. | ||
232 | Temporary data is cleaned up, and the parent is released. | ||
233 | |||
234 | (8) State FSCACHE_OBJECT_DYING. | ||
235 | |||
236 | The object comes here if it is dying due to an error, because its parent | ||
237 | cookie has been relinquished by the netfs or because the cache is being | ||
238 | withdrawn. | ||
239 | |||
240 | Any child objects waiting on this one are given CPU time so that they too | ||
241 | can destroy themselves. This object waits for all its children to go away | ||
242 | before advancing to the next state. | ||
243 | |||
244 | (9) State FSCACHE_OBJECT_ABORT_INIT. | ||
245 | |||
246 | The object comes to this state if it was waiting on its parent in | ||
247 | FSCACHE_OBJECT_INIT, but its parent died. The object will destroy itself | ||
248 | so that the parent may proceed from the FSCACHE_OBJECT_DYING state. | ||
249 | |||
250 | (10) State FSCACHE_OBJECT_RELEASING. | ||
251 | (11) State FSCACHE_OBJECT_RECYCLING. | ||
252 | |||
253 | The object comes to one of these two states when dying once it is rid of | ||
254 | all its children, if it is dying because the netfs relinquished its | ||
255 | cookie. In the first state, the cached data is expected to persist, and | ||
256 | in the second it will be deleted. | ||
257 | |||
258 | (12) State FSCACHE_OBJECT_WITHDRAWING. | ||
259 | |||
260 | The object transits to this state if the cache decides it wants to | ||
261 | withdraw the object from service, perhaps to make space, but also due to | ||
262 | error or just because the whole cache is being withdrawn. | ||
263 | |||
264 | (13) State FSCACHE_OBJECT_DEAD. | ||
265 | |||
266 | The object transits to this state when the in-memory object record is | ||
267 | ready to be deleted. The object processor shouldn't ever see an object in | ||
268 | this state. | ||
269 | |||
270 | |||
271 | THE SET OF EVENTS | ||
272 | ----------------- | ||
273 | |||
274 | There are a number of events that can be raised to an object state machine: | ||
275 | |||
276 | (*) FSCACHE_OBJECT_EV_UPDATE | ||
277 | |||
278 | The netfs requested that an object be updated. The state machine will ask | ||
279 | the cache backend to update the object, and the cache backend will ask the | ||
280 | netfs for details of the change through its cookie definition ops. | ||
281 | |||
282 | (*) FSCACHE_OBJECT_EV_CLEARED | ||
283 | |||
284 | This is signalled in two circumstances: | ||
285 | |||
286 | (a) when an object's last child object is dropped and | ||
287 | |||
288 | (b) when the last operation outstanding on an object is completed. | ||
289 | |||
290 | This is used to proceed from the dying state. | ||
291 | |||
292 | (*) FSCACHE_OBJECT_EV_ERROR | ||
293 | |||
294 | This is signalled when an I/O error occurs during the processing of some | ||
295 | object. | ||
296 | |||
297 | (*) FSCACHE_OBJECT_EV_RELEASE | ||
298 | (*) FSCACHE_OBJECT_EV_RETIRE | ||
299 | |||
300 | These are signalled when the netfs relinquishes a cookie it was using. | ||
301 | The event selected depends on whether the netfs asks for the backing | ||
302 | object to be retired (deleted) or retained. | ||
303 | |||
304 | (*) FSCACHE_OBJECT_EV_WITHDRAW | ||
305 | |||
306 | This is signalled when the cache backend wants to withdraw an object. | ||
307 | This means that the object will have to be detached from the netfs's | ||
308 | cookie. | ||
309 | |||
310 | Because the withdrawing releasing/retiring events are all handled by the object | ||
311 | state machine, it doesn't matter if there's a collision with both ends trying | ||
312 | to sever the connection at the same time. The state machine can just pick | ||
313 | which one it wants to honour, and that effects the other. | ||
diff --git a/Documentation/filesystems/caching/operations.txt b/Documentation/filesystems/caching/operations.txt new file mode 100644 index 000000000000..b6b070c57cbf --- /dev/null +++ b/Documentation/filesystems/caching/operations.txt | |||
@@ -0,0 +1,213 @@ | |||
1 | ================================ | ||
2 | ASYNCHRONOUS OPERATIONS HANDLING | ||
3 | ================================ | ||
4 | |||
5 | By: David Howells <dhowells@redhat.com> | ||
6 | |||
7 | Contents: | ||
8 | |||
9 | (*) Overview. | ||
10 | |||
11 | (*) Operation record initialisation. | ||
12 | |||
13 | (*) Parameters. | ||
14 | |||
15 | (*) Procedure. | ||
16 | |||
17 | (*) Asynchronous callback. | ||
18 | |||
19 | |||
20 | ======== | ||
21 | OVERVIEW | ||
22 | ======== | ||
23 | |||
24 | FS-Cache has an asynchronous operations handling facility that it uses for its | ||
25 | data storage and retrieval routines. Its operations are represented by | ||
26 | fscache_operation structs, though these are usually embedded into some other | ||
27 | structure. | ||
28 | |||
29 | This facility is available to and expected to be be used by the cache backends, | ||
30 | and FS-Cache will create operations and pass them off to the appropriate cache | ||
31 | backend for completion. | ||
32 | |||
33 | To make use of this facility, <linux/fscache-cache.h> should be #included. | ||
34 | |||
35 | |||
36 | =============================== | ||
37 | OPERATION RECORD INITIALISATION | ||
38 | =============================== | ||
39 | |||
40 | An operation is recorded in an fscache_operation struct: | ||
41 | |||
42 | struct fscache_operation { | ||
43 | union { | ||
44 | struct work_struct fast_work; | ||
45 | struct slow_work slow_work; | ||
46 | }; | ||
47 | unsigned long flags; | ||
48 | fscache_operation_processor_t processor; | ||
49 | ... | ||
50 | }; | ||
51 | |||
52 | Someone wanting to issue an operation should allocate something with this | ||
53 | struct embedded in it. They should initialise it by calling: | ||
54 | |||
55 | void fscache_operation_init(struct fscache_operation *op, | ||
56 | fscache_operation_release_t release); | ||
57 | |||
58 | with the operation to be initialised and the release function to use. | ||
59 | |||
60 | The op->flags parameter should be set to indicate the CPU time provision and | ||
61 | the exclusivity (see the Parameters section). | ||
62 | |||
63 | The op->fast_work, op->slow_work and op->processor flags should be set as | ||
64 | appropriate for the CPU time provision (see the Parameters section). | ||
65 | |||
66 | FSCACHE_OP_WAITING may be set in op->flags prior to each submission of the | ||
67 | operation and waited for afterwards. | ||
68 | |||
69 | |||
70 | ========== | ||
71 | PARAMETERS | ||
72 | ========== | ||
73 | |||
74 | There are a number of parameters that can be set in the operation record's flag | ||
75 | parameter. There are three options for the provision of CPU time in these | ||
76 | operations: | ||
77 | |||
78 | (1) The operation may be done synchronously (FSCACHE_OP_MYTHREAD). A thread | ||
79 | may decide it wants to handle an operation itself without deferring it to | ||
80 | another thread. | ||
81 | |||
82 | This is, for example, used in read operations for calling readpages() on | ||
83 | the backing filesystem in CacheFiles. Although readpages() does an | ||
84 | asynchronous data fetch, the determination of whether pages exist is done | ||
85 | synchronously - and the netfs does not proceed until this has been | ||
86 | determined. | ||
87 | |||
88 | If this option is to be used, FSCACHE_OP_WAITING must be set in op->flags | ||
89 | before submitting the operation, and the operating thread must wait for it | ||
90 | to be cleared before proceeding: | ||
91 | |||
92 | wait_on_bit(&op->flags, FSCACHE_OP_WAITING, | ||
93 | fscache_wait_bit, TASK_UNINTERRUPTIBLE); | ||
94 | |||
95 | |||
96 | (2) The operation may be fast asynchronous (FSCACHE_OP_FAST), in which case it | ||
97 | will be given to keventd to process. Such an operation is not permitted | ||
98 | to sleep on I/O. | ||
99 | |||
100 | This is, for example, used by CacheFiles to copy data from a backing fs | ||
101 | page to a netfs page after the backing fs has read the page in. | ||
102 | |||
103 | If this option is used, op->fast_work and op->processor must be | ||
104 | initialised before submitting the operation: | ||
105 | |||
106 | INIT_WORK(&op->fast_work, do_some_work); | ||
107 | |||
108 | |||
109 | (3) The operation may be slow asynchronous (FSCACHE_OP_SLOW), in which case it | ||
110 | will be given to the slow work facility to process. Such an operation is | ||
111 | permitted to sleep on I/O. | ||
112 | |||
113 | This is, for example, used by FS-Cache to handle background writes of | ||
114 | pages that have just been fetched from a remote server. | ||
115 | |||
116 | If this option is used, op->slow_work and op->processor must be | ||
117 | initialised before submitting the operation: | ||
118 | |||
119 | fscache_operation_init_slow(op, processor) | ||
120 | |||
121 | |||
122 | Furthermore, operations may be one of two types: | ||
123 | |||
124 | (1) Exclusive (FSCACHE_OP_EXCLUSIVE). Operations of this type may not run in | ||
125 | conjunction with any other operation on the object being operated upon. | ||
126 | |||
127 | An example of this is the attribute change operation, in which the file | ||
128 | being written to may need truncation. | ||
129 | |||
130 | (2) Shareable. Operations of this type may be running simultaneously. It's | ||
131 | up to the operation implementation to prevent interference between other | ||
132 | operations running at the same time. | ||
133 | |||
134 | |||
135 | ========= | ||
136 | PROCEDURE | ||
137 | ========= | ||
138 | |||
139 | Operations are used through the following procedure: | ||
140 | |||
141 | (1) The submitting thread must allocate the operation and initialise it | ||
142 | itself. Normally this would be part of a more specific structure with the | ||
143 | generic op embedded within. | ||
144 | |||
145 | (2) The submitting thread must then submit the operation for processing using | ||
146 | one of the following two functions: | ||
147 | |||
148 | int fscache_submit_op(struct fscache_object *object, | ||
149 | struct fscache_operation *op); | ||
150 | |||
151 | int fscache_submit_exclusive_op(struct fscache_object *object, | ||
152 | struct fscache_operation *op); | ||
153 | |||
154 | The first function should be used to submit non-exclusive ops and the | ||
155 | second to submit exclusive ones. The caller must still set the | ||
156 | FSCACHE_OP_EXCLUSIVE flag. | ||
157 | |||
158 | If successful, both functions will assign the operation to the specified | ||
159 | object and return 0. -ENOBUFS will be returned if the object specified is | ||
160 | permanently unavailable. | ||
161 | |||
162 | The operation manager will defer operations on an object that is still | ||
163 | undergoing lookup or creation. The operation will also be deferred if an | ||
164 | operation of conflicting exclusivity is in progress on the object. | ||
165 | |||
166 | If the operation is asynchronous, the manager will retain a reference to | ||
167 | it, so the caller should put their reference to it by passing it to: | ||
168 | |||
169 | void fscache_put_operation(struct fscache_operation *op); | ||
170 | |||
171 | (3) If the submitting thread wants to do the work itself, and has marked the | ||
172 | operation with FSCACHE_OP_MYTHREAD, then it should monitor | ||
173 | FSCACHE_OP_WAITING as described above and check the state of the object if | ||
174 | necessary (the object might have died whilst the thread was waiting). | ||
175 | |||
176 | When it has finished doing its processing, it should call | ||
177 | fscache_put_operation() on it. | ||
178 | |||
179 | (4) The operation holds an effective lock upon the object, preventing other | ||
180 | exclusive ops conflicting until it is released. The operation can be | ||
181 | enqueued for further immediate asynchronous processing by adjusting the | ||
182 | CPU time provisioning option if necessary, eg: | ||
183 | |||
184 | op->flags &= ~FSCACHE_OP_TYPE; | ||
185 | op->flags |= ~FSCACHE_OP_FAST; | ||
186 | |||
187 | and calling: | ||
188 | |||
189 | void fscache_enqueue_operation(struct fscache_operation *op) | ||
190 | |||
191 | This can be used to allow other things to have use of the worker thread | ||
192 | pools. | ||
193 | |||
194 | |||
195 | ===================== | ||
196 | ASYNCHRONOUS CALLBACK | ||
197 | ===================== | ||
198 | |||
199 | When used in asynchronous mode, the worker thread pool will invoke the | ||
200 | processor method with a pointer to the operation. This should then get at the | ||
201 | container struct by using container_of(): | ||
202 | |||
203 | static void fscache_write_op(struct fscache_operation *_op) | ||
204 | { | ||
205 | struct fscache_storage *op = | ||
206 | container_of(_op, struct fscache_storage, op); | ||
207 | ... | ||
208 | } | ||
209 | |||
210 | The caller holds a reference on the operation, and will invoke | ||
211 | fscache_put_operation() when the processor function returns. The processor | ||
212 | function is at liberty to call fscache_enqueue_operation() or to take extra | ||
213 | references. | ||
diff --git a/Documentation/filesystems/debugfs.txt b/Documentation/filesystems/debugfs.txt new file mode 100644 index 000000000000..ed52af60c2d8 --- /dev/null +++ b/Documentation/filesystems/debugfs.txt | |||
@@ -0,0 +1,158 @@ | |||
1 | Copyright 2009 Jonathan Corbet <corbet@lwn.net> | ||
2 | |||
3 | Debugfs exists as a simple way for kernel developers to make information | ||
4 | available to user space. Unlike /proc, which is only meant for information | ||
5 | about a process, or sysfs, which has strict one-value-per-file rules, | ||
6 | debugfs has no rules at all. Developers can put any information they want | ||
7 | there. The debugfs filesystem is also intended to not serve as a stable | ||
8 | ABI to user space; in theory, there are no stability constraints placed on | ||
9 | files exported there. The real world is not always so simple, though [1]; | ||
10 | even debugfs interfaces are best designed with the idea that they will need | ||
11 | to be maintained forever. | ||
12 | |||
13 | Debugfs is typically mounted with a command like: | ||
14 | |||
15 | mount -t debugfs none /sys/kernel/debug | ||
16 | |||
17 | (Or an equivalent /etc/fstab line). | ||
18 | |||
19 | Note that the debugfs API is exported GPL-only to modules. | ||
20 | |||
21 | Code using debugfs should include <linux/debugfs.h>. Then, the first order | ||
22 | of business will be to create at least one directory to hold a set of | ||
23 | debugfs files: | ||
24 | |||
25 | struct dentry *debugfs_create_dir(const char *name, struct dentry *parent); | ||
26 | |||
27 | This call, if successful, will make a directory called name underneath the | ||
28 | indicated parent directory. If parent is NULL, the directory will be | ||
29 | created in the debugfs root. On success, the return value is a struct | ||
30 | dentry pointer which can be used to create files in the directory (and to | ||
31 | clean it up at the end). A NULL return value indicates that something went | ||
32 | wrong. If ERR_PTR(-ENODEV) is returned, that is an indication that the | ||
33 | kernel has been built without debugfs support and none of the functions | ||
34 | described below will work. | ||
35 | |||
36 | The most general way to create a file within a debugfs directory is with: | ||
37 | |||
38 | struct dentry *debugfs_create_file(const char *name, mode_t mode, | ||
39 | struct dentry *parent, void *data, | ||
40 | const struct file_operations *fops); | ||
41 | |||
42 | Here, name is the name of the file to create, mode describes the access | ||
43 | permissions the file should have, parent indicates the directory which | ||
44 | should hold the file, data will be stored in the i_private field of the | ||
45 | resulting inode structure, and fops is a set of file operations which | ||
46 | implement the file's behavior. At a minimum, the read() and/or write() | ||
47 | operations should be provided; others can be included as needed. Again, | ||
48 | the return value will be a dentry pointer to the created file, NULL for | ||
49 | error, or ERR_PTR(-ENODEV) if debugfs support is missing. | ||
50 | |||
51 | In a number of cases, the creation of a set of file operations is not | ||
52 | actually necessary; the debugfs code provides a number of helper functions | ||
53 | for simple situations. Files containing a single integer value can be | ||
54 | created with any of: | ||
55 | |||
56 | struct dentry *debugfs_create_u8(const char *name, mode_t mode, | ||
57 | struct dentry *parent, u8 *value); | ||
58 | struct dentry *debugfs_create_u16(const char *name, mode_t mode, | ||
59 | struct dentry *parent, u16 *value); | ||
60 | struct dentry *debugfs_create_u32(const char *name, mode_t mode, | ||
61 | struct dentry *parent, u32 *value); | ||
62 | struct dentry *debugfs_create_u64(const char *name, mode_t mode, | ||
63 | struct dentry *parent, u64 *value); | ||
64 | |||
65 | These files support both reading and writing the given value; if a specific | ||
66 | file should not be written to, simply set the mode bits accordingly. The | ||
67 | values in these files are in decimal; if hexadecimal is more appropriate, | ||
68 | the following functions can be used instead: | ||
69 | |||
70 | struct dentry *debugfs_create_x8(const char *name, mode_t mode, | ||
71 | struct dentry *parent, u8 *value); | ||
72 | struct dentry *debugfs_create_x16(const char *name, mode_t mode, | ||
73 | struct dentry *parent, u16 *value); | ||
74 | struct dentry *debugfs_create_x32(const char *name, mode_t mode, | ||
75 | struct dentry *parent, u32 *value); | ||
76 | |||
77 | Note that there is no debugfs_create_x64(). | ||
78 | |||
79 | These functions are useful as long as the developer knows the size of the | ||
80 | value to be exported. Some types can have different widths on different | ||
81 | architectures, though, complicating the situation somewhat. There is a | ||
82 | function meant to help out in one special case: | ||
83 | |||
84 | struct dentry *debugfs_create_size_t(const char *name, mode_t mode, | ||
85 | struct dentry *parent, | ||
86 | size_t *value); | ||
87 | |||
88 | As might be expected, this function will create a debugfs file to represent | ||
89 | a variable of type size_t. | ||
90 | |||
91 | Boolean values can be placed in debugfs with: | ||
92 | |||
93 | struct dentry *debugfs_create_bool(const char *name, mode_t mode, | ||
94 | struct dentry *parent, u32 *value); | ||
95 | |||
96 | A read on the resulting file will yield either Y (for non-zero values) or | ||
97 | N, followed by a newline. If written to, it will accept either upper- or | ||
98 | lower-case values, or 1 or 0. Any other input will be silently ignored. | ||
99 | |||
100 | Finally, a block of arbitrary binary data can be exported with: | ||
101 | |||
102 | struct debugfs_blob_wrapper { | ||
103 | void *data; | ||
104 | unsigned long size; | ||
105 | }; | ||
106 | |||
107 | struct dentry *debugfs_create_blob(const char *name, mode_t mode, | ||
108 | struct dentry *parent, | ||
109 | struct debugfs_blob_wrapper *blob); | ||
110 | |||
111 | A read of this file will return the data pointed to by the | ||
112 | debugfs_blob_wrapper structure. Some drivers use "blobs" as a simple way | ||
113 | to return several lines of (static) formatted text output. This function | ||
114 | can be used to export binary information, but there does not appear to be | ||
115 | any code which does so in the mainline. Note that all files created with | ||
116 | debugfs_create_blob() are read-only. | ||
117 | |||
118 | There are a couple of other directory-oriented helper functions: | ||
119 | |||
120 | struct dentry *debugfs_rename(struct dentry *old_dir, | ||
121 | struct dentry *old_dentry, | ||
122 | struct dentry *new_dir, | ||
123 | const char *new_name); | ||
124 | |||
125 | struct dentry *debugfs_create_symlink(const char *name, | ||
126 | struct dentry *parent, | ||
127 | const char *target); | ||
128 | |||
129 | A call to debugfs_rename() will give a new name to an existing debugfs | ||
130 | file, possibly in a different directory. The new_name must not exist prior | ||
131 | to the call; the return value is old_dentry with updated information. | ||
132 | Symbolic links can be created with debugfs_create_symlink(). | ||
133 | |||
134 | There is one important thing that all debugfs users must take into account: | ||
135 | there is no automatic cleanup of any directories created in debugfs. If a | ||
136 | module is unloaded without explicitly removing debugfs entries, the result | ||
137 | will be a lot of stale pointers and no end of highly antisocial behavior. | ||
138 | So all debugfs users - at least those which can be built as modules - must | ||
139 | be prepared to remove all files and directories they create there. A file | ||
140 | can be removed with: | ||
141 | |||
142 | void debugfs_remove(struct dentry *dentry); | ||
143 | |||
144 | The dentry value can be NULL, in which case nothing will be removed. | ||
145 | |||
146 | Once upon a time, debugfs users were required to remember the dentry | ||
147 | pointer for every debugfs file they created so that all files could be | ||
148 | cleaned up. We live in more civilized times now, though, and debugfs users | ||
149 | can call: | ||
150 | |||
151 | void debugfs_remove_recursive(struct dentry *dentry); | ||
152 | |||
153 | If this function is passed a pointer for the dentry corresponding to the | ||
154 | top-level directory, the entire hierarchy below that directory will be | ||
155 | removed. | ||
156 | |||
157 | Notes: | ||
158 | [1] http://lwn.net/Articles/309298/ | ||
diff --git a/Documentation/filesystems/exofs.txt b/Documentation/filesystems/exofs.txt new file mode 100644 index 000000000000..0ced74c2f73c --- /dev/null +++ b/Documentation/filesystems/exofs.txt | |||
@@ -0,0 +1,176 @@ | |||
1 | =============================================================================== | ||
2 | WHAT IS EXOFS? | ||
3 | =============================================================================== | ||
4 | |||
5 | exofs is a file system that uses an OSD and exports the API of a normal Linux | ||
6 | file system. Users access exofs like any other local file system, and exofs | ||
7 | will in turn issue commands to the local OSD initiator. | ||
8 | |||
9 | OSD is a new T10 command set that views storage devices not as a large/flat | ||
10 | array of sectors but as a container of objects, each having a length, quota, | ||
11 | time attributes and more. Each object is addressed by a 64bit ID, and is | ||
12 | contained in a 64bit ID partition. Each object has associated attributes | ||
13 | attached to it, which are integral part of the object and provide metadata about | ||
14 | the object. The standard defines some common obligatory attributes, but user | ||
15 | attributes can be added as needed. | ||
16 | |||
17 | =============================================================================== | ||
18 | ENVIRONMENT | ||
19 | =============================================================================== | ||
20 | |||
21 | To use this file system, you need to have an object store to run it on. You | ||
22 | may download a target from: | ||
23 | http://open-osd.org | ||
24 | |||
25 | See Documentation/scsi/osd.txt for how to setup a working osd environment. | ||
26 | |||
27 | =============================================================================== | ||
28 | USAGE | ||
29 | =============================================================================== | ||
30 | |||
31 | 1. Download and compile exofs and open-osd initiator: | ||
32 | You need an external Kernel source tree or kernel headers from your | ||
33 | distribution. (anything based on 2.6.26 or later). | ||
34 | |||
35 | a. download open-osd including exofs source using: | ||
36 | [parent-directory]$ git clone git://git.open-osd.org/open-osd.git | ||
37 | |||
38 | b. Build the library module like this: | ||
39 | [parent-directory]$ make -C KSRC=$(KER_DIR) open-osd | ||
40 | |||
41 | This will build both the open-osd initiator as well as the exofs kernel | ||
42 | module. Use whatever parameters you compiled your Kernel with and | ||
43 | $(KER_DIR) above pointing to the Kernel you compile against. See the file | ||
44 | open-osd/top-level-Makefile for an example. | ||
45 | |||
46 | 2. Get the OSD initiator and target set up properly, and login to the target. | ||
47 | See Documentation/scsi/osd.txt for farther instructions. Also see ./do-osd | ||
48 | for example script that does all these steps. | ||
49 | |||
50 | 3. Insmod the exofs.ko module: | ||
51 | [exofs]$ insmod exofs.ko | ||
52 | |||
53 | 4. Make sure the directory where you want to mount exists. If not, create it. | ||
54 | (For example, mkdir /mnt/exofs) | ||
55 | |||
56 | 5. At first run you will need to invoke the mkfs.exofs application | ||
57 | |||
58 | As an example, this will create the file system on: | ||
59 | /dev/osd0 partition ID 65536 | ||
60 | |||
61 | mkfs.exofs --pid=65536 --format /dev/osd0 | ||
62 | |||
63 | The --format is optional if not specified no OSD_FORMAT will be | ||
64 | preformed and a clean file system will be created in the specified pid, | ||
65 | in the available space of the target. (Use --format=size_in_meg to limit | ||
66 | the total LUN space available) | ||
67 | |||
68 | If pid already exist it will be deleted and a new one will be created in it's | ||
69 | place. Be careful. | ||
70 | |||
71 | An exofs lives inside a single OSD partition. You can create multiple exofs | ||
72 | filesystems on the same device using multiple pids. | ||
73 | |||
74 | (run mkfs.exofs without any parameters for usage help message) | ||
75 | |||
76 | 6. Mount the file system. | ||
77 | |||
78 | For example, to mount /dev/osd0, partition ID 0x10000 on /mnt/exofs: | ||
79 | |||
80 | mount -t exofs -o pid=65536 /dev/osd0 /mnt/exofs/ | ||
81 | |||
82 | 7. For reference (See do-exofs example script): | ||
83 | do-exofs start - an example of how to perform the above steps. | ||
84 | do-exofs stop - an example of how to unmount the file system. | ||
85 | do-exofs format - an example of how to format and mkfs a new exofs. | ||
86 | |||
87 | 8. Extra compilation flags (uncomment in fs/exofs/Kbuild): | ||
88 | CONFIG_EXOFS_DEBUG - for debug messages and extra checks. | ||
89 | |||
90 | =============================================================================== | ||
91 | exofs mount options | ||
92 | =============================================================================== | ||
93 | Similar to any mount command: | ||
94 | mount -t exofs -o exofs_options /dev/osdX mount_exofs_directory | ||
95 | |||
96 | Where: | ||
97 | -t exofs: specifies the exofs file system | ||
98 | |||
99 | /dev/osdX: X is a decimal number. /dev/osdX was created after a successful | ||
100 | login into an OSD target. | ||
101 | |||
102 | mount_exofs_directory: The directory to mount the file system on | ||
103 | |||
104 | exofs specific options: Options are separated by commas (,) | ||
105 | pid=<integer> - The partition number to mount/create as | ||
106 | container of the filesystem. | ||
107 | This option is mandatory | ||
108 | to=<integer> - Timeout in ticks for a single command | ||
109 | default is (60 * HZ) [for debugging only] | ||
110 | |||
111 | =============================================================================== | ||
112 | DESIGN | ||
113 | =============================================================================== | ||
114 | |||
115 | * The file system control block (AKA on-disk superblock) resides in an object | ||
116 | with a special ID (defined in common.h). | ||
117 | Information included in the file system control block is used to fill the | ||
118 | in-memory superblock structure at mount time. This object is created before | ||
119 | the file system is used by mkexofs.c It contains information such as: | ||
120 | - The file system's magic number | ||
121 | - The next inode number to be allocated | ||
122 | |||
123 | * Each file resides in its own object and contains the data (and it will be | ||
124 | possible to extend the file over multiple objects, though this has not been | ||
125 | implemented yet). | ||
126 | |||
127 | * A directory is treated as a file, and essentially contains a list of <file | ||
128 | name, inode #> pairs for files that are found in that directory. The object | ||
129 | IDs correspond to the files' inode numbers and will be allocated according to | ||
130 | a bitmap (stored in a separate object). Now they are allocated using a | ||
131 | counter. | ||
132 | |||
133 | * Each file's control block (AKA on-disk inode) is stored in its object's | ||
134 | attributes. This applies to both regular files and other types (directories, | ||
135 | device files, symlinks, etc.). | ||
136 | |||
137 | * Credentials are generated per object (inode and superblock) when they is | ||
138 | created in memory (read off disk or created). The credential works for all | ||
139 | operations and is used as long as the object remains in memory. | ||
140 | |||
141 | * Async OSD operations are used whenever possible, but the target may execute | ||
142 | them out of order. The operations that concern us are create, delete, | ||
143 | readpage, writepage, update_inode, and truncate. The following pairs of | ||
144 | operations should execute in the order written, and we need to prevent them | ||
145 | from executing in reverse order: | ||
146 | - The following are handled with the OBJ_CREATED and OBJ_2BCREATED | ||
147 | flags. OBJ_CREATED is set when we know the object exists on the OSD - | ||
148 | in create's callback function, and when we successfully do a read_inode. | ||
149 | OBJ_2BCREATED is set in the beginning of the create function, so we | ||
150 | know that we should wait. | ||
151 | - create/delete: delete should wait until the object is created | ||
152 | on the OSD. | ||
153 | - create/readpage: readpage should be able to return a page | ||
154 | full of zeroes in this case. If there was a write already | ||
155 | en-route (i.e. create, writepage, readpage) then the page | ||
156 | would be locked, and so it would really be the same as | ||
157 | create/writepage. | ||
158 | - create/writepage: if writepage is called for a sync write, it | ||
159 | should wait until the object is created on the OSD. | ||
160 | Otherwise, it should just return. | ||
161 | - create/truncate: truncate should wait until the object is | ||
162 | created on the OSD. | ||
163 | - create/update_inode: update_inode should wait until the | ||
164 | object is created on the OSD. | ||
165 | - Handled by VFS locks: | ||
166 | - readpage/delete: shouldn't happen because of page lock. | ||
167 | - writepage/delete: shouldn't happen because of page lock. | ||
168 | - readpage/writepage: shouldn't happen because of page lock. | ||
169 | |||
170 | =============================================================================== | ||
171 | LICENSE/COPYRIGHT | ||
172 | =============================================================================== | ||
173 | The exofs file system is based on ext2 v0.5b (distributed with the Linux kernel | ||
174 | version 2.6.10). All files include the original copyrights, and the license | ||
175 | is GPL version 2 (only version 2, as is true for the Linux kernel). The | ||
176 | Linux kernel can be downloaded from www.kernel.org. | ||
diff --git a/Documentation/filesystems/ext2.txt b/Documentation/filesystems/ext2.txt index e055acb6b2d4..67639f905f10 100644 --- a/Documentation/filesystems/ext2.txt +++ b/Documentation/filesystems/ext2.txt | |||
@@ -322,7 +322,7 @@ an upper limit on the block size imposed by the page size of the kernel, | |||
322 | so 8kB blocks are only allowed on Alpha systems (and other architectures | 322 | so 8kB blocks are only allowed on Alpha systems (and other architectures |
323 | which support larger pages). | 323 | which support larger pages). |
324 | 324 | ||
325 | There is an upper limit of 32768 subdirectories in a single directory. | 325 | There is an upper limit of 32000 subdirectories in a single directory. |
326 | 326 | ||
327 | There is a "soft" upper limit of about 10-15k files in a single directory | 327 | There is a "soft" upper limit of about 10-15k files in a single directory |
328 | with the current linear linked-list directory implementation. This limit | 328 | with the current linear linked-list directory implementation. This limit |
diff --git a/Documentation/filesystems/ext3.txt b/Documentation/filesystems/ext3.txt index e5f3833a6ef8..570f9bd9be2b 100644 --- a/Documentation/filesystems/ext3.txt +++ b/Documentation/filesystems/ext3.txt | |||
@@ -14,6 +14,11 @@ Options | |||
14 | When mounting an ext3 filesystem, the following option are accepted: | 14 | When mounting an ext3 filesystem, the following option are accepted: |
15 | (*) == default | 15 | (*) == default |
16 | 16 | ||
17 | ro Mount filesystem read only. Note that ext3 will replay | ||
18 | the journal (and thus write to the partition) even when | ||
19 | mounted "read only". Mount options "ro,noload" can be | ||
20 | used to prevent writes to the filesystem. | ||
21 | |||
17 | journal=update Update the ext3 file system's journal to the current | 22 | journal=update Update the ext3 file system's journal to the current |
18 | format. | 23 | format. |
19 | 24 | ||
@@ -27,7 +32,9 @@ journal_dev=devnum When the external journal device's major/minor numbers | |||
27 | identified through its new major/minor numbers encoded | 32 | identified through its new major/minor numbers encoded |
28 | in devnum. | 33 | in devnum. |
29 | 34 | ||
30 | noload Don't load the journal on mounting. | 35 | noload Don't load the journal on mounting. Note that this forces |
36 | mount of inconsistent filesystem, which can lead to | ||
37 | various problems. | ||
31 | 38 | ||
32 | data=journal All data are committed into the journal prior to being | 39 | data=journal All data are committed into the journal prior to being |
33 | written into the main file system. | 40 | written into the main file system. |
@@ -92,9 +99,12 @@ nocheck | |||
92 | 99 | ||
93 | debug Extra debugging information is sent to syslog. | 100 | debug Extra debugging information is sent to syslog. |
94 | 101 | ||
95 | errors=remount-ro(*) Remount the filesystem read-only on an error. | 102 | errors=remount-ro Remount the filesystem read-only on an error. |
96 | errors=continue Keep going on a filesystem error. | 103 | errors=continue Keep going on a filesystem error. |
97 | errors=panic Panic and halt the machine if an error occurs. | 104 | errors=panic Panic and halt the machine if an error occurs. |
105 | (These mount options override the errors behavior | ||
106 | specified in the superblock, which can be | ||
107 | configured using tune2fs.) | ||
98 | 108 | ||
99 | data_err=ignore(*) Just print an error message if an error occurs | 109 | data_err=ignore(*) Just print an error message if an error occurs |
100 | in a file data buffer in ordered mode. | 110 | in a file data buffer in ordered mode. |
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index cec829bc7291..7be02ac5fa36 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt | |||
@@ -85,7 +85,7 @@ Note: More extensive information for getting started with ext4 can be | |||
85 | * extent format more robust in face of on-disk corruption due to magics, | 85 | * extent format more robust in face of on-disk corruption due to magics, |
86 | * internal redundancy in tree | 86 | * internal redundancy in tree |
87 | * improved file allocation (multi-block alloc) | 87 | * improved file allocation (multi-block alloc) |
88 | * fix 32000 subdirectory limit | 88 | * lift 32000 subdirectory limit imposed by i_links_count[1] |
89 | * nsec timestamps for mtime, atime, ctime, create time | 89 | * nsec timestamps for mtime, atime, ctime, create time |
90 | * inode version field on disk (NFSv4, Lustre) | 90 | * inode version field on disk (NFSv4, Lustre) |
91 | * reduced e2fsck time via uninit_bg feature | 91 | * reduced e2fsck time via uninit_bg feature |
@@ -100,6 +100,9 @@ Note: More extensive information for getting started with ext4 can be | |||
100 | * efficent new ordered mode in JBD2 and ext4(avoid using buffer head to force | 100 | * efficent new ordered mode in JBD2 and ext4(avoid using buffer head to force |
101 | the ordering) | 101 | the ordering) |
102 | 102 | ||
103 | [1] Filesystems with a block size of 1k may see a limit imposed by the | ||
104 | directory hash tree having a maximum depth of two. | ||
105 | |||
103 | 2.2 Candidate features for future inclusion | 106 | 2.2 Candidate features for future inclusion |
104 | 107 | ||
105 | * Online defrag (patches available but not well tested) | 108 | * Online defrag (patches available but not well tested) |
@@ -180,8 +183,8 @@ commit=nrsec (*) Ext4 can be told to sync all its data and metadata | |||
180 | performance. | 183 | performance. |
181 | 184 | ||
182 | barrier=<0|1(*)> This enables/disables the use of write barriers in | 185 | barrier=<0|1(*)> This enables/disables the use of write barriers in |
183 | the jbd code. barrier=0 disables, barrier=1 enables. | 186 | barrier(*) the jbd code. barrier=0 disables, barrier=1 enables. |
184 | This also requires an IO stack which can support | 187 | nobarrier This also requires an IO stack which can support |
185 | barriers, and if jbd gets an error on a barrier | 188 | barriers, and if jbd gets an error on a barrier |
186 | write, it will disable again with a warning. | 189 | write, it will disable again with a warning. |
187 | Write barriers enforce proper on-disk ordering | 190 | Write barriers enforce proper on-disk ordering |
@@ -189,6 +192,9 @@ barrier=<0|1(*)> This enables/disables the use of write barriers in | |||
189 | safe to use, at some performance penalty. If | 192 | safe to use, at some performance penalty. If |
190 | your disks are battery-backed in one way or another, | 193 | your disks are battery-backed in one way or another, |
191 | disabling barriers may safely improve performance. | 194 | disabling barriers may safely improve performance. |
195 | The mount options "barrier" and "nobarrier" can | ||
196 | also be used to enable or disable barriers, for | ||
197 | consistency with other ext4 mount options. | ||
192 | 198 | ||
193 | inode_readahead=n This tuning parameter controls the maximum | 199 | inode_readahead=n This tuning parameter controls the maximum |
194 | number of inode table blocks that ext4's inode | 200 | number of inode table blocks that ext4's inode |
@@ -229,6 +235,10 @@ minixdf Make 'df' act like Minix. | |||
229 | 235 | ||
230 | debug Extra debugging information is sent to syslog. | 236 | debug Extra debugging information is sent to syslog. |
231 | 237 | ||
238 | abort Simulate the effects of calling ext4_abort() for | ||
239 | debugging purposes. This is normally used while | ||
240 | remounting a filesystem which is already mounted. | ||
241 | |||
232 | errors=remount-ro Remount the filesystem read-only on an error. | 242 | errors=remount-ro Remount the filesystem read-only on an error. |
233 | errors=continue Keep going on a filesystem error. | 243 | errors=continue Keep going on a filesystem error. |
234 | errors=panic Panic and halt the machine if an error occurs. | 244 | errors=panic Panic and halt the machine if an error occurs. |
@@ -288,7 +298,7 @@ max_batch_time=usec Maximum amount of time ext4 should wait for | |||
288 | amount of time (on average) that it takes to | 298 | amount of time (on average) that it takes to |
289 | finish committing a transaction. Call this time | 299 | finish committing a transaction. Call this time |
290 | the "commit time". If the time that the | 300 | the "commit time". If the time that the |
291 | transactoin has been running is less than the | 301 | transaction has been running is less than the |
292 | commit time, ext4 will try sleeping for the | 302 | commit time, ext4 will try sleeping for the |
293 | commit time to see if other operations will join | 303 | commit time to see if other operations will join |
294 | the transaction. The commit time is capped by | 304 | the transaction. The commit time is capped by |
@@ -310,6 +320,24 @@ journal_ioprio=prio The I/O priority (from 0 to 7, where 0 is the | |||
310 | a slightly higher priority than the default I/O | 320 | a slightly higher priority than the default I/O |
311 | priority. | 321 | priority. |
312 | 322 | ||
323 | auto_da_alloc(*) Many broken applications don't use fsync() when | ||
324 | noauto_da_alloc replacing existing files via patterns such as | ||
325 | fd = open("foo.new")/write(fd,..)/close(fd)/ | ||
326 | rename("foo.new", "foo"), or worse yet, | ||
327 | fd = open("foo", O_TRUNC)/write(fd,..)/close(fd). | ||
328 | If auto_da_alloc is enabled, ext4 will detect | ||
329 | the replace-via-rename and replace-via-truncate | ||
330 | patterns and force that any delayed allocation | ||
331 | blocks are allocated such that at the next | ||
332 | journal commit, in the default data=ordered | ||
333 | mode, the data blocks of the new file are forced | ||
334 | to disk before the rename() operation is | ||
335 | committed. This provides roughly the same level | ||
336 | of guarantees as ext3, and avoids the | ||
337 | "zero-length" problem that can happen when a | ||
338 | system crashes before the delayed allocation | ||
339 | blocks are forced to disk. | ||
340 | |||
313 | Data Mode | 341 | Data Mode |
314 | ========= | 342 | ========= |
315 | There are 3 different data modes: | 343 | There are 3 different data modes: |
@@ -334,7 +362,7 @@ written to the journal first, and then to its final location. | |||
334 | In the event of a crash, the journal can be replayed, bringing both data and | 362 | In the event of a crash, the journal can be replayed, bringing both data and |
335 | metadata into a consistent state. This mode is the slowest except when data | 363 | metadata into a consistent state. This mode is the slowest except when data |
336 | needs to be read from and written to disk at the same time where it | 364 | needs to be read from and written to disk at the same time where it |
337 | outperforms all others modes. Curently ext4 does not have delayed | 365 | outperforms all others modes. Currently ext4 does not have delayed |
338 | allocation support if this data journalling mode is selected. | 366 | allocation support if this data journalling mode is selected. |
339 | 367 | ||
340 | References | 368 | References |
diff --git a/Documentation/filesystems/fiemap.txt b/Documentation/filesystems/fiemap.txt index 1e3defcfe50b..606233cd4618 100644 --- a/Documentation/filesystems/fiemap.txt +++ b/Documentation/filesystems/fiemap.txt | |||
@@ -204,7 +204,7 @@ fiemap_check_flags() helper: | |||
204 | 204 | ||
205 | int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); | 205 | int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); |
206 | 206 | ||
207 | The struct fieinfo should be passed in as recieved from ioctl_fiemap(). The | 207 | The struct fieinfo should be passed in as received from ioctl_fiemap(). The |
208 | set of fiemap flags which the fs understands should be passed via fs_flags. If | 208 | set of fiemap flags which the fs understands should be passed via fs_flags. If |
209 | fiemap_check_flags finds invalid user flags, it will place the bad values in | 209 | fiemap_check_flags finds invalid user flags, it will place the bad values in |
210 | fieinfo->fi_flags and return -EBADR. If the file system gets -EBADR, from | 210 | fieinfo->fi_flags and return -EBADR. If the file system gets -EBADR, from |
diff --git a/Documentation/filesystems/gfs2-glocks.txt b/Documentation/filesystems/gfs2-glocks.txt index 4dae9a3840bf..0494f78d87e4 100644 --- a/Documentation/filesystems/gfs2-glocks.txt +++ b/Documentation/filesystems/gfs2-glocks.txt | |||
@@ -60,7 +60,7 @@ go_lock | Called for the first local holder of a lock | |||
60 | go_unlock | Called on the final local unlock of a lock | 60 | go_unlock | Called on the final local unlock of a lock |
61 | go_dump | Called to print content of object for debugfs file, or on | 61 | go_dump | Called to print content of object for debugfs file, or on |
62 | | error to dump glock to the log. | 62 | | error to dump glock to the log. |
63 | go_type; | The type of the glock, LM_TYPE_..... | 63 | go_type | The type of the glock, LM_TYPE_..... |
64 | go_min_hold_time | The minimum hold time | 64 | go_min_hold_time | The minimum hold time |
65 | 65 | ||
66 | The minimum hold time for each lock is the time after a remote lock | 66 | The minimum hold time for each lock is the time after a remote lock |
diff --git a/Documentation/filesystems/gfs2.txt b/Documentation/filesystems/gfs2.txt index 593004b6bbab..5e3ab8f3beff 100644 --- a/Documentation/filesystems/gfs2.txt +++ b/Documentation/filesystems/gfs2.txt | |||
@@ -11,18 +11,15 @@ their I/O so file system consistency is maintained. One of the nifty | |||
11 | features of GFS is perfect consistency -- changes made to the file system | 11 | features of GFS is perfect consistency -- changes made to the file system |
12 | on one machine show up immediately on all other machines in the cluster. | 12 | on one machine show up immediately on all other machines in the cluster. |
13 | 13 | ||
14 | GFS uses interchangable inter-node locking mechanisms. Different lock | 14 | GFS uses interchangable inter-node locking mechanisms, the currently |
15 | modules can plug into GFS and each file system selects the appropriate | 15 | supported mechanisms are: |
16 | lock module at mount time. Lock modules include: | ||
17 | 16 | ||
18 | lock_nolock -- allows gfs to be used as a local file system | 17 | lock_nolock -- allows gfs to be used as a local file system |
19 | 18 | ||
20 | lock_dlm -- uses a distributed lock manager (dlm) for inter-node locking | 19 | lock_dlm -- uses a distributed lock manager (dlm) for inter-node locking |
21 | The dlm is found at linux/fs/dlm/ | 20 | The dlm is found at linux/fs/dlm/ |
22 | 21 | ||
23 | In addition to interfacing with an external locking manager, a gfs lock | 22 | Lock_dlm depends on user space cluster management systems found |
24 | module is responsible for interacting with external cluster management | ||
25 | systems. Lock_dlm depends on user space cluster management systems found | ||
26 | at the URL above. | 23 | at the URL above. |
27 | 24 | ||
28 | To use gfs as a local file system, no external clustering systems are | 25 | To use gfs as a local file system, no external clustering systems are |
@@ -31,13 +28,19 @@ needed, simply: | |||
31 | $ mkfs -t gfs2 -p lock_nolock -j 1 /dev/block_device | 28 | $ mkfs -t gfs2 -p lock_nolock -j 1 /dev/block_device |
32 | $ mount -t gfs2 /dev/block_device /dir | 29 | $ mount -t gfs2 /dev/block_device /dir |
33 | 30 | ||
34 | GFS2 is not on-disk compatible with previous versions of GFS. | 31 | If you are using Fedora, you need to install the gfs2-utils package |
32 | and, for lock_dlm, you will also need to install the cman package | ||
33 | and write a cluster.conf as per the documentation. | ||
34 | |||
35 | GFS2 is not on-disk compatible with previous versions of GFS, but it | ||
36 | is pretty close. | ||
35 | 37 | ||
36 | The following man pages can be found at the URL above: | 38 | The following man pages can be found at the URL above: |
37 | gfs2_fsck to repair a filesystem | 39 | fsck.gfs2 to repair a filesystem |
38 | gfs2_grow to expand a filesystem online | 40 | gfs2_grow to expand a filesystem online |
39 | gfs2_jadd to add journals to a filesystem online | 41 | gfs2_jadd to add journals to a filesystem online |
40 | gfs2_tool to manipulate, examine and tune a filesystem | 42 | gfs2_tool to manipulate, examine and tune a filesystem |
41 | gfs2_quota to examine and change quota values in a filesystem | 43 | gfs2_quota to examine and change quota values in a filesystem |
44 | gfs2_convert to convert a gfs filesystem to gfs2 in-place | ||
42 | mount.gfs2 to help mount(8) mount a filesystem | 45 | mount.gfs2 to help mount(8) mount a filesystem |
43 | mkfs.gfs2 to make a filesystem | 46 | mkfs.gfs2 to make a filesystem |
diff --git a/Documentation/filesystems/isofs.txt b/Documentation/filesystems/isofs.txt index 6973b980ca2a..3c367c3b3608 100644 --- a/Documentation/filesystems/isofs.txt +++ b/Documentation/filesystems/isofs.txt | |||
@@ -23,8 +23,13 @@ Mount options unique to the isofs filesystem. | |||
23 | map=off Do not map non-Rock Ridge filenames to lower case | 23 | map=off Do not map non-Rock Ridge filenames to lower case |
24 | map=normal Map non-Rock Ridge filenames to lower case | 24 | map=normal Map non-Rock Ridge filenames to lower case |
25 | map=acorn As map=normal but also apply Acorn extensions if present | 25 | map=acorn As map=normal but also apply Acorn extensions if present |
26 | mode=xxx Sets the permissions on files to xxx | 26 | mode=xxx Sets the permissions on files to xxx unless Rock Ridge |
27 | dmode=xxx Sets the permissions on directories to xxx | 27 | extensions set the permissions otherwise |
28 | dmode=xxx Sets the permissions on directories to xxx unless Rock Ridge | ||
29 | extensions set the permissions otherwise | ||
30 | overriderockperm Set permissions on files and directories according to | ||
31 | 'mode' and 'dmode' even though Rock Ridge extensions are | ||
32 | present. | ||
28 | nojoliet Ignore Joliet extensions if they are present. | 33 | nojoliet Ignore Joliet extensions if they are present. |
29 | norock Ignore Rock Ridge extensions if they are present. | 34 | norock Ignore Rock Ridge extensions if they are present. |
30 | hide Completely strip hidden files from the file system. | 35 | hide Completely strip hidden files from the file system. |
diff --git a/Documentation/filesystems/knfsd-stats.txt b/Documentation/filesystems/knfsd-stats.txt new file mode 100644 index 000000000000..64ced5149d37 --- /dev/null +++ b/Documentation/filesystems/knfsd-stats.txt | |||
@@ -0,0 +1,159 @@ | |||
1 | |||
2 | Kernel NFS Server Statistics | ||
3 | ============================ | ||
4 | |||
5 | This document describes the format and semantics of the statistics | ||
6 | which the kernel NFS server makes available to userspace. These | ||
7 | statistics are available in several text form pseudo files, each of | ||
8 | which is described separately below. | ||
9 | |||
10 | In most cases you don't need to know these formats, as the nfsstat(8) | ||
11 | program from the nfs-utils distribution provides a helpful command-line | ||
12 | interface for extracting and printing them. | ||
13 | |||
14 | All the files described here are formatted as a sequence of text lines, | ||
15 | separated by newline '\n' characters. Lines beginning with a hash | ||
16 | '#' character are comments intended for humans and should be ignored | ||
17 | by parsing routines. All other lines contain a sequence of fields | ||
18 | separated by whitespace. | ||
19 | |||
20 | /proc/fs/nfsd/pool_stats | ||
21 | ------------------------ | ||
22 | |||
23 | This file is available in kernels from 2.6.30 onwards, if the | ||
24 | /proc/fs/nfsd filesystem is mounted (it almost always should be). | ||
25 | |||
26 | The first line is a comment which describes the fields present in | ||
27 | all the other lines. The other lines present the following data as | ||
28 | a sequence of unsigned decimal numeric fields. One line is shown | ||
29 | for each NFS thread pool. | ||
30 | |||
31 | All counters are 64 bits wide and wrap naturally. There is no way | ||
32 | to zero these counters, instead applications should do their own | ||
33 | rate conversion. | ||
34 | |||
35 | pool | ||
36 | The id number of the NFS thread pool to which this line applies. | ||
37 | This number does not change. | ||
38 | |||
39 | Thread pool ids are a contiguous set of small integers starting | ||
40 | at zero. The maximum value depends on the thread pool mode, but | ||
41 | currently cannot be larger than the number of CPUs in the system. | ||
42 | Note that in the default case there will be a single thread pool | ||
43 | which contains all the nfsd threads and all the CPUs in the system, | ||
44 | and thus this file will have a single line with a pool id of "0". | ||
45 | |||
46 | packets-arrived | ||
47 | Counts how many NFS packets have arrived. More precisely, this | ||
48 | is the number of times that the network stack has notified the | ||
49 | sunrpc server layer that new data may be available on a transport | ||
50 | (e.g. an NFS or UDP socket or an NFS/RDMA endpoint). | ||
51 | |||
52 | Depending on the NFS workload patterns and various network stack | ||
53 | effects (such as Large Receive Offload) which can combine packets | ||
54 | on the wire, this may be either more or less than the number | ||
55 | of NFS calls received (which statistic is available elsewhere). | ||
56 | However this is a more accurate and less workload-dependent measure | ||
57 | of how much CPU load is being placed on the sunrpc server layer | ||
58 | due to NFS network traffic. | ||
59 | |||
60 | sockets-enqueued | ||
61 | Counts how many times an NFS transport is enqueued to wait for | ||
62 | an nfsd thread to service it, i.e. no nfsd thread was considered | ||
63 | available. | ||
64 | |||
65 | The circumstance this statistic tracks indicates that there was NFS | ||
66 | network-facing work to be done but it couldn't be done immediately, | ||
67 | thus introducing a small delay in servicing NFS calls. The ideal | ||
68 | rate of change for this counter is zero; significantly non-zero | ||
69 | values may indicate a performance limitation. | ||
70 | |||
71 | This can happen either because there are too few nfsd threads in the | ||
72 | thread pool for the NFS workload (the workload is thread-limited), | ||
73 | or because the NFS workload needs more CPU time than is available in | ||
74 | the thread pool (the workload is CPU-limited). In the former case, | ||
75 | configuring more nfsd threads will probably improve the performance | ||
76 | of the NFS workload. In the latter case, the sunrpc server layer is | ||
77 | already choosing not to wake idle nfsd threads because there are too | ||
78 | many nfsd threads which want to run but cannot, so configuring more | ||
79 | nfsd threads will make no difference whatsoever. The overloads-avoided | ||
80 | statistic (see below) can be used to distinguish these cases. | ||
81 | |||
82 | threads-woken | ||
83 | Counts how many times an idle nfsd thread is woken to try to | ||
84 | receive some data from an NFS transport. | ||
85 | |||
86 | This statistic tracks the circumstance where incoming | ||
87 | network-facing NFS work is being handled quickly, which is a good | ||
88 | thing. The ideal rate of change for this counter will be close | ||
89 | to but less than the rate of change of the packets-arrived counter. | ||
90 | |||
91 | overloads-avoided | ||
92 | Counts how many times the sunrpc server layer chose not to wake an | ||
93 | nfsd thread, despite the presence of idle nfsd threads, because | ||
94 | too many nfsd threads had been recently woken but could not get | ||
95 | enough CPU time to actually run. | ||
96 | |||
97 | This statistic counts a circumstance where the sunrpc layer | ||
98 | heuristically avoids overloading the CPU scheduler with too many | ||
99 | runnable nfsd threads. The ideal rate of change for this counter | ||
100 | is zero. Significant non-zero values indicate that the workload | ||
101 | is CPU limited. Usually this is associated with heavy CPU usage | ||
102 | on all the CPUs in the nfsd thread pool. | ||
103 | |||
104 | If a sustained large overloads-avoided rate is detected on a pool, | ||
105 | the top(1) utility should be used to check for the following | ||
106 | pattern of CPU usage on all the CPUs associated with the given | ||
107 | nfsd thread pool. | ||
108 | |||
109 | - %us ~= 0 (as you're *NOT* running applications on your NFS server) | ||
110 | |||
111 | - %wa ~= 0 | ||
112 | |||
113 | - %id ~= 0 | ||
114 | |||
115 | - %sy + %hi + %si ~= 100 | ||
116 | |||
117 | If this pattern is seen, configuring more nfsd threads will *not* | ||
118 | improve the performance of the workload. If this patten is not | ||
119 | seen, then something more subtle is wrong. | ||
120 | |||
121 | threads-timedout | ||
122 | Counts how many times an nfsd thread triggered an idle timeout, | ||
123 | i.e. was not woken to handle any incoming network packets for | ||
124 | some time. | ||
125 | |||
126 | This statistic counts a circumstance where there are more nfsd | ||
127 | threads configured than can be used by the NFS workload. This is | ||
128 | a clue that the number of nfsd threads can be reduced without | ||
129 | affecting performance. Unfortunately, it's only a clue and not | ||
130 | a strong indication, for a couple of reasons: | ||
131 | |||
132 | - Currently the rate at which the counter is incremented is quite | ||
133 | slow; the idle timeout is 60 minutes. Unless the NFS workload | ||
134 | remains constant for hours at a time, this counter is unlikely | ||
135 | to be providing information that is still useful. | ||
136 | |||
137 | - It is usually a wise policy to provide some slack, | ||
138 | i.e. configure a few more nfsds than are currently needed, | ||
139 | to allow for future spikes in load. | ||
140 | |||
141 | |||
142 | Note that incoming packets on NFS transports will be dealt with in | ||
143 | one of three ways. An nfsd thread can be woken (threads-woken counts | ||
144 | this case), or the transport can be enqueued for later attention | ||
145 | (sockets-enqueued counts this case), or the packet can be temporarily | ||
146 | deferred because the transport is currently being used by an nfsd | ||
147 | thread. This last case is not very interesting and is not explicitly | ||
148 | counted, but can be inferred from the other counters thus: | ||
149 | |||
150 | packets-deferred = packets-arrived - ( sockets-enqueued + threads-woken ) | ||
151 | |||
152 | |||
153 | More | ||
154 | ---- | ||
155 | Descriptions of the other statistics file should go here. | ||
156 | |||
157 | |||
158 | Greg Banks <gnb@sgi.com> | ||
159 | 26 Mar 2009 | ||
diff --git a/Documentation/filesystems/nfs-rdma.txt b/Documentation/filesystems/nfs-rdma.txt index 85eaeaddd27c..e386f7e4bcee 100644 --- a/Documentation/filesystems/nfs-rdma.txt +++ b/Documentation/filesystems/nfs-rdma.txt | |||
@@ -100,7 +100,7 @@ Installation | |||
100 | $ sudo cp utils/mount/mount.nfs /sbin/mount.nfs | 100 | $ sudo cp utils/mount/mount.nfs /sbin/mount.nfs |
101 | 101 | ||
102 | In this location, mount.nfs will be invoked automatically for NFS mounts | 102 | In this location, mount.nfs will be invoked automatically for NFS mounts |
103 | by the system mount commmand. | 103 | by the system mount command. |
104 | 104 | ||
105 | NOTE: mount.nfs and therefore nfs-utils-1.1.2 or greater is only needed | 105 | NOTE: mount.nfs and therefore nfs-utils-1.1.2 or greater is only needed |
106 | on the NFS client machine. You do not need this specific version of | 106 | on the NFS client machine. You do not need this specific version of |
diff --git a/Documentation/filesystems/nfs41-server.txt b/Documentation/filesystems/nfs41-server.txt new file mode 100644 index 000000000000..05d81cbcb2e1 --- /dev/null +++ b/Documentation/filesystems/nfs41-server.txt | |||
@@ -0,0 +1,161 @@ | |||
1 | NFSv4.1 Server Implementation | ||
2 | |||
3 | Server support for minorversion 1 can be controlled using the | ||
4 | /proc/fs/nfsd/versions control file. The string output returned | ||
5 | by reading this file will contain either "+4.1" or "-4.1" | ||
6 | correspondingly. | ||
7 | |||
8 | Currently, server support for minorversion 1 is disabled by default. | ||
9 | It can be enabled at run time by writing the string "+4.1" to | ||
10 | the /proc/fs/nfsd/versions control file. Note that to write this | ||
11 | control file, the nfsd service must be taken down. Use your user-mode | ||
12 | nfs-utils to set this up; see rpc.nfsd(8) | ||
13 | |||
14 | The NFSv4 minorversion 1 (NFSv4.1) implementation in nfsd is based | ||
15 | on the latest NFSv4.1 Internet Draft: | ||
16 | http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-29 | ||
17 | |||
18 | From the many new features in NFSv4.1 the current implementation | ||
19 | focuses on the mandatory-to-implement NFSv4.1 Sessions, providing | ||
20 | "exactly once" semantics and better control and throttling of the | ||
21 | resources allocated for each client. | ||
22 | |||
23 | Other NFSv4.1 features, Parallel NFS operations in particular, | ||
24 | are still under development out of tree. | ||
25 | See http://wiki.linux-nfs.org/wiki/index.php/PNFS_prototype_design | ||
26 | for more information. | ||
27 | |||
28 | The table below, taken from the NFSv4.1 document, lists | ||
29 | the operations that are mandatory to implement (REQ), optional | ||
30 | (OPT), and NFSv4.0 operations that are required not to implement (MNI) | ||
31 | in minor version 1. The first column indicates the operations that | ||
32 | are not supported yet by the linux server implementation. | ||
33 | |||
34 | The OPTIONAL features identified and their abbreviations are as follows: | ||
35 | pNFS Parallel NFS | ||
36 | FDELG File Delegations | ||
37 | DDELG Directory Delegations | ||
38 | |||
39 | The following abbreviations indicate the linux server implementation status. | ||
40 | I Implemented NFSv4.1 operations. | ||
41 | NS Not Supported. | ||
42 | NS* unimplemented optional feature. | ||
43 | P pNFS features implemented out of tree. | ||
44 | PNS pNFS features that are not supported yet (out of tree). | ||
45 | |||
46 | Operations | ||
47 | |||
48 | +----------------------+------------+--------------+----------------+ | ||
49 | | Operation | REQ, REC, | Feature | Definition | | ||
50 | | | OPT, or | (REQ, REC, | | | ||
51 | | | MNI | or OPT) | | | ||
52 | +----------------------+------------+--------------+----------------+ | ||
53 | | ACCESS | REQ | | Section 18.1 | | ||
54 | NS | BACKCHANNEL_CTL | REQ | | Section 18.33 | | ||
55 | NS | BIND_CONN_TO_SESSION | REQ | | Section 18.34 | | ||
56 | | CLOSE | REQ | | Section 18.2 | | ||
57 | | COMMIT | REQ | | Section 18.3 | | ||
58 | | CREATE | REQ | | Section 18.4 | | ||
59 | I | CREATE_SESSION | REQ | | Section 18.36 | | ||
60 | NS*| DELEGPURGE | OPT | FDELG (REQ) | Section 18.5 | | ||
61 | | DELEGRETURN | OPT | FDELG, | Section 18.6 | | ||
62 | | | | DDELG, pNFS | | | ||
63 | | | | (REQ) | | | ||
64 | NS | DESTROY_CLIENTID | REQ | | Section 18.50 | | ||
65 | I | DESTROY_SESSION | REQ | | Section 18.37 | | ||
66 | I | EXCHANGE_ID | REQ | | Section 18.35 | | ||
67 | NS | FREE_STATEID | REQ | | Section 18.38 | | ||
68 | | GETATTR | REQ | | Section 18.7 | | ||
69 | P | GETDEVICEINFO | OPT | pNFS (REQ) | Section 18.40 | | ||
70 | P | GETDEVICELIST | OPT | pNFS (OPT) | Section 18.41 | | ||
71 | | GETFH | REQ | | Section 18.8 | | ||
72 | NS*| GET_DIR_DELEGATION | OPT | DDELG (REQ) | Section 18.39 | | ||
73 | P | LAYOUTCOMMIT | OPT | pNFS (REQ) | Section 18.42 | | ||
74 | P | LAYOUTGET | OPT | pNFS (REQ) | Section 18.43 | | ||
75 | P | LAYOUTRETURN | OPT | pNFS (REQ) | Section 18.44 | | ||
76 | | LINK | OPT | | Section 18.9 | | ||
77 | | LOCK | REQ | | Section 18.10 | | ||
78 | | LOCKT | REQ | | Section 18.11 | | ||
79 | | LOCKU | REQ | | Section 18.12 | | ||
80 | | LOOKUP | REQ | | Section 18.13 | | ||
81 | | LOOKUPP | REQ | | Section 18.14 | | ||
82 | | NVERIFY | REQ | | Section 18.15 | | ||
83 | | OPEN | REQ | | Section 18.16 | | ||
84 | NS*| OPENATTR | OPT | | Section 18.17 | | ||
85 | | OPEN_CONFIRM | MNI | | N/A | | ||
86 | | OPEN_DOWNGRADE | REQ | | Section 18.18 | | ||
87 | | PUTFH | REQ | | Section 18.19 | | ||
88 | | PUTPUBFH | REQ | | Section 18.20 | | ||
89 | | PUTROOTFH | REQ | | Section 18.21 | | ||
90 | | READ | REQ | | Section 18.22 | | ||
91 | | READDIR | REQ | | Section 18.23 | | ||
92 | | READLINK | OPT | | Section 18.24 | | ||
93 | NS | RECLAIM_COMPLETE | REQ | | Section 18.51 | | ||
94 | | RELEASE_LOCKOWNER | MNI | | N/A | | ||
95 | | REMOVE | REQ | | Section 18.25 | | ||
96 | | RENAME | REQ | | Section 18.26 | | ||
97 | | RENEW | MNI | | N/A | | ||
98 | | RESTOREFH | REQ | | Section 18.27 | | ||
99 | | SAVEFH | REQ | | Section 18.28 | | ||
100 | | SECINFO | REQ | | Section 18.29 | | ||
101 | NS | SECINFO_NO_NAME | REC | pNFS files | Section 18.45, | | ||
102 | | | | layout (REQ) | Section 13.12 | | ||
103 | I | SEQUENCE | REQ | | Section 18.46 | | ||
104 | | SETATTR | REQ | | Section 18.30 | | ||
105 | | SETCLIENTID | MNI | | N/A | | ||
106 | | SETCLIENTID_CONFIRM | MNI | | N/A | | ||
107 | NS | SET_SSV | REQ | | Section 18.47 | | ||
108 | NS | TEST_STATEID | REQ | | Section 18.48 | | ||
109 | | VERIFY | REQ | | Section 18.31 | | ||
110 | NS*| WANT_DELEGATION | OPT | FDELG (OPT) | Section 18.49 | | ||
111 | | WRITE | REQ | | Section 18.32 | | ||
112 | |||
113 | Callback Operations | ||
114 | |||
115 | +-------------------------+-----------+-------------+---------------+ | ||
116 | | Operation | REQ, REC, | Feature | Definition | | ||
117 | | | OPT, or | (REQ, REC, | | | ||
118 | | | MNI | or OPT) | | | ||
119 | +-------------------------+-----------+-------------+---------------+ | ||
120 | | CB_GETATTR | OPT | FDELG (REQ) | Section 20.1 | | ||
121 | P | CB_LAYOUTRECALL | OPT | pNFS (REQ) | Section 20.3 | | ||
122 | NS*| CB_NOTIFY | OPT | DDELG (REQ) | Section 20.4 | | ||
123 | P | CB_NOTIFY_DEVICEID | OPT | pNFS (OPT) | Section 20.12 | | ||
124 | NS*| CB_NOTIFY_LOCK | OPT | | Section 20.11 | | ||
125 | NS*| CB_PUSH_DELEG | OPT | FDELG (OPT) | Section 20.5 | | ||
126 | | CB_RECALL | OPT | FDELG, | Section 20.2 | | ||
127 | | | | DDELG, pNFS | | | ||
128 | | | | (REQ) | | | ||
129 | NS*| CB_RECALL_ANY | OPT | FDELG, | Section 20.6 | | ||
130 | | | | DDELG, pNFS | | | ||
131 | | | | (REQ) | | | ||
132 | NS | CB_RECALL_SLOT | REQ | | Section 20.8 | | ||
133 | NS*| CB_RECALLABLE_OBJ_AVAIL | OPT | DDELG, pNFS | Section 20.7 | | ||
134 | | | | (REQ) | | | ||
135 | I | CB_SEQUENCE | OPT | FDELG, | Section 20.9 | | ||
136 | | | | DDELG, pNFS | | | ||
137 | | | | (REQ) | | | ||
138 | NS*| CB_WANTS_CANCELLED | OPT | FDELG, | Section 20.10 | | ||
139 | | | | DDELG, pNFS | | | ||
140 | | | | (REQ) | | | ||
141 | +-------------------------+-----------+-------------+---------------+ | ||
142 | |||
143 | Implementation notes: | ||
144 | |||
145 | EXCHANGE_ID: | ||
146 | * only SP4_NONE state protection supported | ||
147 | * implementation ids are ignored | ||
148 | |||
149 | CREATE_SESSION: | ||
150 | * backchannel attributes are ignored | ||
151 | * backchannel security parameters are ignored | ||
152 | |||
153 | SEQUENCE: | ||
154 | * no support for dynamic slot table renegotiation (optional) | ||
155 | |||
156 | nfsv4.1 COMPOUND rules: | ||
157 | The following cases aren't supported yet: | ||
158 | * Enforcing of NFS4ERR_NOT_ONLY_OP for: BIND_CONN_TO_SESSION, CREATE_SESSION, | ||
159 | DESTROY_CLIENTID, DESTROY_SESSION, EXCHANGE_ID. | ||
160 | * DESTROY_SESSION MUST be the final operation in the COMPOUND request. | ||
161 | |||
diff --git a/Documentation/filesystems/nilfs2.txt b/Documentation/filesystems/nilfs2.txt new file mode 100644 index 000000000000..01539f410676 --- /dev/null +++ b/Documentation/filesystems/nilfs2.txt | |||
@@ -0,0 +1,199 @@ | |||
1 | NILFS2 | ||
2 | ------ | ||
3 | |||
4 | NILFS2 is a log-structured file system (LFS) supporting continuous | ||
5 | snapshotting. In addition to versioning capability of the entire file | ||
6 | system, users can even restore files mistakenly overwritten or | ||
7 | destroyed just a few seconds ago. Since NILFS2 can keep consistency | ||
8 | like conventional LFS, it achieves quick recovery after system | ||
9 | crashes. | ||
10 | |||
11 | NILFS2 creates a number of checkpoints every few seconds or per | ||
12 | synchronous write basis (unless there is no change). Users can select | ||
13 | significant versions among continuously created checkpoints, and can | ||
14 | change them into snapshots which will be preserved until they are | ||
15 | changed back to checkpoints. | ||
16 | |||
17 | There is no limit on the number of snapshots until the volume gets | ||
18 | full. Each snapshot is mountable as a read-only file system | ||
19 | concurrently with its writable mount, and this feature is convenient | ||
20 | for online backup. | ||
21 | |||
22 | The userland tools are included in nilfs-utils package, which is | ||
23 | available from the following download page. At least "mkfs.nilfs2", | ||
24 | "mount.nilfs2", "umount.nilfs2", and "nilfs_cleanerd" (so called | ||
25 | cleaner or garbage collector) are required. Details on the tools are | ||
26 | described in the man pages included in the package. | ||
27 | |||
28 | Project web page: http://www.nilfs.org/en/ | ||
29 | Download page: http://www.nilfs.org/en/download.html | ||
30 | Git tree web page: http://www.nilfs.org/git/ | ||
31 | NILFS mailing lists: http://www.nilfs.org/mailman/listinfo/users | ||
32 | |||
33 | Caveats | ||
34 | ======= | ||
35 | |||
36 | Features which NILFS2 does not support yet: | ||
37 | |||
38 | - atime | ||
39 | - extended attributes | ||
40 | - POSIX ACLs | ||
41 | - quotas | ||
42 | - fsck | ||
43 | - resize | ||
44 | - defragmentation | ||
45 | |||
46 | Mount options | ||
47 | ============= | ||
48 | |||
49 | NILFS2 supports the following mount options: | ||
50 | (*) == default | ||
51 | |||
52 | barrier=on(*) This enables/disables barriers. barrier=off disables | ||
53 | it, barrier=on enables it. | ||
54 | errors=continue(*) Keep going on a filesystem error. | ||
55 | errors=remount-ro Remount the filesystem read-only on an error. | ||
56 | errors=panic Panic and halt the machine if an error occurs. | ||
57 | cp=n Specify the checkpoint-number of the snapshot to be | ||
58 | mounted. Checkpoints and snapshots are listed by lscp | ||
59 | user command. Only the checkpoints marked as snapshot | ||
60 | are mountable with this option. Snapshot is read-only, | ||
61 | so a read-only mount option must be specified together. | ||
62 | order=relaxed(*) Apply relaxed order semantics that allows modified data | ||
63 | blocks to be written to disk without making a | ||
64 | checkpoint if no metadata update is going. This mode | ||
65 | is equivalent to the ordered data mode of the ext3 | ||
66 | filesystem except for the updates on data blocks still | ||
67 | conserve atomicity. This will improve synchronous | ||
68 | write performance for overwriting. | ||
69 | order=strict Apply strict in-order semantics that preserves sequence | ||
70 | of all file operations including overwriting of data | ||
71 | blocks. That means, it is guaranteed that no | ||
72 | overtaking of events occurs in the recovered file | ||
73 | system after a crash. | ||
74 | |||
75 | NILFS2 usage | ||
76 | ============ | ||
77 | |||
78 | To use nilfs2 as a local file system, simply: | ||
79 | |||
80 | # mkfs -t nilfs2 /dev/block_device | ||
81 | # mount -t nilfs2 /dev/block_device /dir | ||
82 | |||
83 | This will also invoke the cleaner through the mount helper program | ||
84 | (mount.nilfs2). | ||
85 | |||
86 | Checkpoints and snapshots are managed by the following commands. | ||
87 | Their manpages are included in the nilfs-utils package above. | ||
88 | |||
89 | lscp list checkpoints or snapshots. | ||
90 | mkcp make a checkpoint or a snapshot. | ||
91 | chcp change an existing checkpoint to a snapshot or vice versa. | ||
92 | rmcp invalidate specified checkpoint(s). | ||
93 | |||
94 | To mount a snapshot, | ||
95 | |||
96 | # mount -t nilfs2 -r -o cp=<cno> /dev/block_device /snap_dir | ||
97 | |||
98 | where <cno> is the checkpoint number of the snapshot. | ||
99 | |||
100 | To unmount the NILFS2 mount point or snapshot, simply: | ||
101 | |||
102 | # umount /dir | ||
103 | |||
104 | Then, the cleaner daemon is automatically shut down by the umount | ||
105 | helper program (umount.nilfs2). | ||
106 | |||
107 | Disk format | ||
108 | =========== | ||
109 | |||
110 | A nilfs2 volume is equally divided into a number of segments except | ||
111 | for the super block (SB) and segment #0. A segment is the container | ||
112 | of logs. Each log is composed of summary information blocks, payload | ||
113 | blocks, and an optional super root block (SR): | ||
114 | |||
115 | ______________________________________________________ | ||
116 | | |SB| | Segment | Segment | Segment | ... | Segment | | | ||
117 | |_|__|_|____0____|____1____|____2____|_____|____N____|_| | ||
118 | 0 +1K +4K +8M +16M +24M +(8MB x N) | ||
119 | . . (Typical offsets for 4KB-block) | ||
120 | . . | ||
121 | .______________________. | ||
122 | | log | log |... | log | | ||
123 | |__1__|__2__|____|__m__| | ||
124 | . . | ||
125 | . . | ||
126 | . . | ||
127 | .______________________________. | ||
128 | | Summary | Payload blocks |SR| | ||
129 | |_blocks__|_________________|__| | ||
130 | |||
131 | The payload blocks are organized per file, and each file consists of | ||
132 | data blocks and B-tree node blocks: | ||
133 | |||
134 | |<--- File-A --->|<--- File-B --->| | ||
135 | _______________________________________________________________ | ||
136 | | Data blocks | B-tree blocks | Data blocks | B-tree blocks | ... | ||
137 | _|_____________|_______________|_____________|_______________|_ | ||
138 | |||
139 | |||
140 | Since only the modified blocks are written in the log, it may have | ||
141 | files without data blocks or B-tree node blocks. | ||
142 | |||
143 | The organization of the blocks is recorded in the summary information | ||
144 | blocks, which contains a header structure (nilfs_segment_summary), per | ||
145 | file structures (nilfs_finfo), and per block structures (nilfs_binfo): | ||
146 | |||
147 | _________________________________________________________________________ | ||
148 | | Summary | finfo | binfo | ... | binfo | finfo | binfo | ... | binfo |... | ||
149 | |_blocks__|___A___|_(A,1)_|_____|(A,Na)_|___B___|_(B,1)_|_____|(B,Nb)_|___ | ||
150 | |||
151 | |||
152 | The logs include regular files, directory files, symbolic link files | ||
153 | and several meta data files. The mata data files are the files used | ||
154 | to maintain file system meta data. The current version of NILFS2 uses | ||
155 | the following meta data files: | ||
156 | |||
157 | 1) Inode file (ifile) -- Stores on-disk inodes | ||
158 | 2) Checkpoint file (cpfile) -- Stores checkpoints | ||
159 | 3) Segment usage file (sufile) -- Stores allocation state of segments | ||
160 | 4) Data address translation file -- Maps virtual block numbers to usual | ||
161 | (DAT) block numbers. This file serves to | ||
162 | make on-disk blocks relocatable. | ||
163 | |||
164 | The following figure shows a typical organization of the logs: | ||
165 | |||
166 | _________________________________________________________________________ | ||
167 | | Summary | regular file | file | ... | ifile | cpfile | sufile | DAT |SR| | ||
168 | |_blocks__|_or_directory_|_______|_____|_______|________|________|_____|__| | ||
169 | |||
170 | |||
171 | To stride over segment boundaries, this sequence of files may be split | ||
172 | into multiple logs. The sequence of logs that should be treated as | ||
173 | logically one log, is delimited with flags marked in the segment | ||
174 | summary. The recovery code of nilfs2 looks this boundary information | ||
175 | to ensure atomicity of updates. | ||
176 | |||
177 | The super root block is inserted for every checkpoints. It includes | ||
178 | three special inodes, inodes for the DAT, cpfile, and sufile. Inodes | ||
179 | of regular files, directories, symlinks and other special files, are | ||
180 | included in the ifile. The inode of ifile itself is included in the | ||
181 | corresponding checkpoint entry in the cpfile. Thus, the hierarchy | ||
182 | among NILFS2 files can be depicted as follows: | ||
183 | |||
184 | Super block (SB) | ||
185 | | | ||
186 | v | ||
187 | Super root block (the latest cno=xx) | ||
188 | |-- DAT | ||
189 | |-- sufile | ||
190 | `-- cpfile | ||
191 | |-- ifile (cno=c1) | ||
192 | |-- ifile (cno=c2) ---- file (ino=i1) | ||
193 | : : |-- file (ino=i2) | ||
194 | `-- ifile (cno=xx) |-- file (ino=i3) | ||
195 | : : | ||
196 | `-- file (ino=yy) | ||
197 | ( regular file, directory, or symlink ) | ||
198 | |||
199 | For detail on the format of each file, please see include/linux/nilfs2_fs.h. | ||
diff --git a/Documentation/filesystems/pohmelfs/design_notes.txt b/Documentation/filesystems/pohmelfs/design_notes.txt new file mode 100644 index 000000000000..dcf833587162 --- /dev/null +++ b/Documentation/filesystems/pohmelfs/design_notes.txt | |||
@@ -0,0 +1,71 @@ | |||
1 | POHMELFS: Parallel Optimized Host Message Exchange Layered File System. | ||
2 | |||
3 | Evgeniy Polyakov <zbr@ioremap.net> | ||
4 | |||
5 | Homepage: http://www.ioremap.net/projects/pohmelfs | ||
6 | |||
7 | POHMELFS first began as a network filesystem with coherent local data and | ||
8 | metadata caches but is now evolving into a parallel distributed filesystem. | ||
9 | |||
10 | Main features of this FS include: | ||
11 | * Locally coherent cache for data and metadata with (potentially) byte-range locks. | ||
12 | Since all Linux filesystems lock the whole inode during writing, algorithm | ||
13 | is very simple and does not use byte-ranges, although they are sent in | ||
14 | locking messages. | ||
15 | * Completely async processing of all events except creation of hard and symbolic | ||
16 | links, and rename events. | ||
17 | Object creation and data reading and writing are processed asynchronously. | ||
18 | * Flexible object architecture optimized for network processing. | ||
19 | Ability to create long paths to objects and remove arbitrarily huge | ||
20 | directories with a single network command. | ||
21 | (like removing the whole kernel tree via a single network command). | ||
22 | * Very high performance. | ||
23 | * Fast and scalable multithreaded userspace server. Being in userspace it works | ||
24 | with any underlying filesystem and still is much faster than async in-kernel NFS one. | ||
25 | * Client is able to switch between different servers (if one goes down, client | ||
26 | automatically reconnects to second and so on). | ||
27 | * Transactions support. Full failover for all operations. | ||
28 | Resending transactions to different servers on timeout or error. | ||
29 | * Read request (data read, directory listing, lookup requests) balancing between multiple servers. | ||
30 | * Write requests are replicated to multiple servers and completed only when all of them are acked. | ||
31 | * Ability to add and/or remove servers from the working set at run-time. | ||
32 | * Strong authentification and possible data encryption in network channel. | ||
33 | * Extended attributes support. | ||
34 | |||
35 | POHMELFS is based on transactions, which are potentially long-standing objects that live | ||
36 | in the client's memory. Each transaction contains all the information needed to process a given | ||
37 | command (or set of commands, which is frequently used during data writing: single transactions | ||
38 | can contain creation and data writing commands). Transactions are committed by all the servers | ||
39 | to which they are sent and, in case of failures, are eventually resent or dropped with an error. | ||
40 | For example, reading will return an error if no servers are available. | ||
41 | |||
42 | POHMELFS uses a asynchronous approach to data processing. Courtesy of transactions, it is | ||
43 | possible to detach replies from requests and, if the command requires data to be received, the | ||
44 | caller sleeps waiting for it. Thus, it is possible to issue multiple read commands to different | ||
45 | servers and async threads will pick up replies in parallel, find appropriate transactions in the | ||
46 | system and put the data where it belongs (like the page or inode cache). | ||
47 | |||
48 | The main feature of POHMELFS is writeback data and the metadata cache. | ||
49 | Only a few non-performance critical operations use the write-through cache and | ||
50 | are synchronous: hard and symbolic link creation, and object rename. Creation, | ||
51 | removal of objects and data writing are asynchronous and are sent to | ||
52 | the server during system writeback. Only one writer at a time is allowed for any | ||
53 | given inode, which is guarded by an appropriate locking protocol. | ||
54 | Because of this feature, POHMELFS is extremely fast at metadata intensive | ||
55 | workloads and can fully utilize the bandwidth to the servers when doing bulk | ||
56 | data transfers. | ||
57 | |||
58 | POHMELFS clients operate with a working set of servers and are capable of balancing read-only | ||
59 | operations (like lookups or directory listings) between them according to IO priorities. | ||
60 | Administrators can add or remove servers from the set at run-time via special commands (described | ||
61 | in Documentation/pohmelfs/info.txt file). Writes are replicated to all servers, which are connected | ||
62 | with write permission turned on. IO priority and permissions can be changed in run-time. | ||
63 | |||
64 | POHMELFS is capable of full data channel encryption and/or strong crypto hashing. | ||
65 | One can select any kernel supported cipher, encryption mode, hash type and operation mode | ||
66 | (hmac or digest). It is also possible to use both or neither (default). Crypto configuration | ||
67 | is checked during mount time and, if the server does not support it, appropriate capabilities | ||
68 | will be disabled or mount will fail (if 'crypto_fail_unsupported' mount option is specified). | ||
69 | Crypto performance heavily depends on the number of crypto threads, which asynchronously perform | ||
70 | crypto operations and send the resulting data to server or submit it up the stack. This number | ||
71 | can be controlled via a mount option. | ||
diff --git a/Documentation/filesystems/pohmelfs/info.txt b/Documentation/filesystems/pohmelfs/info.txt new file mode 100644 index 000000000000..db2e41393626 --- /dev/null +++ b/Documentation/filesystems/pohmelfs/info.txt | |||
@@ -0,0 +1,99 @@ | |||
1 | POHMELFS usage information. | ||
2 | |||
3 | Mount options. | ||
4 | All but index, number of crypto threads and maximum IO size can changed via remount. | ||
5 | |||
6 | idx=%u | ||
7 | Each mountpoint is associated with a special index via this option. | ||
8 | Administrator can add or remove servers from the given index, so all mounts, | ||
9 | which were attached to it, are updated. | ||
10 | Default it is 0. | ||
11 | |||
12 | trans_scan_timeout=%u | ||
13 | This timeout, expressed in milliseconds, specifies time to scan transaction | ||
14 | trees looking for stale requests, which have to be resent, or if number of | ||
15 | retries exceed specified limit, dropped with error. | ||
16 | Default is 5 seconds. | ||
17 | |||
18 | drop_scan_timeout=%u | ||
19 | Internal timeout, expressed in milliseconds, which specifies how frequently | ||
20 | inodes marked to be dropped are freed. It also specifies how frequently | ||
21 | the system checks that servers have to be added or removed from current working set. | ||
22 | Default is 1 second. | ||
23 | |||
24 | wait_on_page_timeout=%u | ||
25 | Number of milliseconds to wait for reply from remote server for data reading command. | ||
26 | If this timeout is exceeded, reading returns an error. | ||
27 | Default is 5 seconds. | ||
28 | |||
29 | trans_retries=%u | ||
30 | This is the number of times that a transaction will be resent to a server that did | ||
31 | not answer for the last @trans_scan_timeout milliseconds. | ||
32 | When the number of resends exceeds this limit, the transaction is completed with error. | ||
33 | Default is 5 resends. | ||
34 | |||
35 | crypto_thread_num=%u | ||
36 | Number of crypto processing threads. Threads are used both for RX and TX traffic. | ||
37 | Default is 2, or no threads if crypto operations are not supported. | ||
38 | |||
39 | trans_max_pages=%u | ||
40 | Maximum number of pages in a single transaction. This parameter also controls | ||
41 | the number of pages, allocated for crypto processing (each crypto thread has | ||
42 | pool of pages, the number of which is equal to 'trans_max_pages'. | ||
43 | Default is 100 pages. | ||
44 | |||
45 | crypto_fail_unsupported | ||
46 | If specified, mount will fail if the server does not support requested crypto operations. | ||
47 | By default mount will disable non-matching crypto operations. | ||
48 | |||
49 | mcache_timeout=%u | ||
50 | Maximum number of milliseconds to wait for the mcache objects to be processed. | ||
51 | Mcache includes locks (given lock should be granted by server), attributes (they should be | ||
52 | fully received in the given timeframe). | ||
53 | Default is 5 seconds. | ||
54 | |||
55 | Usage examples. | ||
56 | |||
57 | Add server server1.net:1025 into the working set with index $idx | ||
58 | with appropriate hash algorithm and key file and cipher algorithm, mode and key file: | ||
59 | $cfg A add -a server1.net -p 1025 -i $idx -K $hash_key -k $cipher_key | ||
60 | |||
61 | Mount filesystem with given index $idx to /mnt mountpoint. | ||
62 | Client will connect to all servers specified in the working set via previous command: | ||
63 | mount -t pohmel -o idx=$idx q /mnt | ||
64 | |||
65 | Change permissions to read-only (-I 1 option, '-I 2' - write-only, 3 - rw): | ||
66 | $cfg A modify -a server1.net -p 1025 -i $idx -I 1 | ||
67 | |||
68 | Change IO priority to 123 (node with the highest priority gets read requests). | ||
69 | $cfg A modify -a server1.net -p 1025 -i $idx -P 123 | ||
70 | |||
71 | One can check currect status of all connections in the mountstats file: | ||
72 | # cat /proc/$PID/mountstats | ||
73 | ... | ||
74 | device none mounted on /mnt with fstype pohmel | ||
75 | idx addr(:port) socket_type protocol active priority permissions | ||
76 | 0 server1.net:1026 1 6 1 250 1 | ||
77 | 0 server2.net:1025 1 6 1 123 3 | ||
78 | |||
79 | Server installation. | ||
80 | |||
81 | Creating a server, which listens at port 1025 and 0.0.0.0 address. | ||
82 | Working root directory (note, that server chroots there, so you have to have appropriate permissions) | ||
83 | is set to /mnt, server will negotiate hash/cipher with client, in case client requested it, there | ||
84 | are appropriate key files. | ||
85 | Number of working threads is set to 10. | ||
86 | |||
87 | # ./fserver -a 0.0.0.0 -p 1025 -r /mnt -w 10 -K hash_key -k cipher_key | ||
88 | |||
89 | -A 6 - listen on ipv6 address. Default: Disabled. | ||
90 | -r root - path to root directory. Default: /tmp. | ||
91 | -a addr - listen address. Default: 0.0.0.0. | ||
92 | -p port - listen port. Default: 1025. | ||
93 | -w workers - number of workers per connected client. Default: 1. | ||
94 | -K file - hash key size. Default: none. | ||
95 | -k file - cipher key size. Default: none. | ||
96 | -h - this help. | ||
97 | |||
98 | Number of worker threads specifies how many workers will be created for each client. | ||
99 | Bulk single-client transafers usually are better handled with smaller number (like 1-3). | ||
diff --git a/Documentation/filesystems/pohmelfs/network_protocol.txt b/Documentation/filesystems/pohmelfs/network_protocol.txt new file mode 100644 index 000000000000..40ea6c295afb --- /dev/null +++ b/Documentation/filesystems/pohmelfs/network_protocol.txt | |||
@@ -0,0 +1,227 @@ | |||
1 | POHMELFS network protocol. | ||
2 | |||
3 | Basic structure used in network communication is following command: | ||
4 | |||
5 | struct netfs_cmd | ||
6 | { | ||
7 | __u16 cmd; /* Command number */ | ||
8 | __u16 csize; /* Attached crypto information size */ | ||
9 | __u16 cpad; /* Attached padding size */ | ||
10 | __u16 ext; /* External flags */ | ||
11 | __u32 size; /* Size of the attached data */ | ||
12 | __u32 trans; /* Transaction id */ | ||
13 | __u64 id; /* Object ID to operate on. Used for feedback.*/ | ||
14 | __u64 start; /* Start of the object. */ | ||
15 | __u64 iv; /* IV sequence */ | ||
16 | __u8 data[0]; | ||
17 | }; | ||
18 | |||
19 | Commands can be embedded into transaction command (which in turn has own command), | ||
20 | so one can extend protocol as needed without breaking backward compatibility as long | ||
21 | as old commands are supported. All string lengths include tail 0 byte. | ||
22 | |||
23 | All commans are transfered over the network in big-endian. CPU endianess is used at the end peers. | ||
24 | |||
25 | @cmd - command number, which specifies command to be processed. Following | ||
26 | commands are used currently: | ||
27 | |||
28 | NETFS_READDIR = 1, /* Read directory for given inode number */ | ||
29 | NETFS_READ_PAGE, /* Read data page from the server */ | ||
30 | NETFS_WRITE_PAGE, /* Write data page to the server */ | ||
31 | NETFS_CREATE, /* Create directory entry */ | ||
32 | NETFS_REMOVE, /* Remove directory entry */ | ||
33 | NETFS_LOOKUP, /* Lookup single object */ | ||
34 | NETFS_LINK, /* Create a link */ | ||
35 | NETFS_TRANS, /* Transaction */ | ||
36 | NETFS_OPEN, /* Open intent */ | ||
37 | NETFS_INODE_INFO, /* Metadata cache coherency synchronization message */ | ||
38 | NETFS_PAGE_CACHE, /* Page cache invalidation message */ | ||
39 | NETFS_READ_PAGES, /* Read multiple contiguous pages in one go */ | ||
40 | NETFS_RENAME, /* Rename object */ | ||
41 | NETFS_CAPABILITIES, /* Capabilities of the client, for example supported crypto */ | ||
42 | NETFS_LOCK, /* Distributed lock message */ | ||
43 | NETFS_XATTR_SET, /* Set extended attribute */ | ||
44 | NETFS_XATTR_GET, /* Get extended attribute */ | ||
45 | |||
46 | @ext - external flags. Used by different commands to specify some extra arguments | ||
47 | like partial size of the embedded objects or creation flags. | ||
48 | |||
49 | @size - size of the attached data. For NETFS_READ_PAGE and NETFS_READ_PAGES no data is attached, | ||
50 | but size of the requested data is incorporated here. It does not include size of the command | ||
51 | header (struct netfs_cmd) itself. | ||
52 | |||
53 | @id - id of the object this command operates on. Each command can use it for own purpose. | ||
54 | |||
55 | @start - start of the object this command operates on. Each command can use it for own purpose. | ||
56 | |||
57 | @csize, @cpad - size and padding size of the (attached if needed) crypto information. | ||
58 | |||
59 | Command specifications. | ||
60 | |||
61 | @NETFS_READDIR | ||
62 | This command is used to sync content of the remote dir to the client. | ||
63 | |||
64 | @ext - length of the path to object. | ||
65 | @size - the same. | ||
66 | @id - local inode number of the directory to read. | ||
67 | @start - zero. | ||
68 | |||
69 | |||
70 | @NETFS_READ_PAGE | ||
71 | This command is used to read data from remote server. | ||
72 | Data size does not exceed local page cache size. | ||
73 | |||
74 | @id - inode number. | ||
75 | @start - first byte offset. | ||
76 | @size - number of bytes to read plus length of the path to object. | ||
77 | @ext - object path length. | ||
78 | |||
79 | |||
80 | @NETFS_CREATE | ||
81 | Used to create object. | ||
82 | It does not require that all directories on top of the object were | ||
83 | already created, it will create them automatically. Each object has | ||
84 | associated @netfs_path_entry data structure, which contains creation | ||
85 | mode (permissions and type) and length of the name as long as name itself. | ||
86 | |||
87 | @start - 0 | ||
88 | @size - size of the all data structures needed to create a path | ||
89 | @id - local inode number | ||
90 | @ext - 0 | ||
91 | |||
92 | |||
93 | @NETFS_REMOVE | ||
94 | Used to remove object. | ||
95 | |||
96 | @ext - length of the path to object. | ||
97 | @size - the same. | ||
98 | @id - local inode number. | ||
99 | @start - zero. | ||
100 | |||
101 | |||
102 | @NETFS_LOOKUP | ||
103 | Lookup information about object on server. | ||
104 | |||
105 | @ext - length of the path to object. | ||
106 | @size - the same. | ||
107 | @id - local inode number of the directory to look object in. | ||
108 | @start - local inode number of the object to look at. | ||
109 | |||
110 | |||
111 | @NETFS_LINK | ||
112 | Create hard of symlink. | ||
113 | Command is sent as "object_path|target_path". | ||
114 | |||
115 | @size - size of the above string. | ||
116 | @id - parent local inode number. | ||
117 | @start - 1 for symlink, 0 for hardlink. | ||
118 | @ext - size of the "object_path" above. | ||
119 | |||
120 | |||
121 | @NETFS_TRANS | ||
122 | Transaction header. | ||
123 | |||
124 | @size - incorporates all embedded command sizes including theirs header sizes. | ||
125 | @start - transaction generation number - unique id used to find transaction. | ||
126 | @ext - transaction flags. Unused at the moment. | ||
127 | @id - 0. | ||
128 | |||
129 | |||
130 | @NETFS_OPEN | ||
131 | Open intent for given transaction. | ||
132 | |||
133 | @id - local inode number. | ||
134 | @start - 0. | ||
135 | @size - path length to the object. | ||
136 | @ext - open flags (O_RDWR and so on). | ||
137 | |||
138 | |||
139 | @NETFS_INODE_INFO | ||
140 | Metadata update command. | ||
141 | It is sent to servers when attributes of the object are changed and received | ||
142 | when data or metadata were updated. It operates with the following structure: | ||
143 | |||
144 | struct netfs_inode_info | ||
145 | { | ||
146 | unsigned int mode; | ||
147 | unsigned int nlink; | ||
148 | unsigned int uid; | ||
149 | unsigned int gid; | ||
150 | unsigned int blocksize; | ||
151 | unsigned int padding; | ||
152 | __u64 ino; | ||
153 | __u64 blocks; | ||
154 | __u64 rdev; | ||
155 | __u64 size; | ||
156 | __u64 version; | ||
157 | }; | ||
158 | |||
159 | It effectively mirrors stat(2) returned data. | ||
160 | |||
161 | |||
162 | @ext - path length to the object. | ||
163 | @size - the same plus size of the netfs_inode_info structure. | ||
164 | @id - local inode number. | ||
165 | @start - 0. | ||
166 | |||
167 | |||
168 | @NETFS_PAGE_CACHE | ||
169 | Command is only received by clients. It contains information about | ||
170 | page to be marked as not up-to-date. | ||
171 | |||
172 | @id - client's inode number. | ||
173 | @start - last byte of the page to be invalidated. If it is not equal to | ||
174 | current inode size, it will be vmtruncated(). | ||
175 | @size - 0 | ||
176 | @ext - 0 | ||
177 | |||
178 | |||
179 | @NETFS_READ_PAGES | ||
180 | Used to read multiple contiguous pages in one go. | ||
181 | |||
182 | @start - first byte of the contiguous region to read. | ||
183 | @size - contains of two fields: lower 8 bits are used to represent page cache shift | ||
184 | used by client, another 3 bytes are used to get number of pages. | ||
185 | @id - local inode number. | ||
186 | @ext - path length to the object. | ||
187 | |||
188 | |||
189 | @NETFS_RENAME | ||
190 | Used to rename object. | ||
191 | Attached data is formed into following string: "old_path|new_path". | ||
192 | |||
193 | @id - local inode number. | ||
194 | @start - parent inode number. | ||
195 | @size - length of the above string. | ||
196 | @ext - length of the old path part. | ||
197 | |||
198 | |||
199 | @NETFS_CAPABILITIES | ||
200 | Used to exchange crypto capabilities with server. | ||
201 | If crypto capabilities are not supported by server, then client will disable it | ||
202 | or fail (if 'crypto_fail_unsupported' mount options was specified). | ||
203 | |||
204 | @id - superblock index. Used to specify crypto information for group of servers. | ||
205 | @size - size of the attached capabilities structure. | ||
206 | @start - 0. | ||
207 | @size - 0. | ||
208 | @scsize - 0. | ||
209 | |||
210 | @NETFS_LOCK | ||
211 | Used to send lock request/release messages. Although it sends byte range request | ||
212 | and is capable of flushing pages based on that, it is not used, since all Linux | ||
213 | filesystems lock the whole inode. | ||
214 | |||
215 | @id - lock generation number. | ||
216 | @start - start of the locked range. | ||
217 | @size - size of the locked range. | ||
218 | @ext - lock type: read/write. Not used actually. 15'th bit is used to determine, | ||
219 | if it is lock request (1) or release (0). | ||
220 | |||
221 | @NETFS_XATTR_SET | ||
222 | @NETFS_XATTR_GET | ||
223 | Used to set/get extended attributes for given inode. | ||
224 | @id - attribute generation number or xattr setting type | ||
225 | @start - size of the attribute (request or attached) | ||
226 | @size - name length, path len and data size for given attribute | ||
227 | @ext - path length for given object | ||
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 830bad7cce0f..fad18f9456e4 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt | |||
@@ -5,10 +5,12 @@ | |||
5 | Bodo Bauer <bb@ricochet.net> | 5 | Bodo Bauer <bb@ricochet.net> |
6 | 6 | ||
7 | 2.4.x update Jorge Nerin <comandante@zaralinux.com> November 14 2000 | 7 | 2.4.x update Jorge Nerin <comandante@zaralinux.com> November 14 2000 |
8 | move /proc/sys Shen Feng <shen@cn.fujitsu.com> April 1 2009 | ||
8 | ------------------------------------------------------------------------------ | 9 | ------------------------------------------------------------------------------ |
9 | Version 1.3 Kernel version 2.2.12 | 10 | Version 1.3 Kernel version 2.2.12 |
10 | Kernel version 2.4.0-test11-pre4 | 11 | Kernel version 2.4.0-test11-pre4 |
11 | ------------------------------------------------------------------------------ | 12 | ------------------------------------------------------------------------------ |
13 | fixes/update part 1.1 Stefani Seibold <stefani@seibold.net> June 9 2009 | ||
12 | 14 | ||
13 | Table of Contents | 15 | Table of Contents |
14 | ----------------- | 16 | ----------------- |
@@ -26,25 +28,17 @@ Table of Contents | |||
26 | 1.6 Parallel port info in /proc/parport | 28 | 1.6 Parallel port info in /proc/parport |
27 | 1.7 TTY info in /proc/tty | 29 | 1.7 TTY info in /proc/tty |
28 | 1.8 Miscellaneous kernel statistics in /proc/stat | 30 | 1.8 Miscellaneous kernel statistics in /proc/stat |
31 | 1.9 Ext4 file system parameters | ||
29 | 32 | ||
30 | 2 Modifying System Parameters | 33 | 2 Modifying System Parameters |
31 | 2.1 /proc/sys/fs - File system data | 34 | |
32 | 2.2 /proc/sys/fs/binfmt_misc - Miscellaneous binary formats | 35 | 3 Per-Process Parameters |
33 | 2.3 /proc/sys/kernel - general kernel parameters | 36 | 3.1 /proc/<pid>/oom_adj - Adjust the oom-killer score |
34 | 2.4 /proc/sys/vm - The virtual memory subsystem | 37 | 3.2 /proc/<pid>/oom_score - Display current oom-killer score |
35 | 2.5 /proc/sys/dev - Device specific parameters | 38 | 3.3 /proc/<pid>/io - Display the IO accounting fields |
36 | 2.6 /proc/sys/sunrpc - Remote procedure calls | 39 | 3.4 /proc/<pid>/coredump_filter - Core dump filtering settings |
37 | 2.7 /proc/sys/net - Networking stuff | 40 | 3.5 /proc/<pid>/mountinfo - Information about mounts |
38 | 2.8 /proc/sys/net/ipv4 - IPV4 settings | 41 | |
39 | 2.9 Appletalk | ||
40 | 2.10 IPX | ||
41 | 2.11 /proc/sys/fs/mqueue - POSIX message queues filesystem | ||
42 | 2.12 /proc/<pid>/oom_adj - Adjust the oom-killer score | ||
43 | 2.13 /proc/<pid>/oom_score - Display current oom-killer score | ||
44 | 2.14 /proc/<pid>/io - Display the IO accounting fields | ||
45 | 2.15 /proc/<pid>/coredump_filter - Core dump filtering settings | ||
46 | 2.16 /proc/<pid>/mountinfo - Information about mounts | ||
47 | 2.17 /proc/sys/fs/epoll - Configuration options for the epoll interface | ||
48 | 42 | ||
49 | ------------------------------------------------------------------------------ | 43 | ------------------------------------------------------------------------------ |
50 | Preface | 44 | Preface |
@@ -123,7 +117,7 @@ The link self points to the process reading the file system. Each process | |||
123 | subdirectory has the entries listed in Table 1-1. | 117 | subdirectory has the entries listed in Table 1-1. |
124 | 118 | ||
125 | 119 | ||
126 | Table 1-1: Process specific entries in /proc | 120 | Table 1-1: Process specific entries in /proc |
127 | .............................................................................. | 121 | .............................................................................. |
128 | File Content | 122 | File Content |
129 | clear_refs Clears page referenced bits shown in smaps output | 123 | clear_refs Clears page referenced bits shown in smaps output |
@@ -141,46 +135,103 @@ Table 1-1: Process specific entries in /proc | |||
141 | status Process status in human readable form | 135 | status Process status in human readable form |
142 | wchan If CONFIG_KALLSYMS is set, a pre-decoded wchan | 136 | wchan If CONFIG_KALLSYMS is set, a pre-decoded wchan |
143 | stack Report full stack trace, enable via CONFIG_STACKTRACE | 137 | stack Report full stack trace, enable via CONFIG_STACKTRACE |
144 | smaps Extension based on maps, the rss size for each mapped file | 138 | smaps a extension based on maps, showing the memory consumption of |
139 | each mapping | ||
145 | .............................................................................. | 140 | .............................................................................. |
146 | 141 | ||
147 | For example, to get the status information of a process, all you have to do is | 142 | For example, to get the status information of a process, all you have to do is |
148 | read the file /proc/PID/status: | 143 | read the file /proc/PID/status: |
149 | 144 | ||
150 | >cat /proc/self/status | 145 | >cat /proc/self/status |
151 | Name: cat | 146 | Name: cat |
152 | State: R (running) | 147 | State: R (running) |
153 | Pid: 5452 | 148 | Tgid: 5452 |
154 | PPid: 743 | 149 | Pid: 5452 |
150 | PPid: 743 | ||
155 | TracerPid: 0 (2.4) | 151 | TracerPid: 0 (2.4) |
156 | Uid: 501 501 501 501 | 152 | Uid: 501 501 501 501 |
157 | Gid: 100 100 100 100 | 153 | Gid: 100 100 100 100 |
158 | Groups: 100 14 16 | 154 | FDSize: 256 |
159 | VmSize: 1112 kB | 155 | Groups: 100 14 16 |
160 | VmLck: 0 kB | 156 | VmPeak: 5004 kB |
161 | VmRSS: 348 kB | 157 | VmSize: 5004 kB |
162 | VmData: 24 kB | 158 | VmLck: 0 kB |
163 | VmStk: 12 kB | 159 | VmHWM: 476 kB |
164 | VmExe: 8 kB | 160 | VmRSS: 476 kB |
165 | VmLib: 1044 kB | 161 | VmData: 156 kB |
166 | SigPnd: 0000000000000000 | 162 | VmStk: 88 kB |
167 | SigBlk: 0000000000000000 | 163 | VmExe: 68 kB |
168 | SigIgn: 0000000000000000 | 164 | VmLib: 1412 kB |
169 | SigCgt: 0000000000000000 | 165 | VmPTE: 20 kb |
170 | CapInh: 00000000fffffeff | 166 | Threads: 1 |
171 | CapPrm: 0000000000000000 | 167 | SigQ: 0/28578 |
172 | CapEff: 0000000000000000 | 168 | SigPnd: 0000000000000000 |
173 | 169 | ShdPnd: 0000000000000000 | |
170 | SigBlk: 0000000000000000 | ||
171 | SigIgn: 0000000000000000 | ||
172 | SigCgt: 0000000000000000 | ||
173 | CapInh: 00000000fffffeff | ||
174 | CapPrm: 0000000000000000 | ||
175 | CapEff: 0000000000000000 | ||
176 | CapBnd: ffffffffffffffff | ||
177 | voluntary_ctxt_switches: 0 | ||
178 | nonvoluntary_ctxt_switches: 1 | ||
174 | 179 | ||
175 | This shows you nearly the same information you would get if you viewed it with | 180 | This shows you nearly the same information you would get if you viewed it with |
176 | the ps command. In fact, ps uses the proc file system to obtain its | 181 | the ps command. In fact, ps uses the proc file system to obtain its |
177 | information. The statm file contains more detailed information about the | 182 | information. But you get a more detailed view of the process by reading the |
178 | process memory usage. Its seven fields are explained in Table 1-2. The stat | 183 | file /proc/PID/status. It fields are described in table 1-2. |
179 | file contains details information about the process itself. Its fields are | ||
180 | explained in Table 1-3. | ||
181 | 184 | ||
185 | The statm file contains more detailed information about the process | ||
186 | memory usage. Its seven fields are explained in Table 1-3. The stat file | ||
187 | contains details information about the process itself. Its fields are | ||
188 | explained in Table 1-4. | ||
182 | 189 | ||
183 | Table 1-2: Contents of the statm files (as of 2.6.8-rc3) | 190 | Table 1-2: Contents of the statm files (as of 2.6.30-rc7) |
191 | .............................................................................. | ||
192 | Field Content | ||
193 | Name filename of the executable | ||
194 | State state (R is running, S is sleeping, D is sleeping | ||
195 | in an uninterruptible wait, Z is zombie, | ||
196 | T is traced or stopped) | ||
197 | Tgid thread group ID | ||
198 | Pid process id | ||
199 | PPid process id of the parent process | ||
200 | TracerPid PID of process tracing this process (0 if not) | ||
201 | Uid Real, effective, saved set, and file system UIDs | ||
202 | Gid Real, effective, saved set, and file system GIDs | ||
203 | FDSize number of file descriptor slots currently allocated | ||
204 | Groups supplementary group list | ||
205 | VmPeak peak virtual memory size | ||
206 | VmSize total program size | ||
207 | VmLck locked memory size | ||
208 | VmHWM peak resident set size ("high water mark") | ||
209 | VmRSS size of memory portions | ||
210 | VmData size of data, stack, and text segments | ||
211 | VmStk size of data, stack, and text segments | ||
212 | VmExe size of text segment | ||
213 | VmLib size of shared library code | ||
214 | VmPTE size of page table entries | ||
215 | Threads number of threads | ||
216 | SigQ number of signals queued/max. number for queue | ||
217 | SigPnd bitmap of pending signals for the thread | ||
218 | ShdPnd bitmap of shared pending signals for the process | ||
219 | SigBlk bitmap of blocked signals | ||
220 | SigIgn bitmap of ignored signals | ||
221 | SigCgt bitmap of catched signals | ||
222 | CapInh bitmap of inheritable capabilities | ||
223 | CapPrm bitmap of permitted capabilities | ||
224 | CapEff bitmap of effective capabilities | ||
225 | CapBnd bitmap of capabilities bounding set | ||
226 | Cpus_allowed mask of CPUs on which this process may run | ||
227 | Cpus_allowed_list Same as previous, but in "list format" | ||
228 | Mems_allowed mask of memory nodes allowed to this process | ||
229 | Mems_allowed_list Same as previous, but in "list format" | ||
230 | voluntary_ctxt_switches number of voluntary context switches | ||
231 | nonvoluntary_ctxt_switches number of non voluntary context switches | ||
232 | .............................................................................. | ||
233 | |||
234 | Table 1-3: Contents of the statm files (as of 2.6.8-rc3) | ||
184 | .............................................................................. | 235 | .............................................................................. |
185 | Field Content | 236 | Field Content |
186 | size total program size (pages) (same as VmSize in status) | 237 | size total program size (pages) (same as VmSize in status) |
@@ -195,7 +246,7 @@ Table 1-2: Contents of the statm files (as of 2.6.8-rc3) | |||
195 | .............................................................................. | 246 | .............................................................................. |
196 | 247 | ||
197 | 248 | ||
198 | Table 1-3: Contents of the stat files (as of 2.6.22-rc3) | 249 | Table 1-4: Contents of the stat files (as of 2.6.30-rc7) |
199 | .............................................................................. | 250 | .............................................................................. |
200 | Field Content | 251 | Field Content |
201 | pid process id | 252 | pid process id |
@@ -229,10 +280,10 @@ Table 1-3: Contents of the stat files (as of 2.6.22-rc3) | |||
229 | start_stack address of the start of the stack | 280 | start_stack address of the start of the stack |
230 | esp current value of ESP | 281 | esp current value of ESP |
231 | eip current value of EIP | 282 | eip current value of EIP |
232 | pending bitmap of pending signals (obsolete) | 283 | pending bitmap of pending signals |
233 | blocked bitmap of blocked signals (obsolete) | 284 | blocked bitmap of blocked signals |
234 | sigign bitmap of ignored signals (obsolete) | 285 | sigign bitmap of ignored signals |
235 | sigcatch bitmap of catched signals (obsolete) | 286 | sigcatch bitmap of catched signals |
236 | wchan address where process went to sleep | 287 | wchan address where process went to sleep |
237 | 0 (place holder) | 288 | 0 (place holder) |
238 | 0 (place holder) | 289 | 0 (place holder) |
@@ -241,19 +292,99 @@ Table 1-3: Contents of the stat files (as of 2.6.22-rc3) | |||
241 | rt_priority realtime priority | 292 | rt_priority realtime priority |
242 | policy scheduling policy (man sched_setscheduler) | 293 | policy scheduling policy (man sched_setscheduler) |
243 | blkio_ticks time spent waiting for block IO | 294 | blkio_ticks time spent waiting for block IO |
295 | gtime guest time of the task in jiffies | ||
296 | cgtime guest time of the task children in jiffies | ||
244 | .............................................................................. | 297 | .............................................................................. |
245 | 298 | ||
299 | The /proc/PID/map file containing the currently mapped memory regions and | ||
300 | their access permissions. | ||
301 | |||
302 | The format is: | ||
303 | |||
304 | address perms offset dev inode pathname | ||
305 | |||
306 | 08048000-08049000 r-xp 00000000 03:00 8312 /opt/test | ||
307 | 08049000-0804a000 rw-p 00001000 03:00 8312 /opt/test | ||
308 | 0804a000-0806b000 rw-p 00000000 00:00 0 [heap] | ||
309 | a7cb1000-a7cb2000 ---p 00000000 00:00 0 | ||
310 | a7cb2000-a7eb2000 rw-p 00000000 00:00 0 | ||
311 | a7eb2000-a7eb3000 ---p 00000000 00:00 0 | ||
312 | a7eb3000-a7ed5000 rw-p 00000000 00:00 0 | ||
313 | a7ed5000-a8008000 r-xp 00000000 03:00 4222 /lib/libc.so.6 | ||
314 | a8008000-a800a000 r--p 00133000 03:00 4222 /lib/libc.so.6 | ||
315 | a800a000-a800b000 rw-p 00135000 03:00 4222 /lib/libc.so.6 | ||
316 | a800b000-a800e000 rw-p 00000000 00:00 0 | ||
317 | a800e000-a8022000 r-xp 00000000 03:00 14462 /lib/libpthread.so.0 | ||
318 | a8022000-a8023000 r--p 00013000 03:00 14462 /lib/libpthread.so.0 | ||
319 | a8023000-a8024000 rw-p 00014000 03:00 14462 /lib/libpthread.so.0 | ||
320 | a8024000-a8027000 rw-p 00000000 00:00 0 | ||
321 | a8027000-a8043000 r-xp 00000000 03:00 8317 /lib/ld-linux.so.2 | ||
322 | a8043000-a8044000 r--p 0001b000 03:00 8317 /lib/ld-linux.so.2 | ||
323 | a8044000-a8045000 rw-p 0001c000 03:00 8317 /lib/ld-linux.so.2 | ||
324 | aff35000-aff4a000 rw-p 00000000 00:00 0 [stack] | ||
325 | ffffe000-fffff000 r-xp 00000000 00:00 0 [vdso] | ||
326 | |||
327 | where "address" is the address space in the process that it occupies, "perms" | ||
328 | is a set of permissions: | ||
329 | |||
330 | r = read | ||
331 | w = write | ||
332 | x = execute | ||
333 | s = shared | ||
334 | p = private (copy on write) | ||
335 | |||
336 | "offset" is the offset into the mapping, "dev" is the device (major:minor), and | ||
337 | "inode" is the inode on that device. 0 indicates that no inode is associated | ||
338 | with the memory region, as the case would be with BSS (uninitialized data). | ||
339 | The "pathname" shows the name associated file for this mapping. If the mapping | ||
340 | is not associated with a file: | ||
341 | |||
342 | [heap] = the heap of the program | ||
343 | [stack] = the stack of the main process | ||
344 | [vdso] = the "virtual dynamic shared object", | ||
345 | the kernel system call handler | ||
346 | |||
347 | or if empty, the mapping is anonymous. | ||
348 | |||
349 | |||
350 | The /proc/PID/smaps is an extension based on maps, showing the memory | ||
351 | consumption for each of the process's mappings. For each of mappings there | ||
352 | is a series of lines such as the following: | ||
353 | |||
354 | 08048000-080bc000 r-xp 00000000 03:02 13130 /bin/bash | ||
355 | Size: 1084 kB | ||
356 | Rss: 892 kB | ||
357 | Pss: 374 kB | ||
358 | Shared_Clean: 892 kB | ||
359 | Shared_Dirty: 0 kB | ||
360 | Private_Clean: 0 kB | ||
361 | Private_Dirty: 0 kB | ||
362 | Referenced: 892 kB | ||
363 | Swap: 0 kB | ||
364 | KernelPageSize: 4 kB | ||
365 | MMUPageSize: 4 kB | ||
366 | |||
367 | The first of these lines shows the same information as is displayed for the | ||
368 | mapping in /proc/PID/maps. The remaining lines show the size of the mapping, | ||
369 | the amount of the mapping that is currently resident in RAM, the "proportional | ||
370 | set size” (divide each shared page by the number of processes sharing it), the | ||
371 | number of clean and dirty shared pages in the mapping, and the number of clean | ||
372 | and dirty private pages in the mapping. The "Referenced" indicates the amount | ||
373 | of memory currently marked as referenced or accessed. | ||
374 | |||
375 | This file is only present if the CONFIG_MMU kernel configuration option is | ||
376 | enabled. | ||
246 | 377 | ||
247 | 1.2 Kernel data | 378 | 1.2 Kernel data |
248 | --------------- | 379 | --------------- |
249 | 380 | ||
250 | Similar to the process entries, the kernel data files give information about | 381 | Similar to the process entries, the kernel data files give information about |
251 | the running kernel. The files used to obtain this information are contained in | 382 | the running kernel. The files used to obtain this information are contained in |
252 | /proc and are listed in Table 1-4. Not all of these will be present in your | 383 | /proc and are listed in Table 1-5. Not all of these will be present in your |
253 | system. It depends on the kernel configuration and the loaded modules, which | 384 | system. It depends on the kernel configuration and the loaded modules, which |
254 | files are there, and which are missing. | 385 | files are there, and which are missing. |
255 | 386 | ||
256 | Table 1-4: Kernel info in /proc | 387 | Table 1-5: Kernel info in /proc |
257 | .............................................................................. | 388 | .............................................................................. |
258 | File Content | 389 | File Content |
259 | apm Advanced power management info | 390 | apm Advanced power management info |
@@ -290,6 +421,7 @@ Table 1-4: Kernel info in /proc | |||
290 | rtc Real time clock | 421 | rtc Real time clock |
291 | scsi SCSI info (see text) | 422 | scsi SCSI info (see text) |
292 | slabinfo Slab pool info | 423 | slabinfo Slab pool info |
424 | softirqs softirq usage | ||
293 | stat Overall statistics | 425 | stat Overall statistics |
294 | swaps Swap space utilization | 426 | swaps Swap space utilization |
295 | sys See chapter 2 | 427 | sys See chapter 2 |
@@ -373,7 +505,7 @@ just those considered 'most important'. The new vectors are: | |||
373 | RES, CAL, TLB -- rescheduling, call and TLB flush interrupts are | 505 | RES, CAL, TLB -- rescheduling, call and TLB flush interrupts are |
374 | sent from one CPU to another per the needs of the OS. Typically, | 506 | sent from one CPU to another per the needs of the OS. Typically, |
375 | their statistics are used by kernel developers and interested users to | 507 | their statistics are used by kernel developers and interested users to |
376 | determine the occurance of interrupt of the given type. | 508 | determine the occurrence of interrupts of the given type. |
377 | 509 | ||
378 | The above IRQ vectors are displayed only when relevent. For example, | 510 | The above IRQ vectors are displayed only when relevent. For example, |
379 | the threshold vector does not exist on x86_64 platforms. Others are | 511 | the threshold vector does not exist on x86_64 platforms. Others are |
@@ -558,7 +690,7 @@ Committed_AS: The amount of memory presently allocated on the system. | |||
558 | memory once that memory has been successfully allocated. | 690 | memory once that memory has been successfully allocated. |
559 | VmallocTotal: total size of vmalloc memory area | 691 | VmallocTotal: total size of vmalloc memory area |
560 | VmallocUsed: amount of vmalloc area which is used | 692 | VmallocUsed: amount of vmalloc area which is used |
561 | VmallocChunk: largest contigious block of vmalloc area which is free | 693 | VmallocChunk: largest contiguous block of vmalloc area which is free |
562 | 694 | ||
563 | .............................................................................. | 695 | .............................................................................. |
564 | 696 | ||
@@ -604,6 +736,25 @@ on the kind of area : | |||
604 | 0xffffffffa0017000-0xffffffffa0022000 45056 sys_init_module+0xc27/0x1d00 ... | 736 | 0xffffffffa0017000-0xffffffffa0022000 45056 sys_init_module+0xc27/0x1d00 ... |
605 | pages=10 vmalloc N0=10 | 737 | pages=10 vmalloc N0=10 |
606 | 738 | ||
739 | .............................................................................. | ||
740 | |||
741 | softirqs: | ||
742 | |||
743 | Provides counts of softirq handlers serviced since boot time, for each cpu. | ||
744 | |||
745 | > cat /proc/softirqs | ||
746 | CPU0 CPU1 CPU2 CPU3 | ||
747 | HI: 0 0 0 0 | ||
748 | TIMER: 27166 27120 27097 27034 | ||
749 | NET_TX: 0 0 0 17 | ||
750 | NET_RX: 42 0 0 39 | ||
751 | BLOCK: 0 0 107 1121 | ||
752 | TASKLET: 0 0 0 290 | ||
753 | SCHED: 27035 26983 26971 26746 | ||
754 | HRTIMER: 0 0 0 0 | ||
755 | RCU: 1678 1769 2178 2250 | ||
756 | |||
757 | |||
607 | 1.3 IDE devices in /proc/ide | 758 | 1.3 IDE devices in /proc/ide |
608 | ---------------------------- | 759 | ---------------------------- |
609 | 760 | ||
@@ -621,10 +772,10 @@ IDE devices: | |||
621 | 772 | ||
622 | More detailed information can be found in the controller specific | 773 | More detailed information can be found in the controller specific |
623 | subdirectories. These are named ide0, ide1 and so on. Each of these | 774 | subdirectories. These are named ide0, ide1 and so on. Each of these |
624 | directories contains the files shown in table 1-5. | 775 | directories contains the files shown in table 1-6. |
625 | 776 | ||
626 | 777 | ||
627 | Table 1-5: IDE controller info in /proc/ide/ide? | 778 | Table 1-6: IDE controller info in /proc/ide/ide? |
628 | .............................................................................. | 779 | .............................................................................. |
629 | File Content | 780 | File Content |
630 | channel IDE channel (0 or 1) | 781 | channel IDE channel (0 or 1) |
@@ -634,11 +785,11 @@ Table 1-5: IDE controller info in /proc/ide/ide? | |||
634 | .............................................................................. | 785 | .............................................................................. |
635 | 786 | ||
636 | Each device connected to a controller has a separate subdirectory in the | 787 | Each device connected to a controller has a separate subdirectory in the |
637 | controllers directory. The files listed in table 1-6 are contained in these | 788 | controllers directory. The files listed in table 1-7 are contained in these |
638 | directories. | 789 | directories. |
639 | 790 | ||
640 | 791 | ||
641 | Table 1-6: IDE device information | 792 | Table 1-7: IDE device information |
642 | .............................................................................. | 793 | .............................................................................. |
643 | File Content | 794 | File Content |
644 | cache The cache | 795 | cache The cache |
@@ -680,12 +831,12 @@ the drive parameters: | |||
680 | 1.4 Networking info in /proc/net | 831 | 1.4 Networking info in /proc/net |
681 | -------------------------------- | 832 | -------------------------------- |
682 | 833 | ||
683 | The subdirectory /proc/net follows the usual pattern. Table 1-6 shows the | 834 | The subdirectory /proc/net follows the usual pattern. Table 1-8 shows the |
684 | additional values you get for IP version 6 if you configure the kernel to | 835 | additional values you get for IP version 6 if you configure the kernel to |
685 | support this. Table 1-7 lists the files and their meaning. | 836 | support this. Table 1-9 lists the files and their meaning. |
686 | 837 | ||
687 | 838 | ||
688 | Table 1-6: IPv6 info in /proc/net | 839 | Table 1-8: IPv6 info in /proc/net |
689 | .............................................................................. | 840 | .............................................................................. |
690 | File Content | 841 | File Content |
691 | udp6 UDP sockets (IPv6) | 842 | udp6 UDP sockets (IPv6) |
@@ -700,7 +851,7 @@ Table 1-6: IPv6 info in /proc/net | |||
700 | .............................................................................. | 851 | .............................................................................. |
701 | 852 | ||
702 | 853 | ||
703 | Table 1-7: Network info in /proc/net | 854 | Table 1-9: Network info in /proc/net |
704 | .............................................................................. | 855 | .............................................................................. |
705 | File Content | 856 | File Content |
706 | arp Kernel ARP table | 857 | arp Kernel ARP table |
@@ -824,10 +975,10 @@ The directory /proc/parport contains information about the parallel ports of | |||
824 | your system. It has one subdirectory for each port, named after the port | 975 | your system. It has one subdirectory for each port, named after the port |
825 | number (0,1,2,...). | 976 | number (0,1,2,...). |
826 | 977 | ||
827 | These directories contain the four files shown in Table 1-8. | 978 | These directories contain the four files shown in Table 1-10. |
828 | 979 | ||
829 | 980 | ||
830 | Table 1-8: Files in /proc/parport | 981 | Table 1-10: Files in /proc/parport |
831 | .............................................................................. | 982 | .............................................................................. |
832 | File Content | 983 | File Content |
833 | autoprobe Any IEEE-1284 device ID information that has been acquired. | 984 | autoprobe Any IEEE-1284 device ID information that has been acquired. |
@@ -845,10 +996,10 @@ Table 1-8: Files in /proc/parport | |||
845 | 996 | ||
846 | Information about the available and actually used tty's can be found in the | 997 | Information about the available and actually used tty's can be found in the |
847 | directory /proc/tty.You'll find entries for drivers and line disciplines in | 998 | directory /proc/tty.You'll find entries for drivers and line disciplines in |
848 | this directory, as shown in Table 1-9. | 999 | this directory, as shown in Table 1-11. |
849 | 1000 | ||
850 | 1001 | ||
851 | Table 1-9: Files in /proc/tty | 1002 | Table 1-11: Files in /proc/tty |
852 | .............................................................................. | 1003 | .............................................................................. |
853 | File Content | 1004 | File Content |
854 | drivers list of drivers and their usage | 1005 | drivers list of drivers and their usage |
@@ -890,6 +1041,7 @@ since the system first booted. For a quick look, simply cat the file: | |||
890 | processes 2915 | 1041 | processes 2915 |
891 | procs_running 1 | 1042 | procs_running 1 |
892 | procs_blocked 0 | 1043 | procs_blocked 0 |
1044 | softirq 183433 0 21755 12 39 1137 231 21459 2263 | ||
893 | 1045 | ||
894 | The very first "cpu" line aggregates the numbers in all of the other "cpuN" | 1046 | The very first "cpu" line aggregates the numbers in all of the other "cpuN" |
895 | lines. These numbers identify the amount of time the CPU has spent performing | 1047 | lines. These numbers identify the amount of time the CPU has spent performing |
@@ -925,6 +1077,11 @@ CPUs. | |||
925 | The "procs_blocked" line gives the number of processes currently blocked, | 1077 | The "procs_blocked" line gives the number of processes currently blocked, |
926 | waiting for I/O to complete. | 1078 | waiting for I/O to complete. |
927 | 1079 | ||
1080 | The "softirq" line gives counts of softirqs serviced since boot time, for each | ||
1081 | of the possible system softirqs. The first column is the total of all | ||
1082 | softirqs serviced; each subsequent column is the total for that particular | ||
1083 | softirq. | ||
1084 | |||
928 | 1085 | ||
929 | 1.9 Ext4 file system parameters | 1086 | 1.9 Ext4 file system parameters |
930 | ------------------------------ | 1087 | ------------------------------ |
@@ -933,34 +1090,13 @@ Information about mounted ext4 file systems can be found in | |||
933 | /proc/fs/ext4. Each mounted filesystem will have a directory in | 1090 | /proc/fs/ext4. Each mounted filesystem will have a directory in |
934 | /proc/fs/ext4 based on its device name (i.e., /proc/fs/ext4/hdc or | 1091 | /proc/fs/ext4 based on its device name (i.e., /proc/fs/ext4/hdc or |
935 | /proc/fs/ext4/dm-0). The files in each per-device directory are shown | 1092 | /proc/fs/ext4/dm-0). The files in each per-device directory are shown |
936 | in Table 1-10, below. | 1093 | in Table 1-12, below. |
937 | 1094 | ||
938 | Table 1-10: Files in /proc/fs/ext4/<devname> | 1095 | Table 1-12: Files in /proc/fs/ext4/<devname> |
939 | .............................................................................. | 1096 | .............................................................................. |
940 | File Content | 1097 | File Content |
941 | mb_groups details of multiblock allocator buddy cache of free blocks | 1098 | mb_groups details of multiblock allocator buddy cache of free blocks |
942 | mb_history multiblock allocation history | 1099 | mb_history multiblock allocation history |
943 | stats controls whether the multiblock allocator should start | ||
944 | collecting statistics, which are shown during the unmount | ||
945 | group_prealloc the multiblock allocator will round up allocation | ||
946 | requests to a multiple of this tuning parameter if the | ||
947 | stripe size is not set in the ext4 superblock | ||
948 | max_to_scan The maximum number of extents the multiblock allocator | ||
949 | will search to find the best extent | ||
950 | min_to_scan The minimum number of extents the multiblock allocator | ||
951 | will search to find the best extent | ||
952 | order2_req Tuning parameter which controls the minimum size for | ||
953 | requests (as a power of 2) where the buddy cache is | ||
954 | used | ||
955 | stream_req Files which have fewer blocks than this tunable | ||
956 | parameter will have their blocks allocated out of a | ||
957 | block group specific preallocation pool, so that small | ||
958 | files are packed closely together. Each large file | ||
959 | will have its blocks allocated out of its own unique | ||
960 | preallocation pool. | ||
961 | inode_readahead Tuning parameter which controls the maximum number of | ||
962 | inode table blocks that ext4's inode table readahead | ||
963 | algorithm will pre-read into the buffer cache | ||
964 | .............................................................................. | 1100 | .............................................................................. |
965 | 1101 | ||
966 | 1102 | ||
@@ -1011,1028 +1147,33 @@ review the kernel documentation in the directory /usr/src/linux/Documentation. | |||
1011 | This chapter is heavily based on the documentation included in the pre 2.2 | 1147 | This chapter is heavily based on the documentation included in the pre 2.2 |
1012 | kernels, and became part of it in version 2.2.1 of the Linux kernel. | 1148 | kernels, and became part of it in version 2.2.1 of the Linux kernel. |
1013 | 1149 | ||
1014 | 2.1 /proc/sys/fs - File system data | 1150 | Please see: Documentation/sysctls/ directory for descriptions of these |
1015 | ----------------------------------- | ||
1016 | |||
1017 | This subdirectory contains specific file system, file handle, inode, dentry | ||
1018 | and quota information. | ||
1019 | |||
1020 | Currently, these files are in /proc/sys/fs: | ||
1021 | |||
1022 | dentry-state | ||
1023 | ------------ | ||
1024 | |||
1025 | Status of the directory cache. Since directory entries are dynamically | ||
1026 | allocated and deallocated, this file indicates the current status. It holds | ||
1027 | six values, in which the last two are not used and are always zero. The others | ||
1028 | are listed in table 2-1. | ||
1029 | |||
1030 | |||
1031 | Table 2-1: Status files of the directory cache | ||
1032 | .............................................................................. | ||
1033 | File Content | ||
1034 | nr_dentry Almost always zero | ||
1035 | nr_unused Number of unused cache entries | ||
1036 | age_limit | ||
1037 | in seconds after the entry may be reclaimed, when memory is short | ||
1038 | want_pages internally | ||
1039 | .............................................................................. | ||
1040 | |||
1041 | dquot-nr and dquot-max | ||
1042 | ---------------------- | ||
1043 | |||
1044 | The file dquot-max shows the maximum number of cached disk quota entries. | ||
1045 | |||
1046 | The file dquot-nr shows the number of allocated disk quota entries and the | ||
1047 | number of free disk quota entries. | ||
1048 | |||
1049 | If the number of available cached disk quotas is very low and you have a large | ||
1050 | number of simultaneous system users, you might want to raise the limit. | ||
1051 | |||
1052 | file-nr and file-max | ||
1053 | -------------------- | ||
1054 | |||
1055 | The kernel allocates file handles dynamically, but doesn't free them again at | ||
1056 | this time. | ||
1057 | |||
1058 | The value in file-max denotes the maximum number of file handles that the | ||
1059 | Linux kernel will allocate. When you get a lot of error messages about running | ||
1060 | out of file handles, you might want to raise this limit. The default value is | ||
1061 | 10% of RAM in kilobytes. To change it, just write the new number into the | ||
1062 | file: | ||
1063 | |||
1064 | # cat /proc/sys/fs/file-max | ||
1065 | 4096 | ||
1066 | # echo 8192 > /proc/sys/fs/file-max | ||
1067 | # cat /proc/sys/fs/file-max | ||
1068 | 8192 | ||
1069 | |||
1070 | |||
1071 | This method of revision is useful for all customizable parameters of the | ||
1072 | kernel - simply echo the new value to the corresponding file. | ||
1073 | |||
1074 | Historically, the three values in file-nr denoted the number of allocated file | ||
1075 | handles, the number of allocated but unused file handles, and the maximum | ||
1076 | number of file handles. Linux 2.6 always reports 0 as the number of free file | ||
1077 | handles -- this is not an error, it just means that the number of allocated | ||
1078 | file handles exactly matches the number of used file handles. | ||
1079 | |||
1080 | Attempts to allocate more file descriptors than file-max are reported with | ||
1081 | printk, look for "VFS: file-max limit <number> reached". | ||
1082 | |||
1083 | inode-state and inode-nr | ||
1084 | ------------------------ | ||
1085 | |||
1086 | The file inode-nr contains the first two items from inode-state, so we'll skip | ||
1087 | to that file... | ||
1088 | |||
1089 | inode-state contains two actual numbers and five dummy values. The numbers | ||
1090 | are nr_inodes and nr_free_inodes (in order of appearance). | ||
1091 | |||
1092 | nr_inodes | ||
1093 | ~~~~~~~~~ | ||
1094 | |||
1095 | Denotes the number of inodes the system has allocated. This number will | ||
1096 | grow and shrink dynamically. | ||
1097 | |||
1098 | nr_open | ||
1099 | ------- | ||
1100 | |||
1101 | Denotes the maximum number of file-handles a process can | ||
1102 | allocate. Default value is 1024*1024 (1048576) which should be | ||
1103 | enough for most machines. Actual limit depends on RLIMIT_NOFILE | ||
1104 | resource limit. | ||
1105 | |||
1106 | nr_free_inodes | ||
1107 | -------------- | ||
1108 | |||
1109 | Represents the number of free inodes. Ie. The number of inuse inodes is | ||
1110 | (nr_inodes - nr_free_inodes). | ||
1111 | |||
1112 | aio-nr and aio-max-nr | ||
1113 | --------------------- | ||
1114 | |||
1115 | aio-nr is the running total of the number of events specified on the | ||
1116 | io_setup system call for all currently active aio contexts. If aio-nr | ||
1117 | reaches aio-max-nr then io_setup will fail with EAGAIN. Note that | ||
1118 | raising aio-max-nr does not result in the pre-allocation or re-sizing | ||
1119 | of any kernel data structures. | ||
1120 | |||
1121 | 2.2 /proc/sys/fs/binfmt_misc - Miscellaneous binary formats | ||
1122 | ----------------------------------------------------------- | ||
1123 | |||
1124 | Besides these files, there is the subdirectory /proc/sys/fs/binfmt_misc. This | ||
1125 | handles the kernel support for miscellaneous binary formats. | ||
1126 | |||
1127 | Binfmt_misc provides the ability to register additional binary formats to the | ||
1128 | Kernel without compiling an additional module/kernel. Therefore, binfmt_misc | ||
1129 | needs to know magic numbers at the beginning or the filename extension of the | ||
1130 | binary. | ||
1131 | |||
1132 | It works by maintaining a linked list of structs that contain a description of | ||
1133 | a binary format, including a magic with size (or the filename extension), | ||
1134 | offset and mask, and the interpreter name. On request it invokes the given | ||
1135 | interpreter with the original program as argument, as binfmt_java and | ||
1136 | binfmt_em86 and binfmt_mz do. Since binfmt_misc does not define any default | ||
1137 | binary-formats, you have to register an additional binary-format. | ||
1138 | |||
1139 | There are two general files in binfmt_misc and one file per registered format. | ||
1140 | The two general files are register and status. | ||
1141 | |||
1142 | Registering a new binary format | ||
1143 | ------------------------------- | ||
1144 | |||
1145 | To register a new binary format you have to issue the command | ||
1146 | |||
1147 | echo :name:type:offset:magic:mask:interpreter: > /proc/sys/fs/binfmt_misc/register | ||
1148 | |||
1149 | |||
1150 | |||
1151 | with appropriate name (the name for the /proc-dir entry), offset (defaults to | ||
1152 | 0, if omitted), magic, mask (which can be omitted, defaults to all 0xff) and | ||
1153 | last but not least, the interpreter that is to be invoked (for example and | ||
1154 | testing /bin/echo). Type can be M for usual magic matching or E for filename | ||
1155 | extension matching (give extension in place of magic). | ||
1156 | |||
1157 | Check or reset the status of the binary format handler | ||
1158 | ------------------------------------------------------ | ||
1159 | |||
1160 | If you do a cat on the file /proc/sys/fs/binfmt_misc/status, you will get the | ||
1161 | current status (enabled/disabled) of binfmt_misc. Change the status by echoing | ||
1162 | 0 (disables) or 1 (enables) or -1 (caution: this clears all previously | ||
1163 | registered binary formats) to status. For example echo 0 > status to disable | ||
1164 | binfmt_misc (temporarily). | ||
1165 | |||
1166 | Status of a single handler | ||
1167 | -------------------------- | ||
1168 | |||
1169 | Each registered handler has an entry in /proc/sys/fs/binfmt_misc. These files | ||
1170 | perform the same function as status, but their scope is limited to the actual | ||
1171 | binary format. By cating this file, you also receive all related information | ||
1172 | about the interpreter/magic of the binfmt. | ||
1173 | |||
1174 | Example usage of binfmt_misc (emulate binfmt_java) | ||
1175 | -------------------------------------------------- | ||
1176 | |||
1177 | cd /proc/sys/fs/binfmt_misc | ||
1178 | echo ':Java:M::\xca\xfe\xba\xbe::/usr/local/java/bin/javawrapper:' > register | ||
1179 | echo ':HTML:E::html::/usr/local/java/bin/appletviewer:' > register | ||
1180 | echo ':Applet:M::<!--applet::/usr/local/java/bin/appletviewer:' > register | ||
1181 | echo ':DEXE:M::\x0eDEX::/usr/bin/dosexec:' > register | ||
1182 | |||
1183 | |||
1184 | These four lines add support for Java executables and Java applets (like | ||
1185 | binfmt_java, additionally recognizing the .html extension with no need to put | ||
1186 | <!--applet> to every applet file). You have to install the JDK and the | ||
1187 | shell-script /usr/local/java/bin/javawrapper too. It works around the | ||
1188 | brokenness of the Java filename handling. To add a Java binary, just create a | ||
1189 | link to the class-file somewhere in the path. | ||
1190 | |||
1191 | 2.3 /proc/sys/kernel - general kernel parameters | ||
1192 | ------------------------------------------------ | ||
1193 | |||
1194 | This directory reflects general kernel behaviors. As I've said before, the | ||
1195 | contents depend on your configuration. Here you'll find the most important | ||
1196 | files, along with descriptions of what they mean and how to use them. | ||
1197 | |||
1198 | acct | ||
1199 | ---- | ||
1200 | |||
1201 | The file contains three values; highwater, lowwater, and frequency. | ||
1202 | |||
1203 | It exists only when BSD-style process accounting is enabled. These values | ||
1204 | control its behavior. If the free space on the file system where the log lives | ||
1205 | goes below lowwater percentage, accounting suspends. If it goes above | ||
1206 | highwater percentage, accounting resumes. Frequency determines how often you | ||
1207 | check the amount of free space (value is in seconds). Default settings are: 4, | ||
1208 | 2, and 30. That is, suspend accounting if there is less than 2 percent free; | ||
1209 | resume it if we have a value of 3 or more percent; consider information about | ||
1210 | the amount of free space valid for 30 seconds | ||
1211 | |||
1212 | ctrl-alt-del | ||
1213 | ------------ | ||
1214 | |||
1215 | When the value in this file is 0, ctrl-alt-del is trapped and sent to the init | ||
1216 | program to handle a graceful restart. However, when the value is greater that | ||
1217 | zero, Linux's reaction to this key combination will be an immediate reboot, | ||
1218 | without syncing its dirty buffers. | ||
1219 | |||
1220 | [NOTE] | ||
1221 | When a program (like dosemu) has the keyboard in raw mode, the | ||
1222 | ctrl-alt-del is intercepted by the program before it ever reaches the | ||
1223 | kernel tty layer, and it is up to the program to decide what to do with | ||
1224 | it. | ||
1225 | |||
1226 | domainname and hostname | ||
1227 | ----------------------- | ||
1228 | |||
1229 | These files can be controlled to set the NIS domainname and hostname of your | ||
1230 | box. For the classic darkstar.frop.org a simple: | ||
1231 | |||
1232 | # echo "darkstar" > /proc/sys/kernel/hostname | ||
1233 | # echo "frop.org" > /proc/sys/kernel/domainname | ||
1234 | |||
1235 | |||
1236 | would suffice to set your hostname and NIS domainname. | ||
1237 | |||
1238 | osrelease, ostype and version | ||
1239 | ----------------------------- | ||
1240 | |||
1241 | The names make it pretty obvious what these fields contain: | ||
1242 | |||
1243 | > cat /proc/sys/kernel/osrelease | ||
1244 | 2.2.12 | ||
1245 | |||
1246 | > cat /proc/sys/kernel/ostype | ||
1247 | Linux | ||
1248 | |||
1249 | > cat /proc/sys/kernel/version | ||
1250 | #4 Fri Oct 1 12:41:14 PDT 1999 | ||
1251 | |||
1252 | |||
1253 | The files osrelease and ostype should be clear enough. Version needs a little | ||
1254 | more clarification. The #4 means that this is the 4th kernel built from this | ||
1255 | source base and the date after it indicates the time the kernel was built. The | ||
1256 | only way to tune these values is to rebuild the kernel. | ||
1257 | |||
1258 | panic | ||
1259 | ----- | ||
1260 | |||
1261 | The value in this file represents the number of seconds the kernel waits | ||
1262 | before rebooting on a panic. When you use the software watchdog, the | ||
1263 | recommended setting is 60. If set to 0, the auto reboot after a kernel panic | ||
1264 | is disabled, which is the default setting. | ||
1265 | |||
1266 | printk | ||
1267 | ------ | ||
1268 | |||
1269 | The four values in printk denote | ||
1270 | * console_loglevel, | ||
1271 | * default_message_loglevel, | ||
1272 | * minimum_console_loglevel and | ||
1273 | * default_console_loglevel | ||
1274 | respectively. | ||
1275 | |||
1276 | These values influence printk() behavior when printing or logging error | ||
1277 | messages, which come from inside the kernel. See syslog(2) for more | ||
1278 | information on the different log levels. | ||
1279 | |||
1280 | console_loglevel | ||
1281 | ---------------- | ||
1282 | |||
1283 | Messages with a higher priority than this will be printed to the console. | ||
1284 | |||
1285 | default_message_level | ||
1286 | --------------------- | ||
1287 | |||
1288 | Messages without an explicit priority will be printed with this priority. | ||
1289 | |||
1290 | minimum_console_loglevel | ||
1291 | ------------------------ | ||
1292 | |||
1293 | Minimum (highest) value to which the console_loglevel can be set. | ||
1294 | |||
1295 | default_console_loglevel | ||
1296 | ------------------------ | ||
1297 | |||
1298 | Default value for console_loglevel. | ||
1299 | |||
1300 | sg-big-buff | ||
1301 | ----------- | ||
1302 | |||
1303 | This file shows the size of the generic SCSI (sg) buffer. At this point, you | ||
1304 | can't tune it yet, but you can change it at compile time by editing | ||
1305 | include/scsi/sg.h and changing the value of SG_BIG_BUFF. | ||
1306 | |||
1307 | If you use a scanner with SANE (Scanner Access Now Easy) you might want to set | ||
1308 | this to a higher value. Refer to the SANE documentation on this issue. | ||
1309 | |||
1310 | modprobe | ||
1311 | -------- | ||
1312 | |||
1313 | The location where the modprobe binary is located. The kernel uses this | ||
1314 | program to load modules on demand. | ||
1315 | |||
1316 | unknown_nmi_panic | ||
1317 | ----------------- | ||
1318 | |||
1319 | The value in this file affects behavior of handling NMI. When the value is | ||
1320 | non-zero, unknown NMI is trapped and then panic occurs. At that time, kernel | ||
1321 | debugging information is displayed on console. | ||
1322 | |||
1323 | NMI switch that most IA32 servers have fires unknown NMI up, for example. | ||
1324 | If a system hangs up, try pressing the NMI switch. | ||
1325 | |||
1326 | panic_on_unrecovered_nmi | ||
1327 | ------------------------ | ||
1328 | |||
1329 | The default Linux behaviour on an NMI of either memory or unknown is to continue | ||
1330 | operation. For many environments such as scientific computing it is preferable | ||
1331 | that the box is taken out and the error dealt with than an uncorrected | ||
1332 | parity/ECC error get propogated. | ||
1333 | |||
1334 | A small number of systems do generate NMI's for bizarre random reasons such as | ||
1335 | power management so the default is off. That sysctl works like the existing | ||
1336 | panic controls already in that directory. | ||
1337 | |||
1338 | nmi_watchdog | ||
1339 | ------------ | ||
1340 | |||
1341 | Enables/Disables the NMI watchdog on x86 systems. When the value is non-zero | ||
1342 | the NMI watchdog is enabled and will continuously test all online cpus to | ||
1343 | determine whether or not they are still functioning properly. Currently, | ||
1344 | passing "nmi_watchdog=" parameter at boot time is required for this function | ||
1345 | to work. | ||
1346 | |||
1347 | If LAPIC NMI watchdog method is in use (nmi_watchdog=2 kernel parameter), the | ||
1348 | NMI watchdog shares registers with oprofile. By disabling the NMI watchdog, | ||
1349 | oprofile may have more registers to utilize. | ||
1350 | |||
1351 | msgmni | ||
1352 | ------ | ||
1353 | |||
1354 | Maximum number of message queue ids on the system. | ||
1355 | This value scales to the amount of lowmem. It is automatically recomputed | ||
1356 | upon memory add/remove or ipc namespace creation/removal. | ||
1357 | When a value is written into this file, msgmni's value becomes fixed, i.e. it | ||
1358 | is not recomputed anymore when one of the above events occurs. | ||
1359 | Use auto_msgmni to change this behavior. | ||
1360 | |||
1361 | auto_msgmni | ||
1362 | ----------- | ||
1363 | |||
1364 | Enables/Disables automatic recomputing of msgmni upon memory add/remove or | ||
1365 | upon ipc namespace creation/removal (see the msgmni description above). | ||
1366 | Echoing "1" into this file enables msgmni automatic recomputing. | ||
1367 | Echoing "0" turns it off. | ||
1368 | auto_msgmni default value is 1. | ||
1369 | |||
1370 | |||
1371 | 2.4 /proc/sys/vm - The virtual memory subsystem | ||
1372 | ----------------------------------------------- | ||
1373 | |||
1374 | Please see: Documentation/sysctls/vm.txt for a description of these | ||
1375 | entries. | 1151 | entries. |
1376 | 1152 | ||
1153 | ------------------------------------------------------------------------------ | ||
1154 | Summary | ||
1155 | ------------------------------------------------------------------------------ | ||
1156 | Certain aspects of kernel behavior can be modified at runtime, without the | ||
1157 | need to recompile the kernel, or even to reboot the system. The files in the | ||
1158 | /proc/sys tree can not only be read, but also modified. You can use the echo | ||
1159 | command to write value into these files, thereby changing the default settings | ||
1160 | of the kernel. | ||
1161 | ------------------------------------------------------------------------------ | ||
1377 | 1162 | ||
1378 | 2.5 /proc/sys/dev - Device specific parameters | 1163 | ------------------------------------------------------------------------------ |
1379 | ---------------------------------------------- | 1164 | CHAPTER 3: PER-PROCESS PARAMETERS |
1380 | 1165 | ------------------------------------------------------------------------------ | |
1381 | Currently there is only support for CDROM drives, and for those, there is only | ||
1382 | one read-only file containing information about the CD-ROM drives attached to | ||
1383 | the system: | ||
1384 | |||
1385 | >cat /proc/sys/dev/cdrom/info | ||
1386 | CD-ROM information, Id: cdrom.c 2.55 1999/04/25 | ||
1387 | |||
1388 | drive name: sr0 hdb | ||
1389 | drive speed: 32 40 | ||
1390 | drive # of slots: 1 0 | ||
1391 | Can close tray: 1 1 | ||
1392 | Can open tray: 1 1 | ||
1393 | Can lock tray: 1 1 | ||
1394 | Can change speed: 1 1 | ||
1395 | Can select disk: 0 1 | ||
1396 | Can read multisession: 1 1 | ||
1397 | Can read MCN: 1 1 | ||
1398 | Reports media changed: 1 1 | ||
1399 | Can play audio: 1 1 | ||
1400 | |||
1401 | |||
1402 | You see two drives, sr0 and hdb, along with a list of their features. | ||
1403 | |||
1404 | 2.6 /proc/sys/sunrpc - Remote procedure calls | ||
1405 | --------------------------------------------- | ||
1406 | |||
1407 | This directory contains four files, which enable or disable debugging for the | ||
1408 | RPC functions NFS, NFS-daemon, RPC and NLM. The default values are 0. They can | ||
1409 | be set to one to turn debugging on. (The default value is 0 for each) | ||
1410 | |||
1411 | 2.7 /proc/sys/net - Networking stuff | ||
1412 | ------------------------------------ | ||
1413 | |||
1414 | The interface to the networking parts of the kernel is located in | ||
1415 | /proc/sys/net. Table 2-3 shows all possible subdirectories. You may see only | ||
1416 | some of them, depending on your kernel's configuration. | ||
1417 | |||
1418 | |||
1419 | Table 2-3: Subdirectories in /proc/sys/net | ||
1420 | .............................................................................. | ||
1421 | Directory Content Directory Content | ||
1422 | core General parameter appletalk Appletalk protocol | ||
1423 | unix Unix domain sockets netrom NET/ROM | ||
1424 | 802 E802 protocol ax25 AX25 | ||
1425 | ethernet Ethernet protocol rose X.25 PLP layer | ||
1426 | ipv4 IP version 4 x25 X.25 protocol | ||
1427 | ipx IPX token-ring IBM token ring | ||
1428 | bridge Bridging decnet DEC net | ||
1429 | ipv6 IP version 6 | ||
1430 | .............................................................................. | ||
1431 | |||
1432 | We will concentrate on IP networking here. Since AX15, X.25, and DEC Net are | ||
1433 | only minor players in the Linux world, we'll skip them in this chapter. You'll | ||
1434 | find some short info on Appletalk and IPX further on in this chapter. Review | ||
1435 | the online documentation and the kernel source to get a detailed view of the | ||
1436 | parameters for those protocols. In this section we'll discuss the | ||
1437 | subdirectories printed in bold letters in the table above. As default values | ||
1438 | are suitable for most needs, there is no need to change these values. | ||
1439 | |||
1440 | /proc/sys/net/core - Network core options | ||
1441 | ----------------------------------------- | ||
1442 | |||
1443 | rmem_default | ||
1444 | ------------ | ||
1445 | |||
1446 | The default setting of the socket receive buffer in bytes. | ||
1447 | |||
1448 | rmem_max | ||
1449 | -------- | ||
1450 | |||
1451 | The maximum receive socket buffer size in bytes. | ||
1452 | |||
1453 | wmem_default | ||
1454 | ------------ | ||
1455 | |||
1456 | The default setting (in bytes) of the socket send buffer. | ||
1457 | |||
1458 | wmem_max | ||
1459 | -------- | ||
1460 | |||
1461 | The maximum send socket buffer size in bytes. | ||
1462 | |||
1463 | message_burst and message_cost | ||
1464 | ------------------------------ | ||
1465 | |||
1466 | These parameters are used to limit the warning messages written to the kernel | ||
1467 | log from the networking code. They enforce a rate limit to make a | ||
1468 | denial-of-service attack impossible. A higher message_cost factor, results in | ||
1469 | fewer messages that will be written. Message_burst controls when messages will | ||
1470 | be dropped. The default settings limit warning messages to one every five | ||
1471 | seconds. | ||
1472 | |||
1473 | warnings | ||
1474 | -------- | ||
1475 | |||
1476 | This controls console messages from the networking stack that can occur because | ||
1477 | of problems on the network like duplicate address or bad checksums. Normally, | ||
1478 | this should be enabled, but if the problem persists the messages can be | ||
1479 | disabled. | ||
1480 | |||
1481 | netdev_budget | ||
1482 | ------------- | ||
1483 | |||
1484 | Maximum number of packets taken from all interfaces in one polling cycle (NAPI | ||
1485 | poll). In one polling cycle interfaces which are registered to polling are | ||
1486 | probed in a round-robin manner. The limit of packets in one such probe can be | ||
1487 | set per-device via sysfs class/net/<device>/weight . | ||
1488 | |||
1489 | netdev_max_backlog | ||
1490 | ------------------ | ||
1491 | |||
1492 | Maximum number of packets, queued on the INPUT side, when the interface | ||
1493 | receives packets faster than kernel can process them. | ||
1494 | |||
1495 | optmem_max | ||
1496 | ---------- | ||
1497 | |||
1498 | Maximum ancillary buffer size allowed per socket. Ancillary data is a sequence | ||
1499 | of struct cmsghdr structures with appended data. | ||
1500 | |||
1501 | /proc/sys/net/unix - Parameters for Unix domain sockets | ||
1502 | ------------------------------------------------------- | ||
1503 | |||
1504 | There are only two files in this subdirectory. They control the delays for | ||
1505 | deleting and destroying socket descriptors. | ||
1506 | |||
1507 | 2.8 /proc/sys/net/ipv4 - IPV4 settings | ||
1508 | -------------------------------------- | ||
1509 | |||
1510 | IP version 4 is still the most used protocol in Unix networking. It will be | ||
1511 | replaced by IP version 6 in the next couple of years, but for the moment it's | ||
1512 | the de facto standard for the internet and is used in most networking | ||
1513 | environments around the world. Because of the importance of this protocol, | ||
1514 | we'll have a deeper look into the subtree controlling the behavior of the IPv4 | ||
1515 | subsystem of the Linux kernel. | ||
1516 | |||
1517 | Let's start with the entries in /proc/sys/net/ipv4. | ||
1518 | |||
1519 | ICMP settings | ||
1520 | ------------- | ||
1521 | |||
1522 | icmp_echo_ignore_all and icmp_echo_ignore_broadcasts | ||
1523 | ---------------------------------------------------- | ||
1524 | |||
1525 | Turn on (1) or off (0), if the kernel should ignore all ICMP ECHO requests, or | ||
1526 | just those to broadcast and multicast addresses. | ||
1527 | |||
1528 | Please note that if you accept ICMP echo requests with a broadcast/multi\-cast | ||
1529 | destination address your network may be used as an exploder for denial of | ||
1530 | service packet flooding attacks to other hosts. | ||
1531 | |||
1532 | icmp_destunreach_rate, icmp_echoreply_rate, icmp_paramprob_rate and icmp_timeexeed_rate | ||
1533 | --------------------------------------------------------------------------------------- | ||
1534 | |||
1535 | Sets limits for sending ICMP packets to specific targets. A value of zero | ||
1536 | disables all limiting. Any positive value sets the maximum package rate in | ||
1537 | hundredth of a second (on Intel systems). | ||
1538 | |||
1539 | IP settings | ||
1540 | ----------- | ||
1541 | |||
1542 | ip_autoconfig | ||
1543 | ------------- | ||
1544 | |||
1545 | This file contains the number one if the host received its IP configuration by | ||
1546 | RARP, BOOTP, DHCP or a similar mechanism. Otherwise it is zero. | ||
1547 | |||
1548 | ip_default_ttl | ||
1549 | -------------- | ||
1550 | |||
1551 | TTL (Time To Live) for IPv4 interfaces. This is simply the maximum number of | ||
1552 | hops a packet may travel. | ||
1553 | |||
1554 | ip_dynaddr | ||
1555 | ---------- | ||
1556 | |||
1557 | Enable dynamic socket address rewriting on interface address change. This is | ||
1558 | useful for dialup interface with changing IP addresses. | ||
1559 | |||
1560 | ip_forward | ||
1561 | ---------- | ||
1562 | |||
1563 | Enable or disable forwarding of IP packages between interfaces. Changing this | ||
1564 | value resets all other parameters to their default values. They differ if the | ||
1565 | kernel is configured as host or router. | ||
1566 | |||
1567 | ip_local_port_range | ||
1568 | ------------------- | ||
1569 | |||
1570 | Range of ports used by TCP and UDP to choose the local port. Contains two | ||
1571 | numbers, the first number is the lowest port, the second number the highest | ||
1572 | local port. Default is 1024-4999. Should be changed to 32768-61000 for | ||
1573 | high-usage systems. | ||
1574 | |||
1575 | ip_no_pmtu_disc | ||
1576 | --------------- | ||
1577 | |||
1578 | Global switch to turn path MTU discovery off. It can also be set on a per | ||
1579 | socket basis by the applications or on a per route basis. | ||
1580 | |||
1581 | ip_masq_debug | ||
1582 | ------------- | ||
1583 | |||
1584 | Enable/disable debugging of IP masquerading. | ||
1585 | |||
1586 | IP fragmentation settings | ||
1587 | ------------------------- | ||
1588 | |||
1589 | ipfrag_high_trash and ipfrag_low_trash | ||
1590 | -------------------------------------- | ||
1591 | |||
1592 | Maximum memory used to reassemble IP fragments. When ipfrag_high_thresh bytes | ||
1593 | of memory is allocated for this purpose, the fragment handler will toss | ||
1594 | packets until ipfrag_low_thresh is reached. | ||
1595 | |||
1596 | ipfrag_time | ||
1597 | ----------- | ||
1598 | |||
1599 | Time in seconds to keep an IP fragment in memory. | ||
1600 | |||
1601 | TCP settings | ||
1602 | ------------ | ||
1603 | |||
1604 | tcp_ecn | ||
1605 | ------- | ||
1606 | |||
1607 | This file controls the use of the ECN bit in the IPv4 headers. This is a new | ||
1608 | feature about Explicit Congestion Notification, but some routers and firewalls | ||
1609 | block traffic that has this bit set, so it could be necessary to echo 0 to | ||
1610 | /proc/sys/net/ipv4/tcp_ecn if you want to talk to these sites. For more info | ||
1611 | you could read RFC2481. | ||
1612 | |||
1613 | tcp_retrans_collapse | ||
1614 | -------------------- | ||
1615 | |||
1616 | Bug-to-bug compatibility with some broken printers. On retransmit, try to send | ||
1617 | larger packets to work around bugs in certain TCP stacks. Can be turned off by | ||
1618 | setting it to zero. | ||
1619 | |||
1620 | tcp_keepalive_probes | ||
1621 | -------------------- | ||
1622 | |||
1623 | Number of keep alive probes TCP sends out, until it decides that the | ||
1624 | connection is broken. | ||
1625 | |||
1626 | tcp_keepalive_time | ||
1627 | ------------------ | ||
1628 | |||
1629 | How often TCP sends out keep alive messages, when keep alive is enabled. The | ||
1630 | default is 2 hours. | ||
1631 | |||
1632 | tcp_syn_retries | ||
1633 | --------------- | ||
1634 | |||
1635 | Number of times initial SYNs for a TCP connection attempt will be | ||
1636 | retransmitted. Should not be higher than 255. This is only the timeout for | ||
1637 | outgoing connections, for incoming connections the number of retransmits is | ||
1638 | defined by tcp_retries1. | ||
1639 | |||
1640 | tcp_sack | ||
1641 | -------- | ||
1642 | |||
1643 | Enable select acknowledgments after RFC2018. | ||
1644 | |||
1645 | tcp_timestamps | ||
1646 | -------------- | ||
1647 | |||
1648 | Enable timestamps as defined in RFC1323. | ||
1649 | |||
1650 | tcp_stdurg | ||
1651 | ---------- | ||
1652 | |||
1653 | Enable the strict RFC793 interpretation of the TCP urgent pointer field. The | ||
1654 | default is to use the BSD compatible interpretation of the urgent pointer | ||
1655 | pointing to the first byte after the urgent data. The RFC793 interpretation is | ||
1656 | to have it point to the last byte of urgent data. Enabling this option may | ||
1657 | lead to interoperability problems. Disabled by default. | ||
1658 | |||
1659 | tcp_syncookies | ||
1660 | -------------- | ||
1661 | |||
1662 | Only valid when the kernel was compiled with CONFIG_SYNCOOKIES. Send out | ||
1663 | syncookies when the syn backlog queue of a socket overflows. This is to ward | ||
1664 | off the common 'syn flood attack'. Disabled by default. | ||
1665 | |||
1666 | Note that the concept of a socket backlog is abandoned. This means the peer | ||
1667 | may not receive reliable error messages from an over loaded server with | ||
1668 | syncookies enabled. | ||
1669 | |||
1670 | tcp_window_scaling | ||
1671 | ------------------ | ||
1672 | |||
1673 | Enable window scaling as defined in RFC1323. | ||
1674 | |||
1675 | tcp_fin_timeout | ||
1676 | --------------- | ||
1677 | |||
1678 | The length of time in seconds it takes to receive a final FIN before the | ||
1679 | socket is always closed. This is strictly a violation of the TCP | ||
1680 | specification, but required to prevent denial-of-service attacks. | ||
1681 | |||
1682 | tcp_max_ka_probes | ||
1683 | ----------------- | ||
1684 | |||
1685 | Indicates how many keep alive probes are sent per slow timer run. Should not | ||
1686 | be set too high to prevent bursts. | ||
1687 | |||
1688 | tcp_max_syn_backlog | ||
1689 | ------------------- | ||
1690 | |||
1691 | Length of the per socket backlog queue. Since Linux 2.2 the backlog specified | ||
1692 | in listen(2) only specifies the length of the backlog queue of already | ||
1693 | established sockets. When more connection requests arrive Linux starts to drop | ||
1694 | packets. When syncookies are enabled the packets are still answered and the | ||
1695 | maximum queue is effectively ignored. | ||
1696 | |||
1697 | tcp_retries1 | ||
1698 | ------------ | ||
1699 | |||
1700 | Defines how often an answer to a TCP connection request is retransmitted | ||
1701 | before giving up. | ||
1702 | |||
1703 | tcp_retries2 | ||
1704 | ------------ | ||
1705 | |||
1706 | Defines how often a TCP packet is retransmitted before giving up. | ||
1707 | |||
1708 | Interface specific settings | ||
1709 | --------------------------- | ||
1710 | |||
1711 | In the directory /proc/sys/net/ipv4/conf you'll find one subdirectory for each | ||
1712 | interface the system knows about and one directory calls all. Changes in the | ||
1713 | all subdirectory affect all interfaces, whereas changes in the other | ||
1714 | subdirectories affect only one interface. All directories have the same | ||
1715 | entries: | ||
1716 | |||
1717 | accept_redirects | ||
1718 | ---------------- | ||
1719 | |||
1720 | This switch decides if the kernel accepts ICMP redirect messages or not. The | ||
1721 | default is 'yes' if the kernel is configured for a regular host and 'no' for a | ||
1722 | router configuration. | ||
1723 | |||
1724 | accept_source_route | ||
1725 | ------------------- | ||
1726 | |||
1727 | Should source routed packages be accepted or declined. The default is | ||
1728 | dependent on the kernel configuration. It's 'yes' for routers and 'no' for | ||
1729 | hosts. | ||
1730 | |||
1731 | bootp_relay | ||
1732 | ~~~~~~~~~~~ | ||
1733 | |||
1734 | Accept packets with source address 0.b.c.d with destinations not to this host | ||
1735 | as local ones. It is supposed that a BOOTP relay daemon will catch and forward | ||
1736 | such packets. | ||
1737 | |||
1738 | The default is 0, since this feature is not implemented yet (kernel version | ||
1739 | 2.2.12). | ||
1740 | |||
1741 | forwarding | ||
1742 | ---------- | ||
1743 | |||
1744 | Enable or disable IP forwarding on this interface. | ||
1745 | |||
1746 | log_martians | ||
1747 | ------------ | ||
1748 | |||
1749 | Log packets with source addresses with no known route to kernel log. | ||
1750 | |||
1751 | mc_forwarding | ||
1752 | ------------- | ||
1753 | |||
1754 | Do multicast routing. The kernel needs to be compiled with CONFIG_MROUTE and a | ||
1755 | multicast routing daemon is required. | ||
1756 | |||
1757 | proxy_arp | ||
1758 | --------- | ||
1759 | |||
1760 | Does (1) or does not (0) perform proxy ARP. | ||
1761 | |||
1762 | rp_filter | ||
1763 | --------- | ||
1764 | |||
1765 | Integer value determines if a source validation should be made. 1 means yes, 0 | ||
1766 | means no. Disabled by default, but local/broadcast address spoofing is always | ||
1767 | on. | ||
1768 | |||
1769 | If you set this to 1 on a router that is the only connection for a network to | ||
1770 | the net, it will prevent spoofing attacks against your internal networks | ||
1771 | (external addresses can still be spoofed), without the need for additional | ||
1772 | firewall rules. | ||
1773 | |||
1774 | secure_redirects | ||
1775 | ---------------- | ||
1776 | |||
1777 | Accept ICMP redirect messages only for gateways, listed in default gateway | ||
1778 | list. Enabled by default. | ||
1779 | |||
1780 | shared_media | ||
1781 | ------------ | ||
1782 | |||
1783 | If it is not set the kernel does not assume that different subnets on this | ||
1784 | device can communicate directly. Default setting is 'yes'. | ||
1785 | |||
1786 | send_redirects | ||
1787 | -------------- | ||
1788 | |||
1789 | Determines whether to send ICMP redirects to other hosts. | ||
1790 | |||
1791 | Routing settings | ||
1792 | ---------------- | ||
1793 | |||
1794 | The directory /proc/sys/net/ipv4/route contains several file to control | ||
1795 | routing issues. | ||
1796 | |||
1797 | error_burst and error_cost | ||
1798 | -------------------------- | ||
1799 | |||
1800 | These parameters are used to limit how many ICMP destination unreachable to | ||
1801 | send from the host in question. ICMP destination unreachable messages are | ||
1802 | sent when we cannot reach the next hop while trying to transmit a packet. | ||
1803 | It will also print some error messages to kernel logs if someone is ignoring | ||
1804 | our ICMP redirects. The higher the error_cost factor is, the fewer | ||
1805 | destination unreachable and error messages will be let through. Error_burst | ||
1806 | controls when destination unreachable messages and error messages will be | ||
1807 | dropped. The default settings limit warning messages to five every second. | ||
1808 | |||
1809 | flush | ||
1810 | ----- | ||
1811 | |||
1812 | Writing to this file results in a flush of the routing cache. | ||
1813 | |||
1814 | gc_elasticity, gc_interval, gc_min_interval_ms, gc_timeout, gc_thresh | ||
1815 | --------------------------------------------------------------------- | ||
1816 | |||
1817 | Values to control the frequency and behavior of the garbage collection | ||
1818 | algorithm for the routing cache. gc_min_interval is deprecated and replaced | ||
1819 | by gc_min_interval_ms. | ||
1820 | |||
1821 | |||
1822 | max_size | ||
1823 | -------- | ||
1824 | |||
1825 | Maximum size of the routing cache. Old entries will be purged once the cache | ||
1826 | reached has this size. | ||
1827 | |||
1828 | redirect_load, redirect_number | ||
1829 | ------------------------------ | ||
1830 | |||
1831 | Factors which determine if more ICPM redirects should be sent to a specific | ||
1832 | host. No redirects will be sent once the load limit or the maximum number of | ||
1833 | redirects has been reached. | ||
1834 | |||
1835 | redirect_silence | ||
1836 | ---------------- | ||
1837 | |||
1838 | Timeout for redirects. After this period redirects will be sent again, even if | ||
1839 | this has been stopped, because the load or number limit has been reached. | ||
1840 | |||
1841 | Network Neighbor handling | ||
1842 | ------------------------- | ||
1843 | |||
1844 | Settings about how to handle connections with direct neighbors (nodes attached | ||
1845 | to the same link) can be found in the directory /proc/sys/net/ipv4/neigh. | ||
1846 | |||
1847 | As we saw it in the conf directory, there is a default subdirectory which | ||
1848 | holds the default values, and one directory for each interface. The contents | ||
1849 | of the directories are identical, with the single exception that the default | ||
1850 | settings contain additional options to set garbage collection parameters. | ||
1851 | |||
1852 | In the interface directories you'll find the following entries: | ||
1853 | |||
1854 | base_reachable_time, base_reachable_time_ms | ||
1855 | ------------------------------------------- | ||
1856 | |||
1857 | A base value used for computing the random reachable time value as specified | ||
1858 | in RFC2461. | ||
1859 | |||
1860 | Expression of base_reachable_time, which is deprecated, is in seconds. | ||
1861 | Expression of base_reachable_time_ms is in milliseconds. | ||
1862 | |||
1863 | retrans_time, retrans_time_ms | ||
1864 | ----------------------------- | ||
1865 | |||
1866 | The time between retransmitted Neighbor Solicitation messages. | ||
1867 | Used for address resolution and to determine if a neighbor is | ||
1868 | unreachable. | ||
1869 | |||
1870 | Expression of retrans_time, which is deprecated, is in 1/100 seconds (for | ||
1871 | IPv4) or in jiffies (for IPv6). | ||
1872 | Expression of retrans_time_ms is in milliseconds. | ||
1873 | |||
1874 | unres_qlen | ||
1875 | ---------- | ||
1876 | |||
1877 | Maximum queue length for a pending arp request - the number of packets which | ||
1878 | are accepted from other layers while the ARP address is still resolved. | ||
1879 | |||
1880 | anycast_delay | ||
1881 | ------------- | ||
1882 | |||
1883 | Maximum for random delay of answers to neighbor solicitation messages in | ||
1884 | jiffies (1/100 sec). Not yet implemented (Linux does not have anycast support | ||
1885 | yet). | ||
1886 | |||
1887 | ucast_solicit | ||
1888 | ------------- | ||
1889 | |||
1890 | Maximum number of retries for unicast solicitation. | ||
1891 | |||
1892 | mcast_solicit | ||
1893 | ------------- | ||
1894 | |||
1895 | Maximum number of retries for multicast solicitation. | ||
1896 | |||
1897 | delay_first_probe_time | ||
1898 | ---------------------- | ||
1899 | |||
1900 | Delay for the first time probe if the neighbor is reachable. (see | ||
1901 | gc_stale_time) | ||
1902 | |||
1903 | locktime | ||
1904 | -------- | ||
1905 | |||
1906 | An ARP/neighbor entry is only replaced with a new one if the old is at least | ||
1907 | locktime old. This prevents ARP cache thrashing. | ||
1908 | |||
1909 | proxy_delay | ||
1910 | ----------- | ||
1911 | |||
1912 | Maximum time (real time is random [0..proxytime]) before answering to an ARP | ||
1913 | request for which we have an proxy ARP entry. In some cases, this is used to | ||
1914 | prevent network flooding. | ||
1915 | |||
1916 | proxy_qlen | ||
1917 | ---------- | ||
1918 | |||
1919 | Maximum queue length of the delayed proxy arp timer. (see proxy_delay). | ||
1920 | |||
1921 | app_solicit | ||
1922 | ---------- | ||
1923 | |||
1924 | Determines the number of requests to send to the user level ARP daemon. Use 0 | ||
1925 | to turn off. | ||
1926 | |||
1927 | gc_stale_time | ||
1928 | ------------- | ||
1929 | |||
1930 | Determines how often to check for stale ARP entries. After an ARP entry is | ||
1931 | stale it will be resolved again (which is useful when an IP address migrates | ||
1932 | to another machine). When ucast_solicit is greater than 0 it first tries to | ||
1933 | send an ARP packet directly to the known host When that fails and | ||
1934 | mcast_solicit is greater than 0, an ARP request is broadcasted. | ||
1935 | |||
1936 | 2.9 Appletalk | ||
1937 | ------------- | ||
1938 | |||
1939 | The /proc/sys/net/appletalk directory holds the Appletalk configuration data | ||
1940 | when Appletalk is loaded. The configurable parameters are: | ||
1941 | |||
1942 | aarp-expiry-time | ||
1943 | ---------------- | ||
1944 | |||
1945 | The amount of time we keep an ARP entry before expiring it. Used to age out | ||
1946 | old hosts. | ||
1947 | |||
1948 | aarp-resolve-time | ||
1949 | ----------------- | ||
1950 | |||
1951 | The amount of time we will spend trying to resolve an Appletalk address. | ||
1952 | |||
1953 | aarp-retransmit-limit | ||
1954 | --------------------- | ||
1955 | |||
1956 | The number of times we will retransmit a query before giving up. | ||
1957 | |||
1958 | aarp-tick-time | ||
1959 | -------------- | ||
1960 | |||
1961 | Controls the rate at which expires are checked. | ||
1962 | |||
1963 | The directory /proc/net/appletalk holds the list of active Appletalk sockets | ||
1964 | on a machine. | ||
1965 | |||
1966 | The fields indicate the DDP type, the local address (in network:node format) | ||
1967 | the remote address, the size of the transmit pending queue, the size of the | ||
1968 | received queue (bytes waiting for applications to read) the state and the uid | ||
1969 | owning the socket. | ||
1970 | |||
1971 | /proc/net/atalk_iface lists all the interfaces configured for appletalk.It | ||
1972 | shows the name of the interface, its Appletalk address, the network range on | ||
1973 | that address (or network number for phase 1 networks), and the status of the | ||
1974 | interface. | ||
1975 | |||
1976 | /proc/net/atalk_route lists each known network route. It lists the target | ||
1977 | (network) that the route leads to, the router (may be directly connected), the | ||
1978 | route flags, and the device the route is using. | ||
1979 | |||
1980 | 2.10 IPX | ||
1981 | -------- | ||
1982 | |||
1983 | The IPX protocol has no tunable values in proc/sys/net. | ||
1984 | |||
1985 | The IPX protocol does, however, provide proc/net/ipx. This lists each IPX | ||
1986 | socket giving the local and remote addresses in Novell format (that is | ||
1987 | network:node:port). In accordance with the strange Novell tradition, | ||
1988 | everything but the port is in hex. Not_Connected is displayed for sockets that | ||
1989 | are not tied to a specific remote address. The Tx and Rx queue sizes indicate | ||
1990 | the number of bytes pending for transmission and reception. The state | ||
1991 | indicates the state the socket is in and the uid is the owning uid of the | ||
1992 | socket. | ||
1993 | |||
1994 | The /proc/net/ipx_interface file lists all IPX interfaces. For each interface | ||
1995 | it gives the network number, the node number, and indicates if the network is | ||
1996 | the primary network. It also indicates which device it is bound to (or | ||
1997 | Internal for internal networks) and the Frame Type if appropriate. Linux | ||
1998 | supports 802.3, 802.2, 802.2 SNAP and DIX (Blue Book) ethernet framing for | ||
1999 | IPX. | ||
2000 | |||
2001 | The /proc/net/ipx_route table holds a list of IPX routes. For each route it | ||
2002 | gives the destination network, the router node (or Directly) and the network | ||
2003 | address of the router (or Connected) for internal networks. | ||
2004 | |||
2005 | 2.11 /proc/sys/fs/mqueue - POSIX message queues filesystem | ||
2006 | ---------------------------------------------------------- | ||
2007 | |||
2008 | The "mqueue" filesystem provides the necessary kernel features to enable the | ||
2009 | creation of a user space library that implements the POSIX message queues | ||
2010 | API (as noted by the MSG tag in the POSIX 1003.1-2001 version of the System | ||
2011 | Interfaces specification.) | ||
2012 | |||
2013 | The "mqueue" filesystem contains values for determining/setting the amount of | ||
2014 | resources used by the file system. | ||
2015 | |||
2016 | /proc/sys/fs/mqueue/queues_max is a read/write file for setting/getting the | ||
2017 | maximum number of message queues allowed on the system. | ||
2018 | |||
2019 | /proc/sys/fs/mqueue/msg_max is a read/write file for setting/getting the | ||
2020 | maximum number of messages in a queue value. In fact it is the limiting value | ||
2021 | for another (user) limit which is set in mq_open invocation. This attribute of | ||
2022 | a queue must be less or equal then msg_max. | ||
2023 | |||
2024 | /proc/sys/fs/mqueue/msgsize_max is a read/write file for setting/getting the | ||
2025 | maximum message size value (it is every message queue's attribute set during | ||
2026 | its creation). | ||
2027 | 1166 | ||
2028 | 2.12 /proc/<pid>/oom_adj - Adjust the oom-killer score | 1167 | 3.1 /proc/<pid>/oom_adj - Adjust the oom-killer score |
2029 | ------------------------------------------------------ | 1168 | ------------------------------------------------------ |
2030 | 1169 | ||
2031 | This file can be used to adjust the score used to select which processes | 1170 | This file can be used to adjust the score used to select which processes should |
2032 | should be killed in an out-of-memory situation. Giving it a high score will | 1171 | be killed in an out-of-memory situation. The oom_adj value is a characteristic |
2033 | increase the likelihood of this process being killed by the oom-killer. Valid | 1172 | of the task's mm, so all threads that share an mm with pid will have the same |
2034 | values are in the range -16 to +15, plus the special value -17, which disables | 1173 | oom_adj value. A high value will increase the likelihood of this process being |
2035 | oom-killing altogether for this process. | 1174 | killed by the oom-killer. Valid values are in the range -16 to +15 as |
1175 | explained below and a special value of -17, which disables oom-killing | ||
1176 | altogether for threads sharing pid's mm. | ||
2036 | 1177 | ||
2037 | The process to be killed in an out-of-memory situation is selected among all others | 1178 | The process to be killed in an out-of-memory situation is selected among all others |
2038 | based on its badness score. This value equals the original memory size of the process | 1179 | based on its badness score. This value equals the original memory size of the process |
@@ -2046,6 +1187,9 @@ the parent's score if they do not share the same memory. Thus forking servers | |||
2046 | are the prime candidates to be killed. Having only one 'hungry' child will make | 1187 | are the prime candidates to be killed. Having only one 'hungry' child will make |
2047 | parent less preferable than the child. | 1188 | parent less preferable than the child. |
2048 | 1189 | ||
1190 | /proc/<pid>/oom_adj cannot be changed for kthreads since they are immune from | ||
1191 | oom-killing already. | ||
1192 | |||
2049 | /proc/<pid>/oom_score shows process' current badness score. | 1193 | /proc/<pid>/oom_score shows process' current badness score. |
2050 | 1194 | ||
2051 | The following heuristics are then applied: | 1195 | The following heuristics are then applied: |
@@ -2062,25 +1206,15 @@ The task with the highest badness score is then selected and its children | |||
2062 | are killed, process itself will be killed in an OOM situation when it does | 1206 | are killed, process itself will be killed in an OOM situation when it does |
2063 | not have children or some of them disabled oom like described above. | 1207 | not have children or some of them disabled oom like described above. |
2064 | 1208 | ||
2065 | 2.13 /proc/<pid>/oom_score - Display current oom-killer score | 1209 | 3.2 /proc/<pid>/oom_score - Display current oom-killer score |
2066 | ------------------------------------------------------------- | 1210 | ------------------------------------------------------------- |
2067 | 1211 | ||
2068 | ------------------------------------------------------------------------------ | ||
2069 | This file can be used to check the current score used by the oom-killer is for | 1212 | This file can be used to check the current score used by the oom-killer is for |
2070 | any given <pid>. Use it together with /proc/<pid>/oom_adj to tune which | 1213 | any given <pid>. Use it together with /proc/<pid>/oom_adj to tune which |
2071 | process should be killed in an out-of-memory situation. | 1214 | process should be killed in an out-of-memory situation. |
2072 | 1215 | ||
2073 | ------------------------------------------------------------------------------ | ||
2074 | Summary | ||
2075 | ------------------------------------------------------------------------------ | ||
2076 | Certain aspects of kernel behavior can be modified at runtime, without the | ||
2077 | need to recompile the kernel, or even to reboot the system. The files in the | ||
2078 | /proc/sys tree can not only be read, but also modified. You can use the echo | ||
2079 | command to write value into these files, thereby changing the default settings | ||
2080 | of the kernel. | ||
2081 | ------------------------------------------------------------------------------ | ||
2082 | 1216 | ||
2083 | 2.14 /proc/<pid>/io - Display the IO accounting fields | 1217 | 3.3 /proc/<pid>/io - Display the IO accounting fields |
2084 | ------------------------------------------------------- | 1218 | ------------------------------------------------------- |
2085 | 1219 | ||
2086 | This file contains IO statistics for each running process | 1220 | This file contains IO statistics for each running process |
@@ -2182,7 +1316,7 @@ those 64-bit counters, process A could see an intermediate result. | |||
2182 | More information about this can be found within the taskstats documentation in | 1316 | More information about this can be found within the taskstats documentation in |
2183 | Documentation/accounting. | 1317 | Documentation/accounting. |
2184 | 1318 | ||
2185 | 2.15 /proc/<pid>/coredump_filter - Core dump filtering settings | 1319 | 3.4 /proc/<pid>/coredump_filter - Core dump filtering settings |
2186 | --------------------------------------------------------------- | 1320 | --------------------------------------------------------------- |
2187 | When a process is dumped, all anonymous memory is written to a core file as | 1321 | When a process is dumped, all anonymous memory is written to a core file as |
2188 | long as the size of the core file isn't limited. But sometimes we don't want | 1322 | long as the size of the core file isn't limited. But sometimes we don't want |
@@ -2226,7 +1360,7 @@ For example: | |||
2226 | $ echo 0x7 > /proc/self/coredump_filter | 1360 | $ echo 0x7 > /proc/self/coredump_filter |
2227 | $ ./some_program | 1361 | $ ./some_program |
2228 | 1362 | ||
2229 | 2.16 /proc/<pid>/mountinfo - Information about mounts | 1363 | 3.5 /proc/<pid>/mountinfo - Information about mounts |
2230 | -------------------------------------------------------- | 1364 | -------------------------------------------------------- |
2231 | 1365 | ||
2232 | This file contains lines of the form: | 1366 | This file contains lines of the form: |
@@ -2263,30 +1397,3 @@ For more information on mount propagation see: | |||
2263 | 1397 | ||
2264 | Documentation/filesystems/sharedsubtree.txt | 1398 | Documentation/filesystems/sharedsubtree.txt |
2265 | 1399 | ||
2266 | 2.17 /proc/sys/fs/epoll - Configuration options for the epoll interface | ||
2267 | -------------------------------------------------------- | ||
2268 | |||
2269 | This directory contains configuration options for the epoll(7) interface. | ||
2270 | |||
2271 | max_user_instances | ||
2272 | ------------------ | ||
2273 | |||
2274 | This is the maximum number of epoll file descriptors that a single user can | ||
2275 | have open at a given time. The default value is 128, and should be enough | ||
2276 | for normal users. | ||
2277 | |||
2278 | max_user_watches | ||
2279 | ---------------- | ||
2280 | |||
2281 | Every epoll file descriptor can store a number of files to be monitored | ||
2282 | for event readiness. Each one of these monitored files constitutes a "watch". | ||
2283 | This configuration option sets the maximum number of "watches" that are | ||
2284 | allowed for each user. | ||
2285 | Each "watch" costs roughly 90 bytes on a 32bit kernel, and roughly 160 bytes | ||
2286 | on a 64bit one. | ||
2287 | The current default value for max_user_watches is the 1/32 of the available | ||
2288 | low memory, divided for the "watch" cost in bytes. | ||
2289 | |||
2290 | |||
2291 | ------------------------------------------------------------------------------ | ||
2292 | |||
diff --git a/Documentation/filesystems/sysfs-pci.txt b/Documentation/filesystems/sysfs-pci.txt index 9f8740ca3f3b..85354b32d731 100644 --- a/Documentation/filesystems/sysfs-pci.txt +++ b/Documentation/filesystems/sysfs-pci.txt | |||
@@ -12,6 +12,7 @@ that support it. For example, a given bus might look like this: | |||
12 | | |-- enable | 12 | | |-- enable |
13 | | |-- irq | 13 | | |-- irq |
14 | | |-- local_cpus | 14 | | |-- local_cpus |
15 | | |-- remove | ||
15 | | |-- resource | 16 | | |-- resource |
16 | | |-- resource0 | 17 | | |-- resource0 |
17 | | |-- resource1 | 18 | | |-- resource1 |
@@ -36,6 +37,7 @@ files, each with their own function. | |||
36 | enable Whether the device is enabled (ascii, rw) | 37 | enable Whether the device is enabled (ascii, rw) |
37 | irq IRQ number (ascii, ro) | 38 | irq IRQ number (ascii, ro) |
38 | local_cpus nearby CPU mask (cpumask, ro) | 39 | local_cpus nearby CPU mask (cpumask, ro) |
40 | remove remove device from kernel's list (ascii, wo) | ||
39 | resource PCI resource host addresses (ascii, ro) | 41 | resource PCI resource host addresses (ascii, ro) |
40 | resource0..N PCI resource N, if present (binary, mmap) | 42 | resource0..N PCI resource N, if present (binary, mmap) |
41 | resource0_wc..N_wc PCI WC map resource N, if prefetchable (binary, mmap) | 43 | resource0_wc..N_wc PCI WC map resource N, if prefetchable (binary, mmap) |
@@ -46,6 +48,7 @@ files, each with their own function. | |||
46 | 48 | ||
47 | ro - read only file | 49 | ro - read only file |
48 | rw - file is readable and writable | 50 | rw - file is readable and writable |
51 | wo - write only file | ||
49 | mmap - file is mmapable | 52 | mmap - file is mmapable |
50 | ascii - file contains ascii text | 53 | ascii - file contains ascii text |
51 | binary - file contains binary data | 54 | binary - file contains binary data |
@@ -69,10 +72,17 @@ The 'rom' file is special in that it provides read-only access to the device's | |||
69 | ROM file, if available. It's disabled by default, however, so applications | 72 | ROM file, if available. It's disabled by default, however, so applications |
70 | should write the string "1" to the file to enable it before attempting a read | 73 | should write the string "1" to the file to enable it before attempting a read |
71 | call, and disable it following the access by writing "0" to the file. Note | 74 | call, and disable it following the access by writing "0" to the file. Note |
72 | that the device must be enabled for a rom read to return data succesfully. | 75 | that the device must be enabled for a rom read to return data successfully. |
73 | In the event a driver is not bound to the device, it can be enabled using the | 76 | In the event a driver is not bound to the device, it can be enabled using the |
74 | 'enable' file, documented above. | 77 | 'enable' file, documented above. |
75 | 78 | ||
79 | The 'remove' file is used to remove the PCI device, by writing a non-zero | ||
80 | integer to the file. This does not involve any kind of hot-plug functionality, | ||
81 | e.g. powering off the device. The device is removed from the kernel's list of | ||
82 | PCI devices, the sysfs directory for it is removed, and the device will be | ||
83 | removed from any drivers attached to it. Removal of PCI root buses is | ||
84 | disallowed. | ||
85 | |||
76 | Accessing legacy resources through sysfs | 86 | Accessing legacy resources through sysfs |
77 | ---------------------------------------- | 87 | ---------------------------------------- |
78 | 88 | ||
diff --git a/Documentation/filesystems/tmpfs.txt b/Documentation/filesystems/tmpfs.txt index 222437efd75a..3015da0c6b2a 100644 --- a/Documentation/filesystems/tmpfs.txt +++ b/Documentation/filesystems/tmpfs.txt | |||
@@ -133,4 +133,4 @@ RAM/SWAP in 10240 inodes and it is only accessible by root. | |||
133 | Author: | 133 | Author: |
134 | Christoph Rohland <cr@sap.com>, 1.12.01 | 134 | Christoph Rohland <cr@sap.com>, 1.12.01 |
135 | Updated: | 135 | Updated: |
136 | Hugh Dickins <hugh@veritas.com>, 4 June 2007 | 136 | Hugh Dickins, 4 June 2007 |
diff --git a/Documentation/filesystems/udf.txt b/Documentation/filesystems/udf.txt index fde829a756e6..902b95d0ee51 100644 --- a/Documentation/filesystems/udf.txt +++ b/Documentation/filesystems/udf.txt | |||
@@ -24,6 +24,8 @@ The following mount options are supported: | |||
24 | 24 | ||
25 | gid= Set the default group. | 25 | gid= Set the default group. |
26 | umask= Set the default umask. | 26 | umask= Set the default umask. |
27 | mode= Set the default file permissions. | ||
28 | dmode= Set the default directory permissions. | ||
27 | uid= Set the default user. | 29 | uid= Set the default user. |
28 | bs= Set the block size. | 30 | bs= Set the block size. |
29 | unhide Show otherwise hidden files. | 31 | unhide Show otherwise hidden files. |
diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt index 3a5ddc96901a..b58b84b50fa2 100644 --- a/Documentation/filesystems/vfat.txt +++ b/Documentation/filesystems/vfat.txt | |||
@@ -124,14 +124,19 @@ sys_immutable -- If set, ATTR_SYS attribute on FAT is handled as | |||
124 | flush -- If set, the filesystem will try to flush to disk more | 124 | flush -- If set, the filesystem will try to flush to disk more |
125 | early than normal. Not set by default. | 125 | early than normal. Not set by default. |
126 | 126 | ||
127 | rodir -- FAT has the ATTR_RO (read-only) attribute. But on Windows, | 127 | rodir -- FAT has the ATTR_RO (read-only) attribute. On Windows, |
128 | the ATTR_RO of the directory will be just ignored actually, | 128 | the ATTR_RO of the directory will just be ignored, |
129 | and is used by only applications as flag. E.g. it's setted | 129 | and is used only by applications as a flag (e.g. it's set |
130 | for the customized folder. | 130 | for the customized folder). |
131 | 131 | ||
132 | If you want to use ATTR_RO as read-only flag even for | 132 | If you want to use ATTR_RO as read-only flag even for |
133 | the directory, set this option. | 133 | the directory, set this option. |
134 | 134 | ||
135 | errors=panic|continue|remount-ro | ||
136 | -- specify FAT behavior on critical errors: panic, continue | ||
137 | without doing anything or remount the partition in | ||
138 | read-only mode (default behavior). | ||
139 | |||
135 | <bool>: 0,1,yes,no,true,false | 140 | <bool>: 0,1,yes,no,true,false |
136 | 141 | ||
137 | TODO | 142 | TODO |
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index deeeed0faa8f..f49eecf2e573 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt | |||
@@ -277,8 +277,7 @@ or bottom half). | |||
277 | unfreeze_fs: called when VFS is unlocking a filesystem and making it writable | 277 | unfreeze_fs: called when VFS is unlocking a filesystem and making it writable |
278 | again. | 278 | again. |
279 | 279 | ||
280 | statfs: called when the VFS needs to get filesystem statistics. This | 280 | statfs: called when the VFS needs to get filesystem statistics. |
281 | is called with the kernel lock held | ||
282 | 281 | ||
283 | remount_fs: called when the filesystem is remounted. This is called | 282 | remount_fs: called when the filesystem is remounted. This is called |
284 | with the kernel lock held | 283 | with the kernel lock held |
diff --git a/Documentation/firmware_class/README b/Documentation/firmware_class/README index c3480aa66ba8..7eceaff63f5f 100644 --- a/Documentation/firmware_class/README +++ b/Documentation/firmware_class/README | |||
@@ -77,7 +77,8 @@ | |||
77 | seconds for the whole load operation. | 77 | seconds for the whole load operation. |
78 | 78 | ||
79 | - request_firmware_nowait() is also provided for convenience in | 79 | - request_firmware_nowait() is also provided for convenience in |
80 | non-user contexts. | 80 | user contexts to request firmware asynchronously, but can't be called |
81 | in atomic contexts. | ||
81 | 82 | ||
82 | 83 | ||
83 | about in-kernel persistence: | 84 | about in-kernel persistence: |
diff --git a/Documentation/ftrace.txt b/Documentation/ftrace.txt deleted file mode 100644 index 803b1318b13d..000000000000 --- a/Documentation/ftrace.txt +++ /dev/null | |||
@@ -1,1424 +0,0 @@ | |||
1 | ftrace - Function Tracer | ||
2 | ======================== | ||
3 | |||
4 | Copyright 2008 Red Hat Inc. | ||
5 | Author: Steven Rostedt <srostedt@redhat.com> | ||
6 | License: The GNU Free Documentation License, Version 1.2 | ||
7 | (dual licensed under the GPL v2) | ||
8 | Reviewers: Elias Oltmanns, Randy Dunlap, Andrew Morton, | ||
9 | John Kacur, and David Teigland. | ||
10 | |||
11 | Written for: 2.6.28-rc2 | ||
12 | |||
13 | Introduction | ||
14 | ------------ | ||
15 | |||
16 | Ftrace is an internal tracer designed to help out developers and | ||
17 | designers of systems to find what is going on inside the kernel. | ||
18 | It can be used for debugging or analyzing latencies and performance | ||
19 | issues that take place outside of user-space. | ||
20 | |||
21 | Although ftrace is the function tracer, it also includes an | ||
22 | infrastructure that allows for other types of tracing. Some of the | ||
23 | tracers that are currently in ftrace include a tracer to trace | ||
24 | context switches, the time it takes for a high priority task to | ||
25 | run after it was woken up, the time interrupts are disabled, and | ||
26 | more (ftrace allows for tracer plugins, which means that the list of | ||
27 | tracers can always grow). | ||
28 | |||
29 | |||
30 | The File System | ||
31 | --------------- | ||
32 | |||
33 | Ftrace uses the debugfs file system to hold the control files as well | ||
34 | as the files to display output. | ||
35 | |||
36 | To mount the debugfs system: | ||
37 | |||
38 | # mkdir /debug | ||
39 | # mount -t debugfs nodev /debug | ||
40 | |||
41 | (Note: it is more common to mount at /sys/kernel/debug, but for simplicity | ||
42 | this document will use /debug) | ||
43 | |||
44 | That's it! (assuming that you have ftrace configured into your kernel) | ||
45 | |||
46 | After mounting the debugfs, you can see a directory called | ||
47 | "tracing". This directory contains the control and output files | ||
48 | of ftrace. Here is a list of some of the key files: | ||
49 | |||
50 | |||
51 | Note: all time values are in microseconds. | ||
52 | |||
53 | current_tracer: This is used to set or display the current tracer | ||
54 | that is configured. | ||
55 | |||
56 | available_tracers: This holds the different types of tracers that | ||
57 | have been compiled into the kernel. The tracers | ||
58 | listed here can be configured by echoing their name | ||
59 | into current_tracer. | ||
60 | |||
61 | tracing_enabled: This sets or displays whether the current_tracer | ||
62 | is activated and tracing or not. Echo 0 into this | ||
63 | file to disable the tracer or 1 to enable it. | ||
64 | |||
65 | trace: This file holds the output of the trace in a human readable | ||
66 | format (described below). | ||
67 | |||
68 | latency_trace: This file shows the same trace but the information | ||
69 | is organized more to display possible latencies | ||
70 | in the system (described below). | ||
71 | |||
72 | trace_pipe: The output is the same as the "trace" file but this | ||
73 | file is meant to be streamed with live tracing. | ||
74 | Reads from this file will block until new data | ||
75 | is retrieved. Unlike the "trace" and "latency_trace" | ||
76 | files, this file is a consumer. This means reading | ||
77 | from this file causes sequential reads to display | ||
78 | more current data. Once data is read from this | ||
79 | file, it is consumed, and will not be read | ||
80 | again with a sequential read. The "trace" and | ||
81 | "latency_trace" files are static, and if the | ||
82 | tracer is not adding more data, they will display | ||
83 | the same information every time they are read. | ||
84 | |||
85 | trace_options: This file lets the user control the amount of data | ||
86 | that is displayed in one of the above output | ||
87 | files. | ||
88 | |||
89 | trace_max_latency: Some of the tracers record the max latency. | ||
90 | For example, the time interrupts are disabled. | ||
91 | This time is saved in this file. The max trace | ||
92 | will also be stored, and displayed by either | ||
93 | "trace" or "latency_trace". A new max trace will | ||
94 | only be recorded if the latency is greater than | ||
95 | the value in this file. (in microseconds) | ||
96 | |||
97 | buffer_size_kb: This sets or displays the number of kilobytes each CPU | ||
98 | buffer can hold. The tracer buffers are the same size | ||
99 | for each CPU. The displayed number is the size of the | ||
100 | CPU buffer and not total size of all buffers. The | ||
101 | trace buffers are allocated in pages (blocks of memory | ||
102 | that the kernel uses for allocation, usually 4 KB in size). | ||
103 | If the last page allocated has room for more bytes | ||
104 | than requested, the rest of the page will be used, | ||
105 | making the actual allocation bigger than requested. | ||
106 | (Note, the size may not be a multiple of the page size due | ||
107 | to buffer managment overhead.) | ||
108 | |||
109 | This can only be updated when the current_tracer | ||
110 | is set to "nop". | ||
111 | |||
112 | tracing_cpumask: This is a mask that lets the user only trace | ||
113 | on specified CPUS. The format is a hex string | ||
114 | representing the CPUS. | ||
115 | |||
116 | set_ftrace_filter: When dynamic ftrace is configured in (see the | ||
117 | section below "dynamic ftrace"), the code is dynamically | ||
118 | modified (code text rewrite) to disable calling of the | ||
119 | function profiler (mcount). This lets tracing be configured | ||
120 | in with practically no overhead in performance. This also | ||
121 | has a side effect of enabling or disabling specific functions | ||
122 | to be traced. Echoing names of functions into this file | ||
123 | will limit the trace to only those functions. | ||
124 | |||
125 | set_ftrace_notrace: This has an effect opposite to that of | ||
126 | set_ftrace_filter. Any function that is added here will not | ||
127 | be traced. If a function exists in both set_ftrace_filter | ||
128 | and set_ftrace_notrace, the function will _not_ be traced. | ||
129 | |||
130 | set_ftrace_pid: Have the function tracer only trace a single thread. | ||
131 | |||
132 | available_filter_functions: This lists the functions that ftrace | ||
133 | has processed and can trace. These are the function | ||
134 | names that you can pass to "set_ftrace_filter" or | ||
135 | "set_ftrace_notrace". (See the section "dynamic ftrace" | ||
136 | below for more details.) | ||
137 | |||
138 | |||
139 | The Tracers | ||
140 | ----------- | ||
141 | |||
142 | Here is the list of current tracers that may be configured. | ||
143 | |||
144 | function - function tracer that uses mcount to trace all functions. | ||
145 | |||
146 | sched_switch - traces the context switches between tasks. | ||
147 | |||
148 | irqsoff - traces the areas that disable interrupts and saves | ||
149 | the trace with the longest max latency. | ||
150 | See tracing_max_latency. When a new max is recorded, | ||
151 | it replaces the old trace. It is best to view this | ||
152 | trace via the latency_trace file. | ||
153 | |||
154 | preemptoff - Similar to irqsoff but traces and records the amount of | ||
155 | time for which preemption is disabled. | ||
156 | |||
157 | preemptirqsoff - Similar to irqsoff and preemptoff, but traces and | ||
158 | records the largest time for which irqs and/or preemption | ||
159 | is disabled. | ||
160 | |||
161 | wakeup - Traces and records the max latency that it takes for | ||
162 | the highest priority task to get scheduled after | ||
163 | it has been woken up. | ||
164 | |||
165 | nop - This is not a tracer. To remove all tracers from tracing | ||
166 | simply echo "nop" into current_tracer. | ||
167 | |||
168 | |||
169 | Examples of using the tracer | ||
170 | ---------------------------- | ||
171 | |||
172 | Here are typical examples of using the tracers when controlling them only | ||
173 | with the debugfs interface (without using any user-land utilities). | ||
174 | |||
175 | Output format: | ||
176 | -------------- | ||
177 | |||
178 | Here is an example of the output format of the file "trace" | ||
179 | |||
180 | -------- | ||
181 | # tracer: function | ||
182 | # | ||
183 | # TASK-PID CPU# TIMESTAMP FUNCTION | ||
184 | # | | | | | | ||
185 | bash-4251 [01] 10152.583854: path_put <-path_walk | ||
186 | bash-4251 [01] 10152.583855: dput <-path_put | ||
187 | bash-4251 [01] 10152.583855: _atomic_dec_and_lock <-dput | ||
188 | -------- | ||
189 | |||
190 | A header is printed with the tracer name that is represented by the trace. | ||
191 | In this case the tracer is "function". Then a header showing the format. Task | ||
192 | name "bash", the task PID "4251", the CPU that it was running on | ||
193 | "01", the timestamp in <secs>.<usecs> format, the function name that was | ||
194 | traced "path_put" and the parent function that called this function | ||
195 | "path_walk". The timestamp is the time at which the function was | ||
196 | entered. | ||
197 | |||
198 | The sched_switch tracer also includes tracing of task wakeups and | ||
199 | context switches. | ||
200 | |||
201 | ksoftirqd/1-7 [01] 1453.070013: 7:115:R + 2916:115:S | ||
202 | ksoftirqd/1-7 [01] 1453.070013: 7:115:R + 10:115:S | ||
203 | ksoftirqd/1-7 [01] 1453.070013: 7:115:R ==> 10:115:R | ||
204 | events/1-10 [01] 1453.070013: 10:115:S ==> 2916:115:R | ||
205 | kondemand/1-2916 [01] 1453.070013: 2916:115:S ==> 7:115:R | ||
206 | ksoftirqd/1-7 [01] 1453.070013: 7:115:S ==> 0:140:R | ||
207 | |||
208 | Wake ups are represented by a "+" and the context switches are shown as | ||
209 | "==>". The format is: | ||
210 | |||
211 | Context switches: | ||
212 | |||
213 | Previous task Next Task | ||
214 | |||
215 | <pid>:<prio>:<state> ==> <pid>:<prio>:<state> | ||
216 | |||
217 | Wake ups: | ||
218 | |||
219 | Current task Task waking up | ||
220 | |||
221 | <pid>:<prio>:<state> + <pid>:<prio>:<state> | ||
222 | |||
223 | The prio is the internal kernel priority, which is the inverse of the | ||
224 | priority that is usually displayed by user-space tools. Zero represents | ||
225 | the highest priority (99). Prio 100 starts the "nice" priorities with | ||
226 | 100 being equal to nice -20 and 139 being nice 19. The prio "140" is | ||
227 | reserved for the idle task which is the lowest priority thread (pid 0). | ||
228 | |||
229 | |||
230 | Latency trace format | ||
231 | -------------------- | ||
232 | |||
233 | For traces that display latency times, the latency_trace file gives | ||
234 | somewhat more information to see why a latency happened. Here is a typical | ||
235 | trace. | ||
236 | |||
237 | # tracer: irqsoff | ||
238 | # | ||
239 | irqsoff latency trace v1.1.5 on 2.6.26-rc8 | ||
240 | -------------------------------------------------------------------- | ||
241 | latency: 97 us, #3/3, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | ||
242 | ----------------- | ||
243 | | task: swapper-0 (uid:0 nice:0 policy:0 rt_prio:0) | ||
244 | ----------------- | ||
245 | => started at: apic_timer_interrupt | ||
246 | => ended at: do_softirq | ||
247 | |||
248 | # _------=> CPU# | ||
249 | # / _-----=> irqs-off | ||
250 | # | / _----=> need-resched | ||
251 | # || / _---=> hardirq/softirq | ||
252 | # ||| / _--=> preempt-depth | ||
253 | # |||| / | ||
254 | # ||||| delay | ||
255 | # cmd pid ||||| time | caller | ||
256 | # \ / ||||| \ | / | ||
257 | <idle>-0 0d..1 0us+: trace_hardirqs_off_thunk (apic_timer_interrupt) | ||
258 | <idle>-0 0d.s. 97us : __do_softirq (do_softirq) | ||
259 | <idle>-0 0d.s1 98us : trace_hardirqs_on (do_softirq) | ||
260 | |||
261 | |||
262 | |||
263 | This shows that the current tracer is "irqsoff" tracing the time for which | ||
264 | interrupts were disabled. It gives the trace version and the version | ||
265 | of the kernel upon which this was executed on (2.6.26-rc8). Then it displays | ||
266 | the max latency in microsecs (97 us). The number of trace entries displayed | ||
267 | and the total number recorded (both are three: #3/3). The type of | ||
268 | preemption that was used (PREEMPT). VP, KP, SP, and HP are always zero | ||
269 | and are reserved for later use. #P is the number of online CPUS (#P:2). | ||
270 | |||
271 | The task is the process that was running when the latency occurred. | ||
272 | (swapper pid: 0). | ||
273 | |||
274 | The start and stop (the functions in which the interrupts were disabled and | ||
275 | enabled respectively) that caused the latencies: | ||
276 | |||
277 | apic_timer_interrupt is where the interrupts were disabled. | ||
278 | do_softirq is where they were enabled again. | ||
279 | |||
280 | The next lines after the header are the trace itself. The header | ||
281 | explains which is which. | ||
282 | |||
283 | cmd: The name of the process in the trace. | ||
284 | |||
285 | pid: The PID of that process. | ||
286 | |||
287 | CPU#: The CPU which the process was running on. | ||
288 | |||
289 | irqs-off: 'd' interrupts are disabled. '.' otherwise. | ||
290 | Note: If the architecture does not support a way to | ||
291 | read the irq flags variable, an 'X' will always | ||
292 | be printed here. | ||
293 | |||
294 | need-resched: 'N' task need_resched is set, '.' otherwise. | ||
295 | |||
296 | hardirq/softirq: | ||
297 | 'H' - hard irq occurred inside a softirq. | ||
298 | 'h' - hard irq is running | ||
299 | 's' - soft irq is running | ||
300 | '.' - normal context. | ||
301 | |||
302 | preempt-depth: The level of preempt_disabled | ||
303 | |||
304 | The above is mostly meaningful for kernel developers. | ||
305 | |||
306 | time: This differs from the trace file output. The trace file output | ||
307 | includes an absolute timestamp. The timestamp used by the | ||
308 | latency_trace file is relative to the start of the trace. | ||
309 | |||
310 | delay: This is just to help catch your eye a bit better. And | ||
311 | needs to be fixed to be only relative to the same CPU. | ||
312 | The marks are determined by the difference between this | ||
313 | current trace and the next trace. | ||
314 | '!' - greater than preempt_mark_thresh (default 100) | ||
315 | '+' - greater than 1 microsecond | ||
316 | ' ' - less than or equal to 1 microsecond. | ||
317 | |||
318 | The rest is the same as the 'trace' file. | ||
319 | |||
320 | |||
321 | trace_options | ||
322 | ------------- | ||
323 | |||
324 | The trace_options file is used to control what gets printed in the trace | ||
325 | output. To see what is available, simply cat the file: | ||
326 | |||
327 | cat /debug/tracing/trace_options | ||
328 | print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \ | ||
329 | noblock nostacktrace nosched-tree nouserstacktrace nosym-userobj | ||
330 | |||
331 | To disable one of the options, echo in the option prepended with "no". | ||
332 | |||
333 | echo noprint-parent > /debug/tracing/trace_options | ||
334 | |||
335 | To enable an option, leave off the "no". | ||
336 | |||
337 | echo sym-offset > /debug/tracing/trace_options | ||
338 | |||
339 | Here are the available options: | ||
340 | |||
341 | print-parent - On function traces, display the calling function | ||
342 | as well as the function being traced. | ||
343 | |||
344 | print-parent: | ||
345 | bash-4000 [01] 1477.606694: simple_strtoul <-strict_strtoul | ||
346 | |||
347 | noprint-parent: | ||
348 | bash-4000 [01] 1477.606694: simple_strtoul | ||
349 | |||
350 | |||
351 | sym-offset - Display not only the function name, but also the offset | ||
352 | in the function. For example, instead of seeing just | ||
353 | "ktime_get", you will see "ktime_get+0xb/0x20". | ||
354 | |||
355 | sym-offset: | ||
356 | bash-4000 [01] 1477.606694: simple_strtoul+0x6/0xa0 | ||
357 | |||
358 | sym-addr - this will also display the function address as well as | ||
359 | the function name. | ||
360 | |||
361 | sym-addr: | ||
362 | bash-4000 [01] 1477.606694: simple_strtoul <c0339346> | ||
363 | |||
364 | verbose - This deals with the latency_trace file. | ||
365 | |||
366 | bash 4000 1 0 00000000 00010a95 [58127d26] 1720.415ms \ | ||
367 | (+0.000ms): simple_strtoul (strict_strtoul) | ||
368 | |||
369 | raw - This will display raw numbers. This option is best for use with | ||
370 | user applications that can translate the raw numbers better than | ||
371 | having it done in the kernel. | ||
372 | |||
373 | hex - Similar to raw, but the numbers will be in a hexadecimal format. | ||
374 | |||
375 | bin - This will print out the formats in raw binary. | ||
376 | |||
377 | block - TBD (needs update) | ||
378 | |||
379 | stacktrace - This is one of the options that changes the trace itself. | ||
380 | When a trace is recorded, so is the stack of functions. | ||
381 | This allows for back traces of trace sites. | ||
382 | |||
383 | userstacktrace - This option changes the trace. | ||
384 | It records a stacktrace of the current userspace thread. | ||
385 | |||
386 | sym-userobj - when user stacktrace are enabled, look up which object the | ||
387 | address belongs to, and print a relative address | ||
388 | This is especially useful when ASLR is on, otherwise you don't | ||
389 | get a chance to resolve the address to object/file/line after the app is no | ||
390 | longer running | ||
391 | |||
392 | The lookup is performed when you read trace,trace_pipe,latency_trace. Example: | ||
393 | |||
394 | a.out-1623 [000] 40874.465068: /root/a.out[+0x480] <-/root/a.out[+0 | ||
395 | x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6] | ||
396 | |||
397 | sched-tree - TBD (any users??) | ||
398 | |||
399 | |||
400 | sched_switch | ||
401 | ------------ | ||
402 | |||
403 | This tracer simply records schedule switches. Here is an example | ||
404 | of how to use it. | ||
405 | |||
406 | # echo sched_switch > /debug/tracing/current_tracer | ||
407 | # echo 1 > /debug/tracing/tracing_enabled | ||
408 | # sleep 1 | ||
409 | # echo 0 > /debug/tracing/tracing_enabled | ||
410 | # cat /debug/tracing/trace | ||
411 | |||
412 | # tracer: sched_switch | ||
413 | # | ||
414 | # TASK-PID CPU# TIMESTAMP FUNCTION | ||
415 | # | | | | | | ||
416 | bash-3997 [01] 240.132281: 3997:120:R + 4055:120:R | ||
417 | bash-3997 [01] 240.132284: 3997:120:R ==> 4055:120:R | ||
418 | sleep-4055 [01] 240.132371: 4055:120:S ==> 3997:120:R | ||
419 | bash-3997 [01] 240.132454: 3997:120:R + 4055:120:S | ||
420 | bash-3997 [01] 240.132457: 3997:120:R ==> 4055:120:R | ||
421 | sleep-4055 [01] 240.132460: 4055:120:D ==> 3997:120:R | ||
422 | bash-3997 [01] 240.132463: 3997:120:R + 4055:120:D | ||
423 | bash-3997 [01] 240.132465: 3997:120:R ==> 4055:120:R | ||
424 | <idle>-0 [00] 240.132589: 0:140:R + 4:115:S | ||
425 | <idle>-0 [00] 240.132591: 0:140:R ==> 4:115:R | ||
426 | ksoftirqd/0-4 [00] 240.132595: 4:115:S ==> 0:140:R | ||
427 | <idle>-0 [00] 240.132598: 0:140:R + 4:115:S | ||
428 | <idle>-0 [00] 240.132599: 0:140:R ==> 4:115:R | ||
429 | ksoftirqd/0-4 [00] 240.132603: 4:115:S ==> 0:140:R | ||
430 | sleep-4055 [01] 240.133058: 4055:120:S ==> 3997:120:R | ||
431 | [...] | ||
432 | |||
433 | |||
434 | As we have discussed previously about this format, the header shows | ||
435 | the name of the trace and points to the options. The "FUNCTION" | ||
436 | is a misnomer since here it represents the wake ups and context | ||
437 | switches. | ||
438 | |||
439 | The sched_switch file only lists the wake ups (represented with '+') | ||
440 | and context switches ('==>') with the previous task or current task | ||
441 | first followed by the next task or task waking up. The format for both | ||
442 | of these is PID:KERNEL-PRIO:TASK-STATE. Remember that the KERNEL-PRIO | ||
443 | is the inverse of the actual priority with zero (0) being the highest | ||
444 | priority and the nice values starting at 100 (nice -20). Below is | ||
445 | a quick chart to map the kernel priority to user land priorities. | ||
446 | |||
447 | Kernel priority: 0 to 99 ==> user RT priority 99 to 0 | ||
448 | Kernel priority: 100 to 139 ==> user nice -20 to 19 | ||
449 | Kernel priority: 140 ==> idle task priority | ||
450 | |||
451 | The task states are: | ||
452 | |||
453 | R - running : wants to run, may not actually be running | ||
454 | S - sleep : process is waiting to be woken up (handles signals) | ||
455 | D - disk sleep (uninterruptible sleep) : process must be woken up | ||
456 | (ignores signals) | ||
457 | T - stopped : process suspended | ||
458 | t - traced : process is being traced (with something like gdb) | ||
459 | Z - zombie : process waiting to be cleaned up | ||
460 | X - unknown | ||
461 | |||
462 | |||
463 | ftrace_enabled | ||
464 | -------------- | ||
465 | |||
466 | The following tracers (listed below) give different output depending | ||
467 | on whether or not the sysctl ftrace_enabled is set. To set ftrace_enabled, | ||
468 | one can either use the sysctl function or set it via the proc | ||
469 | file system interface. | ||
470 | |||
471 | sysctl kernel.ftrace_enabled=1 | ||
472 | |||
473 | or | ||
474 | |||
475 | echo 1 > /proc/sys/kernel/ftrace_enabled | ||
476 | |||
477 | To disable ftrace_enabled simply replace the '1' with '0' in | ||
478 | the above commands. | ||
479 | |||
480 | When ftrace_enabled is set the tracers will also record the functions | ||
481 | that are within the trace. The descriptions of the tracers | ||
482 | will also show an example with ftrace enabled. | ||
483 | |||
484 | |||
485 | irqsoff | ||
486 | ------- | ||
487 | |||
488 | When interrupts are disabled, the CPU can not react to any other | ||
489 | external event (besides NMIs and SMIs). This prevents the timer | ||
490 | interrupt from triggering or the mouse interrupt from letting the | ||
491 | kernel know of a new mouse event. The result is a latency with the | ||
492 | reaction time. | ||
493 | |||
494 | The irqsoff tracer tracks the time for which interrupts are disabled. | ||
495 | When a new maximum latency is hit, the tracer saves the trace leading up | ||
496 | to that latency point so that every time a new maximum is reached, the old | ||
497 | saved trace is discarded and the new trace is saved. | ||
498 | |||
499 | To reset the maximum, echo 0 into tracing_max_latency. Here is an | ||
500 | example: | ||
501 | |||
502 | # echo irqsoff > /debug/tracing/current_tracer | ||
503 | # echo 0 > /debug/tracing/tracing_max_latency | ||
504 | # echo 1 > /debug/tracing/tracing_enabled | ||
505 | # ls -ltr | ||
506 | [...] | ||
507 | # echo 0 > /debug/tracing/tracing_enabled | ||
508 | # cat /debug/tracing/latency_trace | ||
509 | # tracer: irqsoff | ||
510 | # | ||
511 | irqsoff latency trace v1.1.5 on 2.6.26 | ||
512 | -------------------------------------------------------------------- | ||
513 | latency: 12 us, #3/3, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | ||
514 | ----------------- | ||
515 | | task: bash-3730 (uid:0 nice:0 policy:0 rt_prio:0) | ||
516 | ----------------- | ||
517 | => started at: sys_setpgid | ||
518 | => ended at: sys_setpgid | ||
519 | |||
520 | # _------=> CPU# | ||
521 | # / _-----=> irqs-off | ||
522 | # | / _----=> need-resched | ||
523 | # || / _---=> hardirq/softirq | ||
524 | # ||| / _--=> preempt-depth | ||
525 | # |||| / | ||
526 | # ||||| delay | ||
527 | # cmd pid ||||| time | caller | ||
528 | # \ / ||||| \ | / | ||
529 | bash-3730 1d... 0us : _write_lock_irq (sys_setpgid) | ||
530 | bash-3730 1d..1 1us+: _write_unlock_irq (sys_setpgid) | ||
531 | bash-3730 1d..2 14us : trace_hardirqs_on (sys_setpgid) | ||
532 | |||
533 | |||
534 | Here we see that that we had a latency of 12 microsecs (which is | ||
535 | very good). The _write_lock_irq in sys_setpgid disabled interrupts. | ||
536 | The difference between the 12 and the displayed timestamp 14us occurred | ||
537 | because the clock was incremented between the time of recording the max | ||
538 | latency and the time of recording the function that had that latency. | ||
539 | |||
540 | Note the above example had ftrace_enabled not set. If we set the | ||
541 | ftrace_enabled, we get a much larger output: | ||
542 | |||
543 | # tracer: irqsoff | ||
544 | # | ||
545 | irqsoff latency trace v1.1.5 on 2.6.26-rc8 | ||
546 | -------------------------------------------------------------------- | ||
547 | latency: 50 us, #101/101, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | ||
548 | ----------------- | ||
549 | | task: ls-4339 (uid:0 nice:0 policy:0 rt_prio:0) | ||
550 | ----------------- | ||
551 | => started at: __alloc_pages_internal | ||
552 | => ended at: __alloc_pages_internal | ||
553 | |||
554 | # _------=> CPU# | ||
555 | # / _-----=> irqs-off | ||
556 | # | / _----=> need-resched | ||
557 | # || / _---=> hardirq/softirq | ||
558 | # ||| / _--=> preempt-depth | ||
559 | # |||| / | ||
560 | # ||||| delay | ||
561 | # cmd pid ||||| time | caller | ||
562 | # \ / ||||| \ | / | ||
563 | ls-4339 0...1 0us+: get_page_from_freelist (__alloc_pages_internal) | ||
564 | ls-4339 0d..1 3us : rmqueue_bulk (get_page_from_freelist) | ||
565 | ls-4339 0d..1 3us : _spin_lock (rmqueue_bulk) | ||
566 | ls-4339 0d..1 4us : add_preempt_count (_spin_lock) | ||
567 | ls-4339 0d..2 4us : __rmqueue (rmqueue_bulk) | ||
568 | ls-4339 0d..2 5us : __rmqueue_smallest (__rmqueue) | ||
569 | ls-4339 0d..2 5us : __mod_zone_page_state (__rmqueue_smallest) | ||
570 | ls-4339 0d..2 6us : __rmqueue (rmqueue_bulk) | ||
571 | ls-4339 0d..2 6us : __rmqueue_smallest (__rmqueue) | ||
572 | ls-4339 0d..2 7us : __mod_zone_page_state (__rmqueue_smallest) | ||
573 | ls-4339 0d..2 7us : __rmqueue (rmqueue_bulk) | ||
574 | ls-4339 0d..2 8us : __rmqueue_smallest (__rmqueue) | ||
575 | [...] | ||
576 | ls-4339 0d..2 46us : __rmqueue_smallest (__rmqueue) | ||
577 | ls-4339 0d..2 47us : __mod_zone_page_state (__rmqueue_smallest) | ||
578 | ls-4339 0d..2 47us : __rmqueue (rmqueue_bulk) | ||
579 | ls-4339 0d..2 48us : __rmqueue_smallest (__rmqueue) | ||
580 | ls-4339 0d..2 48us : __mod_zone_page_state (__rmqueue_smallest) | ||
581 | ls-4339 0d..2 49us : _spin_unlock (rmqueue_bulk) | ||
582 | ls-4339 0d..2 49us : sub_preempt_count (_spin_unlock) | ||
583 | ls-4339 0d..1 50us : get_page_from_freelist (__alloc_pages_internal) | ||
584 | ls-4339 0d..2 51us : trace_hardirqs_on (__alloc_pages_internal) | ||
585 | |||
586 | |||
587 | |||
588 | Here we traced a 50 microsecond latency. But we also see all the | ||
589 | functions that were called during that time. Note that by enabling | ||
590 | function tracing, we incur an added overhead. This overhead may | ||
591 | extend the latency times. But nevertheless, this trace has provided | ||
592 | some very helpful debugging information. | ||
593 | |||
594 | |||
595 | preemptoff | ||
596 | ---------- | ||
597 | |||
598 | When preemption is disabled, we may be able to receive interrupts but | ||
599 | the task cannot be preempted and a higher priority task must wait | ||
600 | for preemption to be enabled again before it can preempt a lower | ||
601 | priority task. | ||
602 | |||
603 | The preemptoff tracer traces the places that disable preemption. | ||
604 | Like the irqsoff tracer, it records the maximum latency for which preemption | ||
605 | was disabled. The control of preemptoff tracer is much like the irqsoff | ||
606 | tracer. | ||
607 | |||
608 | # echo preemptoff > /debug/tracing/current_tracer | ||
609 | # echo 0 > /debug/tracing/tracing_max_latency | ||
610 | # echo 1 > /debug/tracing/tracing_enabled | ||
611 | # ls -ltr | ||
612 | [...] | ||
613 | # echo 0 > /debug/tracing/tracing_enabled | ||
614 | # cat /debug/tracing/latency_trace | ||
615 | # tracer: preemptoff | ||
616 | # | ||
617 | preemptoff latency trace v1.1.5 on 2.6.26-rc8 | ||
618 | -------------------------------------------------------------------- | ||
619 | latency: 29 us, #3/3, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | ||
620 | ----------------- | ||
621 | | task: sshd-4261 (uid:0 nice:0 policy:0 rt_prio:0) | ||
622 | ----------------- | ||
623 | => started at: do_IRQ | ||
624 | => ended at: __do_softirq | ||
625 | |||
626 | # _------=> CPU# | ||
627 | # / _-----=> irqs-off | ||
628 | # | / _----=> need-resched | ||
629 | # || / _---=> hardirq/softirq | ||
630 | # ||| / _--=> preempt-depth | ||
631 | # |||| / | ||
632 | # ||||| delay | ||
633 | # cmd pid ||||| time | caller | ||
634 | # \ / ||||| \ | / | ||
635 | sshd-4261 0d.h. 0us+: irq_enter (do_IRQ) | ||
636 | sshd-4261 0d.s. 29us : _local_bh_enable (__do_softirq) | ||
637 | sshd-4261 0d.s1 30us : trace_preempt_on (__do_softirq) | ||
638 | |||
639 | |||
640 | This has some more changes. Preemption was disabled when an interrupt | ||
641 | came in (notice the 'h'), and was enabled while doing a softirq. | ||
642 | (notice the 's'). But we also see that interrupts have been disabled | ||
643 | when entering the preempt off section and leaving it (the 'd'). | ||
644 | We do not know if interrupts were enabled in the mean time. | ||
645 | |||
646 | # tracer: preemptoff | ||
647 | # | ||
648 | preemptoff latency trace v1.1.5 on 2.6.26-rc8 | ||
649 | -------------------------------------------------------------------- | ||
650 | latency: 63 us, #87/87, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | ||
651 | ----------------- | ||
652 | | task: sshd-4261 (uid:0 nice:0 policy:0 rt_prio:0) | ||
653 | ----------------- | ||
654 | => started at: remove_wait_queue | ||
655 | => ended at: __do_softirq | ||
656 | |||
657 | # _------=> CPU# | ||
658 | # / _-----=> irqs-off | ||
659 | # | / _----=> need-resched | ||
660 | # || / _---=> hardirq/softirq | ||
661 | # ||| / _--=> preempt-depth | ||
662 | # |||| / | ||
663 | # ||||| delay | ||
664 | # cmd pid ||||| time | caller | ||
665 | # \ / ||||| \ | / | ||
666 | sshd-4261 0d..1 0us : _spin_lock_irqsave (remove_wait_queue) | ||
667 | sshd-4261 0d..1 1us : _spin_unlock_irqrestore (remove_wait_queue) | ||
668 | sshd-4261 0d..1 2us : do_IRQ (common_interrupt) | ||
669 | sshd-4261 0d..1 2us : irq_enter (do_IRQ) | ||
670 | sshd-4261 0d..1 2us : idle_cpu (irq_enter) | ||
671 | sshd-4261 0d..1 3us : add_preempt_count (irq_enter) | ||
672 | sshd-4261 0d.h1 3us : idle_cpu (irq_enter) | ||
673 | sshd-4261 0d.h. 4us : handle_fasteoi_irq (do_IRQ) | ||
674 | [...] | ||
675 | sshd-4261 0d.h. 12us : add_preempt_count (_spin_lock) | ||
676 | sshd-4261 0d.h1 12us : ack_ioapic_quirk_irq (handle_fasteoi_irq) | ||
677 | sshd-4261 0d.h1 13us : move_native_irq (ack_ioapic_quirk_irq) | ||
678 | sshd-4261 0d.h1 13us : _spin_unlock (handle_fasteoi_irq) | ||
679 | sshd-4261 0d.h1 14us : sub_preempt_count (_spin_unlock) | ||
680 | sshd-4261 0d.h1 14us : irq_exit (do_IRQ) | ||
681 | sshd-4261 0d.h1 15us : sub_preempt_count (irq_exit) | ||
682 | sshd-4261 0d..2 15us : do_softirq (irq_exit) | ||
683 | sshd-4261 0d... 15us : __do_softirq (do_softirq) | ||
684 | sshd-4261 0d... 16us : __local_bh_disable (__do_softirq) | ||
685 | sshd-4261 0d... 16us+: add_preempt_count (__local_bh_disable) | ||
686 | sshd-4261 0d.s4 20us : add_preempt_count (__local_bh_disable) | ||
687 | sshd-4261 0d.s4 21us : sub_preempt_count (local_bh_enable) | ||
688 | sshd-4261 0d.s5 21us : sub_preempt_count (local_bh_enable) | ||
689 | [...] | ||
690 | sshd-4261 0d.s6 41us : add_preempt_count (__local_bh_disable) | ||
691 | sshd-4261 0d.s6 42us : sub_preempt_count (local_bh_enable) | ||
692 | sshd-4261 0d.s7 42us : sub_preempt_count (local_bh_enable) | ||
693 | sshd-4261 0d.s5 43us : add_preempt_count (__local_bh_disable) | ||
694 | sshd-4261 0d.s5 43us : sub_preempt_count (local_bh_enable_ip) | ||
695 | sshd-4261 0d.s6 44us : sub_preempt_count (local_bh_enable_ip) | ||
696 | sshd-4261 0d.s5 44us : add_preempt_count (__local_bh_disable) | ||
697 | sshd-4261 0d.s5 45us : sub_preempt_count (local_bh_enable) | ||
698 | [...] | ||
699 | sshd-4261 0d.s. 63us : _local_bh_enable (__do_softirq) | ||
700 | sshd-4261 0d.s1 64us : trace_preempt_on (__do_softirq) | ||
701 | |||
702 | |||
703 | The above is an example of the preemptoff trace with ftrace_enabled | ||
704 | set. Here we see that interrupts were disabled the entire time. | ||
705 | The irq_enter code lets us know that we entered an interrupt 'h'. | ||
706 | Before that, the functions being traced still show that it is not | ||
707 | in an interrupt, but we can see from the functions themselves that | ||
708 | this is not the case. | ||
709 | |||
710 | Notice that __do_softirq when called does not have a preempt_count. | ||
711 | It may seem that we missed a preempt enabling. What really happened | ||
712 | is that the preempt count is held on the thread's stack and we | ||
713 | switched to the softirq stack (4K stacks in effect). The code | ||
714 | does not copy the preempt count, but because interrupts are disabled, | ||
715 | we do not need to worry about it. Having a tracer like this is good | ||
716 | for letting people know what really happens inside the kernel. | ||
717 | |||
718 | |||
719 | preemptirqsoff | ||
720 | -------------- | ||
721 | |||
722 | Knowing the locations that have interrupts disabled or preemption | ||
723 | disabled for the longest times is helpful. But sometimes we would | ||
724 | like to know when either preemption and/or interrupts are disabled. | ||
725 | |||
726 | Consider the following code: | ||
727 | |||
728 | local_irq_disable(); | ||
729 | call_function_with_irqs_off(); | ||
730 | preempt_disable(); | ||
731 | call_function_with_irqs_and_preemption_off(); | ||
732 | local_irq_enable(); | ||
733 | call_function_with_preemption_off(); | ||
734 | preempt_enable(); | ||
735 | |||
736 | The irqsoff tracer will record the total length of | ||
737 | call_function_with_irqs_off() and | ||
738 | call_function_with_irqs_and_preemption_off(). | ||
739 | |||
740 | The preemptoff tracer will record the total length of | ||
741 | call_function_with_irqs_and_preemption_off() and | ||
742 | call_function_with_preemption_off(). | ||
743 | |||
744 | But neither will trace the time that interrupts and/or preemption | ||
745 | is disabled. This total time is the time that we can not schedule. | ||
746 | To record this time, use the preemptirqsoff tracer. | ||
747 | |||
748 | Again, using this trace is much like the irqsoff and preemptoff tracers. | ||
749 | |||
750 | # echo preemptirqsoff > /debug/tracing/current_tracer | ||
751 | # echo 0 > /debug/tracing/tracing_max_latency | ||
752 | # echo 1 > /debug/tracing/tracing_enabled | ||
753 | # ls -ltr | ||
754 | [...] | ||
755 | # echo 0 > /debug/tracing/tracing_enabled | ||
756 | # cat /debug/tracing/latency_trace | ||
757 | # tracer: preemptirqsoff | ||
758 | # | ||
759 | preemptirqsoff latency trace v1.1.5 on 2.6.26-rc8 | ||
760 | -------------------------------------------------------------------- | ||
761 | latency: 293 us, #3/3, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | ||
762 | ----------------- | ||
763 | | task: ls-4860 (uid:0 nice:0 policy:0 rt_prio:0) | ||
764 | ----------------- | ||
765 | => started at: apic_timer_interrupt | ||
766 | => ended at: __do_softirq | ||
767 | |||
768 | # _------=> CPU# | ||
769 | # / _-----=> irqs-off | ||
770 | # | / _----=> need-resched | ||
771 | # || / _---=> hardirq/softirq | ||
772 | # ||| / _--=> preempt-depth | ||
773 | # |||| / | ||
774 | # ||||| delay | ||
775 | # cmd pid ||||| time | caller | ||
776 | # \ / ||||| \ | / | ||
777 | ls-4860 0d... 0us!: trace_hardirqs_off_thunk (apic_timer_interrupt) | ||
778 | ls-4860 0d.s. 294us : _local_bh_enable (__do_softirq) | ||
779 | ls-4860 0d.s1 294us : trace_preempt_on (__do_softirq) | ||
780 | |||
781 | |||
782 | |||
783 | The trace_hardirqs_off_thunk is called from assembly on x86 when | ||
784 | interrupts are disabled in the assembly code. Without the function | ||
785 | tracing, we do not know if interrupts were enabled within the preemption | ||
786 | points. We do see that it started with preemption enabled. | ||
787 | |||
788 | Here is a trace with ftrace_enabled set: | ||
789 | |||
790 | |||
791 | # tracer: preemptirqsoff | ||
792 | # | ||
793 | preemptirqsoff latency trace v1.1.5 on 2.6.26-rc8 | ||
794 | -------------------------------------------------------------------- | ||
795 | latency: 105 us, #183/183, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | ||
796 | ----------------- | ||
797 | | task: sshd-4261 (uid:0 nice:0 policy:0 rt_prio:0) | ||
798 | ----------------- | ||
799 | => started at: write_chan | ||
800 | => ended at: __do_softirq | ||
801 | |||
802 | # _------=> CPU# | ||
803 | # / _-----=> irqs-off | ||
804 | # | / _----=> need-resched | ||
805 | # || / _---=> hardirq/softirq | ||
806 | # ||| / _--=> preempt-depth | ||
807 | # |||| / | ||
808 | # ||||| delay | ||
809 | # cmd pid ||||| time | caller | ||
810 | # \ / ||||| \ | / | ||
811 | ls-4473 0.N.. 0us : preempt_schedule (write_chan) | ||
812 | ls-4473 0dN.1 1us : _spin_lock (schedule) | ||
813 | ls-4473 0dN.1 2us : add_preempt_count (_spin_lock) | ||
814 | ls-4473 0d..2 2us : put_prev_task_fair (schedule) | ||
815 | [...] | ||
816 | ls-4473 0d..2 13us : set_normalized_timespec (ktime_get_ts) | ||
817 | ls-4473 0d..2 13us : __switch_to (schedule) | ||
818 | sshd-4261 0d..2 14us : finish_task_switch (schedule) | ||
819 | sshd-4261 0d..2 14us : _spin_unlock_irq (finish_task_switch) | ||
820 | sshd-4261 0d..1 15us : add_preempt_count (_spin_lock_irqsave) | ||
821 | sshd-4261 0d..2 16us : _spin_unlock_irqrestore (hrtick_set) | ||
822 | sshd-4261 0d..2 16us : do_IRQ (common_interrupt) | ||
823 | sshd-4261 0d..2 17us : irq_enter (do_IRQ) | ||
824 | sshd-4261 0d..2 17us : idle_cpu (irq_enter) | ||
825 | sshd-4261 0d..2 18us : add_preempt_count (irq_enter) | ||
826 | sshd-4261 0d.h2 18us : idle_cpu (irq_enter) | ||
827 | sshd-4261 0d.h. 18us : handle_fasteoi_irq (do_IRQ) | ||
828 | sshd-4261 0d.h. 19us : _spin_lock (handle_fasteoi_irq) | ||
829 | sshd-4261 0d.h. 19us : add_preempt_count (_spin_lock) | ||
830 | sshd-4261 0d.h1 20us : _spin_unlock (handle_fasteoi_irq) | ||
831 | sshd-4261 0d.h1 20us : sub_preempt_count (_spin_unlock) | ||
832 | [...] | ||
833 | sshd-4261 0d.h1 28us : _spin_unlock (handle_fasteoi_irq) | ||
834 | sshd-4261 0d.h1 29us : sub_preempt_count (_spin_unlock) | ||
835 | sshd-4261 0d.h2 29us : irq_exit (do_IRQ) | ||
836 | sshd-4261 0d.h2 29us : sub_preempt_count (irq_exit) | ||
837 | sshd-4261 0d..3 30us : do_softirq (irq_exit) | ||
838 | sshd-4261 0d... 30us : __do_softirq (do_softirq) | ||
839 | sshd-4261 0d... 31us : __local_bh_disable (__do_softirq) | ||
840 | sshd-4261 0d... 31us+: add_preempt_count (__local_bh_disable) | ||
841 | sshd-4261 0d.s4 34us : add_preempt_count (__local_bh_disable) | ||
842 | [...] | ||
843 | sshd-4261 0d.s3 43us : sub_preempt_count (local_bh_enable_ip) | ||
844 | sshd-4261 0d.s4 44us : sub_preempt_count (local_bh_enable_ip) | ||
845 | sshd-4261 0d.s3 44us : smp_apic_timer_interrupt (apic_timer_interrupt) | ||
846 | sshd-4261 0d.s3 45us : irq_enter (smp_apic_timer_interrupt) | ||
847 | sshd-4261 0d.s3 45us : idle_cpu (irq_enter) | ||
848 | sshd-4261 0d.s3 46us : add_preempt_count (irq_enter) | ||
849 | sshd-4261 0d.H3 46us : idle_cpu (irq_enter) | ||
850 | sshd-4261 0d.H3 47us : hrtimer_interrupt (smp_apic_timer_interrupt) | ||
851 | sshd-4261 0d.H3 47us : ktime_get (hrtimer_interrupt) | ||
852 | [...] | ||
853 | sshd-4261 0d.H3 81us : tick_program_event (hrtimer_interrupt) | ||
854 | sshd-4261 0d.H3 82us : ktime_get (tick_program_event) | ||
855 | sshd-4261 0d.H3 82us : ktime_get_ts (ktime_get) | ||
856 | sshd-4261 0d.H3 83us : getnstimeofday (ktime_get_ts) | ||
857 | sshd-4261 0d.H3 83us : set_normalized_timespec (ktime_get_ts) | ||
858 | sshd-4261 0d.H3 84us : clockevents_program_event (tick_program_event) | ||
859 | sshd-4261 0d.H3 84us : lapic_next_event (clockevents_program_event) | ||
860 | sshd-4261 0d.H3 85us : irq_exit (smp_apic_timer_interrupt) | ||
861 | sshd-4261 0d.H3 85us : sub_preempt_count (irq_exit) | ||
862 | sshd-4261 0d.s4 86us : sub_preempt_count (irq_exit) | ||
863 | sshd-4261 0d.s3 86us : add_preempt_count (__local_bh_disable) | ||
864 | [...] | ||
865 | sshd-4261 0d.s1 98us : sub_preempt_count (net_rx_action) | ||
866 | sshd-4261 0d.s. 99us : add_preempt_count (_spin_lock_irq) | ||
867 | sshd-4261 0d.s1 99us+: _spin_unlock_irq (run_timer_softirq) | ||
868 | sshd-4261 0d.s. 104us : _local_bh_enable (__do_softirq) | ||
869 | sshd-4261 0d.s. 104us : sub_preempt_count (_local_bh_enable) | ||
870 | sshd-4261 0d.s. 105us : _local_bh_enable (__do_softirq) | ||
871 | sshd-4261 0d.s1 105us : trace_preempt_on (__do_softirq) | ||
872 | |||
873 | |||
874 | This is a very interesting trace. It started with the preemption of | ||
875 | the ls task. We see that the task had the "need_resched" bit set | ||
876 | via the 'N' in the trace. Interrupts were disabled before the spin_lock | ||
877 | at the beginning of the trace. We see that a schedule took place to run | ||
878 | sshd. When the interrupts were enabled, we took an interrupt. | ||
879 | On return from the interrupt handler, the softirq ran. We took another | ||
880 | interrupt while running the softirq as we see from the capital 'H'. | ||
881 | |||
882 | |||
883 | wakeup | ||
884 | ------ | ||
885 | |||
886 | In a Real-Time environment it is very important to know the wakeup | ||
887 | time it takes for the highest priority task that is woken up to the | ||
888 | time that it executes. This is also known as "schedule latency". | ||
889 | I stress the point that this is about RT tasks. It is also important | ||
890 | to know the scheduling latency of non-RT tasks, but the average | ||
891 | schedule latency is better for non-RT tasks. Tools like | ||
892 | LatencyTop are more appropriate for such measurements. | ||
893 | |||
894 | Real-Time environments are interested in the worst case latency. | ||
895 | That is the longest latency it takes for something to happen, and | ||
896 | not the average. We can have a very fast scheduler that may only | ||
897 | have a large latency once in a while, but that would not work well | ||
898 | with Real-Time tasks. The wakeup tracer was designed to record | ||
899 | the worst case wakeups of RT tasks. Non-RT tasks are not recorded | ||
900 | because the tracer only records one worst case and tracing non-RT | ||
901 | tasks that are unpredictable will overwrite the worst case latency | ||
902 | of RT tasks. | ||
903 | |||
904 | Since this tracer only deals with RT tasks, we will run this slightly | ||
905 | differently than we did with the previous tracers. Instead of performing | ||
906 | an 'ls', we will run 'sleep 1' under 'chrt' which changes the | ||
907 | priority of the task. | ||
908 | |||
909 | # echo wakeup > /debug/tracing/current_tracer | ||
910 | # echo 0 > /debug/tracing/tracing_max_latency | ||
911 | # echo 1 > /debug/tracing/tracing_enabled | ||
912 | # chrt -f 5 sleep 1 | ||
913 | # echo 0 > /debug/tracing/tracing_enabled | ||
914 | # cat /debug/tracing/latency_trace | ||
915 | # tracer: wakeup | ||
916 | # | ||
917 | wakeup latency trace v1.1.5 on 2.6.26-rc8 | ||
918 | -------------------------------------------------------------------- | ||
919 | latency: 4 us, #2/2, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | ||
920 | ----------------- | ||
921 | | task: sleep-4901 (uid:0 nice:0 policy:1 rt_prio:5) | ||
922 | ----------------- | ||
923 | |||
924 | # _------=> CPU# | ||
925 | # / _-----=> irqs-off | ||
926 | # | / _----=> need-resched | ||
927 | # || / _---=> hardirq/softirq | ||
928 | # ||| / _--=> preempt-depth | ||
929 | # |||| / | ||
930 | # ||||| delay | ||
931 | # cmd pid ||||| time | caller | ||
932 | # \ / ||||| \ | / | ||
933 | <idle>-0 1d.h4 0us+: try_to_wake_up (wake_up_process) | ||
934 | <idle>-0 1d..4 4us : schedule (cpu_idle) | ||
935 | |||
936 | |||
937 | |||
938 | Running this on an idle system, we see that it only took 4 microseconds | ||
939 | to perform the task switch. Note, since the trace marker in the | ||
940 | schedule is before the actual "switch", we stop the tracing when | ||
941 | the recorded task is about to schedule in. This may change if | ||
942 | we add a new marker at the end of the scheduler. | ||
943 | |||
944 | Notice that the recorded task is 'sleep' with the PID of 4901 and it | ||
945 | has an rt_prio of 5. This priority is user-space priority and not | ||
946 | the internal kernel priority. The policy is 1 for SCHED_FIFO and 2 | ||
947 | for SCHED_RR. | ||
948 | |||
949 | Doing the same with chrt -r 5 and ftrace_enabled set. | ||
950 | |||
951 | # tracer: wakeup | ||
952 | # | ||
953 | wakeup latency trace v1.1.5 on 2.6.26-rc8 | ||
954 | -------------------------------------------------------------------- | ||
955 | latency: 50 us, #60/60, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | ||
956 | ----------------- | ||
957 | | task: sleep-4068 (uid:0 nice:0 policy:2 rt_prio:5) | ||
958 | ----------------- | ||
959 | |||
960 | # _------=> CPU# | ||
961 | # / _-----=> irqs-off | ||
962 | # | / _----=> need-resched | ||
963 | # || / _---=> hardirq/softirq | ||
964 | # ||| / _--=> preempt-depth | ||
965 | # |||| / | ||
966 | # ||||| delay | ||
967 | # cmd pid ||||| time | caller | ||
968 | # \ / ||||| \ | / | ||
969 | ksoftirq-7 1d.H3 0us : try_to_wake_up (wake_up_process) | ||
970 | ksoftirq-7 1d.H4 1us : sub_preempt_count (marker_probe_cb) | ||
971 | ksoftirq-7 1d.H3 2us : check_preempt_wakeup (try_to_wake_up) | ||
972 | ksoftirq-7 1d.H3 3us : update_curr (check_preempt_wakeup) | ||
973 | ksoftirq-7 1d.H3 4us : calc_delta_mine (update_curr) | ||
974 | ksoftirq-7 1d.H3 5us : __resched_task (check_preempt_wakeup) | ||
975 | ksoftirq-7 1d.H3 6us : task_wake_up_rt (try_to_wake_up) | ||
976 | ksoftirq-7 1d.H3 7us : _spin_unlock_irqrestore (try_to_wake_up) | ||
977 | [...] | ||
978 | ksoftirq-7 1d.H2 17us : irq_exit (smp_apic_timer_interrupt) | ||
979 | ksoftirq-7 1d.H2 18us : sub_preempt_count (irq_exit) | ||
980 | ksoftirq-7 1d.s3 19us : sub_preempt_count (irq_exit) | ||
981 | ksoftirq-7 1..s2 20us : rcu_process_callbacks (__do_softirq) | ||
982 | [...] | ||
983 | ksoftirq-7 1..s2 26us : __rcu_process_callbacks (rcu_process_callbacks) | ||
984 | ksoftirq-7 1d.s2 27us : _local_bh_enable (__do_softirq) | ||
985 | ksoftirq-7 1d.s2 28us : sub_preempt_count (_local_bh_enable) | ||
986 | ksoftirq-7 1.N.3 29us : sub_preempt_count (ksoftirqd) | ||
987 | ksoftirq-7 1.N.2 30us : _cond_resched (ksoftirqd) | ||
988 | ksoftirq-7 1.N.2 31us : __cond_resched (_cond_resched) | ||
989 | ksoftirq-7 1.N.2 32us : add_preempt_count (__cond_resched) | ||
990 | ksoftirq-7 1.N.2 33us : schedule (__cond_resched) | ||
991 | ksoftirq-7 1.N.2 33us : add_preempt_count (schedule) | ||
992 | ksoftirq-7 1.N.3 34us : hrtick_clear (schedule) | ||
993 | ksoftirq-7 1dN.3 35us : _spin_lock (schedule) | ||
994 | ksoftirq-7 1dN.3 36us : add_preempt_count (_spin_lock) | ||
995 | ksoftirq-7 1d..4 37us : put_prev_task_fair (schedule) | ||
996 | ksoftirq-7 1d..4 38us : update_curr (put_prev_task_fair) | ||
997 | [...] | ||
998 | ksoftirq-7 1d..5 47us : _spin_trylock (tracing_record_cmdline) | ||
999 | ksoftirq-7 1d..5 48us : add_preempt_count (_spin_trylock) | ||
1000 | ksoftirq-7 1d..6 49us : _spin_unlock (tracing_record_cmdline) | ||
1001 | ksoftirq-7 1d..6 49us : sub_preempt_count (_spin_unlock) | ||
1002 | ksoftirq-7 1d..4 50us : schedule (__cond_resched) | ||
1003 | |||
1004 | The interrupt went off while running ksoftirqd. This task runs at | ||
1005 | SCHED_OTHER. Why did not we see the 'N' set early? This may be | ||
1006 | a harmless bug with x86_32 and 4K stacks. On x86_32 with 4K stacks | ||
1007 | configured, the interrupt and softirq run with their own stack. | ||
1008 | Some information is held on the top of the task's stack (need_resched | ||
1009 | and preempt_count are both stored there). The setting of the NEED_RESCHED | ||
1010 | bit is done directly to the task's stack, but the reading of the | ||
1011 | NEED_RESCHED is done by looking at the current stack, which in this case | ||
1012 | is the stack for the hard interrupt. This hides the fact that NEED_RESCHED | ||
1013 | has been set. We do not see the 'N' until we switch back to the task's | ||
1014 | assigned stack. | ||
1015 | |||
1016 | function | ||
1017 | -------- | ||
1018 | |||
1019 | This tracer is the function tracer. Enabling the function tracer | ||
1020 | can be done from the debug file system. Make sure the ftrace_enabled is | ||
1021 | set; otherwise this tracer is a nop. | ||
1022 | |||
1023 | # sysctl kernel.ftrace_enabled=1 | ||
1024 | # echo function > /debug/tracing/current_tracer | ||
1025 | # echo 1 > /debug/tracing/tracing_enabled | ||
1026 | # usleep 1 | ||
1027 | # echo 0 > /debug/tracing/tracing_enabled | ||
1028 | # cat /debug/tracing/trace | ||
1029 | # tracer: function | ||
1030 | # | ||
1031 | # TASK-PID CPU# TIMESTAMP FUNCTION | ||
1032 | # | | | | | | ||
1033 | bash-4003 [00] 123.638713: finish_task_switch <-schedule | ||
1034 | bash-4003 [00] 123.638714: _spin_unlock_irq <-finish_task_switch | ||
1035 | bash-4003 [00] 123.638714: sub_preempt_count <-_spin_unlock_irq | ||
1036 | bash-4003 [00] 123.638715: hrtick_set <-schedule | ||
1037 | bash-4003 [00] 123.638715: _spin_lock_irqsave <-hrtick_set | ||
1038 | bash-4003 [00] 123.638716: add_preempt_count <-_spin_lock_irqsave | ||
1039 | bash-4003 [00] 123.638716: _spin_unlock_irqrestore <-hrtick_set | ||
1040 | bash-4003 [00] 123.638717: sub_preempt_count <-_spin_unlock_irqrestore | ||
1041 | bash-4003 [00] 123.638717: hrtick_clear <-hrtick_set | ||
1042 | bash-4003 [00] 123.638718: sub_preempt_count <-schedule | ||
1043 | bash-4003 [00] 123.638718: sub_preempt_count <-preempt_schedule | ||
1044 | bash-4003 [00] 123.638719: wait_for_completion <-__stop_machine_run | ||
1045 | bash-4003 [00] 123.638719: wait_for_common <-wait_for_completion | ||
1046 | bash-4003 [00] 123.638720: _spin_lock_irq <-wait_for_common | ||
1047 | bash-4003 [00] 123.638720: add_preempt_count <-_spin_lock_irq | ||
1048 | [...] | ||
1049 | |||
1050 | |||
1051 | Note: function tracer uses ring buffers to store the above entries. | ||
1052 | The newest data may overwrite the oldest data. Sometimes using echo to | ||
1053 | stop the trace is not sufficient because the tracing could have overwritten | ||
1054 | the data that you wanted to record. For this reason, it is sometimes better to | ||
1055 | disable tracing directly from a program. This allows you to stop the | ||
1056 | tracing at the point that you hit the part that you are interested in. | ||
1057 | To disable the tracing directly from a C program, something like following | ||
1058 | code snippet can be used: | ||
1059 | |||
1060 | int trace_fd; | ||
1061 | [...] | ||
1062 | int main(int argc, char *argv[]) { | ||
1063 | [...] | ||
1064 | trace_fd = open("/debug/tracing/tracing_enabled", O_WRONLY); | ||
1065 | [...] | ||
1066 | if (condition_hit()) { | ||
1067 | write(trace_fd, "0", 1); | ||
1068 | } | ||
1069 | [...] | ||
1070 | } | ||
1071 | |||
1072 | Note: Here we hard coded the path name. The debugfs mount is not | ||
1073 | guaranteed to be at /debug (and is more commonly at /sys/kernel/debug). | ||
1074 | For simple one time traces, the above is sufficent. For anything else, | ||
1075 | a search through /proc/mounts may be needed to find where the debugfs | ||
1076 | file-system is mounted. | ||
1077 | |||
1078 | |||
1079 | Single thread tracing | ||
1080 | --------------------- | ||
1081 | |||
1082 | By writing into /debug/tracing/set_ftrace_pid you can trace a | ||
1083 | single thread. For example: | ||
1084 | |||
1085 | # cat /debug/tracing/set_ftrace_pid | ||
1086 | no pid | ||
1087 | # echo 3111 > /debug/tracing/set_ftrace_pid | ||
1088 | # cat /debug/tracing/set_ftrace_pid | ||
1089 | 3111 | ||
1090 | # echo function > /debug/tracing/current_tracer | ||
1091 | # cat /debug/tracing/trace | head | ||
1092 | # tracer: function | ||
1093 | # | ||
1094 | # TASK-PID CPU# TIMESTAMP FUNCTION | ||
1095 | # | | | | | | ||
1096 | yum-updatesd-3111 [003] 1637.254676: finish_task_switch <-thread_return | ||
1097 | yum-updatesd-3111 [003] 1637.254681: hrtimer_cancel <-schedule_hrtimeout_range | ||
1098 | yum-updatesd-3111 [003] 1637.254682: hrtimer_try_to_cancel <-hrtimer_cancel | ||
1099 | yum-updatesd-3111 [003] 1637.254683: lock_hrtimer_base <-hrtimer_try_to_cancel | ||
1100 | yum-updatesd-3111 [003] 1637.254685: fget_light <-do_sys_poll | ||
1101 | yum-updatesd-3111 [003] 1637.254686: pipe_poll <-do_sys_poll | ||
1102 | # echo -1 > /debug/tracing/set_ftrace_pid | ||
1103 | # cat /debug/tracing/trace |head | ||
1104 | # tracer: function | ||
1105 | # | ||
1106 | # TASK-PID CPU# TIMESTAMP FUNCTION | ||
1107 | # | | | | | | ||
1108 | ##### CPU 3 buffer started #### | ||
1109 | yum-updatesd-3111 [003] 1701.957688: free_poll_entry <-poll_freewait | ||
1110 | yum-updatesd-3111 [003] 1701.957689: remove_wait_queue <-free_poll_entry | ||
1111 | yum-updatesd-3111 [003] 1701.957691: fput <-free_poll_entry | ||
1112 | yum-updatesd-3111 [003] 1701.957692: audit_syscall_exit <-sysret_audit | ||
1113 | yum-updatesd-3111 [003] 1701.957693: path_put <-audit_syscall_exit | ||
1114 | |||
1115 | If you want to trace a function when executing, you could use | ||
1116 | something like this simple program: | ||
1117 | |||
1118 | #include <stdio.h> | ||
1119 | #include <stdlib.h> | ||
1120 | #include <sys/types.h> | ||
1121 | #include <sys/stat.h> | ||
1122 | #include <fcntl.h> | ||
1123 | #include <unistd.h> | ||
1124 | |||
1125 | int main (int argc, char **argv) | ||
1126 | { | ||
1127 | if (argc < 1) | ||
1128 | exit(-1); | ||
1129 | |||
1130 | if (fork() > 0) { | ||
1131 | int fd, ffd; | ||
1132 | char line[64]; | ||
1133 | int s; | ||
1134 | |||
1135 | ffd = open("/debug/tracing/current_tracer", O_WRONLY); | ||
1136 | if (ffd < 0) | ||
1137 | exit(-1); | ||
1138 | write(ffd, "nop", 3); | ||
1139 | |||
1140 | fd = open("/debug/tracing/set_ftrace_pid", O_WRONLY); | ||
1141 | s = sprintf(line, "%d\n", getpid()); | ||
1142 | write(fd, line, s); | ||
1143 | |||
1144 | write(ffd, "function", 8); | ||
1145 | |||
1146 | close(fd); | ||
1147 | close(ffd); | ||
1148 | |||
1149 | execvp(argv[1], argv+1); | ||
1150 | } | ||
1151 | |||
1152 | return 0; | ||
1153 | } | ||
1154 | |||
1155 | dynamic ftrace | ||
1156 | -------------- | ||
1157 | |||
1158 | If CONFIG_DYNAMIC_FTRACE is set, the system will run with | ||
1159 | virtually no overhead when function tracing is disabled. The way | ||
1160 | this works is the mcount function call (placed at the start of | ||
1161 | every kernel function, produced by the -pg switch in gcc), starts | ||
1162 | of pointing to a simple return. (Enabling FTRACE will include the | ||
1163 | -pg switch in the compiling of the kernel.) | ||
1164 | |||
1165 | At compile time every C file object is run through the | ||
1166 | recordmcount.pl script (located in the scripts directory). This | ||
1167 | script will process the C object using objdump to find all the | ||
1168 | locations in the .text section that call mcount. (Note, only | ||
1169 | the .text section is processed, since processing other sections | ||
1170 | like .init.text may cause races due to those sections being freed). | ||
1171 | |||
1172 | A new section called "__mcount_loc" is created that holds references | ||
1173 | to all the mcount call sites in the .text section. This section is | ||
1174 | compiled back into the original object. The final linker will add | ||
1175 | all these references into a single table. | ||
1176 | |||
1177 | On boot up, before SMP is initialized, the dynamic ftrace code | ||
1178 | scans this table and updates all the locations into nops. It also | ||
1179 | records the locations, which are added to the available_filter_functions | ||
1180 | list. Modules are processed as they are loaded and before they are | ||
1181 | executed. When a module is unloaded, it also removes its functions from | ||
1182 | the ftrace function list. This is automatic in the module unload | ||
1183 | code, and the module author does not need to worry about it. | ||
1184 | |||
1185 | When tracing is enabled, kstop_machine is called to prevent races | ||
1186 | with the CPUS executing code being modified (which can cause the | ||
1187 | CPU to do undesireable things), and the nops are patched back | ||
1188 | to calls. But this time, they do not call mcount (which is just | ||
1189 | a function stub). They now call into the ftrace infrastructure. | ||
1190 | |||
1191 | One special side-effect to the recording of the functions being | ||
1192 | traced is that we can now selectively choose which functions we | ||
1193 | wish to trace and which ones we want the mcount calls to remain as | ||
1194 | nops. | ||
1195 | |||
1196 | Two files are used, one for enabling and one for disabling the tracing | ||
1197 | of specified functions. They are: | ||
1198 | |||
1199 | set_ftrace_filter | ||
1200 | |||
1201 | and | ||
1202 | |||
1203 | set_ftrace_notrace | ||
1204 | |||
1205 | A list of available functions that you can add to these files is listed | ||
1206 | in: | ||
1207 | |||
1208 | available_filter_functions | ||
1209 | |||
1210 | # cat /debug/tracing/available_filter_functions | ||
1211 | put_prev_task_idle | ||
1212 | kmem_cache_create | ||
1213 | pick_next_task_rt | ||
1214 | get_online_cpus | ||
1215 | pick_next_task_fair | ||
1216 | mutex_lock | ||
1217 | [...] | ||
1218 | |||
1219 | If I am only interested in sys_nanosleep and hrtimer_interrupt: | ||
1220 | |||
1221 | # echo sys_nanosleep hrtimer_interrupt \ | ||
1222 | > /debug/tracing/set_ftrace_filter | ||
1223 | # echo ftrace > /debug/tracing/current_tracer | ||
1224 | # echo 1 > /debug/tracing/tracing_enabled | ||
1225 | # usleep 1 | ||
1226 | # echo 0 > /debug/tracing/tracing_enabled | ||
1227 | # cat /debug/tracing/trace | ||
1228 | # tracer: ftrace | ||
1229 | # | ||
1230 | # TASK-PID CPU# TIMESTAMP FUNCTION | ||
1231 | # | | | | | | ||
1232 | usleep-4134 [00] 1317.070017: hrtimer_interrupt <-smp_apic_timer_interrupt | ||
1233 | usleep-4134 [00] 1317.070111: sys_nanosleep <-syscall_call | ||
1234 | <idle>-0 [00] 1317.070115: hrtimer_interrupt <-smp_apic_timer_interrupt | ||
1235 | |||
1236 | To see which functions are being traced, you can cat the file: | ||
1237 | |||
1238 | # cat /debug/tracing/set_ftrace_filter | ||
1239 | hrtimer_interrupt | ||
1240 | sys_nanosleep | ||
1241 | |||
1242 | |||
1243 | Perhaps this is not enough. The filters also allow simple wild cards. | ||
1244 | Only the following are currently available | ||
1245 | |||
1246 | <match>* - will match functions that begin with <match> | ||
1247 | *<match> - will match functions that end with <match> | ||
1248 | *<match>* - will match functions that have <match> in it | ||
1249 | |||
1250 | These are the only wild cards which are supported. | ||
1251 | |||
1252 | <match>*<match> will not work. | ||
1253 | |||
1254 | Note: It is better to use quotes to enclose the wild cards, otherwise | ||
1255 | the shell may expand the parameters into names of files in the local | ||
1256 | directory. | ||
1257 | |||
1258 | # echo 'hrtimer_*' > /debug/tracing/set_ftrace_filter | ||
1259 | |||
1260 | Produces: | ||
1261 | |||
1262 | # tracer: ftrace | ||
1263 | # | ||
1264 | # TASK-PID CPU# TIMESTAMP FUNCTION | ||
1265 | # | | | | | | ||
1266 | bash-4003 [00] 1480.611794: hrtimer_init <-copy_process | ||
1267 | bash-4003 [00] 1480.611941: hrtimer_start <-hrtick_set | ||
1268 | bash-4003 [00] 1480.611956: hrtimer_cancel <-hrtick_clear | ||
1269 | bash-4003 [00] 1480.611956: hrtimer_try_to_cancel <-hrtimer_cancel | ||
1270 | <idle>-0 [00] 1480.612019: hrtimer_get_next_event <-get_next_timer_interrupt | ||
1271 | <idle>-0 [00] 1480.612025: hrtimer_get_next_event <-get_next_timer_interrupt | ||
1272 | <idle>-0 [00] 1480.612032: hrtimer_get_next_event <-get_next_timer_interrupt | ||
1273 | <idle>-0 [00] 1480.612037: hrtimer_get_next_event <-get_next_timer_interrupt | ||
1274 | <idle>-0 [00] 1480.612382: hrtimer_get_next_event <-get_next_timer_interrupt | ||
1275 | |||
1276 | |||
1277 | Notice that we lost the sys_nanosleep. | ||
1278 | |||
1279 | # cat /debug/tracing/set_ftrace_filter | ||
1280 | hrtimer_run_queues | ||
1281 | hrtimer_run_pending | ||
1282 | hrtimer_init | ||
1283 | hrtimer_cancel | ||
1284 | hrtimer_try_to_cancel | ||
1285 | hrtimer_forward | ||
1286 | hrtimer_start | ||
1287 | hrtimer_reprogram | ||
1288 | hrtimer_force_reprogram | ||
1289 | hrtimer_get_next_event | ||
1290 | hrtimer_interrupt | ||
1291 | hrtimer_nanosleep | ||
1292 | hrtimer_wakeup | ||
1293 | hrtimer_get_remaining | ||
1294 | hrtimer_get_res | ||
1295 | hrtimer_init_sleeper | ||
1296 | |||
1297 | |||
1298 | This is because the '>' and '>>' act just like they do in bash. | ||
1299 | To rewrite the filters, use '>' | ||
1300 | To append to the filters, use '>>' | ||
1301 | |||
1302 | To clear out a filter so that all functions will be recorded again: | ||
1303 | |||
1304 | # echo > /debug/tracing/set_ftrace_filter | ||
1305 | # cat /debug/tracing/set_ftrace_filter | ||
1306 | # | ||
1307 | |||
1308 | Again, now we want to append. | ||
1309 | |||
1310 | # echo sys_nanosleep > /debug/tracing/set_ftrace_filter | ||
1311 | # cat /debug/tracing/set_ftrace_filter | ||
1312 | sys_nanosleep | ||
1313 | # echo 'hrtimer_*' >> /debug/tracing/set_ftrace_filter | ||
1314 | # cat /debug/tracing/set_ftrace_filter | ||
1315 | hrtimer_run_queues | ||
1316 | hrtimer_run_pending | ||
1317 | hrtimer_init | ||
1318 | hrtimer_cancel | ||
1319 | hrtimer_try_to_cancel | ||
1320 | hrtimer_forward | ||
1321 | hrtimer_start | ||
1322 | hrtimer_reprogram | ||
1323 | hrtimer_force_reprogram | ||
1324 | hrtimer_get_next_event | ||
1325 | hrtimer_interrupt | ||
1326 | sys_nanosleep | ||
1327 | hrtimer_nanosleep | ||
1328 | hrtimer_wakeup | ||
1329 | hrtimer_get_remaining | ||
1330 | hrtimer_get_res | ||
1331 | hrtimer_init_sleeper | ||
1332 | |||
1333 | |||
1334 | The set_ftrace_notrace prevents those functions from being traced. | ||
1335 | |||
1336 | # echo '*preempt*' '*lock*' > /debug/tracing/set_ftrace_notrace | ||
1337 | |||
1338 | Produces: | ||
1339 | |||
1340 | # tracer: ftrace | ||
1341 | # | ||
1342 | # TASK-PID CPU# TIMESTAMP FUNCTION | ||
1343 | # | | | | | | ||
1344 | bash-4043 [01] 115.281644: finish_task_switch <-schedule | ||
1345 | bash-4043 [01] 115.281645: hrtick_set <-schedule | ||
1346 | bash-4043 [01] 115.281645: hrtick_clear <-hrtick_set | ||
1347 | bash-4043 [01] 115.281646: wait_for_completion <-__stop_machine_run | ||
1348 | bash-4043 [01] 115.281647: wait_for_common <-wait_for_completion | ||
1349 | bash-4043 [01] 115.281647: kthread_stop <-stop_machine_run | ||
1350 | bash-4043 [01] 115.281648: init_waitqueue_head <-kthread_stop | ||
1351 | bash-4043 [01] 115.281648: wake_up_process <-kthread_stop | ||
1352 | bash-4043 [01] 115.281649: try_to_wake_up <-wake_up_process | ||
1353 | |||
1354 | We can see that there's no more lock or preempt tracing. | ||
1355 | |||
1356 | trace_pipe | ||
1357 | ---------- | ||
1358 | |||
1359 | The trace_pipe outputs the same content as the trace file, but the effect | ||
1360 | on the tracing is different. Every read from trace_pipe is consumed. | ||
1361 | This means that subsequent reads will be different. The trace | ||
1362 | is live. | ||
1363 | |||
1364 | # echo function > /debug/tracing/current_tracer | ||
1365 | # cat /debug/tracing/trace_pipe > /tmp/trace.out & | ||
1366 | [1] 4153 | ||
1367 | # echo 1 > /debug/tracing/tracing_enabled | ||
1368 | # usleep 1 | ||
1369 | # echo 0 > /debug/tracing/tracing_enabled | ||
1370 | # cat /debug/tracing/trace | ||
1371 | # tracer: function | ||
1372 | # | ||
1373 | # TASK-PID CPU# TIMESTAMP FUNCTION | ||
1374 | # | | | | | | ||
1375 | |||
1376 | # | ||
1377 | # cat /tmp/trace.out | ||
1378 | bash-4043 [00] 41.267106: finish_task_switch <-schedule | ||
1379 | bash-4043 [00] 41.267106: hrtick_set <-schedule | ||
1380 | bash-4043 [00] 41.267107: hrtick_clear <-hrtick_set | ||
1381 | bash-4043 [00] 41.267108: wait_for_completion <-__stop_machine_run | ||
1382 | bash-4043 [00] 41.267108: wait_for_common <-wait_for_completion | ||
1383 | bash-4043 [00] 41.267109: kthread_stop <-stop_machine_run | ||
1384 | bash-4043 [00] 41.267109: init_waitqueue_head <-kthread_stop | ||
1385 | bash-4043 [00] 41.267110: wake_up_process <-kthread_stop | ||
1386 | bash-4043 [00] 41.267110: try_to_wake_up <-wake_up_process | ||
1387 | bash-4043 [00] 41.267111: select_task_rq_rt <-try_to_wake_up | ||
1388 | |||
1389 | |||
1390 | Note, reading the trace_pipe file will block until more input is added. | ||
1391 | By changing the tracer, trace_pipe will issue an EOF. We needed | ||
1392 | to set the function tracer _before_ we "cat" the trace_pipe file. | ||
1393 | |||
1394 | |||
1395 | trace entries | ||
1396 | ------------- | ||
1397 | |||
1398 | Having too much or not enough data can be troublesome in diagnosing | ||
1399 | an issue in the kernel. The file buffer_size_kb is used to modify | ||
1400 | the size of the internal trace buffers. The number listed | ||
1401 | is the number of entries that can be recorded per CPU. To know | ||
1402 | the full size, multiply the number of possible CPUS with the | ||
1403 | number of entries. | ||
1404 | |||
1405 | # cat /debug/tracing/buffer_size_kb | ||
1406 | 1408 (units kilobytes) | ||
1407 | |||
1408 | Note, to modify this, you must have tracing completely disabled. To do that, | ||
1409 | echo "nop" into the current_tracer. If the current_tracer is not set | ||
1410 | to "nop", an EINVAL error will be returned. | ||
1411 | |||
1412 | # echo nop > /debug/tracing/current_tracer | ||
1413 | # echo 10000 > /debug/tracing/buffer_size_kb | ||
1414 | # cat /debug/tracing/buffer_size_kb | ||
1415 | 10000 (units kilobytes) | ||
1416 | |||
1417 | The number of pages which will be allocated is limited to a percentage | ||
1418 | of available memory. Allocating too much will produce an error. | ||
1419 | |||
1420 | # echo 1000000000000 > /debug/tracing/buffer_size_kb | ||
1421 | -bash: echo: write error: Cannot allocate memory | ||
1422 | # cat /debug/tracing/buffer_size_kb | ||
1423 | 85 | ||
1424 | |||
diff --git a/Documentation/futex-requeue-pi.txt b/Documentation/futex-requeue-pi.txt new file mode 100644 index 000000000000..9dc1ff4fd536 --- /dev/null +++ b/Documentation/futex-requeue-pi.txt | |||
@@ -0,0 +1,131 @@ | |||
1 | Futex Requeue PI | ||
2 | ---------------- | ||
3 | |||
4 | Requeueing of tasks from a non-PI futex to a PI futex requires | ||
5 | special handling in order to ensure the underlying rt_mutex is never | ||
6 | left without an owner if it has waiters; doing so would break the PI | ||
7 | boosting logic [see rt-mutex-desgin.txt] For the purposes of | ||
8 | brevity, this action will be referred to as "requeue_pi" throughout | ||
9 | this document. Priority inheritance is abbreviated throughout as | ||
10 | "PI". | ||
11 | |||
12 | Motivation | ||
13 | ---------- | ||
14 | |||
15 | Without requeue_pi, the glibc implementation of | ||
16 | pthread_cond_broadcast() must resort to waking all the tasks waiting | ||
17 | on a pthread_condvar and letting them try to sort out which task | ||
18 | gets to run first in classic thundering-herd formation. An ideal | ||
19 | implementation would wake the highest-priority waiter, and leave the | ||
20 | rest to the natural wakeup inherent in unlocking the mutex | ||
21 | associated with the condvar. | ||
22 | |||
23 | Consider the simplified glibc calls: | ||
24 | |||
25 | /* caller must lock mutex */ | ||
26 | pthread_cond_wait(cond, mutex) | ||
27 | { | ||
28 | lock(cond->__data.__lock); | ||
29 | unlock(mutex); | ||
30 | do { | ||
31 | unlock(cond->__data.__lock); | ||
32 | futex_wait(cond->__data.__futex); | ||
33 | lock(cond->__data.__lock); | ||
34 | } while(...) | ||
35 | unlock(cond->__data.__lock); | ||
36 | lock(mutex); | ||
37 | } | ||
38 | |||
39 | pthread_cond_broadcast(cond) | ||
40 | { | ||
41 | lock(cond->__data.__lock); | ||
42 | unlock(cond->__data.__lock); | ||
43 | futex_requeue(cond->data.__futex, cond->mutex); | ||
44 | } | ||
45 | |||
46 | Once pthread_cond_broadcast() requeues the tasks, the cond->mutex | ||
47 | has waiters. Note that pthread_cond_wait() attempts to lock the | ||
48 | mutex only after it has returned to user space. This will leave the | ||
49 | underlying rt_mutex with waiters, and no owner, breaking the | ||
50 | previously mentioned PI-boosting algorithms. | ||
51 | |||
52 | In order to support PI-aware pthread_condvar's, the kernel needs to | ||
53 | be able to requeue tasks to PI futexes. This support implies that | ||
54 | upon a successful futex_wait system call, the caller would return to | ||
55 | user space already holding the PI futex. The glibc implementation | ||
56 | would be modified as follows: | ||
57 | |||
58 | |||
59 | /* caller must lock mutex */ | ||
60 | pthread_cond_wait_pi(cond, mutex) | ||
61 | { | ||
62 | lock(cond->__data.__lock); | ||
63 | unlock(mutex); | ||
64 | do { | ||
65 | unlock(cond->__data.__lock); | ||
66 | futex_wait_requeue_pi(cond->__data.__futex); | ||
67 | lock(cond->__data.__lock); | ||
68 | } while(...) | ||
69 | unlock(cond->__data.__lock); | ||
70 | /* the kernel acquired the the mutex for us */ | ||
71 | } | ||
72 | |||
73 | pthread_cond_broadcast_pi(cond) | ||
74 | { | ||
75 | lock(cond->__data.__lock); | ||
76 | unlock(cond->__data.__lock); | ||
77 | futex_requeue_pi(cond->data.__futex, cond->mutex); | ||
78 | } | ||
79 | |||
80 | The actual glibc implementation will likely test for PI and make the | ||
81 | necessary changes inside the existing calls rather than creating new | ||
82 | calls for the PI cases. Similar changes are needed for | ||
83 | pthread_cond_timedwait() and pthread_cond_signal(). | ||
84 | |||
85 | Implementation | ||
86 | -------------- | ||
87 | |||
88 | In order to ensure the rt_mutex has an owner if it has waiters, it | ||
89 | is necessary for both the requeue code, as well as the waiting code, | ||
90 | to be able to acquire the rt_mutex before returning to user space. | ||
91 | The requeue code cannot simply wake the waiter and leave it to | ||
92 | acquire the rt_mutex as it would open a race window between the | ||
93 | requeue call returning to user space and the waiter waking and | ||
94 | starting to run. This is especially true in the uncontended case. | ||
95 | |||
96 | The solution involves two new rt_mutex helper routines, | ||
97 | rt_mutex_start_proxy_lock() and rt_mutex_finish_proxy_lock(), which | ||
98 | allow the requeue code to acquire an uncontended rt_mutex on behalf | ||
99 | of the waiter and to enqueue the waiter on a contended rt_mutex. | ||
100 | Two new system calls provide the kernel<->user interface to | ||
101 | requeue_pi: FUTEX_WAIT_REQUEUE_PI and FUTEX_REQUEUE_CMP_PI. | ||
102 | |||
103 | FUTEX_WAIT_REQUEUE_PI is called by the waiter (pthread_cond_wait() | ||
104 | and pthread_cond_timedwait()) to block on the initial futex and wait | ||
105 | to be requeued to a PI-aware futex. The implementation is the | ||
106 | result of a high-speed collision between futex_wait() and | ||
107 | futex_lock_pi(), with some extra logic to check for the additional | ||
108 | wake-up scenarios. | ||
109 | |||
110 | FUTEX_REQUEUE_CMP_PI is called by the waker | ||
111 | (pthread_cond_broadcast() and pthread_cond_signal()) to requeue and | ||
112 | possibly wake the waiting tasks. Internally, this system call is | ||
113 | still handled by futex_requeue (by passing requeue_pi=1). Before | ||
114 | requeueing, futex_requeue() attempts to acquire the requeue target | ||
115 | PI futex on behalf of the top waiter. If it can, this waiter is | ||
116 | woken. futex_requeue() then proceeds to requeue the remaining | ||
117 | nr_wake+nr_requeue tasks to the PI futex, calling | ||
118 | rt_mutex_start_proxy_lock() prior to each requeue to prepare the | ||
119 | task as a waiter on the underlying rt_mutex. It is possible that | ||
120 | the lock can be acquired at this stage as well, if so, the next | ||
121 | waiter is woken to finish the acquisition of the lock. | ||
122 | |||
123 | FUTEX_REQUEUE_PI accepts nr_wake and nr_requeue as arguments, but | ||
124 | their sum is all that really matters. futex_requeue() will wake or | ||
125 | requeue up to nr_wake + nr_requeue tasks. It will wake only as many | ||
126 | tasks as it can acquire the lock for, which in the majority of cases | ||
127 | should be 0 as good programming practice dictates that the caller of | ||
128 | either pthread_cond_broadcast() or pthread_cond_signal() acquire the | ||
129 | mutex prior to making the call. FUTEX_REQUEUE_PI requires that | ||
130 | nr_wake=1. nr_requeue should be INT_MAX for broadcast and 0 for | ||
131 | signal. | ||
diff --git a/Documentation/gcov.txt b/Documentation/gcov.txt new file mode 100644 index 000000000000..e716aadb3a33 --- /dev/null +++ b/Documentation/gcov.txt | |||
@@ -0,0 +1,246 @@ | |||
1 | Using gcov with the Linux kernel | ||
2 | ================================ | ||
3 | |||
4 | 1. Introduction | ||
5 | 2. Preparation | ||
6 | 3. Customization | ||
7 | 4. Files | ||
8 | 5. Modules | ||
9 | 6. Separated build and test machines | ||
10 | 7. Troubleshooting | ||
11 | Appendix A: sample script: gather_on_build.sh | ||
12 | Appendix B: sample script: gather_on_test.sh | ||
13 | |||
14 | |||
15 | 1. Introduction | ||
16 | =============== | ||
17 | |||
18 | gcov profiling kernel support enables the use of GCC's coverage testing | ||
19 | tool gcov [1] with the Linux kernel. Coverage data of a running kernel | ||
20 | is exported in gcov-compatible format via the "gcov" debugfs directory. | ||
21 | To get coverage data for a specific file, change to the kernel build | ||
22 | directory and use gcov with the -o option as follows (requires root): | ||
23 | |||
24 | # cd /tmp/linux-out | ||
25 | # gcov -o /sys/kernel/debug/gcov/tmp/linux-out/kernel spinlock.c | ||
26 | |||
27 | This will create source code files annotated with execution counts | ||
28 | in the current directory. In addition, graphical gcov front-ends such | ||
29 | as lcov [2] can be used to automate the process of collecting data | ||
30 | for the entire kernel and provide coverage overviews in HTML format. | ||
31 | |||
32 | Possible uses: | ||
33 | |||
34 | * debugging (has this line been reached at all?) | ||
35 | * test improvement (how do I change my test to cover these lines?) | ||
36 | * minimizing kernel configurations (do I need this option if the | ||
37 | associated code is never run?) | ||
38 | |||
39 | -- | ||
40 | |||
41 | [1] http://gcc.gnu.org/onlinedocs/gcc/Gcov.html | ||
42 | [2] http://ltp.sourceforge.net/coverage/lcov.php | ||
43 | |||
44 | |||
45 | 2. Preparation | ||
46 | ============== | ||
47 | |||
48 | Configure the kernel with: | ||
49 | |||
50 | CONFIG_DEBUGFS=y | ||
51 | CONFIG_GCOV_KERNEL=y | ||
52 | |||
53 | and to get coverage data for the entire kernel: | ||
54 | |||
55 | CONFIG_GCOV_PROFILE_ALL=y | ||
56 | |||
57 | Note that kernels compiled with profiling flags will be significantly | ||
58 | larger and run slower. Also CONFIG_GCOV_PROFILE_ALL may not be supported | ||
59 | on all architectures. | ||
60 | |||
61 | Profiling data will only become accessible once debugfs has been | ||
62 | mounted: | ||
63 | |||
64 | mount -t debugfs none /sys/kernel/debug | ||
65 | |||
66 | |||
67 | 3. Customization | ||
68 | ================ | ||
69 | |||
70 | To enable profiling for specific files or directories, add a line | ||
71 | similar to the following to the respective kernel Makefile: | ||
72 | |||
73 | For a single file (e.g. main.o): | ||
74 | GCOV_PROFILE_main.o := y | ||
75 | |||
76 | For all files in one directory: | ||
77 | GCOV_PROFILE := y | ||
78 | |||
79 | To exclude files from being profiled even when CONFIG_GCOV_PROFILE_ALL | ||
80 | is specified, use: | ||
81 | |||
82 | GCOV_PROFILE_main.o := n | ||
83 | and: | ||
84 | GCOV_PROFILE := n | ||
85 | |||
86 | Only files which are linked to the main kernel image or are compiled as | ||
87 | kernel modules are supported by this mechanism. | ||
88 | |||
89 | |||
90 | 4. Files | ||
91 | ======== | ||
92 | |||
93 | The gcov kernel support creates the following files in debugfs: | ||
94 | |||
95 | /sys/kernel/debug/gcov | ||
96 | Parent directory for all gcov-related files. | ||
97 | |||
98 | /sys/kernel/debug/gcov/reset | ||
99 | Global reset file: resets all coverage data to zero when | ||
100 | written to. | ||
101 | |||
102 | /sys/kernel/debug/gcov/path/to/compile/dir/file.gcda | ||
103 | The actual gcov data file as understood by the gcov | ||
104 | tool. Resets file coverage data to zero when written to. | ||
105 | |||
106 | /sys/kernel/debug/gcov/path/to/compile/dir/file.gcno | ||
107 | Symbolic link to a static data file required by the gcov | ||
108 | tool. This file is generated by gcc when compiling with | ||
109 | option -ftest-coverage. | ||
110 | |||
111 | |||
112 | 5. Modules | ||
113 | ========== | ||
114 | |||
115 | Kernel modules may contain cleanup code which is only run during | ||
116 | module unload time. The gcov mechanism provides a means to collect | ||
117 | coverage data for such code by keeping a copy of the data associated | ||
118 | with the unloaded module. This data remains available through debugfs. | ||
119 | Once the module is loaded again, the associated coverage counters are | ||
120 | initialized with the data from its previous instantiation. | ||
121 | |||
122 | This behavior can be deactivated by specifying the gcov_persist kernel | ||
123 | parameter: | ||
124 | |||
125 | gcov_persist=0 | ||
126 | |||
127 | At run-time, a user can also choose to discard data for an unloaded | ||
128 | module by writing to its data file or the global reset file. | ||
129 | |||
130 | |||
131 | 6. Separated build and test machines | ||
132 | ==================================== | ||
133 | |||
134 | The gcov kernel profiling infrastructure is designed to work out-of-the | ||
135 | box for setups where kernels are built and run on the same machine. In | ||
136 | cases where the kernel runs on a separate machine, special preparations | ||
137 | must be made, depending on where the gcov tool is used: | ||
138 | |||
139 | a) gcov is run on the TEST machine | ||
140 | |||
141 | The gcov tool version on the test machine must be compatible with the | ||
142 | gcc version used for kernel build. Also the following files need to be | ||
143 | copied from build to test machine: | ||
144 | |||
145 | from the source tree: | ||
146 | - all C source files + headers | ||
147 | |||
148 | from the build tree: | ||
149 | - all C source files + headers | ||
150 | - all .gcda and .gcno files | ||
151 | - all links to directories | ||
152 | |||
153 | It is important to note that these files need to be placed into the | ||
154 | exact same file system location on the test machine as on the build | ||
155 | machine. If any of the path components is symbolic link, the actual | ||
156 | directory needs to be used instead (due to make's CURDIR handling). | ||
157 | |||
158 | b) gcov is run on the BUILD machine | ||
159 | |||
160 | The following files need to be copied after each test case from test | ||
161 | to build machine: | ||
162 | |||
163 | from the gcov directory in sysfs: | ||
164 | - all .gcda files | ||
165 | - all links to .gcno files | ||
166 | |||
167 | These files can be copied to any location on the build machine. gcov | ||
168 | must then be called with the -o option pointing to that directory. | ||
169 | |||
170 | Example directory setup on the build machine: | ||
171 | |||
172 | /tmp/linux: kernel source tree | ||
173 | /tmp/out: kernel build directory as specified by make O= | ||
174 | /tmp/coverage: location of the files copied from the test machine | ||
175 | |||
176 | [user@build] cd /tmp/out | ||
177 | [user@build] gcov -o /tmp/coverage/tmp/out/init main.c | ||
178 | |||
179 | |||
180 | 7. Troubleshooting | ||
181 | ================== | ||
182 | |||
183 | Problem: Compilation aborts during linker step. | ||
184 | Cause: Profiling flags are specified for source files which are not | ||
185 | linked to the main kernel or which are linked by a custom | ||
186 | linker procedure. | ||
187 | Solution: Exclude affected source files from profiling by specifying | ||
188 | GCOV_PROFILE := n or GCOV_PROFILE_basename.o := n in the | ||
189 | corresponding Makefile. | ||
190 | |||
191 | |||
192 | Appendix A: gather_on_build.sh | ||
193 | ============================== | ||
194 | |||
195 | Sample script to gather coverage meta files on the build machine | ||
196 | (see 6a): | ||
197 | |||
198 | #!/bin/bash | ||
199 | |||
200 | KSRC=$1 | ||
201 | KOBJ=$2 | ||
202 | DEST=$3 | ||
203 | |||
204 | if [ -z "$KSRC" ] || [ -z "$KOBJ" ] || [ -z "$DEST" ]; then | ||
205 | echo "Usage: $0 <ksrc directory> <kobj directory> <output.tar.gz>" >&2 | ||
206 | exit 1 | ||
207 | fi | ||
208 | |||
209 | KSRC=$(cd $KSRC; printf "all:\n\t@echo \${CURDIR}\n" | make -f -) | ||
210 | KOBJ=$(cd $KOBJ; printf "all:\n\t@echo \${CURDIR}\n" | make -f -) | ||
211 | |||
212 | find $KSRC $KOBJ \( -name '*.gcno' -o -name '*.[ch]' -o -type l \) -a \ | ||
213 | -perm /u+r,g+r | tar cfz $DEST -P -T - | ||
214 | |||
215 | if [ $? -eq 0 ] ; then | ||
216 | echo "$DEST successfully created, copy to test system and unpack with:" | ||
217 | echo " tar xfz $DEST -P" | ||
218 | else | ||
219 | echo "Could not create file $DEST" | ||
220 | fi | ||
221 | |||
222 | |||
223 | Appendix B: gather_on_test.sh | ||
224 | ============================= | ||
225 | |||
226 | Sample script to gather coverage data files on the test machine | ||
227 | (see 6b): | ||
228 | |||
229 | #!/bin/bash | ||
230 | |||
231 | DEST=$1 | ||
232 | GCDA=/sys/kernel/debug/gcov | ||
233 | |||
234 | if [ -z "$DEST" ] ; then | ||
235 | echo "Usage: $0 <output.tar.gz>" >&2 | ||
236 | exit 1 | ||
237 | fi | ||
238 | |||
239 | find $GCDA -name '*.gcno' -o -name '*.gcda' | tar cfz $DEST -T - | ||
240 | |||
241 | if [ $? -eq 0 ] ; then | ||
242 | echo "$DEST successfully created, copy to build system and unpack with:" | ||
243 | echo " tar xfz $DEST" | ||
244 | else | ||
245 | echo "Could not create file $DEST" | ||
246 | fi | ||
diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt index b1b988701247..e4b6985044a2 100644 --- a/Documentation/gpio.txt +++ b/Documentation/gpio.txt | |||
@@ -123,7 +123,10 @@ platform-specific implementation issue. | |||
123 | 123 | ||
124 | Using GPIOs | 124 | Using GPIOs |
125 | ----------- | 125 | ----------- |
126 | One of the first things to do with a GPIO, often in board setup code when | 126 | The first thing a system should do with a GPIO is allocate it, using |
127 | the gpio_request() call; see later. | ||
128 | |||
129 | One of the next things to do with a GPIO, often in board setup code when | ||
127 | setting up a platform_device using the GPIO, is mark its direction: | 130 | setting up a platform_device using the GPIO, is mark its direction: |
128 | 131 | ||
129 | /* set as input or output, returning 0 or negative errno */ | 132 | /* set as input or output, returning 0 or negative errno */ |
@@ -141,8 +144,8 @@ This helps avoid signal glitching during system startup. | |||
141 | 144 | ||
142 | For compatibility with legacy interfaces to GPIOs, setting the direction | 145 | For compatibility with legacy interfaces to GPIOs, setting the direction |
143 | of a GPIO implicitly requests that GPIO (see below) if it has not been | 146 | of a GPIO implicitly requests that GPIO (see below) if it has not been |
144 | requested already. That compatibility may be removed in the future; | 147 | requested already. That compatibility is being removed from the optional |
145 | explicitly requesting GPIOs is strongly preferred. | 148 | gpiolib framework. |
146 | 149 | ||
147 | Setting the direction can fail if the GPIO number is invalid, or when | 150 | Setting the direction can fail if the GPIO number is invalid, or when |
148 | that particular GPIO can't be used in that mode. It's generally a bad | 151 | that particular GPIO can't be used in that mode. It's generally a bad |
@@ -195,7 +198,7 @@ This requires sleeping, which can't be done from inside IRQ handlers. | |||
195 | 198 | ||
196 | Platforms that support this type of GPIO distinguish them from other GPIOs | 199 | Platforms that support this type of GPIO distinguish them from other GPIOs |
197 | by returning nonzero from this call (which requires a valid GPIO number, | 200 | by returning nonzero from this call (which requires a valid GPIO number, |
198 | either explicitly or implicitly requested): | 201 | which should have been previously allocated with gpio_request): |
199 | 202 | ||
200 | int gpio_cansleep(unsigned gpio); | 203 | int gpio_cansleep(unsigned gpio); |
201 | 204 | ||
@@ -212,10 +215,9 @@ for GPIOs that can't be accessed from IRQ handlers, these calls act the | |||
212 | same as the spinlock-safe calls. | 215 | same as the spinlock-safe calls. |
213 | 216 | ||
214 | 217 | ||
215 | Claiming and Releasing GPIOs (OPTIONAL) | 218 | Claiming and Releasing GPIOs |
216 | --------------------------------------- | 219 | ---------------------------- |
217 | To help catch system configuration errors, two calls are defined. | 220 | To help catch system configuration errors, two calls are defined. |
218 | However, many platforms don't currently support this mechanism. | ||
219 | 221 | ||
220 | /* request GPIO, returning 0 or negative errno. | 222 | /* request GPIO, returning 0 or negative errno. |
221 | * non-null labels may be useful for diagnostics. | 223 | * non-null labels may be useful for diagnostics. |
@@ -244,13 +246,6 @@ Some platforms may also use knowledge about what GPIOs are active for | |||
244 | power management, such as by powering down unused chip sectors and, more | 246 | power management, such as by powering down unused chip sectors and, more |
245 | easily, gating off unused clocks. | 247 | easily, gating off unused clocks. |
246 | 248 | ||
247 | These two calls are optional because not not all current Linux platforms | ||
248 | offer such functionality in their GPIO support; a valid implementation | ||
249 | could return success for all gpio_request() calls. Unlike the other calls, | ||
250 | the state they represent doesn't normally match anything from a hardware | ||
251 | register; it's just a software bitmap which clearly is not necessary for | ||
252 | correct operation of hardware or (bug free) drivers. | ||
253 | |||
254 | Note that requesting a GPIO does NOT cause it to be configured in any | 249 | Note that requesting a GPIO does NOT cause it to be configured in any |
255 | way; it just marks that GPIO as in use. Separate code must handle any | 250 | way; it just marks that GPIO as in use. Separate code must handle any |
256 | pin setup (e.g. controlling which pin the GPIO uses, pullup/pulldown). | 251 | pin setup (e.g. controlling which pin the GPIO uses, pullup/pulldown). |
@@ -463,7 +458,7 @@ debugfs interface, since it provides control over GPIO direction and | |||
463 | value instead of just showing a gpio state summary. Plus, it could be | 458 | value instead of just showing a gpio state summary. Plus, it could be |
464 | present on production systems without debugging support. | 459 | present on production systems without debugging support. |
465 | 460 | ||
466 | Given approprate hardware documentation for the system, userspace could | 461 | Given appropriate hardware documentation for the system, userspace could |
467 | know for example that GPIO #23 controls the write protect line used to | 462 | know for example that GPIO #23 controls the write protect line used to |
468 | protect boot loader segments in flash memory. System upgrade procedures | 463 | protect boot loader segments in flash memory. System upgrade procedures |
469 | may need to temporarily remove that protection, first importing a GPIO, | 464 | may need to temporarily remove that protection, first importing a GPIO, |
diff --git a/Documentation/hwmon/ds1621 b/Documentation/hwmon/ds1621 index 1fee6f1e6bc5..5e97f333c4df 100644 --- a/Documentation/hwmon/ds1621 +++ b/Documentation/hwmon/ds1621 | |||
@@ -49,12 +49,9 @@ of up to +/- 0.5 degrees even when compared against precise temperature | |||
49 | readings. Be sure to have a high vs. low temperature limit gap of al least | 49 | readings. Be sure to have a high vs. low temperature limit gap of al least |
50 | 1.0 degree Celsius to avoid Tout "bouncing", though! | 50 | 1.0 degree Celsius to avoid Tout "bouncing", though! |
51 | 51 | ||
52 | As for alarms, you can read the alarm status of the DS1621 via the 'alarms' | 52 | The alarm bits are set when the high or low limits are met or exceeded and |
53 | /sys file interface. The result consists mainly of bit 6 and 5 of the | 53 | are reset by the module as soon as the respective temperature ranges are |
54 | configuration register of the chip; bit 6 (0x40 or 64) is the high alarm | 54 | left. |
55 | bit and bit 5 (0x20 or 32) the low one. These bits are set when the high or | ||
56 | low limits are met or exceeded and are reset by the module as soon as the | ||
57 | respective temperature ranges are left. | ||
58 | 55 | ||
59 | The alarm registers are in no way suitable to find out about the actual | 56 | The alarm registers are in no way suitable to find out about the actual |
60 | status of Tout. They will only tell you about its history, whether or not | 57 | status of Tout. They will only tell you about its history, whether or not |
@@ -64,45 +61,3 @@ with neither of the alarms set. | |||
64 | 61 | ||
65 | Temperature conversion of the DS1621 takes up to 1000ms; internal access to | 62 | Temperature conversion of the DS1621 takes up to 1000ms; internal access to |
66 | non-volatile registers may last for 10ms or below. | 63 | non-volatile registers may last for 10ms or below. |
67 | |||
68 | High Accuracy Temperature Reading | ||
69 | --------------------------------- | ||
70 | |||
71 | As said before, the temperature issued via the 9-bit i2c-bus data is | ||
72 | somewhat arbitrary. Internally, the temperature conversion is of a | ||
73 | different kind that is explained (not so...) well in the DS1621 data sheet. | ||
74 | To cut the long story short: Inside the DS1621 there are two oscillators, | ||
75 | both of them biassed by a temperature coefficient. | ||
76 | |||
77 | Higher resolution of the temperature reading can be achieved using the | ||
78 | internal projection, which means taking account of REG_COUNT and REG_SLOPE | ||
79 | (the driver manages them): | ||
80 | |||
81 | Taken from Dallas Semiconductors App Note 068: 'Increasing Temperature | ||
82 | Resolution on the DS1620' and App Note 105: 'High Resolution Temperature | ||
83 | Measurement with Dallas Direct-to-Digital Temperature Sensors' | ||
84 | |||
85 | - Read the 9-bit temperature and strip the LSB (Truncate the .5 degs) | ||
86 | - The resulting value is TEMP_READ. | ||
87 | - Then, read REG_COUNT. | ||
88 | - And then, REG_SLOPE. | ||
89 | |||
90 | TEMP = TEMP_READ - 0.25 + ((REG_SLOPE - REG_COUNT) / REG_SLOPE) | ||
91 | |||
92 | Note that this is what the DONE bit in the DS1621 configuration register is | ||
93 | good for: Internally, one temperature conversion takes up to 1000ms. Before | ||
94 | that conversion is complete you will not be able to read valid things out | ||
95 | of REG_COUNT and REG_SLOPE. The DONE bit, as you may have guessed by now, | ||
96 | tells you whether the conversion is complete ("done", in plain English) and | ||
97 | thus, whether the values you read are good or not. | ||
98 | |||
99 | The DS1621 has two modes of operation: "Continuous" conversion, which can | ||
100 | be understood as the default stand-alone mode where the chip gets the | ||
101 | temperature and controls external devices via its Tout pin or tells other | ||
102 | i2c's about it if they care. The other mode is called "1SHOT", that means | ||
103 | that it only figures out about the temperature when it is explicitly told | ||
104 | to do so; this can be seen as power saving mode. | ||
105 | |||
106 | Now if you want to read REG_COUNT and REG_SLOPE, you have to either stop | ||
107 | the continuous conversions until the contents of these registers are valid, | ||
108 | or, in 1SHOT mode, you have to have one conversion made. | ||
diff --git a/Documentation/hwmon/f71882fg b/Documentation/hwmon/f71882fg index a8321267b5b6..bee4c30bc1e2 100644 --- a/Documentation/hwmon/f71882fg +++ b/Documentation/hwmon/f71882fg | |||
@@ -2,14 +2,18 @@ Kernel driver f71882fg | |||
2 | ====================== | 2 | ====================== |
3 | 3 | ||
4 | Supported chips: | 4 | Supported chips: |
5 | * Fintek F71882FG and F71883FG | 5 | * Fintek F71858FG |
6 | Prefix: 'f71882fg' | 6 | Prefix: 'f71858fg' |
7 | Addresses scanned: none, address read from Super I/O config space | 7 | Addresses scanned: none, address read from Super I/O config space |
8 | Datasheet: Available from the Fintek website | 8 | Datasheet: Available from the Fintek website |
9 | * Fintek F71862FG and F71863FG | 9 | * Fintek F71862FG and F71863FG |
10 | Prefix: 'f71862fg' | 10 | Prefix: 'f71862fg' |
11 | Addresses scanned: none, address read from Super I/O config space | 11 | Addresses scanned: none, address read from Super I/O config space |
12 | Datasheet: Available from the Fintek website | 12 | Datasheet: Available from the Fintek website |
13 | * Fintek F71882FG and F71883FG | ||
14 | Prefix: 'f71882fg' | ||
15 | Addresses scanned: none, address read from Super I/O config space | ||
16 | Datasheet: Available from the Fintek website | ||
13 | * Fintek F8000 | 17 | * Fintek F8000 |
14 | Prefix: 'f8000' | 18 | Prefix: 'f8000' |
15 | Addresses scanned: none, address read from Super I/O config space | 19 | Addresses scanned: none, address read from Super I/O config space |
@@ -66,13 +70,13 @@ printed when loading the driver. | |||
66 | 70 | ||
67 | Three different fan control modes are supported; the mode number is written | 71 | Three different fan control modes are supported; the mode number is written |
68 | to the pwm#_enable file. Note that not all modes are supported on all | 72 | to the pwm#_enable file. Note that not all modes are supported on all |
69 | chips, and some modes may only be available in RPM / PWM mode on the F8000. | 73 | chips, and some modes may only be available in RPM / PWM mode. |
70 | Writing an unsupported mode will result in an invalid parameter error. | 74 | Writing an unsupported mode will result in an invalid parameter error. |
71 | 75 | ||
72 | * 1: Manual mode | 76 | * 1: Manual mode |
73 | You ask for a specific PWM duty cycle / DC voltage or a specific % of | 77 | You ask for a specific PWM duty cycle / DC voltage or a specific % of |
74 | fan#_full_speed by writing to the pwm# file. This mode is only | 78 | fan#_full_speed by writing to the pwm# file. This mode is only |
75 | available on the F8000 if the fan channel is in RPM mode. | 79 | available on the F71858FG / F8000 if the fan channel is in RPM mode. |
76 | 80 | ||
77 | * 2: Normal auto mode | 81 | * 2: Normal auto mode |
78 | You can define a number of temperature/fan speed trip points, which % the | 82 | You can define a number of temperature/fan speed trip points, which % the |
diff --git a/Documentation/hwmon/g760a b/Documentation/hwmon/g760a new file mode 100644 index 000000000000..e032eeb75629 --- /dev/null +++ b/Documentation/hwmon/g760a | |||
@@ -0,0 +1,36 @@ | |||
1 | Kernel driver g760a | ||
2 | =================== | ||
3 | |||
4 | Supported chips: | ||
5 | * Global Mixed-mode Technology Inc. G760A | ||
6 | Prefix: 'g760a' | ||
7 | Datasheet: Publicly available at the GMT website | ||
8 | http://www.gmt.com.tw/datasheet/g760a.pdf | ||
9 | |||
10 | Author: Herbert Valerio Riedel <hvr@gnu.org> | ||
11 | |||
12 | Description | ||
13 | ----------- | ||
14 | |||
15 | The GMT G760A Fan Speed PWM Controller is connected directly to a fan | ||
16 | and performs closed-loop control of the fan speed. | ||
17 | |||
18 | The fan speed is programmed by setting the period via 'pwm1' of two | ||
19 | consecutive speed pulses. The period is defined in terms of clock | ||
20 | cycle counts of an assumed 32kHz clock source. | ||
21 | |||
22 | Setting a period of 0 stops the fan; setting the period to 255 sets | ||
23 | fan to maximum speed. | ||
24 | |||
25 | The measured fan rotation speed returned via 'fan1_input' is derived | ||
26 | from the measured speed pulse period by assuming again a 32kHz clock | ||
27 | source and a 2 pulse-per-revolution fan. | ||
28 | |||
29 | The 'alarms' file provides access to the two alarm bits provided by | ||
30 | the G760A chip's status register: Bit 0 is set when the actual fan | ||
31 | speed differs more than 20% with respect to the programmed fan speed; | ||
32 | bit 1 is set when fan speed is below 1920 RPM. | ||
33 | |||
34 | The g760a driver will not update its values more frequently than every | ||
35 | other second; reading them more often will do no harm, but will return | ||
36 | 'old' values. | ||
diff --git a/Documentation/hwmon/ibmaem b/Documentation/hwmon/ibmaem index e98bdfea3467..1e0d59e000b4 100644 --- a/Documentation/hwmon/ibmaem +++ b/Documentation/hwmon/ibmaem | |||
@@ -7,7 +7,7 @@ henceforth as AEM. | |||
7 | Supported systems: | 7 | Supported systems: |
8 | * Any recent IBM System X server with AEM support. | 8 | * Any recent IBM System X server with AEM support. |
9 | This includes the x3350, x3550, x3650, x3655, x3755, x3850 M2, | 9 | This includes the x3350, x3550, x3650, x3655, x3755, x3850 M2, |
10 | x3950 M2, and certain HS2x/LS2x/QS2x blades. The IPMI host interface | 10 | x3950 M2, and certain HC10/HS2x/LS2x/QS2x blades. The IPMI host interface |
11 | driver ("ipmi-si") needs to be loaded for this driver to do anything. | 11 | driver ("ipmi-si") needs to be loaded for this driver to do anything. |
12 | Prefix: 'ibmaem' | 12 | Prefix: 'ibmaem' |
13 | Datasheet: Not available | 13 | Datasheet: Not available |
diff --git a/Documentation/hwmon/lis3lv02d b/Documentation/hwmon/lis3lv02d index 287f8c902656..effe949a7282 100644 --- a/Documentation/hwmon/lis3lv02d +++ b/Documentation/hwmon/lis3lv02d | |||
@@ -1,11 +1,11 @@ | |||
1 | Kernel driver lis3lv02d | 1 | Kernel driver lis3lv02d |
2 | ================== | 2 | ======================= |
3 | 3 | ||
4 | Supported chips: | 4 | Supported chips: |
5 | 5 | ||
6 | * STMicroelectronics LIS3LV02DL and LIS3LV02DQ | 6 | * STMicroelectronics LIS3LV02DL and LIS3LV02DQ |
7 | 7 | ||
8 | Author: | 8 | Authors: |
9 | Yan Burman <burman.yan@gmail.com> | 9 | Yan Burman <burman.yan@gmail.com> |
10 | Eric Piel <eric.piel@tremplin-utc.net> | 10 | Eric Piel <eric.piel@tremplin-utc.net> |
11 | 11 | ||
@@ -15,7 +15,7 @@ Description | |||
15 | 15 | ||
16 | This driver provides support for the accelerometer found in various HP | 16 | This driver provides support for the accelerometer found in various HP |
17 | laptops sporting the feature officially called "HP Mobile Data | 17 | laptops sporting the feature officially called "HP Mobile Data |
18 | Protection System 3D" or "HP 3D DriveGuard". It detect automatically | 18 | Protection System 3D" or "HP 3D DriveGuard". It detects automatically |
19 | laptops with this sensor. Known models (for now the HP 2133, nc6420, | 19 | laptops with this sensor. Known models (for now the HP 2133, nc6420, |
20 | nc2510, nc8510, nc84x0, nw9440 and nx9420) will have their axis | 20 | nc2510, nc8510, nc84x0, nw9440 and nx9420) will have their axis |
21 | automatically oriented on standard way (eg: you can directly play | 21 | automatically oriented on standard way (eg: you can directly play |
@@ -27,7 +27,7 @@ position - 3D position that the accelerometer reports. Format: "(x,y,z)" | |||
27 | calibrate - read: values (x, y, z) that are used as the base for input | 27 | calibrate - read: values (x, y, z) that are used as the base for input |
28 | class device operation. | 28 | class device operation. |
29 | write: forces the base to be recalibrated with the current | 29 | write: forces the base to be recalibrated with the current |
30 | position. | 30 | position. |
31 | rate - reports the sampling rate of the accelerometer device in HZ | 31 | rate - reports the sampling rate of the accelerometer device in HZ |
32 | 32 | ||
33 | This driver also provides an absolute input class device, allowing | 33 | This driver also provides an absolute input class device, allowing |
@@ -48,7 +48,7 @@ For better compatibility between the various laptops. The values reported by | |||
48 | the accelerometer are converted into a "standard" organisation of the axes | 48 | the accelerometer are converted into a "standard" organisation of the axes |
49 | (aka "can play neverball out of the box"): | 49 | (aka "can play neverball out of the box"): |
50 | * When the laptop is horizontal the position reported is about 0 for X and Y | 50 | * When the laptop is horizontal the position reported is about 0 for X and Y |
51 | and a positive value for Z | 51 | and a positive value for Z |
52 | * If the left side is elevated, X increases (becomes positive) | 52 | * If the left side is elevated, X increases (becomes positive) |
53 | * If the front side (where the touchpad is) is elevated, Y decreases | 53 | * If the front side (where the touchpad is) is elevated, Y decreases |
54 | (becomes negative) | 54 | (becomes negative) |
@@ -59,3 +59,13 @@ email to the authors to add it to the database. When reporting a new | |||
59 | laptop, please include the output of "dmidecode" plus the value of | 59 | laptop, please include the output of "dmidecode" plus the value of |
60 | /sys/devices/platform/lis3lv02d/position in these four cases. | 60 | /sys/devices/platform/lis3lv02d/position in these four cases. |
61 | 61 | ||
62 | Q&A | ||
63 | --- | ||
64 | |||
65 | Q: How do I safely simulate freefall? I have an HP "portable | ||
66 | workstation" which has about 3.5kg and a plastic case, so letting it | ||
67 | fall to the ground is out of question... | ||
68 | |||
69 | A: The sensor is pretty sensitive, so your hands can do it. Lift it | ||
70 | into free space, follow the fall with your hands for like 10 | ||
71 | centimeters. That should be enough to trigger the detection. | ||
diff --git a/Documentation/hwmon/ltc4215 b/Documentation/hwmon/ltc4215 new file mode 100644 index 000000000000..2e6a21eb656c --- /dev/null +++ b/Documentation/hwmon/ltc4215 | |||
@@ -0,0 +1,50 @@ | |||
1 | Kernel driver ltc4215 | ||
2 | ===================== | ||
3 | |||
4 | Supported chips: | ||
5 | * Linear Technology LTC4215 | ||
6 | Prefix: 'ltc4215' | ||
7 | Addresses scanned: 0x44 | ||
8 | Datasheet: | ||
9 | http://www.linear.com/pc/downloadDocument.do?navId=H0,C1,C1003,C1006,C1163,P17572,D12697 | ||
10 | |||
11 | Author: Ira W. Snyder <iws@ovro.caltech.edu> | ||
12 | |||
13 | |||
14 | Description | ||
15 | ----------- | ||
16 | |||
17 | The LTC4215 controller allows a board to be safely inserted and removed | ||
18 | from a live backplane. | ||
19 | |||
20 | |||
21 | Usage Notes | ||
22 | ----------- | ||
23 | |||
24 | This driver does not probe for LTC4215 devices, due to the fact that some | ||
25 | of the possible addresses are unfriendly to probing. You will need to use | ||
26 | the "force" parameter to tell the driver where to find the device. | ||
27 | |||
28 | Example: the following will load the driver for an LTC4215 at address 0x44 | ||
29 | on I2C bus #0: | ||
30 | $ modprobe ltc4215 force=0,0x44 | ||
31 | |||
32 | |||
33 | Sysfs entries | ||
34 | ------------- | ||
35 | |||
36 | The LTC4215 has built-in limits for overvoltage, undervoltage, and | ||
37 | undercurrent warnings. This makes it very likely that the reference | ||
38 | circuit will be used. | ||
39 | |||
40 | in1_input input voltage | ||
41 | in2_input output voltage | ||
42 | |||
43 | in1_min_alarm input undervoltage alarm | ||
44 | in1_max_alarm input overvoltage alarm | ||
45 | |||
46 | curr1_input current | ||
47 | curr1_max_alarm overcurrent alarm | ||
48 | |||
49 | power1_input power usage | ||
50 | power1_alarm power bad alarm | ||
diff --git a/Documentation/i2c/chips/pcf8591 b/Documentation/hwmon/pcf8591 index 5628fcf4207f..5628fcf4207f 100644 --- a/Documentation/i2c/chips/pcf8591 +++ b/Documentation/hwmon/pcf8591 | |||
diff --git a/Documentation/hwmon/sysfs-interface b/Documentation/hwmon/sysfs-interface index 6dbfd5efd991..dcbd502c8792 100644 --- a/Documentation/hwmon/sysfs-interface +++ b/Documentation/hwmon/sysfs-interface | |||
@@ -70,6 +70,7 @@ are interpreted as 0! For more on how written strings are interpreted see the | |||
70 | [0-*] denotes any positive number starting from 0 | 70 | [0-*] denotes any positive number starting from 0 |
71 | [1-*] denotes any positive number starting from 1 | 71 | [1-*] denotes any positive number starting from 1 |
72 | RO read only value | 72 | RO read only value |
73 | WO write only value | ||
73 | RW read/write value | 74 | RW read/write value |
74 | 75 | ||
75 | Read/write values may be read-only for some chips, depending on the | 76 | Read/write values may be read-only for some chips, depending on the |
@@ -150,6 +151,11 @@ fan[1-*]_min Fan minimum value | |||
150 | Unit: revolution/min (RPM) | 151 | Unit: revolution/min (RPM) |
151 | RW | 152 | RW |
152 | 153 | ||
154 | fan[1-*]_max Fan maximum value | ||
155 | Unit: revolution/min (RPM) | ||
156 | Only rarely supported by the hardware. | ||
157 | RW | ||
158 | |||
153 | fan[1-*]_input Fan input value. | 159 | fan[1-*]_input Fan input value. |
154 | Unit: revolution/min (RPM) | 160 | Unit: revolution/min (RPM) |
155 | RO | 161 | RO |
@@ -290,6 +296,24 @@ temp[1-*]_label Suggested temperature channel label. | |||
290 | user-space. | 296 | user-space. |
291 | RO | 297 | RO |
292 | 298 | ||
299 | temp[1-*]_lowest | ||
300 | Historical minimum temperature | ||
301 | Unit: millidegree Celsius | ||
302 | RO | ||
303 | |||
304 | temp[1-*]_highest | ||
305 | Historical maximum temperature | ||
306 | Unit: millidegree Celsius | ||
307 | RO | ||
308 | |||
309 | temp[1-*]_reset_history | ||
310 | Reset temp_lowest and temp_highest | ||
311 | WO | ||
312 | |||
313 | temp_reset_history | ||
314 | Reset temp_lowest and temp_highest for all sensors | ||
315 | WO | ||
316 | |||
293 | Some chips measure temperature using external thermistors and an ADC, and | 317 | Some chips measure temperature using external thermistors and an ADC, and |
294 | report the temperature measurement as a voltage. Converting this voltage | 318 | report the temperature measurement as a voltage. Converting this voltage |
295 | back to a temperature (or the other way around for limits) requires | 319 | back to a temperature (or the other way around for limits) requires |
@@ -365,6 +389,7 @@ energy[1-*]_input Cumulative energy use | |||
365 | Unit: microJoule | 389 | Unit: microJoule |
366 | RO | 390 | RO |
367 | 391 | ||
392 | |||
368 | ********** | 393 | ********** |
369 | * Alarms * | 394 | * Alarms * |
370 | ********** | 395 | ********** |
@@ -389,6 +414,7 @@ OR | |||
389 | in[0-*]_min_alarm | 414 | in[0-*]_min_alarm |
390 | in[0-*]_max_alarm | 415 | in[0-*]_max_alarm |
391 | fan[1-*]_min_alarm | 416 | fan[1-*]_min_alarm |
417 | fan[1-*]_max_alarm | ||
392 | temp[1-*]_min_alarm | 418 | temp[1-*]_min_alarm |
393 | temp[1-*]_max_alarm | 419 | temp[1-*]_max_alarm |
394 | temp[1-*]_crit_alarm | 420 | temp[1-*]_crit_alarm |
@@ -453,6 +479,27 @@ beep_mask Bitmask for beep. | |||
453 | RW | 479 | RW |
454 | 480 | ||
455 | 481 | ||
482 | *********************** | ||
483 | * Intrusion detection * | ||
484 | *********************** | ||
485 | |||
486 | intrusion[0-*]_alarm | ||
487 | Chassis intrusion detection | ||
488 | 0: OK | ||
489 | 1: intrusion detected | ||
490 | RW | ||
491 | Contrary to regular alarm flags which clear themselves | ||
492 | automatically when read, this one sticks until cleared by | ||
493 | the user. This is done by writing 0 to the file. Writing | ||
494 | other values is unsupported. | ||
495 | |||
496 | intrusion[0-*]_beep | ||
497 | Chassis intrusion beep | ||
498 | 0: disable | ||
499 | 1: enable | ||
500 | RW | ||
501 | |||
502 | |||
456 | sysfs attribute writes interpretation | 503 | sysfs attribute writes interpretation |
457 | ------------------------------------- | 504 | ------------------------------------- |
458 | 505 | ||
diff --git a/Documentation/hwmon/tmp401 b/Documentation/hwmon/tmp401 new file mode 100644 index 000000000000..9fc447249212 --- /dev/null +++ b/Documentation/hwmon/tmp401 | |||
@@ -0,0 +1,42 @@ | |||
1 | Kernel driver tmp401 | ||
2 | ==================== | ||
3 | |||
4 | Supported chips: | ||
5 | * Texas Instruments TMP401 | ||
6 | Prefix: 'tmp401' | ||
7 | Addresses scanned: I2C 0x4c | ||
8 | Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp401.html | ||
9 | * Texas Instruments TMP411 | ||
10 | Prefix: 'tmp411' | ||
11 | Addresses scanned: I2C 0x4c | ||
12 | Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp411.html | ||
13 | |||
14 | Authors: | ||
15 | Hans de Goede <hdegoede@redhat.com> | ||
16 | Andre Prendel <andre.prendel@gmx.de> | ||
17 | |||
18 | Description | ||
19 | ----------- | ||
20 | |||
21 | This driver implements support for Texas Instruments TMP401 and | ||
22 | TMP411 chips. These chips implements one remote and one local | ||
23 | temperature sensor. Temperature is measured in degrees | ||
24 | Celsius. Resolution of the remote sensor is 0.0625 degree. Local | ||
25 | sensor resolution can be set to 0.5, 0.25, 0.125 or 0.0625 degree (not | ||
26 | supported by the driver so far, so using the default resolution of 0.5 | ||
27 | degree). | ||
28 | |||
29 | The driver provides the common sysfs-interface for temperatures (see | ||
30 | /Documentation/hwmon/sysfs-interface under Temperatures). | ||
31 | |||
32 | The TMP411 chip is compatible with TMP401. It provides some additional | ||
33 | features. | ||
34 | |||
35 | * Minimum and Maximum temperature measured since power-on, chip-reset | ||
36 | |||
37 | Exported via sysfs attributes tempX_lowest and tempX_highest. | ||
38 | |||
39 | * Reset of historical minimum/maximum temperature measurements | ||
40 | |||
41 | Exported via sysfs attribute temp_reset_history. Writing 1 to this | ||
42 | file triggers a reset. | ||
diff --git a/Documentation/hwmon/w83627ehf b/Documentation/hwmon/w83627ehf index d6e1ae30fa6e..02b74899edaf 100644 --- a/Documentation/hwmon/w83627ehf +++ b/Documentation/hwmon/w83627ehf | |||
@@ -2,30 +2,44 @@ Kernel driver w83627ehf | |||
2 | ======================= | 2 | ======================= |
3 | 3 | ||
4 | Supported chips: | 4 | Supported chips: |
5 | * Winbond W83627EHF/EHG/DHG (ISA access ONLY) | 5 | * Winbond W83627EHF/EHG (ISA access ONLY) |
6 | Prefix: 'w83627ehf' | 6 | Prefix: 'w83627ehf' |
7 | Addresses scanned: ISA address retrieved from Super I/O registers | 7 | Addresses scanned: ISA address retrieved from Super I/O registers |
8 | Datasheet: | 8 | Datasheet: |
9 | http://www.winbond-usa.com/products/winbond_products/pdfs/PCIC/W83627EHF_%20W83627EHGb.pdf | 9 | http://www.nuvoton.com.tw/NR/rdonlyres/A6A258F0-F0C9-4F97-81C0-C4D29E7E943E/0/W83627EHF.pdf |
10 | DHG datasheet confidential. | 10 | * Winbond W83627DHG |
11 | Prefix: 'w83627dhg' | ||
12 | Addresses scanned: ISA address retrieved from Super I/O registers | ||
13 | Datasheet: | ||
14 | http://www.nuvoton.com.tw/NR/rdonlyres/7885623D-A487-4CF9-A47F-30C5F73D6FE6/0/W83627DHG.pdf | ||
15 | * Winbond W83627DHG-P | ||
16 | Prefix: 'w83627dhg' | ||
17 | Addresses scanned: ISA address retrieved from Super I/O registers | ||
18 | Datasheet: not available | ||
19 | * Winbond W83667HG | ||
20 | Prefix: 'w83667hg' | ||
21 | Addresses scanned: ISA address retrieved from Super I/O registers | ||
22 | Datasheet: not available | ||
11 | 23 | ||
12 | Authors: | 24 | Authors: |
13 | Jean Delvare <khali@linux-fr.org> | 25 | Jean Delvare <khali@linux-fr.org> |
14 | Yuan Mu (Winbond) | 26 | Yuan Mu (Winbond) |
15 | Rudolf Marek <r.marek@assembler.cz> | 27 | Rudolf Marek <r.marek@assembler.cz> |
16 | David Hubbard <david.c.hubbard@gmail.com> | 28 | David Hubbard <david.c.hubbard@gmail.com> |
29 | Gong Jun <JGong@nuvoton.com> | ||
17 | 30 | ||
18 | Description | 31 | Description |
19 | ----------- | 32 | ----------- |
20 | 33 | ||
21 | This driver implements support for the Winbond W83627EHF, W83627EHG, and | 34 | This driver implements support for the Winbond W83627EHF, W83627EHG, |
22 | W83627DHG super I/O chips. We will refer to them collectively as Winbond chips. | 35 | W83627DHG, W83627DHG-P and W83667HG super I/O chips. We will refer to them |
36 | collectively as Winbond chips. | ||
23 | 37 | ||
24 | The chips implement three temperature sensors, five fan rotation | 38 | The chips implement three temperature sensors, five fan rotation |
25 | speed sensors, ten analog voltage sensors (only nine for the 627DHG), one | 39 | speed sensors, ten analog voltage sensors (only nine for the 627DHG), one |
26 | VID (6 pins for the 627EHF/EHG, 8 pins for the 627DHG), alarms with beep | 40 | VID (6 pins for the 627EHF/EHG, 8 pins for the 627DHG and 667HG), alarms |
27 | warnings (control unimplemented), and some automatic fan regulation | 41 | with beep warnings (control unimplemented), and some automatic fan |
28 | strategies (plus manual fan control mode). | 42 | regulation strategies (plus manual fan control mode). |
29 | 43 | ||
30 | Temperatures are measured in degrees Celsius and measurement resolution is 1 | 44 | Temperatures are measured in degrees Celsius and measurement resolution is 1 |
31 | degC for temp1 and 0.5 degC for temp2 and temp3. An alarm is triggered when | 45 | degC for temp1 and 0.5 degC for temp2 and temp3. An alarm is triggered when |
@@ -54,7 +68,8 @@ follows: | |||
54 | temp1 -> pwm1 | 68 | temp1 -> pwm1 |
55 | temp2 -> pwm2 | 69 | temp2 -> pwm2 |
56 | temp3 -> pwm3 | 70 | temp3 -> pwm3 |
57 | prog -> pwm4 (the programmable setting is not supported by the driver) | 71 | prog -> pwm4 (not on 667HG; the programmable setting is not supported by |
72 | the driver) | ||
58 | 73 | ||
59 | /sys files | 74 | /sys files |
60 | ---------- | 75 | ---------- |
@@ -124,3 +139,6 @@ done in the driver for all register addresses. | |||
124 | The DHG also supports PECI, where the DHG queries Intel CPU temperatures, and | 139 | The DHG also supports PECI, where the DHG queries Intel CPU temperatures, and |
125 | the ICH8 southbridge gets that data via PECI from the DHG, so that the | 140 | the ICH8 southbridge gets that data via PECI from the DHG, so that the |
126 | southbridge drives the fans. And the DHG supports SST, a one-wire serial bus. | 141 | southbridge drives the fans. And the DHG supports SST, a one-wire serial bus. |
142 | |||
143 | The DHG-P has an additional automatic fan speed control mode named Smart Fan | ||
144 | (TM) III+. This mode is not yet supported by the driver. | ||
diff --git a/Documentation/i2c/busses/i2c-nforce2 b/Documentation/i2c/busses/i2c-nforce2 index fae3495bcbaf..9698c396b830 100644 --- a/Documentation/i2c/busses/i2c-nforce2 +++ b/Documentation/i2c/busses/i2c-nforce2 | |||
@@ -7,10 +7,14 @@ Supported adapters: | |||
7 | * nForce3 250Gb MCP 10de:00E4 | 7 | * nForce3 250Gb MCP 10de:00E4 |
8 | * nForce4 MCP 10de:0052 | 8 | * nForce4 MCP 10de:0052 |
9 | * nForce4 MCP-04 10de:0034 | 9 | * nForce4 MCP-04 10de:0034 |
10 | * nForce4 MCP51 10de:0264 | 10 | * nForce MCP51 10de:0264 |
11 | * nForce4 MCP55 10de:0368 | 11 | * nForce MCP55 10de:0368 |
12 | * nForce4 MCP61 10de:03EB | 12 | * nForce MCP61 10de:03EB |
13 | * nForce4 MCP65 10de:0446 | 13 | * nForce MCP65 10de:0446 |
14 | * nForce MCP67 10de:0542 | ||
15 | * nForce MCP73 10de:07D8 | ||
16 | * nForce MCP78S 10de:0752 | ||
17 | * nForce MCP79 10de:0AA2 | ||
14 | 18 | ||
15 | Datasheet: not publicly available, but seems to be similar to the | 19 | Datasheet: not publicly available, but seems to be similar to the |
16 | AMD-8111 SMBus 2.0 adapter. | 20 | AMD-8111 SMBus 2.0 adapter. |
diff --git a/Documentation/i2c/busses/i2c-ocores b/Documentation/i2c/busses/i2c-ocores index cfcebb10d14e..c269aaa2f26a 100644 --- a/Documentation/i2c/busses/i2c-ocores +++ b/Documentation/i2c/busses/i2c-ocores | |||
@@ -20,6 +20,8 @@ platform_device with the base address and interrupt number. The | |||
20 | dev.platform_data of the device should also point to a struct | 20 | dev.platform_data of the device should also point to a struct |
21 | ocores_i2c_platform_data (see linux/i2c-ocores.h) describing the | 21 | ocores_i2c_platform_data (see linux/i2c-ocores.h) describing the |
22 | distance between registers and the input clock speed. | 22 | distance between registers and the input clock speed. |
23 | There is also a possibility to attach a list of i2c_board_info which | ||
24 | the i2c-ocores driver will add to the bus upon creation. | ||
23 | 25 | ||
24 | E.G. something like: | 26 | E.G. something like: |
25 | 27 | ||
@@ -36,9 +38,24 @@ static struct resource ocores_resources[] = { | |||
36 | }, | 38 | }, |
37 | }; | 39 | }; |
38 | 40 | ||
41 | /* optional board info */ | ||
42 | struct i2c_board_info ocores_i2c_board_info[] = { | ||
43 | { | ||
44 | I2C_BOARD_INFO("tsc2003", 0x48), | ||
45 | .platform_data = &tsc2003_platform_data, | ||
46 | .irq = TSC_IRQ | ||
47 | }, | ||
48 | { | ||
49 | I2C_BOARD_INFO("adv7180", 0x42 >> 1), | ||
50 | .irq = ADV_IRQ | ||
51 | } | ||
52 | }; | ||
53 | |||
39 | static struct ocores_i2c_platform_data myi2c_data = { | 54 | static struct ocores_i2c_platform_data myi2c_data = { |
40 | .regstep = 2, /* two bytes between registers */ | 55 | .regstep = 2, /* two bytes between registers */ |
41 | .clock_khz = 50000, /* input clock of 50MHz */ | 56 | .clock_khz = 50000, /* input clock of 50MHz */ |
57 | .devices = ocores_i2c_board_info, /* optional table of devices */ | ||
58 | .num_devices = ARRAY_SIZE(ocores_i2c_board_info), /* table size */ | ||
42 | }; | 59 | }; |
43 | 60 | ||
44 | static struct platform_device myi2c = { | 61 | static struct platform_device myi2c = { |
diff --git a/Documentation/i2c/busses/i2c-piix4 b/Documentation/i2c/busses/i2c-piix4 index ef1efa79b1df..f889481762b5 100644 --- a/Documentation/i2c/busses/i2c-piix4 +++ b/Documentation/i2c/busses/i2c-piix4 | |||
@@ -4,7 +4,7 @@ Supported adapters: | |||
4 | * Intel 82371AB PIIX4 and PIIX4E | 4 | * Intel 82371AB PIIX4 and PIIX4E |
5 | * Intel 82443MX (440MX) | 5 | * Intel 82443MX (440MX) |
6 | Datasheet: Publicly available at the Intel website | 6 | Datasheet: Publicly available at the Intel website |
7 | * ServerWorks OSB4, CSB5, CSB6 and HT-1000 southbridges | 7 | * ServerWorks OSB4, CSB5, CSB6, HT-1000 and HT-1100 southbridges |
8 | Datasheet: Only available via NDA from ServerWorks | 8 | Datasheet: Only available via NDA from ServerWorks |
9 | * ATI IXP200, IXP300, IXP400, SB600, SB700 and SB800 southbridges | 9 | * ATI IXP200, IXP300, IXP400, SB600, SB700 and SB800 southbridges |
10 | Datasheet: Not publicly available | 10 | Datasheet: Not publicly available |
diff --git a/Documentation/i2c/busses/i2c-viapro b/Documentation/i2c/busses/i2c-viapro index 22efedf60c87..2e758b0e9456 100644 --- a/Documentation/i2c/busses/i2c-viapro +++ b/Documentation/i2c/busses/i2c-viapro | |||
@@ -19,6 +19,9 @@ Supported adapters: | |||
19 | * VIA Technologies, Inc. VX800/VX820 | 19 | * VIA Technologies, Inc. VX800/VX820 |
20 | Datasheet: available on http://linux.via.com.tw | 20 | Datasheet: available on http://linux.via.com.tw |
21 | 21 | ||
22 | * VIA Technologies, Inc. VX855/VX875 | ||
23 | Datasheet: Availability unknown | ||
24 | |||
22 | Authors: | 25 | Authors: |
23 | Kyösti Mälkki <kmalkki@cc.hut.fi>, | 26 | Kyösti Mälkki <kmalkki@cc.hut.fi>, |
24 | Mark D. Studebaker <mdsxyz123@yahoo.com>, | 27 | Mark D. Studebaker <mdsxyz123@yahoo.com>, |
@@ -53,6 +56,7 @@ Your lspci -n listing must show one of these : | |||
53 | device 1106:3287 (VT8251) | 56 | device 1106:3287 (VT8251) |
54 | device 1106:8324 (CX700) | 57 | device 1106:8324 (CX700) |
55 | device 1106:8353 (VX800/VX820) | 58 | device 1106:8353 (VX800/VX820) |
59 | device 1106:8409 (VX855/VX875) | ||
56 | 60 | ||
57 | If none of these show up, you should look in the BIOS for settings like | 61 | If none of these show up, you should look in the BIOS for settings like |
58 | enable ACPI / SMBus or even USB. | 62 | enable ACPI / SMBus or even USB. |
diff --git a/Documentation/i2c/instantiating-devices b/Documentation/i2c/instantiating-devices new file mode 100644 index 000000000000..c740b7b41088 --- /dev/null +++ b/Documentation/i2c/instantiating-devices | |||
@@ -0,0 +1,211 @@ | |||
1 | How to instantiate I2C devices | ||
2 | ============================== | ||
3 | |||
4 | Unlike PCI or USB devices, I2C devices are not enumerated at the hardware | ||
5 | level. Instead, the software must know which devices are connected on each | ||
6 | I2C bus segment, and what address these devices are using. For this | ||
7 | reason, the kernel code must instantiate I2C devices explicitly. There are | ||
8 | several ways to achieve this, depending on the context and requirements. | ||
9 | |||
10 | |||
11 | Method 1: Declare the I2C devices by bus number | ||
12 | ----------------------------------------------- | ||
13 | |||
14 | This method is appropriate when the I2C bus is a system bus as is the case | ||
15 | for many embedded systems. On such systems, each I2C bus has a number | ||
16 | which is known in advance. It is thus possible to pre-declare the I2C | ||
17 | devices which live on this bus. This is done with an array of struct | ||
18 | i2c_board_info which is registered by calling i2c_register_board_info(). | ||
19 | |||
20 | Example (from omap2 h4): | ||
21 | |||
22 | static struct i2c_board_info __initdata h4_i2c_board_info[] = { | ||
23 | { | ||
24 | I2C_BOARD_INFO("isp1301_omap", 0x2d), | ||
25 | .irq = OMAP_GPIO_IRQ(125), | ||
26 | }, | ||
27 | { /* EEPROM on mainboard */ | ||
28 | I2C_BOARD_INFO("24c01", 0x52), | ||
29 | .platform_data = &m24c01, | ||
30 | }, | ||
31 | { /* EEPROM on cpu card */ | ||
32 | I2C_BOARD_INFO("24c01", 0x57), | ||
33 | .platform_data = &m24c01, | ||
34 | }, | ||
35 | }; | ||
36 | |||
37 | static void __init omap_h4_init(void) | ||
38 | { | ||
39 | (...) | ||
40 | i2c_register_board_info(1, h4_i2c_board_info, | ||
41 | ARRAY_SIZE(h4_i2c_board_info)); | ||
42 | (...) | ||
43 | } | ||
44 | |||
45 | The above code declares 3 devices on I2C bus 1, including their respective | ||
46 | addresses and custom data needed by their drivers. When the I2C bus in | ||
47 | question is registered, the I2C devices will be instantiated automatically | ||
48 | by i2c-core. | ||
49 | |||
50 | The devices will be automatically unbound and destroyed when the I2C bus | ||
51 | they sit on goes away (if ever.) | ||
52 | |||
53 | |||
54 | Method 2: Instantiate the devices explicitly | ||
55 | -------------------------------------------- | ||
56 | |||
57 | This method is appropriate when a larger device uses an I2C bus for | ||
58 | internal communication. A typical case is TV adapters. These can have a | ||
59 | tuner, a video decoder, an audio decoder, etc. usually connected to the | ||
60 | main chip by the means of an I2C bus. You won't know the number of the I2C | ||
61 | bus in advance, so the method 1 described above can't be used. Instead, | ||
62 | you can instantiate your I2C devices explicitly. This is done by filling | ||
63 | a struct i2c_board_info and calling i2c_new_device(). | ||
64 | |||
65 | Example (from the sfe4001 network driver): | ||
66 | |||
67 | static struct i2c_board_info sfe4001_hwmon_info = { | ||
68 | I2C_BOARD_INFO("max6647", 0x4e), | ||
69 | }; | ||
70 | |||
71 | int sfe4001_init(struct efx_nic *efx) | ||
72 | { | ||
73 | (...) | ||
74 | efx->board_info.hwmon_client = | ||
75 | i2c_new_device(&efx->i2c_adap, &sfe4001_hwmon_info); | ||
76 | |||
77 | (...) | ||
78 | } | ||
79 | |||
80 | The above code instantiates 1 I2C device on the I2C bus which is on the | ||
81 | network adapter in question. | ||
82 | |||
83 | A variant of this is when you don't know for sure if an I2C device is | ||
84 | present or not (for example for an optional feature which is not present | ||
85 | on cheap variants of a board but you have no way to tell them apart), or | ||
86 | it may have different addresses from one board to the next (manufacturer | ||
87 | changing its design without notice). In this case, you can call | ||
88 | i2c_new_probed_device() instead of i2c_new_device(). | ||
89 | |||
90 | Example (from the pnx4008 OHCI driver): | ||
91 | |||
92 | static const unsigned short normal_i2c[] = { 0x2c, 0x2d, I2C_CLIENT_END }; | ||
93 | |||
94 | static int __devinit usb_hcd_pnx4008_probe(struct platform_device *pdev) | ||
95 | { | ||
96 | (...) | ||
97 | struct i2c_adapter *i2c_adap; | ||
98 | struct i2c_board_info i2c_info; | ||
99 | |||
100 | (...) | ||
101 | i2c_adap = i2c_get_adapter(2); | ||
102 | memset(&i2c_info, 0, sizeof(struct i2c_board_info)); | ||
103 | strlcpy(i2c_info.name, "isp1301_pnx", I2C_NAME_SIZE); | ||
104 | isp1301_i2c_client = i2c_new_probed_device(i2c_adap, &i2c_info, | ||
105 | normal_i2c); | ||
106 | i2c_put_adapter(i2c_adap); | ||
107 | (...) | ||
108 | } | ||
109 | |||
110 | The above code instantiates up to 1 I2C device on the I2C bus which is on | ||
111 | the OHCI adapter in question. It first tries at address 0x2c, if nothing | ||
112 | is found there it tries address 0x2d, and if still nothing is found, it | ||
113 | simply gives up. | ||
114 | |||
115 | The driver which instantiated the I2C device is responsible for destroying | ||
116 | it on cleanup. This is done by calling i2c_unregister_device() on the | ||
117 | pointer that was earlier returned by i2c_new_device() or | ||
118 | i2c_new_probed_device(). | ||
119 | |||
120 | |||
121 | Method 3: Probe an I2C bus for certain devices | ||
122 | ---------------------------------------------- | ||
123 | |||
124 | Sometimes you do not have enough information about an I2C device, not even | ||
125 | to call i2c_new_probed_device(). The typical case is hardware monitoring | ||
126 | chips on PC mainboards. There are several dozen models, which can live | ||
127 | at 25 different addresses. Given the huge number of mainboards out there, | ||
128 | it is next to impossible to build an exhaustive list of the hardware | ||
129 | monitoring chips being used. Fortunately, most of these chips have | ||
130 | manufacturer and device ID registers, so they can be identified by | ||
131 | probing. | ||
132 | |||
133 | In that case, I2C devices are neither declared nor instantiated | ||
134 | explicitly. Instead, i2c-core will probe for such devices as soon as their | ||
135 | drivers are loaded, and if any is found, an I2C device will be | ||
136 | instantiated automatically. In order to prevent any misbehavior of this | ||
137 | mechanism, the following restrictions apply: | ||
138 | * The I2C device driver must implement the detect() method, which | ||
139 | identifies a supported device by reading from arbitrary registers. | ||
140 | * Only buses which are likely to have a supported device and agree to be | ||
141 | probed, will be probed. For example this avoids probing for hardware | ||
142 | monitoring chips on a TV adapter. | ||
143 | |||
144 | Example: | ||
145 | See lm90_driver and lm90_detect() in drivers/hwmon/lm90.c | ||
146 | |||
147 | I2C devices instantiated as a result of such a successful probe will be | ||
148 | destroyed automatically when the driver which detected them is removed, | ||
149 | or when the underlying I2C bus is itself destroyed, whichever happens | ||
150 | first. | ||
151 | |||
152 | Those of you familiar with the i2c subsystem of 2.4 kernels and early 2.6 | ||
153 | kernels will find out that this method 3 is essentially similar to what | ||
154 | was done there. Two significant differences are: | ||
155 | * Probing is only one way to instantiate I2C devices now, while it was the | ||
156 | only way back then. Where possible, methods 1 and 2 should be preferred. | ||
157 | Method 3 should only be used when there is no other way, as it can have | ||
158 | undesirable side effects. | ||
159 | * I2C buses must now explicitly say which I2C driver classes can probe | ||
160 | them (by the means of the class bitfield), while all I2C buses were | ||
161 | probed by default back then. The default is an empty class which means | ||
162 | that no probing happens. The purpose of the class bitfield is to limit | ||
163 | the aforementioned undesirable side effects. | ||
164 | |||
165 | Once again, method 3 should be avoided wherever possible. Explicit device | ||
166 | instantiation (methods 1 and 2) is much preferred for it is safer and | ||
167 | faster. | ||
168 | |||
169 | |||
170 | Method 4: Instantiate from user-space | ||
171 | ------------------------------------- | ||
172 | |||
173 | In general, the kernel should know which I2C devices are connected and | ||
174 | what addresses they live at. However, in certain cases, it does not, so a | ||
175 | sysfs interface was added to let the user provide the information. This | ||
176 | interface is made of 2 attribute files which are created in every I2C bus | ||
177 | directory: new_device and delete_device. Both files are write only and you | ||
178 | must write the right parameters to them in order to properly instantiate, | ||
179 | respectively delete, an I2C device. | ||
180 | |||
181 | File new_device takes 2 parameters: the name of the I2C device (a string) | ||
182 | and the address of the I2C device (a number, typically expressed in | ||
183 | hexadecimal starting with 0x, but can also be expressed in decimal.) | ||
184 | |||
185 | File delete_device takes a single parameter: the address of the I2C | ||
186 | device. As no two devices can live at the same address on a given I2C | ||
187 | segment, the address is sufficient to uniquely identify the device to be | ||
188 | deleted. | ||
189 | |||
190 | Example: | ||
191 | # echo eeprom 0x50 > /sys/class/i2c-adapter/i2c-3/new_device | ||
192 | |||
193 | While this interface should only be used when in-kernel device declaration | ||
194 | can't be done, there is a variety of cases where it can be helpful: | ||
195 | * The I2C driver usually detects devices (method 3 above) but the bus | ||
196 | segment your device lives on doesn't have the proper class bit set and | ||
197 | thus detection doesn't trigger. | ||
198 | * The I2C driver usually detects devices, but your device lives at an | ||
199 | unexpected address. | ||
200 | * The I2C driver usually detects devices, but your device is not detected, | ||
201 | either because the detection routine is too strict, or because your | ||
202 | device is not officially supported yet but you know it is compatible. | ||
203 | * You are developing a driver on a test board, where you soldered the I2C | ||
204 | device yourself. | ||
205 | |||
206 | This interface is a replacement for the force_* module parameters some I2C | ||
207 | drivers implement. Being implemented in i2c-core rather than in each | ||
208 | device driver individually, it is much more efficient, and also has the | ||
209 | advantage that you do not have to reload the driver to change a setting. | ||
210 | You can also instantiate the device before the driver is loaded or even | ||
211 | available, and you don't need to know what driver the device needs. | ||
diff --git a/Documentation/i2c/writing-clients b/Documentation/i2c/writing-clients index 6b9af7d479c2..7860aafb483d 100644 --- a/Documentation/i2c/writing-clients +++ b/Documentation/i2c/writing-clients | |||
@@ -126,19 +126,9 @@ different) configuration information, as do drivers handling chip variants | |||
126 | that can't be distinguished by protocol probing, or which need some board | 126 | that can't be distinguished by protocol probing, or which need some board |
127 | specific information to operate correctly. | 127 | specific information to operate correctly. |
128 | 128 | ||
129 | Accordingly, the I2C stack now has two models for associating I2C devices | ||
130 | with their drivers: the original "legacy" model, and a newer one that's | ||
131 | fully compatible with the Linux 2.6 driver model. These models do not mix, | ||
132 | since the "legacy" model requires drivers to create "i2c_client" device | ||
133 | objects after SMBus style probing, while the Linux driver model expects | ||
134 | drivers to be given such device objects in their probe() routines. | ||
135 | 129 | ||
136 | The legacy model is deprecated now and will soon be removed, so we no | 130 | Device/Driver Binding |
137 | longer document it here. | 131 | --------------------- |
138 | |||
139 | |||
140 | Standard Driver Model Binding ("New Style") | ||
141 | ------------------------------------------- | ||
142 | 132 | ||
143 | System infrastructure, typically board-specific initialization code or | 133 | System infrastructure, typically board-specific initialization code or |
144 | boot firmware, reports what I2C devices exist. For example, there may be | 134 | boot firmware, reports what I2C devices exist. For example, there may be |
@@ -201,21 +191,32 @@ a given I2C bus. This is for example the case of hardware monitoring | |||
201 | devices on a PC's SMBus. In that case, you may want to let your driver | 191 | devices on a PC's SMBus. In that case, you may want to let your driver |
202 | detect supported devices automatically. This is how the legacy model | 192 | detect supported devices automatically. This is how the legacy model |
203 | was working, and is now available as an extension to the standard | 193 | was working, and is now available as an extension to the standard |
204 | driver model (so that we can finally get rid of the legacy model.) | 194 | driver model. |
205 | 195 | ||
206 | You simply have to define a detect callback which will attempt to | 196 | You simply have to define a detect callback which will attempt to |
207 | identify supported devices (returning 0 for supported ones and -ENODEV | 197 | identify supported devices (returning 0 for supported ones and -ENODEV |
208 | for unsupported ones), a list of addresses to probe, and a device type | 198 | for unsupported ones), a list of addresses to probe, and a device type |
209 | (or class) so that only I2C buses which may have that type of device | 199 | (or class) so that only I2C buses which may have that type of device |
210 | connected (and not otherwise enumerated) will be probed. The i2c | 200 | connected (and not otherwise enumerated) will be probed. For example, |
211 | core will then call you back as needed and will instantiate a device | 201 | a driver for a hardware monitoring chip for which auto-detection is |
212 | for you for every successful detection. | 202 | needed would set its class to I2C_CLASS_HWMON, and only I2C adapters |
203 | with a class including I2C_CLASS_HWMON would be probed by this driver. | ||
204 | Note that the absence of matching classes does not prevent the use of | ||
205 | a device of that type on the given I2C adapter. All it prevents is | ||
206 | auto-detection; explicit instantiation of devices is still possible. | ||
213 | 207 | ||
214 | Note that this mechanism is purely optional and not suitable for all | 208 | Note that this mechanism is purely optional and not suitable for all |
215 | devices. You need some reliable way to identify the supported devices | 209 | devices. You need some reliable way to identify the supported devices |
216 | (typically using device-specific, dedicated identification registers), | 210 | (typically using device-specific, dedicated identification registers), |
217 | otherwise misdetections are likely to occur and things can get wrong | 211 | otherwise misdetections are likely to occur and things can get wrong |
218 | quickly. | 212 | quickly. Keep in mind that the I2C protocol doesn't include any |
213 | standard way to detect the presence of a chip at a given address, let | ||
214 | alone a standard way to identify devices. Even worse is the lack of | ||
215 | semantics associated to bus transfers, which means that the same | ||
216 | transfer can be seen as a read operation by a chip and as a write | ||
217 | operation by another chip. For these reasons, explicit device | ||
218 | instantiation should always be preferred to auto-detection where | ||
219 | possible. | ||
219 | 220 | ||
220 | 221 | ||
221 | Device Deletion | 222 | Device Deletion |
diff --git a/Documentation/ia64/kvm.txt b/Documentation/ia64/kvm.txt index 84f7cb3d5bec..ffb5c80bec3e 100644 --- a/Documentation/ia64/kvm.txt +++ b/Documentation/ia64/kvm.txt | |||
@@ -42,7 +42,7 @@ Note: For step 2, please make sure that host page size == TARGET_PAGE_SIZE of qe | |||
42 | hg clone http://xenbits.xensource.com/ext/efi-vfirmware.hg | 42 | hg clone http://xenbits.xensource.com/ext/efi-vfirmware.hg |
43 | you can get the firmware's binary in the directory of efi-vfirmware.hg/binaries. | 43 | you can get the firmware's binary in the directory of efi-vfirmware.hg/binaries. |
44 | 44 | ||
45 | (3) Rename the firware you owned to Flash.fd, and copy it to /usr/local/share/qemu | 45 | (3) Rename the firmware you owned to Flash.fd, and copy it to /usr/local/share/qemu |
46 | 46 | ||
47 | 4. Boot up Linux or Windows guests: | 47 | 4. Boot up Linux or Windows guests: |
48 | 4.1 Create or install a image for guest boot. If you have xen experience, it should be easy. | 48 | 4.1 Create or install a image for guest boot. If you have xen experience, it should be easy. |
diff --git a/Documentation/ide/ide.txt b/Documentation/ide/ide.txt index 0c78f4b1d9d9..e77bebfa7b0d 100644 --- a/Documentation/ide/ide.txt +++ b/Documentation/ide/ide.txt | |||
@@ -216,6 +216,8 @@ Other kernel parameters for ide_core are: | |||
216 | 216 | ||
217 | * "noflush=[interface_number.device_number]" to disable flush requests | 217 | * "noflush=[interface_number.device_number]" to disable flush requests |
218 | 218 | ||
219 | * "nohpa=[interface_number.device_number]" to disable Host Protected Area | ||
220 | |||
219 | * "noprobe=[interface_number.device_number]" to skip probing | 221 | * "noprobe=[interface_number.device_number]" to skip probing |
220 | 222 | ||
221 | * "nowerr=[interface_number.device_number]" to ignore the WRERR_STAT bit | 223 | * "nowerr=[interface_number.device_number]" to ignore the WRERR_STAT bit |
diff --git a/Documentation/infiniband/ipoib.txt b/Documentation/infiniband/ipoib.txt index 864ff3283780..6d40f00b358c 100644 --- a/Documentation/infiniband/ipoib.txt +++ b/Documentation/infiniband/ipoib.txt | |||
@@ -24,6 +24,49 @@ Partitions and P_Keys | |||
24 | The P_Key for any interface is given by the "pkey" file, and the | 24 | The P_Key for any interface is given by the "pkey" file, and the |
25 | main interface for a subinterface is in "parent." | 25 | main interface for a subinterface is in "parent." |
26 | 26 | ||
27 | Datagram vs Connected modes | ||
28 | |||
29 | The IPoIB driver supports two modes of operation: datagram and | ||
30 | connected. The mode is set and read through an interface's | ||
31 | /sys/class/net/<intf name>/mode file. | ||
32 | |||
33 | In datagram mode, the IB UD (Unreliable Datagram) transport is used | ||
34 | and so the interface MTU has is equal to the IB L2 MTU minus the | ||
35 | IPoIB encapsulation header (4 bytes). For example, in a typical IB | ||
36 | fabric with a 2K MTU, the IPoIB MTU will be 2048 - 4 = 2044 bytes. | ||
37 | |||
38 | In connected mode, the IB RC (Reliable Connected) transport is used. | ||
39 | Connected mode is to takes advantage of the connected nature of the | ||
40 | IB transport and allows an MTU up to the maximal IP packet size of | ||
41 | 64K, which reduces the number of IP packets needed for handling | ||
42 | large UDP datagrams, TCP segments, etc and increases the performance | ||
43 | for large messages. | ||
44 | |||
45 | In connected mode, the interface's UD QP is still used for multicast | ||
46 | and communication with peers that don't support connected mode. In | ||
47 | this case, RX emulation of ICMP PMTU packets is used to cause the | ||
48 | networking stack to use the smaller UD MTU for these neighbours. | ||
49 | |||
50 | Stateless offloads | ||
51 | |||
52 | If the IB HW supports IPoIB stateless offloads, IPoIB advertises | ||
53 | TCP/IP checksum and/or Large Send (LSO) offloading capability to the | ||
54 | network stack. | ||
55 | |||
56 | Large Receive (LRO) offloading is also implemented and may be turned | ||
57 | on/off using ethtool calls. Currently LRO is supported only for | ||
58 | checksum offload capable devices. | ||
59 | |||
60 | Stateless offloads are supported only in datagram mode. | ||
61 | |||
62 | Interrupt moderation | ||
63 | |||
64 | If the underlying IB device supports CQ event moderation, one can | ||
65 | use ethtool to set interrupt mitigation parameters and thus reduce | ||
66 | the overhead incurred by handling interrupts. The main code path of | ||
67 | IPoIB doesn't use events for TX completion signaling so only RX | ||
68 | moderation is supported. | ||
69 | |||
27 | Debugging Information | 70 | Debugging Information |
28 | 71 | ||
29 | By compiling the IPoIB driver with CONFIG_INFINIBAND_IPOIB_DEBUG set | 72 | By compiling the IPoIB driver with CONFIG_INFINIBAND_IPOIB_DEBUG set |
@@ -55,3 +98,5 @@ References | |||
55 | http://ietf.org/rfc/rfc4391.txt | 98 | http://ietf.org/rfc/rfc4391.txt |
56 | IP over InfiniBand (IPoIB) Architecture (RFC 4392) | 99 | IP over InfiniBand (IPoIB) Architecture (RFC 4392) |
57 | http://ietf.org/rfc/rfc4392.txt | 100 | http://ietf.org/rfc/rfc4392.txt |
101 | IP over InfiniBand: Connected Mode (RFC 4755) | ||
102 | http://ietf.org/rfc/rfc4755.txt | ||
diff --git a/Documentation/input/bcm5974.txt b/Documentation/input/bcm5974.txt new file mode 100644 index 000000000000..5e22dcf6d48d --- /dev/null +++ b/Documentation/input/bcm5974.txt | |||
@@ -0,0 +1,65 @@ | |||
1 | BCM5974 Driver (bcm5974) | ||
2 | ------------------------ | ||
3 | Copyright (C) 2008-2009 Henrik Rydberg <rydberg@euromail.se> | ||
4 | |||
5 | The USB initialization and package decoding was made by Scott Shawcroft as | ||
6 | part of the touchd user-space driver project: | ||
7 | Copyright (C) 2008 Scott Shawcroft (scott.shawcroft@gmail.com) | ||
8 | |||
9 | The BCM5974 driver is based on the appletouch driver: | ||
10 | Copyright (C) 2001-2004 Greg Kroah-Hartman (greg@kroah.com) | ||
11 | Copyright (C) 2005 Johannes Berg (johannes@sipsolutions.net) | ||
12 | Copyright (C) 2005 Stelian Pop (stelian@popies.net) | ||
13 | Copyright (C) 2005 Frank Arnold (frank@scirocco-5v-turbo.de) | ||
14 | Copyright (C) 2005 Peter Osterlund (petero2@telia.com) | ||
15 | Copyright (C) 2005 Michael Hanselmann (linux-kernel@hansmi.ch) | ||
16 | Copyright (C) 2006 Nicolas Boichat (nicolas@boichat.ch) | ||
17 | |||
18 | This driver adds support for the multi-touch trackpad on the new Apple | ||
19 | Macbook Air and Macbook Pro laptops. It replaces the appletouch driver on | ||
20 | those computers, and integrates well with the synaptics driver of the Xorg | ||
21 | system. | ||
22 | |||
23 | Known to work on Macbook Air, Macbook Pro Penryn and the new unibody | ||
24 | Macbook 5 and Macbook Pro 5. | ||
25 | |||
26 | Usage | ||
27 | ----- | ||
28 | |||
29 | The driver loads automatically for the supported usb device ids, and | ||
30 | becomes available both as an event device (/dev/input/event*) and as a | ||
31 | mouse via the mousedev driver (/dev/input/mice). | ||
32 | |||
33 | USB Race | ||
34 | -------- | ||
35 | |||
36 | The Apple multi-touch trackpads report both mouse and keyboard events via | ||
37 | different interfaces of the same usb device. This creates a race condition | ||
38 | with the HID driver, which, if not told otherwise, will find the standard | ||
39 | HID mouse and keyboard, and claim the whole device. To remedy, the usb | ||
40 | product id must be listed in the mouse_ignore list of the hid driver. | ||
41 | |||
42 | Debug output | ||
43 | ------------ | ||
44 | |||
45 | To ease the development for new hardware version, verbose packet output can | ||
46 | be switched on with the debug kernel module parameter. The range [1-9] | ||
47 | yields different levels of verbosity. Example (as root): | ||
48 | |||
49 | echo -n 9 > /sys/module/bcm5974/parameters/debug | ||
50 | |||
51 | tail -f /var/log/debug | ||
52 | |||
53 | echo -n 0 > /sys/module/bcm5974/parameters/debug | ||
54 | |||
55 | Trivia | ||
56 | ------ | ||
57 | |||
58 | The driver was developed at the ubuntu forums in June 2008 [1], and now has | ||
59 | a more permanent home at bitmath.org [2]. | ||
60 | |||
61 | Links | ||
62 | ----- | ||
63 | |||
64 | [1] http://ubuntuforums.org/showthread.php?t=840040 | ||
65 | [2] http://http://bitmath.org/code/ | ||
diff --git a/Documentation/input/input.txt b/Documentation/input/input.txt index 686ee9932dff..b93c08442e3c 100644 --- a/Documentation/input/input.txt +++ b/Documentation/input/input.txt | |||
@@ -278,7 +278,7 @@ struct input_event { | |||
278 | }; | 278 | }; |
279 | 279 | ||
280 | 'time' is the timestamp, it returns the time at which the event happened. | 280 | 'time' is the timestamp, it returns the time at which the event happened. |
281 | Type is for example EV_REL for relative moment, REL_KEY for a keypress or | 281 | Type is for example EV_REL for relative moment, EV_KEY for a keypress or |
282 | release. More types are defined in include/linux/input.h. | 282 | release. More types are defined in include/linux/input.h. |
283 | 283 | ||
284 | 'code' is event code, for example REL_X or KEY_BACKSPACE, again a complete | 284 | 'code' is event code, for example REL_X or KEY_BACKSPACE, again a complete |
diff --git a/Documentation/input/multi-touch-protocol.txt b/Documentation/input/multi-touch-protocol.txt new file mode 100644 index 000000000000..a12ea3b586e6 --- /dev/null +++ b/Documentation/input/multi-touch-protocol.txt | |||
@@ -0,0 +1,195 @@ | |||
1 | Multi-touch (MT) Protocol | ||
2 | ------------------------- | ||
3 | Copyright (C) 2009 Henrik Rydberg <rydberg@euromail.se> | ||
4 | |||
5 | |||
6 | Introduction | ||
7 | ------------ | ||
8 | |||
9 | In order to utilize the full power of the new multi-touch devices, a way to | ||
10 | report detailed finger data to user space is needed. This document | ||
11 | describes the multi-touch (MT) protocol which allows kernel drivers to | ||
12 | report details for an arbitrary number of fingers. | ||
13 | |||
14 | |||
15 | Usage | ||
16 | ----- | ||
17 | |||
18 | Anonymous finger details are sent sequentially as separate packets of ABS | ||
19 | events. Only the ABS_MT events are recognized as part of a finger | ||
20 | packet. The end of a packet is marked by calling the input_mt_sync() | ||
21 | function, which generates a SYN_MT_REPORT event. This instructs the | ||
22 | receiver to accept the data for the current finger and prepare to receive | ||
23 | another. The end of a multi-touch transfer is marked by calling the usual | ||
24 | input_sync() function. This instructs the receiver to act upon events | ||
25 | accumulated since last EV_SYN/SYN_REPORT and prepare to receive a new | ||
26 | set of events/packets. | ||
27 | |||
28 | A set of ABS_MT events with the desired properties is defined. The events | ||
29 | are divided into categories, to allow for partial implementation. The | ||
30 | minimum set consists of ABS_MT_TOUCH_MAJOR, ABS_MT_POSITION_X and | ||
31 | ABS_MT_POSITION_Y, which allows for multiple fingers to be tracked. If the | ||
32 | device supports it, the ABS_MT_WIDTH_MAJOR may be used to provide the size | ||
33 | of the approaching finger. Anisotropy and direction may be specified with | ||
34 | ABS_MT_TOUCH_MINOR, ABS_MT_WIDTH_MINOR and ABS_MT_ORIENTATION. The | ||
35 | ABS_MT_TOOL_TYPE may be used to specify whether the touching tool is a | ||
36 | finger or a pen or something else. Devices with more granular information | ||
37 | may specify general shapes as blobs, i.e., as a sequence of rectangular | ||
38 | shapes grouped together by an ABS_MT_BLOB_ID. Finally, for the few devices | ||
39 | that currently support it, the ABS_MT_TRACKING_ID event may be used to | ||
40 | report finger tracking from hardware [5]. | ||
41 | |||
42 | Here is what a minimal event sequence for a two-finger touch would look | ||
43 | like: | ||
44 | |||
45 | ABS_MT_TOUCH_MAJOR | ||
46 | ABS_MT_POSITION_X | ||
47 | ABS_MT_POSITION_Y | ||
48 | SYN_MT_REPORT | ||
49 | ABS_MT_TOUCH_MAJOR | ||
50 | ABS_MT_POSITION_X | ||
51 | ABS_MT_POSITION_Y | ||
52 | SYN_MT_REPORT | ||
53 | SYN_REPORT | ||
54 | |||
55 | |||
56 | Event Semantics | ||
57 | --------------- | ||
58 | |||
59 | The word "contact" is used to describe a tool which is in direct contact | ||
60 | with the surface. A finger, a pen or a rubber all classify as contacts. | ||
61 | |||
62 | ABS_MT_TOUCH_MAJOR | ||
63 | |||
64 | The length of the major axis of the contact. The length should be given in | ||
65 | surface units. If the surface has an X times Y resolution, the largest | ||
66 | possible value of ABS_MT_TOUCH_MAJOR is sqrt(X^2 + Y^2), the diagonal [4]. | ||
67 | |||
68 | ABS_MT_TOUCH_MINOR | ||
69 | |||
70 | The length, in surface units, of the minor axis of the contact. If the | ||
71 | contact is circular, this event can be omitted [4]. | ||
72 | |||
73 | ABS_MT_WIDTH_MAJOR | ||
74 | |||
75 | The length, in surface units, of the major axis of the approaching | ||
76 | tool. This should be understood as the size of the tool itself. The | ||
77 | orientation of the contact and the approaching tool are assumed to be the | ||
78 | same [4]. | ||
79 | |||
80 | ABS_MT_WIDTH_MINOR | ||
81 | |||
82 | The length, in surface units, of the minor axis of the approaching | ||
83 | tool. Omit if circular [4]. | ||
84 | |||
85 | The above four values can be used to derive additional information about | ||
86 | the contact. The ratio ABS_MT_TOUCH_MAJOR / ABS_MT_WIDTH_MAJOR approximates | ||
87 | the notion of pressure. The fingers of the hand and the palm all have | ||
88 | different characteristic widths [1]. | ||
89 | |||
90 | ABS_MT_ORIENTATION | ||
91 | |||
92 | The orientation of the ellipse. The value should describe a signed quarter | ||
93 | of a revolution clockwise around the touch center. The signed value range | ||
94 | is arbitrary, but zero should be returned for a finger aligned along the Y | ||
95 | axis of the surface, a negative value when finger is turned to the left, and | ||
96 | a positive value when finger turned to the right. When completely aligned with | ||
97 | the X axis, the range max should be returned. Orientation can be omitted | ||
98 | if the touching object is circular, or if the information is not available | ||
99 | in the kernel driver. Partial orientation support is possible if the device | ||
100 | can distinguish between the two axis, but not (uniquely) any values in | ||
101 | between. In such cases, the range of ABS_MT_ORIENTATION should be [0, 1] | ||
102 | [4]. | ||
103 | |||
104 | ABS_MT_POSITION_X | ||
105 | |||
106 | The surface X coordinate of the center of the touching ellipse. | ||
107 | |||
108 | ABS_MT_POSITION_Y | ||
109 | |||
110 | The surface Y coordinate of the center of the touching ellipse. | ||
111 | |||
112 | ABS_MT_TOOL_TYPE | ||
113 | |||
114 | The type of approaching tool. A lot of kernel drivers cannot distinguish | ||
115 | between different tool types, such as a finger or a pen. In such cases, the | ||
116 | event should be omitted. The protocol currently supports MT_TOOL_FINGER and | ||
117 | MT_TOOL_PEN [2]. | ||
118 | |||
119 | ABS_MT_BLOB_ID | ||
120 | |||
121 | The BLOB_ID groups several packets together into one arbitrarily shaped | ||
122 | contact. This is a low-level anonymous grouping, and should not be confused | ||
123 | with the high-level trackingID [5]. Most kernel drivers will not have blob | ||
124 | capability, and can safely omit the event. | ||
125 | |||
126 | ABS_MT_TRACKING_ID | ||
127 | |||
128 | The TRACKING_ID identifies an initiated contact throughout its life cycle | ||
129 | [5]. There are currently only a few devices that support it, so this event | ||
130 | should normally be omitted. | ||
131 | |||
132 | |||
133 | Event Computation | ||
134 | ----------------- | ||
135 | |||
136 | The flora of different hardware unavoidably leads to some devices fitting | ||
137 | better to the MT protocol than others. To simplify and unify the mapping, | ||
138 | this section gives recipes for how to compute certain events. | ||
139 | |||
140 | For devices reporting contacts as rectangular shapes, signed orientation | ||
141 | cannot be obtained. Assuming X and Y are the lengths of the sides of the | ||
142 | touching rectangle, here is a simple formula that retains the most | ||
143 | information possible: | ||
144 | |||
145 | ABS_MT_TOUCH_MAJOR := max(X, Y) | ||
146 | ABS_MT_TOUCH_MINOR := min(X, Y) | ||
147 | ABS_MT_ORIENTATION := bool(X > Y) | ||
148 | |||
149 | The range of ABS_MT_ORIENTATION should be set to [0, 1], to indicate that | ||
150 | the device can distinguish between a finger along the Y axis (0) and a | ||
151 | finger along the X axis (1). | ||
152 | |||
153 | |||
154 | Finger Tracking | ||
155 | --------------- | ||
156 | |||
157 | The kernel driver should generate an arbitrary enumeration of the set of | ||
158 | anonymous contacts currently on the surface. The order in which the packets | ||
159 | appear in the event stream is not important. | ||
160 | |||
161 | The process of finger tracking, i.e., to assign a unique trackingID to each | ||
162 | initiated contact on the surface, is left to user space; preferably the | ||
163 | multi-touch X driver [3]. In that driver, the trackingID stays the same and | ||
164 | unique until the contact vanishes (when the finger leaves the surface). The | ||
165 | problem of assigning a set of anonymous fingers to a set of identified | ||
166 | fingers is a euclidian bipartite matching problem at each event update, and | ||
167 | relies on a sufficiently rapid update rate. | ||
168 | |||
169 | There are a few devices that support trackingID in hardware. User space can | ||
170 | make use of these native identifiers to reduce bandwidth and cpu usage. | ||
171 | |||
172 | |||
173 | Notes | ||
174 | ----- | ||
175 | |||
176 | In order to stay compatible with existing applications, the data | ||
177 | reported in a finger packet must not be recognized as single-touch | ||
178 | events. In addition, all finger data must bypass input filtering, | ||
179 | since subsequent events of the same type refer to different fingers. | ||
180 | |||
181 | The first kernel driver to utilize the MT protocol is the bcm5974 driver, | ||
182 | where examples can be found. | ||
183 | |||
184 | [1] With the extension ABS_MT_APPROACH_X and ABS_MT_APPROACH_Y, the | ||
185 | difference between the contact position and the approaching tool position | ||
186 | could be used to derive tilt. | ||
187 | [2] The list can of course be extended. | ||
188 | [3] The multi-touch X driver is currently in the prototyping stage. At the | ||
189 | time of writing (April 2009), the MT protocol is not yet merged, and the | ||
190 | prototype implements finger matching, basic mouse support and two-finger | ||
191 | scrolling. The project aims at improving the quality of current multi-touch | ||
192 | functionality available in the Synaptics X driver, and in addition | ||
193 | implement more advanced gestures. | ||
194 | [4] See the section on event computation. | ||
195 | [5] See the section on finger tracking. | ||
diff --git a/Documentation/input/rotary-encoder.txt b/Documentation/input/rotary-encoder.txt new file mode 100644 index 000000000000..3a6aec40c0b0 --- /dev/null +++ b/Documentation/input/rotary-encoder.txt | |||
@@ -0,0 +1,108 @@ | |||
1 | rotary-encoder - a generic driver for GPIO connected devices | ||
2 | Daniel Mack <daniel@caiaq.de>, Feb 2009 | ||
3 | |||
4 | 0. Function | ||
5 | ----------- | ||
6 | |||
7 | Rotary encoders are devices which are connected to the CPU or other | ||
8 | peripherals with two wires. The outputs are phase-shifted by 90 degrees | ||
9 | and by triggering on falling and rising edges, the turn direction can | ||
10 | be determined. | ||
11 | |||
12 | The phase diagram of these two outputs look like this: | ||
13 | |||
14 | _____ _____ _____ | ||
15 | | | | | | | | ||
16 | Channel A ____| |_____| |_____| |____ | ||
17 | |||
18 | : : : : : : : : : : : : | ||
19 | __ _____ _____ _____ | ||
20 | | | | | | | | | ||
21 | Channel B |_____| |_____| |_____| |__ | ||
22 | |||
23 | : : : : : : : : : : : : | ||
24 | Event a b c d a b c d a b c d | ||
25 | |||
26 | |<-------->| | ||
27 | one step | ||
28 | |||
29 | |||
30 | For more information, please see | ||
31 | http://en.wikipedia.org/wiki/Rotary_encoder | ||
32 | |||
33 | |||
34 | 1. Events / state machine | ||
35 | ------------------------- | ||
36 | |||
37 | a) Rising edge on channel A, channel B in low state | ||
38 | This state is used to recognize a clockwise turn | ||
39 | |||
40 | b) Rising edge on channel B, channel A in high state | ||
41 | When entering this state, the encoder is put into 'armed' state, | ||
42 | meaning that there it has seen half the way of a one-step transition. | ||
43 | |||
44 | c) Falling edge on channel A, channel B in high state | ||
45 | This state is used to recognize a counter-clockwise turn | ||
46 | |||
47 | d) Falling edge on channel B, channel A in low state | ||
48 | Parking position. If the encoder enters this state, a full transition | ||
49 | should have happend, unless it flipped back on half the way. The | ||
50 | 'armed' state tells us about that. | ||
51 | |||
52 | 2. Platform requirements | ||
53 | ------------------------ | ||
54 | |||
55 | As there is no hardware dependent call in this driver, the platform it is | ||
56 | used with must support gpiolib. Another requirement is that IRQs must be | ||
57 | able to fire on both edges. | ||
58 | |||
59 | |||
60 | 3. Board integration | ||
61 | -------------------- | ||
62 | |||
63 | To use this driver in your system, register a platform_device with the | ||
64 | name 'rotary-encoder' and associate the IRQs and some specific platform | ||
65 | data with it. | ||
66 | |||
67 | struct rotary_encoder_platform_data is declared in | ||
68 | include/linux/rotary-encoder.h and needs to be filled with the number of | ||
69 | steps the encoder has and can carry information about externally inverted | ||
70 | signals (because of an inverting buffer or other reasons). The encoder | ||
71 | can be set up to deliver input information as either an absolute or relative | ||
72 | axes. For relative axes the input event returns +/-1 for each step. For | ||
73 | absolute axes the position of the encoder can either roll over between zero | ||
74 | and the number of steps or will clamp at the maximum and zero depending on | ||
75 | the configuration. | ||
76 | |||
77 | Because GPIO to IRQ mapping is platform specific, this information must | ||
78 | be given in seperately to the driver. See the example below. | ||
79 | |||
80 | ---------<snip>--------- | ||
81 | |||
82 | /* board support file example */ | ||
83 | |||
84 | #include <linux/input.h> | ||
85 | #include <linux/rotary_encoder.h> | ||
86 | |||
87 | #define GPIO_ROTARY_A 1 | ||
88 | #define GPIO_ROTARY_B 2 | ||
89 | |||
90 | static struct rotary_encoder_platform_data my_rotary_encoder_info = { | ||
91 | .steps = 24, | ||
92 | .axis = ABS_X, | ||
93 | .relative_axis = false, | ||
94 | .rollover = false, | ||
95 | .gpio_a = GPIO_ROTARY_A, | ||
96 | .gpio_b = GPIO_ROTARY_B, | ||
97 | .inverted_a = 0, | ||
98 | .inverted_b = 0, | ||
99 | }; | ||
100 | |||
101 | static struct platform_device rotary_encoder_device = { | ||
102 | .name = "rotary-encoder", | ||
103 | .id = 0, | ||
104 | .dev = { | ||
105 | .platform_data = &my_rotary_encoder_info, | ||
106 | } | ||
107 | }; | ||
108 | |||
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt index f1d639903325..7bb0d934b6d8 100644 --- a/Documentation/ioctl/ioctl-number.txt +++ b/Documentation/ioctl/ioctl-number.txt | |||
@@ -122,10 +122,8 @@ Code Seq# Include File Comments | |||
122 | 'c' 00-7F linux/coda.h conflict! | 122 | 'c' 00-7F linux/coda.h conflict! |
123 | 'c' 80-9F arch/s390/include/asm/chsc.h | 123 | 'c' 80-9F arch/s390/include/asm/chsc.h |
124 | 'd' 00-FF linux/char/drm/drm/h conflict! | 124 | 'd' 00-FF linux/char/drm/drm/h conflict! |
125 | 'd' 00-DF linux/video_decoder.h conflict! | ||
126 | 'd' F0-FF linux/digi1.h | 125 | 'd' F0-FF linux/digi1.h |
127 | 'e' all linux/digi1.h conflict! | 126 | 'e' all linux/digi1.h conflict! |
128 | 'e' 00-1F linux/video_encoder.h conflict! | ||
129 | 'e' 00-1F net/irda/irtty.h conflict! | 127 | 'e' 00-1F net/irda/irtty.h conflict! |
130 | 'f' 00-1F linux/ext2_fs.h | 128 | 'f' 00-1F linux/ext2_fs.h |
131 | 'h' 00-7F Charon filesystem | 129 | 'h' 00-7F Charon filesystem |
@@ -151,6 +149,8 @@ Code Seq# Include File Comments | |||
151 | 'p' 40-7F linux/nvram.h | 149 | 'p' 40-7F linux/nvram.h |
152 | 'p' 80-9F user-space parport | 150 | 'p' 80-9F user-space parport |
153 | <mailto:tim@cyberelk.net> | 151 | <mailto:tim@cyberelk.net> |
152 | 'p' a1-a4 linux/pps.h LinuxPPS | ||
153 | <mailto:giometti@linux.it> | ||
154 | 'q' 00-1F linux/serio.h | 154 | 'q' 00-1F linux/serio.h |
155 | 'q' 80-FF Internet PhoneJACK, Internet LineJACK | 155 | 'q' 80-FF Internet PhoneJACK, Internet LineJACK |
156 | <http://www.quicknet.net> | 156 | <http://www.quicknet.net> |
diff --git a/Documentation/isdn/00-INDEX b/Documentation/isdn/00-INDEX index 9fee5f2e5c62..e87e336f590e 100644 --- a/Documentation/isdn/00-INDEX +++ b/Documentation/isdn/00-INDEX | |||
@@ -2,42 +2,49 @@ | |||
2 | - this file (info on ISDN implementation for Linux) | 2 | - this file (info on ISDN implementation for Linux) |
3 | CREDITS | 3 | CREDITS |
4 | - list of the kind folks that brought you this stuff. | 4 | - list of the kind folks that brought you this stuff. |
5 | HiSax.cert | ||
6 | - information about the ITU approval certification of the HiSax driver. | ||
5 | INTERFACE | 7 | INTERFACE |
6 | - description of Linklevel and Hardwarelevel ISDN interface. | 8 | - description of isdn4linux Link Level and Hardware Level interfaces. |
9 | INTERFACE.fax | ||
10 | - description of the fax subinterface of isdn4linux. | ||
11 | INTERFACE.CAPI | ||
12 | - description of kernel CAPI Link Level to Hardware Level interface. | ||
7 | README | 13 | README |
8 | - general info on what you need and what to do for Linux ISDN. | 14 | - general info on what you need and what to do for Linux ISDN. |
9 | README.FAQ | 15 | README.FAQ |
10 | - general info for FAQ. | 16 | - general info for FAQ. |
17 | README.HiSax | ||
18 | - info on the HiSax driver which replaces the old teles. | ||
19 | README.act2000 | ||
20 | - info on driver for IBM ACT-2000 card. | ||
11 | README.audio | 21 | README.audio |
12 | - info for running audio over ISDN. | 22 | - info for running audio over ISDN. |
23 | README.avmb1 | ||
24 | - info on driver for AVM-B1 ISDN card. | ||
25 | README.concap | ||
26 | - info on "CONCAP" encapsulation protocol interface used for X.25. | ||
27 | README.diversion | ||
28 | - info on module for isdn diversion services. | ||
13 | README.fax | 29 | README.fax |
14 | - info for using Fax over ISDN. | 30 | - info for using Fax over ISDN. |
15 | README.icn | 31 | README.gigaset |
16 | - info on the ICN-ISDN-card and its driver. | 32 | - info on the drivers for Siemens Gigaset ISDN adapters |
17 | README.HiSax | ||
18 | - info on the HiSax driver which replaces the old teles. | ||
19 | README.hfc-pci | 33 | README.hfc-pci |
20 | - info on hfc-pci based cards. | 34 | - info on hfc-pci based cards. |
35 | README.hysdn | ||
36 | - info on driver for Hypercope active HYSDN cards | ||
37 | README.icn | ||
38 | - info on the ICN-ISDN-card and its driver. | ||
39 | README.mISDN | ||
40 | - info on the Modular ISDN subsystem (mISDN) | ||
21 | README.pcbit | 41 | README.pcbit |
22 | - info on the PCBIT-D ISDN adapter and driver. | 42 | - info on the PCBIT-D ISDN adapter and driver. |
43 | README.sc | ||
44 | - info on driver for Spellcaster cards. | ||
23 | README.syncppp | 45 | README.syncppp |
24 | - info on running Sync PPP over ISDN. | 46 | - info on running Sync PPP over ISDN. |
47 | README.x25 | ||
48 | - info for running X.25 over ISDN. | ||
25 | syncPPP.FAQ | 49 | syncPPP.FAQ |
26 | - frequently asked questions about running PPP over ISDN. | 50 | - frequently asked questions about running PPP over ISDN. |
27 | README.avmb1 | ||
28 | - info on driver for AVM-B1 ISDN card. | ||
29 | README.act2000 | ||
30 | - info on driver for IBM ACT-2000 card. | ||
31 | README.eicon | ||
32 | - info on driver for Eicon active cards. | ||
33 | README.concap | ||
34 | - info on "CONCAP" encapsulation protocol interface used for X.25. | ||
35 | README.diversion | ||
36 | - info on module for isdn diversion services. | ||
37 | README.sc | ||
38 | - info on driver for Spellcaster cards. | ||
39 | README.x25 | ||
40 | _ info for running X.25 over ISDN. | ||
41 | README.hysdn | ||
42 | - info on driver for Hypercope active HYSDN cards | ||
43 | |||
diff --git a/Documentation/isdn/INTERFACE.CAPI b/Documentation/isdn/INTERFACE.CAPI new file mode 100644 index 000000000000..686e107923ec --- /dev/null +++ b/Documentation/isdn/INTERFACE.CAPI | |||
@@ -0,0 +1,299 @@ | |||
1 | Kernel CAPI Interface to Hardware Drivers | ||
2 | ----------------------------------------- | ||
3 | |||
4 | 1. Overview | ||
5 | |||
6 | From the CAPI 2.0 specification: | ||
7 | COMMON-ISDN-API (CAPI) is an application programming interface standard used | ||
8 | to access ISDN equipment connected to basic rate interfaces (BRI) and primary | ||
9 | rate interfaces (PRI). | ||
10 | |||
11 | Kernel CAPI operates as a dispatching layer between CAPI applications and CAPI | ||
12 | hardware drivers. Hardware drivers register ISDN devices (controllers, in CAPI | ||
13 | lingo) with Kernel CAPI to indicate their readiness to provide their service | ||
14 | to CAPI applications. CAPI applications also register with Kernel CAPI, | ||
15 | requesting association with a CAPI device. Kernel CAPI then dispatches the | ||
16 | application registration to an available device, forwarding it to the | ||
17 | corresponding hardware driver. Kernel CAPI then forwards CAPI messages in both | ||
18 | directions between the application and the hardware driver. | ||
19 | |||
20 | Format and semantics of CAPI messages are specified in the CAPI 2.0 standard. | ||
21 | This standard is freely available from http://www.capi.org. | ||
22 | |||
23 | |||
24 | 2. Driver and Device Registration | ||
25 | |||
26 | CAPI drivers optionally register themselves with Kernel CAPI by calling the | ||
27 | Kernel CAPI function register_capi_driver() with a pointer to a struct | ||
28 | capi_driver. This structure must be filled with the name and revision of the | ||
29 | driver, and optionally a pointer to a callback function, add_card(). The | ||
30 | registration can be revoked by calling the function unregister_capi_driver() | ||
31 | with a pointer to the same struct capi_driver. | ||
32 | |||
33 | CAPI drivers must register each of the ISDN devices they control with Kernel | ||
34 | CAPI by calling the Kernel CAPI function attach_capi_ctr() with a pointer to a | ||
35 | struct capi_ctr before they can be used. This structure must be filled with | ||
36 | the names of the driver and controller, and a number of callback function | ||
37 | pointers which are subsequently used by Kernel CAPI for communicating with the | ||
38 | driver. The registration can be revoked by calling the function | ||
39 | detach_capi_ctr() with a pointer to the same struct capi_ctr. | ||
40 | |||
41 | Before the device can be actually used, the driver must fill in the device | ||
42 | information fields 'manu', 'version', 'profile' and 'serial' in the capi_ctr | ||
43 | structure of the device, and signal its readiness by calling capi_ctr_ready(). | ||
44 | From then on, Kernel CAPI may call the registered callback functions for the | ||
45 | device. | ||
46 | |||
47 | If the device becomes unusable for any reason (shutdown, disconnect ...), the | ||
48 | driver has to call capi_ctr_down(). This will prevent further calls to the | ||
49 | callback functions by Kernel CAPI. | ||
50 | |||
51 | |||
52 | 3. Application Registration and Communication | ||
53 | |||
54 | Kernel CAPI forwards registration requests from applications (calls to CAPI | ||
55 | operation CAPI_REGISTER) to an appropriate hardware driver by calling its | ||
56 | register_appl() callback function. A unique Application ID (ApplID, u16) is | ||
57 | allocated by Kernel CAPI and passed to register_appl() along with the | ||
58 | parameter structure provided by the application. This is analogous to the | ||
59 | open() operation on regular files or character devices. | ||
60 | |||
61 | After a successful return from register_appl(), CAPI messages from the | ||
62 | application may be passed to the driver for the device via calls to the | ||
63 | send_message() callback function. The CAPI message to send is stored in the | ||
64 | data portion of an skb. Conversely, the driver may call Kernel CAPI's | ||
65 | capi_ctr_handle_message() function to pass a received CAPI message to Kernel | ||
66 | CAPI for forwarding to an application, specifying its ApplID. | ||
67 | |||
68 | Deregistration requests (CAPI operation CAPI_RELEASE) from applications are | ||
69 | forwarded as calls to the release_appl() callback function, passing the same | ||
70 | ApplID as with register_appl(). After return from release_appl(), no CAPI | ||
71 | messages for that application may be passed to or from the device anymore. | ||
72 | |||
73 | |||
74 | 4. Data Structures | ||
75 | |||
76 | 4.1 struct capi_driver | ||
77 | |||
78 | This structure describes a Kernel CAPI driver itself. It is used in the | ||
79 | register_capi_driver() and unregister_capi_driver() functions, and contains | ||
80 | the following non-private fields, all to be set by the driver before calling | ||
81 | register_capi_driver(): | ||
82 | |||
83 | char name[32] | ||
84 | the name of the driver, as a zero-terminated ASCII string | ||
85 | char revision[32] | ||
86 | the revision number of the driver, as a zero-terminated ASCII string | ||
87 | int (*add_card)(struct capi_driver *driver, capicardparams *data) | ||
88 | a callback function pointer (may be NULL) | ||
89 | |||
90 | |||
91 | 4.2 struct capi_ctr | ||
92 | |||
93 | This structure describes an ISDN device (controller) handled by a Kernel CAPI | ||
94 | driver. After registration via the attach_capi_ctr() function it is passed to | ||
95 | all controller specific lower layer interface and callback functions to | ||
96 | identify the controller to operate on. | ||
97 | |||
98 | It contains the following non-private fields: | ||
99 | |||
100 | - to be set by the driver before calling attach_capi_ctr(): | ||
101 | |||
102 | struct module *owner | ||
103 | pointer to the driver module owning the device | ||
104 | |||
105 | void *driverdata | ||
106 | an opaque pointer to driver specific data, not touched by Kernel CAPI | ||
107 | |||
108 | char name[32] | ||
109 | the name of the controller, as a zero-terminated ASCII string | ||
110 | |||
111 | char *driver_name | ||
112 | the name of the driver, as a zero-terminated ASCII string | ||
113 | |||
114 | int (*load_firmware)(struct capi_ctr *ctrlr, capiloaddata *ldata) | ||
115 | (optional) pointer to a callback function for sending firmware and | ||
116 | configuration data to the device | ||
117 | Return value: 0 on success, error code on error | ||
118 | Called in process context. | ||
119 | |||
120 | void (*reset_ctr)(struct capi_ctr *ctrlr) | ||
121 | (optional) pointer to a callback function for performing a reset on | ||
122 | the device, releasing all registered applications | ||
123 | Called in process context. | ||
124 | |||
125 | void (*register_appl)(struct capi_ctr *ctrlr, u16 applid, | ||
126 | capi_register_params *rparam) | ||
127 | void (*release_appl)(struct capi_ctr *ctrlr, u16 applid) | ||
128 | pointers to callback functions for registration and deregistration of | ||
129 | applications with the device | ||
130 | Calls to these functions are serialized by Kernel CAPI so that only | ||
131 | one call to any of them is active at any time. | ||
132 | |||
133 | u16 (*send_message)(struct capi_ctr *ctrlr, struct sk_buff *skb) | ||
134 | pointer to a callback function for sending a CAPI message to the | ||
135 | device | ||
136 | Return value: CAPI error code | ||
137 | If the method returns 0 (CAPI_NOERROR) the driver has taken ownership | ||
138 | of the skb and the caller may no longer access it. If it returns a | ||
139 | non-zero (error) value then ownership of the skb returns to the caller | ||
140 | who may reuse or free it. | ||
141 | The return value should only be used to signal problems with respect | ||
142 | to accepting or queueing the message. Errors occurring during the | ||
143 | actual processing of the message should be signaled with an | ||
144 | appropriate reply message. | ||
145 | Calls to this function are not serialized by Kernel CAPI, ie. it must | ||
146 | be prepared to be re-entered. | ||
147 | |||
148 | char *(*procinfo)(struct capi_ctr *ctrlr) | ||
149 | pointer to a callback function returning the entry for the device in | ||
150 | the CAPI controller info table, /proc/capi/controller | ||
151 | |||
152 | read_proc_t *ctr_read_proc | ||
153 | pointer to the read_proc callback function for the device's proc file | ||
154 | system entry, /proc/capi/controllers/<n>; will be called with a | ||
155 | pointer to the device's capi_ctr structure as the last (data) argument | ||
156 | |||
157 | Note: Callback functions are never called in interrupt context. | ||
158 | |||
159 | - to be filled in before calling capi_ctr_ready(): | ||
160 | |||
161 | u8 manu[CAPI_MANUFACTURER_LEN] | ||
162 | value to return for CAPI_GET_MANUFACTURER | ||
163 | |||
164 | capi_version version | ||
165 | value to return for CAPI_GET_VERSION | ||
166 | |||
167 | capi_profile profile | ||
168 | value to return for CAPI_GET_PROFILE | ||
169 | |||
170 | u8 serial[CAPI_SERIAL_LEN] | ||
171 | value to return for CAPI_GET_SERIAL | ||
172 | |||
173 | |||
174 | 4.3 The _cmsg Structure | ||
175 | |||
176 | (declared in <linux/isdn/capiutil.h>) | ||
177 | |||
178 | The _cmsg structure stores the contents of a CAPI 2.0 message in an easily | ||
179 | accessible form. It contains members for all possible CAPI 2.0 parameters, of | ||
180 | which only those appearing in the message type currently being processed are | ||
181 | actually used. Unused members should be set to zero. | ||
182 | |||
183 | Members are named after the CAPI 2.0 standard names of the parameters they | ||
184 | represent. See <linux/isdn/capiutil.h> for the exact spelling. Member data | ||
185 | types are: | ||
186 | |||
187 | u8 for CAPI parameters of type 'byte' | ||
188 | |||
189 | u16 for CAPI parameters of type 'word' | ||
190 | |||
191 | u32 for CAPI parameters of type 'dword' | ||
192 | |||
193 | _cstruct for CAPI parameters of type 'struct' not containing any | ||
194 | variably-sized (struct) subparameters (eg. 'Called Party Number') | ||
195 | The member is a pointer to a buffer containing the parameter in | ||
196 | CAPI encoding (length + content). It may also be NULL, which will | ||
197 | be taken to represent an empty (zero length) parameter. | ||
198 | |||
199 | _cmstruct for CAPI parameters of type 'struct' containing 'struct' | ||
200 | subparameters ('Additional Info' and 'B Protocol') | ||
201 | The representation is a single byte containing one of the values: | ||
202 | CAPI_DEFAULT: the parameter is empty | ||
203 | CAPI_COMPOSE: the values of the subparameters are stored | ||
204 | individually in the corresponding _cmsg structure members | ||
205 | |||
206 | Functions capi_cmsg2message() and capi_message2cmsg() are provided to convert | ||
207 | messages between their transport encoding described in the CAPI 2.0 standard | ||
208 | and their _cmsg structure representation. Note that capi_cmsg2message() does | ||
209 | not know or check the size of its destination buffer. The caller must make | ||
210 | sure it is big enough to accomodate the resulting CAPI message. | ||
211 | |||
212 | |||
213 | 5. Lower Layer Interface Functions | ||
214 | |||
215 | (declared in <linux/isdn/capilli.h>) | ||
216 | |||
217 | void register_capi_driver(struct capi_driver *drvr) | ||
218 | void unregister_capi_driver(struct capi_driver *drvr) | ||
219 | register/unregister a driver with Kernel CAPI | ||
220 | |||
221 | int attach_capi_ctr(struct capi_ctr *ctrlr) | ||
222 | int detach_capi_ctr(struct capi_ctr *ctrlr) | ||
223 | register/unregister a device (controller) with Kernel CAPI | ||
224 | |||
225 | void capi_ctr_ready(struct capi_ctr *ctrlr) | ||
226 | void capi_ctr_down(struct capi_ctr *ctrlr) | ||
227 | signal controller ready/not ready | ||
228 | |||
229 | void capi_ctr_suspend_output(struct capi_ctr *ctrlr) | ||
230 | void capi_ctr_resume_output(struct capi_ctr *ctrlr) | ||
231 | signal suspend/resume | ||
232 | |||
233 | void capi_ctr_handle_message(struct capi_ctr * ctrlr, u16 applid, | ||
234 | struct sk_buff *skb) | ||
235 | pass a received CAPI message to Kernel CAPI | ||
236 | for forwarding to the specified application | ||
237 | |||
238 | |||
239 | 6. Helper Functions and Macros | ||
240 | |||
241 | Library functions (from <linux/isdn/capilli.h>): | ||
242 | |||
243 | void capilib_new_ncci(struct list_head *head, u16 applid, | ||
244 | u32 ncci, u32 winsize) | ||
245 | void capilib_free_ncci(struct list_head *head, u16 applid, u32 ncci) | ||
246 | void capilib_release_appl(struct list_head *head, u16 applid) | ||
247 | void capilib_release(struct list_head *head) | ||
248 | void capilib_data_b3_conf(struct list_head *head, u16 applid, | ||
249 | u32 ncci, u16 msgid) | ||
250 | u16 capilib_data_b3_req(struct list_head *head, u16 applid, | ||
251 | u32 ncci, u16 msgid) | ||
252 | |||
253 | |||
254 | Macros to extract/set element values from/in a CAPI message header | ||
255 | (from <linux/isdn/capiutil.h>): | ||
256 | |||
257 | Get Macro Set Macro Element (Type) | ||
258 | |||
259 | CAPIMSG_LEN(m) CAPIMSG_SETLEN(m, len) Total Length (u16) | ||
260 | CAPIMSG_APPID(m) CAPIMSG_SETAPPID(m, applid) ApplID (u16) | ||
261 | CAPIMSG_COMMAND(m) CAPIMSG_SETCOMMAND(m,cmd) Command (u8) | ||
262 | CAPIMSG_SUBCOMMAND(m) CAPIMSG_SETSUBCOMMAND(m, cmd) Subcommand (u8) | ||
263 | CAPIMSG_CMD(m) - Command*256 | ||
264 | + Subcommand (u16) | ||
265 | CAPIMSG_MSGID(m) CAPIMSG_SETMSGID(m, msgid) Message Number (u16) | ||
266 | |||
267 | CAPIMSG_CONTROL(m) CAPIMSG_SETCONTROL(m, contr) Controller/PLCI/NCCI | ||
268 | (u32) | ||
269 | CAPIMSG_DATALEN(m) CAPIMSG_SETDATALEN(m, len) Data Length (u16) | ||
270 | |||
271 | |||
272 | Library functions for working with _cmsg structures | ||
273 | (from <linux/isdn/capiutil.h>): | ||
274 | |||
275 | unsigned capi_cmsg2message(_cmsg *cmsg, u8 *msg) | ||
276 | Assembles a CAPI 2.0 message from the parameters in *cmsg, storing the | ||
277 | result in *msg. | ||
278 | |||
279 | unsigned capi_message2cmsg(_cmsg *cmsg, u8 *msg) | ||
280 | Disassembles the CAPI 2.0 message in *msg, storing the parameters in | ||
281 | *cmsg. | ||
282 | |||
283 | unsigned capi_cmsg_header(_cmsg *cmsg, u16 ApplId, u8 Command, u8 Subcommand, | ||
284 | u16 Messagenumber, u32 Controller) | ||
285 | Fills the header part and address field of the _cmsg structure *cmsg | ||
286 | with the given values, zeroing the remainder of the structure so only | ||
287 | parameters with non-default values need to be changed before sending | ||
288 | the message. | ||
289 | |||
290 | void capi_cmsg_answer(_cmsg *cmsg) | ||
291 | Sets the low bit of the Subcommand field in *cmsg, thereby converting | ||
292 | _REQ to _CONF and _IND to _RESP. | ||
293 | |||
294 | char *capi_cmd2str(u8 Command, u8 Subcommand) | ||
295 | Returns the CAPI 2.0 message name corresponding to the given command | ||
296 | and subcommand values, as a static ASCII string. The return value may | ||
297 | be NULL if the command/subcommand is not one of those defined in the | ||
298 | CAPI 2.0 standard. | ||
299 | |||
diff --git a/Documentation/isdn/README.gigaset b/Documentation/isdn/README.gigaset index 55b2852904a4..f9963103ae3d 100644 --- a/Documentation/isdn/README.gigaset +++ b/Documentation/isdn/README.gigaset | |||
@@ -61,24 +61,28 @@ GigaSet 307x Device Driver | |||
61 | --------------------- | 61 | --------------------- |
62 | 2.1. Modules | 62 | 2.1. Modules |
63 | ------- | 63 | ------- |
64 | To get the device working, you have to load the proper kernel module. You | 64 | For the devices to work, the proper kernel modules have to be loaded. |
65 | can do this using | 65 | This normally happens automatically when the system detects the USB |
66 | modprobe modulename | 66 | device (base, M105) or when the line discipline is attached (M101). It |
67 | where modulename is ser_gigaset (M101), usb_gigaset (M105), or | 67 | can also be triggered manually using the modprobe(8) command, for example |
68 | bas_gigaset (direct USB connection to the base). | 68 | for troubleshooting or to pass module parameters. |
69 | 69 | ||
70 | The module ser_gigaset provides a serial line discipline N_GIGASET_M101 | 70 | The module ser_gigaset provides a serial line discipline N_GIGASET_M101 |
71 | which drives the device through the regular serial line driver. To use it, | 71 | which drives the device through the regular serial line driver. It must |
72 | run the Gigaset M101 daemon "gigasetm101d" (also available from | 72 | be attached to the serial line to which the M101 is connected with the |
73 | http://sourceforge.net/projects/gigaset307x/) with the device file of the | 73 | ldattach(8) command (requires util-linux-ng release 2.14 or later), for |
74 | RS232 port to the M101 as an argument, for example: | 74 | example: |
75 | gigasetm101d /dev/ttyS1 | 75 | ldattach GIGASET_M101 /dev/ttyS1 |
76 | This will open the device file, set its line discipline to N_GIGASET_M101, | 76 | This will open the device file, attach the line discipline to it, and |
77 | and then sleep in the background, keeping the device open so that the | 77 | then sleep in the background, keeping the device open so that the line |
78 | line discipline remains active. To deactivate it, kill the daemon, for | 78 | discipline remains active. To deactivate it, kill the daemon, for example |
79 | example with | 79 | with |
80 | killall gigasetm101d | 80 | killall ldattach |
81 | before disconnecting the device. | 81 | before disconnecting the device. To have this happen automatically at |
82 | system startup/shutdown on an LSB compatible system, create and activate | ||
83 | an appropriate LSB startup script /etc/init.d/gigaset. (The init name | ||
84 | 'gigaset' is officially assigned to this project by LANANA.) | ||
85 | Alternatively, just add the 'ldattach' command line to /etc/rc.local. | ||
82 | 86 | ||
83 | 2.2. Device nodes for user space programs | 87 | 2.2. Device nodes for user space programs |
84 | ------------------------------------ | 88 | ------------------------------------ |
@@ -145,10 +149,8 @@ GigaSet 307x Device Driver | |||
145 | configuration files and chat scripts in the gigaset-VERSION/ppp directory | 149 | configuration files and chat scripts in the gigaset-VERSION/ppp directory |
146 | in the driver packages from http://sourceforge.net/projects/gigaset307x/. | 150 | in the driver packages from http://sourceforge.net/projects/gigaset307x/. |
147 | Please note that the USB drivers are not able to change the state of the | 151 | Please note that the USB drivers are not able to change the state of the |
148 | control lines (the M105 driver can be configured to use some undocumented | 152 | control lines. This means you must use "Stupid Mode" if you are using |
149 | control requests, if you really need the control lines, though). This means | 153 | wvdial or you should use the nocrtscts option of pppd. |
150 | you must use "Stupid Mode" if you are using wvdial or you should use the | ||
151 | nocrtscts option of pppd. | ||
152 | You must also assure that the ppp_async module is loaded with the parameter | 154 | You must also assure that the ppp_async module is loaded with the parameter |
153 | flag_time=0. You can do this e.g. by adding a line like | 155 | flag_time=0. You can do this e.g. by adding a line like |
154 | 156 | ||
@@ -186,19 +188,19 @@ GigaSet 307x Device Driver | |||
186 | You can also use /sys/class/tty/ttyGxy/cidmode for changing the CID mode | 188 | You can also use /sys/class/tty/ttyGxy/cidmode for changing the CID mode |
187 | setting (ttyGxy is ttyGU0 or ttyGB0). | 189 | setting (ttyGxy is ttyGU0 or ttyGB0). |
188 | 190 | ||
189 | 2.6. M105 Undocumented USB Requests | 191 | 2.6. Unregistered Wireless Devices (M101/M105) |
190 | ------------------------------ | 192 | ----------------------------------------- |
191 | 193 | The main purpose of the ser_gigaset and usb_gigaset drivers is to allow | |
192 | The Gigaset M105 USB data box understands a couple of useful, but | 194 | the M101 and M105 wireless devices to be used as ISDN devices for ISDN |
193 | undocumented USB commands. These requests are not used in normal | 195 | connections through a Gigaset base. Therefore they assume that the device |
194 | operation (for wireless access to the base), but are needed for access | 196 | is registered to a DECT base. |
195 | to the M105's own configuration mode (registration to the base, baudrate | 197 | |
196 | and line format settings, device status queries) via the gigacontr | 198 | If the M101/M105 device is not registered to a base, initialization of |
197 | utility. Their use is disabled in the driver by default for safety | 199 | the device fails, and a corresponding error message is logged by the |
198 | reasons but can be enabled by setting the kernel configuration option | 200 | driver. In that situation, a restricted set of functions is available |
199 | "Support for undocumented USB requests" (GIGASET_UNDOCREQ) to "Y" and | 201 | which includes, in particular, those necessary for registering the device |
200 | recompiling. | 202 | to a base or for switching it between Fixed Part and Portable Part |
201 | 203 | modes. | |
202 | 204 | ||
203 | 3. Troubleshooting | 205 | 3. Troubleshooting |
204 | --------------- | 206 | --------------- |
@@ -228,6 +230,14 @@ GigaSet 307x Device Driver | |||
228 | Solution: | 230 | Solution: |
229 | Select Unimodem mode for all DECT data adapters. (see section 2.4.) | 231 | Select Unimodem mode for all DECT data adapters. (see section 2.4.) |
230 | 232 | ||
233 | Problem: | ||
234 | Messages like this: | ||
235 | usb_gigaset 3-2:1.0: Could not initialize the device. | ||
236 | appear in your syslog. | ||
237 | Solution: | ||
238 | Check whether your M10x wireless device is correctly registered to the | ||
239 | Gigaset base. (see section 2.6.) | ||
240 | |||
231 | 3.2. Telling the driver to provide more information | 241 | 3.2. Telling the driver to provide more information |
232 | ---------------------------------------------- | 242 | ---------------------------------------------- |
233 | Building the driver with the "Gigaset debugging" kernel configuration | 243 | Building the driver with the "Gigaset debugging" kernel configuration |
diff --git a/Documentation/ja_JP/SubmitChecklist b/Documentation/ja_JP/SubmitChecklist index 6c42e071d723..2df4576f1173 100644 --- a/Documentation/ja_JP/SubmitChecklist +++ b/Documentation/ja_JP/SubmitChecklist | |||
@@ -75,7 +75,7 @@ Linux カーネルパッチ投稿者向けチェックリスト | |||
75 | ビルドした上、動作確認を行ってください。 | 75 | ビルドした上、動作確認を行ってください。 |
76 | 76 | ||
77 | 14: もしパッチがディスクのI/O性能などに影響を与えるようであれば、 | 77 | 14: もしパッチがディスクのI/O性能などに影響を与えるようであれば、 |
78 | 'CONFIG_LBD'オプションを有効にした場合と無効にした場合の両方で | 78 | 'CONFIG_LBDAF'オプションを有効にした場合と無効にした場合の両方で |
79 | テストを実施してみてください。 | 79 | テストを実施してみてください。 |
80 | 80 | ||
81 | 15: lockdepの機能を全て有効にした上で、全てのコードパスを評価してください。 | 81 | 15: lockdepの機能を全て有効にした上で、全てのコードパスを評価してください。 |
diff --git a/Documentation/kbuild/kconfig.txt b/Documentation/kbuild/kconfig.txt index 26a7c0a93193..849b5e56d06f 100644 --- a/Documentation/kbuild/kconfig.txt +++ b/Documentation/kbuild/kconfig.txt | |||
@@ -35,48 +35,26 @@ new .config files to see the differences: | |||
35 | 35 | ||
36 | (Yes, we need something better here.) | 36 | (Yes, we need something better here.) |
37 | 37 | ||
38 | |||
39 | ====================================================================== | ||
40 | menuconfig | ||
41 | -------------------------------------------------- | ||
42 | |||
43 | SEARCHING for CONFIG symbols | ||
44 | |||
45 | Searching in menuconfig: | ||
46 | |||
47 | The Search function searches for kernel configuration symbol | ||
48 | names, so you have to know something close to what you are | ||
49 | looking for. | ||
50 | |||
51 | Example: | ||
52 | /hotplug | ||
53 | This lists all config symbols that contain "hotplug", | ||
54 | e.g., HOTPLUG, HOTPLUG_CPU, MEMORY_HOTPLUG. | ||
55 | |||
56 | For search help, enter / followed TAB-TAB-TAB (to highlight | ||
57 | <Help>) and Enter. This will tell you that you can also use | ||
58 | regular expressions (regexes) in the search string, so if you | ||
59 | are not interested in MEMORY_HOTPLUG, you could try | ||
60 | |||
61 | /^hotplug | ||
62 | |||
63 | |||
64 | ______________________________________________________________________ | 38 | ______________________________________________________________________ |
65 | Color Themes for 'menuconfig' | 39 | Environment variables for '*config' |
66 | 40 | ||
67 | It is possible to select different color themes using the variable | 41 | KCONFIG_CONFIG |
68 | MENUCONFIG_COLOR. To select a theme use: | 42 | -------------------------------------------------- |
43 | This environment variable can be used to specify a default kernel config | ||
44 | file name to override the default name of ".config". | ||
69 | 45 | ||
70 | make MENUCONFIG_COLOR=<theme> menuconfig | 46 | KCONFIG_OVERWRITECONFIG |
47 | -------------------------------------------------- | ||
48 | If you set KCONFIG_OVERWRITECONFIG in the environment, Kconfig will not | ||
49 | break symlinks when .config is a symlink to somewhere else. | ||
71 | 50 | ||
72 | Available themes are: | 51 | KCONFIG_NOTIMESTAMP |
73 | mono => selects colors suitable for monochrome displays | 52 | -------------------------------------------------- |
74 | blackbg => selects a color scheme with black background | 53 | If this environment variable exists and is non-null, the timestamp line |
75 | classic => theme with blue background. The classic look | 54 | in generated .config files is omitted. |
76 | bluetitle => a LCD friendly version of classic. (default) | ||
77 | 55 | ||
78 | ______________________________________________________________________ | 56 | ______________________________________________________________________ |
79 | Environment variables in 'menuconfig' | 57 | Environment variables for '{allyes/allmod/allno/rand}config' |
80 | 58 | ||
81 | KCONFIG_ALLCONFIG | 59 | KCONFIG_ALLCONFIG |
82 | -------------------------------------------------- | 60 | -------------------------------------------------- |
@@ -95,8 +73,7 @@ values. | |||
95 | This enables you to create "miniature" config (miniconfig) or custom | 73 | This enables you to create "miniature" config (miniconfig) or custom |
96 | config files containing just the config symbols that you are interested | 74 | config files containing just the config symbols that you are interested |
97 | in. Then the kernel config system generates the full .config file, | 75 | in. Then the kernel config system generates the full .config file, |
98 | including dependencies of your miniconfig file, based on the miniconfig | 76 | including symbols of your miniconfig file. |
99 | file. | ||
100 | 77 | ||
101 | This 'KCONFIG_ALLCONFIG' file is a config file which contains | 78 | This 'KCONFIG_ALLCONFIG' file is a config file which contains |
102 | (usually a subset of all) preset config symbols. These variable | 79 | (usually a subset of all) preset config symbols. These variable |
@@ -113,26 +90,14 @@ These examples will disable most options (allnoconfig) but enable or | |||
113 | disable the options that are explicitly listed in the specified | 90 | disable the options that are explicitly listed in the specified |
114 | mini-config files. | 91 | mini-config files. |
115 | 92 | ||
93 | ______________________________________________________________________ | ||
94 | Environment variables for 'silentoldconfig' | ||
95 | |||
116 | KCONFIG_NOSILENTUPDATE | 96 | KCONFIG_NOSILENTUPDATE |
117 | -------------------------------------------------- | 97 | -------------------------------------------------- |
118 | If this variable has a non-blank value, it prevents silent kernel | 98 | If this variable has a non-blank value, it prevents silent kernel |
119 | config udpates (requires explicit updates). | 99 | config udpates (requires explicit updates). |
120 | 100 | ||
121 | KCONFIG_CONFIG | ||
122 | -------------------------------------------------- | ||
123 | This environment variable can be used to specify a default kernel config | ||
124 | file name to override the default name of ".config". | ||
125 | |||
126 | KCONFIG_OVERWRITECONFIG | ||
127 | -------------------------------------------------- | ||
128 | If you set KCONFIG_OVERWRITECONFIG in the environment, Kconfig will not | ||
129 | break symlinks when .config is a symlink to somewhere else. | ||
130 | |||
131 | KCONFIG_NOTIMESTAMP | ||
132 | -------------------------------------------------- | ||
133 | If this environment variable exists and is non-null, the timestamp line | ||
134 | in generated .config files is omitted. | ||
135 | |||
136 | KCONFIG_AUTOCONFIG | 101 | KCONFIG_AUTOCONFIG |
137 | -------------------------------------------------- | 102 | -------------------------------------------------- |
138 | This environment variable can be set to specify the path & name of the | 103 | This environment variable can be set to specify the path & name of the |
@@ -143,15 +108,54 @@ KCONFIG_AUTOHEADER | |||
143 | This environment variable can be set to specify the path & name of the | 108 | This environment variable can be set to specify the path & name of the |
144 | "autoconf.h" (header) file. Its default value is "include/linux/autoconf.h". | 109 | "autoconf.h" (header) file. Its default value is "include/linux/autoconf.h". |
145 | 110 | ||
111 | |||
112 | ====================================================================== | ||
113 | menuconfig | ||
114 | -------------------------------------------------- | ||
115 | |||
116 | SEARCHING for CONFIG symbols | ||
117 | |||
118 | Searching in menuconfig: | ||
119 | |||
120 | The Search function searches for kernel configuration symbol | ||
121 | names, so you have to know something close to what you are | ||
122 | looking for. | ||
123 | |||
124 | Example: | ||
125 | /hotplug | ||
126 | This lists all config symbols that contain "hotplug", | ||
127 | e.g., HOTPLUG, HOTPLUG_CPU, MEMORY_HOTPLUG. | ||
128 | |||
129 | For search help, enter / followed TAB-TAB-TAB (to highlight | ||
130 | <Help>) and Enter. This will tell you that you can also use | ||
131 | regular expressions (regexes) in the search string, so if you | ||
132 | are not interested in MEMORY_HOTPLUG, you could try | ||
133 | |||
134 | /^hotplug | ||
135 | |||
146 | ______________________________________________________________________ | 136 | ______________________________________________________________________ |
147 | menuconfig User Interface Options | 137 | User interface options for 'menuconfig' |
148 | ---------------------------------------------------------------------- | 138 | |
139 | MENUCONFIG_COLOR | ||
140 | -------------------------------------------------- | ||
141 | It is possible to select different color themes using the variable | ||
142 | MENUCONFIG_COLOR. To select a theme use: | ||
143 | |||
144 | make MENUCONFIG_COLOR=<theme> menuconfig | ||
145 | |||
146 | Available themes are: | ||
147 | mono => selects colors suitable for monochrome displays | ||
148 | blackbg => selects a color scheme with black background | ||
149 | classic => theme with blue background. The classic look | ||
150 | bluetitle => a LCD friendly version of classic. (default) | ||
151 | |||
149 | MENUCONFIG_MODE | 152 | MENUCONFIG_MODE |
150 | -------------------------------------------------- | 153 | -------------------------------------------------- |
151 | This mode shows all sub-menus in one large tree. | 154 | This mode shows all sub-menus in one large tree. |
152 | 155 | ||
153 | Example: | 156 | Example: |
154 | MENUCONFIG_MODE=single_menu make menuconfig | 157 | make MENUCONFIG_MODE=single_menu menuconfig |
158 | |||
155 | 159 | ||
156 | ====================================================================== | 160 | ====================================================================== |
157 | xconfig | 161 | xconfig |
diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt index 51104f9194a5..d76cfd8712e1 100644 --- a/Documentation/kbuild/makefiles.txt +++ b/Documentation/kbuild/makefiles.txt | |||
@@ -40,10 +40,16 @@ This document describes the Linux kernel Makefiles. | |||
40 | --- 6.7 Custom kbuild commands | 40 | --- 6.7 Custom kbuild commands |
41 | --- 6.8 Preprocessing linker scripts | 41 | --- 6.8 Preprocessing linker scripts |
42 | 42 | ||
43 | === 7 Kbuild Variables | 43 | === 7 Kbuild syntax for exported headers |
44 | === 8 Makefile language | 44 | --- 7.1 header-y |
45 | === 9 Credits | 45 | --- 7.2 objhdr-y |
46 | === 10 TODO | 46 | --- 7.3 destination-y |
47 | --- 7.4 unifdef-y (deprecated) | ||
48 | |||
49 | === 8 Kbuild Variables | ||
50 | === 9 Makefile language | ||
51 | === 10 Credits | ||
52 | === 11 TODO | ||
47 | 53 | ||
48 | === 1 Overview | 54 | === 1 Overview |
49 | 55 | ||
@@ -310,6 +316,16 @@ more details, with real examples. | |||
310 | #arch/m68k/fpsp040/Makefile | 316 | #arch/m68k/fpsp040/Makefile |
311 | ldflags-y := -x | 317 | ldflags-y := -x |
312 | 318 | ||
319 | subdir-ccflags-y, subdir-asflags-y | ||
320 | The two flags listed above are similar to ccflags-y and as-falgs-y. | ||
321 | The difference is that the subdir- variants has effect for the kbuild | ||
322 | file where tey are present and all subdirectories. | ||
323 | Options specified using subdir-* are added to the commandline before | ||
324 | the options specified using the non-subdir variants. | ||
325 | |||
326 | Example: | ||
327 | subdir-ccflags-y := -Werror | ||
328 | |||
313 | CFLAGS_$@, AFLAGS_$@ | 329 | CFLAGS_$@, AFLAGS_$@ |
314 | 330 | ||
315 | CFLAGS_$@ and AFLAGS_$@ only apply to commands in current | 331 | CFLAGS_$@ and AFLAGS_$@ only apply to commands in current |
@@ -1143,8 +1159,69 @@ When kbuild executes, the following steps are followed (roughly): | |||
1143 | The kbuild infrastructure for *lds file are used in several | 1159 | The kbuild infrastructure for *lds file are used in several |
1144 | architecture-specific files. | 1160 | architecture-specific files. |
1145 | 1161 | ||
1162 | === 7 Kbuild syntax for exported headers | ||
1163 | |||
1164 | The kernel include a set of headers that is exported to userspace. | ||
1165 | Many headers can be exported as-is but other headers requires a | ||
1166 | minimal pre-processing before they are ready for user-space. | ||
1167 | The pre-processing does: | ||
1168 | - drop kernel specific annotations | ||
1169 | - drop include of compiler.h | ||
1170 | - drop all sections that is kernel internat (guarded by ifdef __KERNEL__) | ||
1171 | |||
1172 | Each relevant directory contain a file name "Kbuild" which specify the | ||
1173 | headers to be exported. | ||
1174 | See subsequent chapter for the syntax of the Kbuild file. | ||
1175 | |||
1176 | --- 7.1 header-y | ||
1177 | |||
1178 | header-y specify header files to be exported. | ||
1179 | |||
1180 | Example: | ||
1181 | #include/linux/Kbuild | ||
1182 | header-y += usb/ | ||
1183 | header-y += aio_abi.h | ||
1184 | |||
1185 | The convention is to list one file per line and | ||
1186 | preferably in alphabetic order. | ||
1187 | |||
1188 | header-y also specify which subdirectories to visit. | ||
1189 | A subdirectory is identified by a trailing '/' which | ||
1190 | can be seen in the example above for the usb subdirectory. | ||
1191 | |||
1192 | Subdirectories are visited before their parent directories. | ||
1193 | |||
1194 | --- 7.2 objhdr-y | ||
1195 | |||
1196 | objhdr-y specifies generated files to be exported. | ||
1197 | Generated files are special as they need to be looked | ||
1198 | up in another directory when doing 'make O=...' builds. | ||
1199 | |||
1200 | Example: | ||
1201 | #include/linux/Kbuild | ||
1202 | objhdr-y += version.h | ||
1203 | |||
1204 | --- 7.3 destination-y | ||
1205 | |||
1206 | When an architecture have a set of exported headers that needs to be | ||
1207 | exported to a different directory destination-y is used. | ||
1208 | destination-y specify the destination directory for all exported | ||
1209 | headers in the file where it is present. | ||
1210 | |||
1211 | Example: | ||
1212 | #arch/xtensa/platforms/s6105/include/platform/Kbuild | ||
1213 | destination-y := include/linux | ||
1214 | |||
1215 | In the example above all exported headers in the Kbuild file | ||
1216 | will be located in the directory "include/linux" when exported. | ||
1217 | |||
1218 | |||
1219 | --- 7.4 unifdef-y (deprecated) | ||
1220 | |||
1221 | unifdef-y is deprecated. A direct replacement is header-y. | ||
1222 | |||
1146 | 1223 | ||
1147 | === 7 Kbuild Variables | 1224 | === 8 Kbuild Variables |
1148 | 1225 | ||
1149 | The top Makefile exports the following variables: | 1226 | The top Makefile exports the following variables: |
1150 | 1227 | ||
@@ -1206,7 +1283,7 @@ The top Makefile exports the following variables: | |||
1206 | INSTALL_MOD_STRIP will used as the option(s) to the strip command. | 1283 | INSTALL_MOD_STRIP will used as the option(s) to the strip command. |
1207 | 1284 | ||
1208 | 1285 | ||
1209 | === 8 Makefile language | 1286 | === 9 Makefile language |
1210 | 1287 | ||
1211 | The kernel Makefiles are designed to be run with GNU Make. The Makefiles | 1288 | The kernel Makefiles are designed to be run with GNU Make. The Makefiles |
1212 | use only the documented features of GNU Make, but they do use many | 1289 | use only the documented features of GNU Make, but they do use many |
@@ -1225,14 +1302,14 @@ time the left-hand side is used. | |||
1225 | There are some cases where "=" is appropriate. Usually, though, ":=" | 1302 | There are some cases where "=" is appropriate. Usually, though, ":=" |
1226 | is the right choice. | 1303 | is the right choice. |
1227 | 1304 | ||
1228 | === 9 Credits | 1305 | === 10 Credits |
1229 | 1306 | ||
1230 | Original version made by Michael Elizabeth Chastain, <mailto:mec@shout.net> | 1307 | Original version made by Michael Elizabeth Chastain, <mailto:mec@shout.net> |
1231 | Updates by Kai Germaschewski <kai@tp1.ruhr-uni-bochum.de> | 1308 | Updates by Kai Germaschewski <kai@tp1.ruhr-uni-bochum.de> |
1232 | Updates by Sam Ravnborg <sam@ravnborg.org> | 1309 | Updates by Sam Ravnborg <sam@ravnborg.org> |
1233 | Language QA by Jan Engelhardt <jengelh@gmx.de> | 1310 | Language QA by Jan Engelhardt <jengelh@gmx.de> |
1234 | 1311 | ||
1235 | === 10 TODO | 1312 | === 11 TODO |
1236 | 1313 | ||
1237 | - Describe how kbuild supports shipped files with _shipped. | 1314 | - Describe how kbuild supports shipped files with _shipped. |
1238 | - Generating offset header files. | 1315 | - Generating offset header files. |
diff --git a/Documentation/kbuild/modules.txt b/Documentation/kbuild/modules.txt index b1096da953c8..0767cf69c69e 100644 --- a/Documentation/kbuild/modules.txt +++ b/Documentation/kbuild/modules.txt | |||
@@ -275,7 +275,7 @@ following files: | |||
275 | 275 | ||
276 | KERNELDIR := /lib/modules/`uname -r`/build | 276 | KERNELDIR := /lib/modules/`uname -r`/build |
277 | all:: | 277 | all:: |
278 | $(MAKE) -C $KERNELDIR M=`pwd` $@ | 278 | $(MAKE) -C $(KERNELDIR) M=`pwd` $@ |
279 | 279 | ||
280 | # Module specific targets | 280 | # Module specific targets |
281 | genbin: | 281 | genbin: |
diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt index 3f4bc840da8b..cab61d842259 100644 --- a/Documentation/kdump/kdump.txt +++ b/Documentation/kdump/kdump.txt | |||
@@ -108,7 +108,7 @@ There are two possible methods of using Kdump. | |||
108 | 108 | ||
109 | 2) Or use the system kernel binary itself as dump-capture kernel and there is | 109 | 2) Or use the system kernel binary itself as dump-capture kernel and there is |
110 | no need to build a separate dump-capture kernel. This is possible | 110 | no need to build a separate dump-capture kernel. This is possible |
111 | only with the architecutres which support a relocatable kernel. As | 111 | only with the architectures which support a relocatable kernel. As |
112 | of today, i386, x86_64, ppc64 and ia64 architectures support relocatable | 112 | of today, i386, x86_64, ppc64 and ia64 architectures support relocatable |
113 | kernel. | 113 | kernel. |
114 | 114 | ||
@@ -222,7 +222,7 @@ Dump-capture kernel config options (Arch Dependent, ia64) | |||
222 | ---------------------------------------------------------- | 222 | ---------------------------------------------------------- |
223 | 223 | ||
224 | - No specific options are required to create a dump-capture kernel | 224 | - No specific options are required to create a dump-capture kernel |
225 | for ia64, other than those specified in the arch idependent section | 225 | for ia64, other than those specified in the arch independent section |
226 | above. This means that it is possible to use the system kernel | 226 | above. This means that it is possible to use the system kernel |
227 | as a dump-capture kernel if desired. | 227 | as a dump-capture kernel if desired. |
228 | 228 | ||
diff --git a/Documentation/kernel-doc-nano-HOWTO.txt b/Documentation/kernel-doc-nano-HOWTO.txt index 026ec7d57384..4d04572b6549 100644 --- a/Documentation/kernel-doc-nano-HOWTO.txt +++ b/Documentation/kernel-doc-nano-HOWTO.txt | |||
@@ -269,7 +269,10 @@ Use the argument mechanism to document members or constants. | |||
269 | 269 | ||
270 | Inside a struct description, you can use the "private:" and "public:" | 270 | Inside a struct description, you can use the "private:" and "public:" |
271 | comment tags. Structure fields that are inside a "private:" area | 271 | comment tags. Structure fields that are inside a "private:" area |
272 | are not listed in the generated output documentation. | 272 | are not listed in the generated output documentation. The "private:" |
273 | and "public:" tags must begin immediately following a "/*" comment | ||
274 | marker. They may optionally include comments between the ":" and the | ||
275 | ending "*/" marker. | ||
273 | 276 | ||
274 | Example: | 277 | Example: |
275 | 278 | ||
@@ -283,7 +286,7 @@ Example: | |||
283 | struct my_struct { | 286 | struct my_struct { |
284 | int a; | 287 | int a; |
285 | int b; | 288 | int b; |
286 | /* private: */ | 289 | /* private: internal use only */ |
287 | int c; | 290 | int c; |
288 | }; | 291 | }; |
289 | 292 | ||
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 54f21a5c262b..54ebf100e4e0 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -17,6 +17,12 @@ are specified on the kernel command line with the module name plus | |||
17 | 17 | ||
18 | usbcore.blinkenlights=1 | 18 | usbcore.blinkenlights=1 |
19 | 19 | ||
20 | Hyphens (dashes) and underscores are equivalent in parameter names, so | ||
21 | log_buf_len=1M print-fatal-signals=1 | ||
22 | can also be entered as | ||
23 | log-buf-len=1M print_fatal_signals=1 | ||
24 | |||
25 | |||
20 | This document may not be entirely up to date and comprehensive. The command | 26 | This document may not be entirely up to date and comprehensive. The command |
21 | "modinfo -p ${modulename}" shows a current list of all parameters of a loadable | 27 | "modinfo -p ${modulename}" shows a current list of all parameters of a loadable |
22 | module. Loadable modules, after being loaded into the running kernel, also | 28 | module. Loadable modules, after being loaded into the running kernel, also |
@@ -42,8 +48,10 @@ parameter is applicable: | |||
42 | EFI EFI Partitioning (GPT) is enabled | 48 | EFI EFI Partitioning (GPT) is enabled |
43 | EIDE EIDE/ATAPI support is enabled. | 49 | EIDE EIDE/ATAPI support is enabled. |
44 | FB The frame buffer device is enabled. | 50 | FB The frame buffer device is enabled. |
51 | GCOV GCOV profiling is enabled. | ||
45 | HW Appropriate hardware is enabled. | 52 | HW Appropriate hardware is enabled. |
46 | IA-64 IA-64 architecture is enabled. | 53 | IA-64 IA-64 architecture is enabled. |
54 | IMA Integrity measurement architecture is enabled. | ||
47 | IOSCHED More than one I/O scheduler is enabled. | 55 | IOSCHED More than one I/O scheduler is enabled. |
48 | IP_PNP IP DHCP, BOOTP, or RARP is enabled. | 56 | IP_PNP IP DHCP, BOOTP, or RARP is enabled. |
49 | ISAPNP ISA PnP code is enabled. | 57 | ISAPNP ISA PnP code is enabled. |
@@ -132,7 +140,7 @@ and is between 256 and 4096 characters. It is defined in the file | |||
132 | ./include/asm/setup.h as COMMAND_LINE_SIZE. | 140 | ./include/asm/setup.h as COMMAND_LINE_SIZE. |
133 | 141 | ||
134 | 142 | ||
135 | acpi= [HW,ACPI,X86-64,i386] | 143 | acpi= [HW,ACPI,X86] |
136 | Advanced Configuration and Power Interface | 144 | Advanced Configuration and Power Interface |
137 | Format: { force | off | ht | strict | noirq | rsdt } | 145 | Format: { force | off | ht | strict | noirq | rsdt } |
138 | force -- enable ACPI if default was off | 146 | force -- enable ACPI if default was off |
@@ -151,60 +159,6 @@ and is between 256 and 4096 characters. It is defined in the file | |||
151 | 1,0: use 1st APIC table | 159 | 1,0: use 1st APIC table |
152 | default: 0 | 160 | default: 0 |
153 | 161 | ||
154 | acpi_sleep= [HW,ACPI] Sleep options | ||
155 | Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig, | ||
156 | old_ordering, s4_nonvs } | ||
157 | See Documentation/power/video.txt for information on | ||
158 | s3_bios and s3_mode. | ||
159 | s3_beep is for debugging; it makes the PC's speaker beep | ||
160 | as soon as the kernel's real-mode entry point is called. | ||
161 | s4_nohwsig prevents ACPI hardware signature from being | ||
162 | used during resume from hibernation. | ||
163 | old_ordering causes the ACPI 1.0 ordering of the _PTS | ||
164 | control method, with respect to putting devices into | ||
165 | low power states, to be enforced (the ACPI 2.0 ordering | ||
166 | of _PTS is used by default). | ||
167 | s4_nonvs prevents the kernel from saving/restoring the | ||
168 | ACPI NVS memory during hibernation. | ||
169 | |||
170 | acpi_sci= [HW,ACPI] ACPI System Control Interrupt trigger mode | ||
171 | Format: { level | edge | high | low } | ||
172 | |||
173 | acpi_irq_balance [HW,ACPI] | ||
174 | ACPI will balance active IRQs | ||
175 | default in APIC mode | ||
176 | |||
177 | acpi_irq_nobalance [HW,ACPI] | ||
178 | ACPI will not move active IRQs (default) | ||
179 | default in PIC mode | ||
180 | |||
181 | acpi_irq_pci= [HW,ACPI] If irq_balance, clear listed IRQs for | ||
182 | use by PCI | ||
183 | Format: <irq>,<irq>... | ||
184 | |||
185 | acpi_irq_isa= [HW,ACPI] If irq_balance, mark listed IRQs used by ISA | ||
186 | Format: <irq>,<irq>... | ||
187 | |||
188 | acpi_no_auto_ssdt [HW,ACPI] Disable automatic loading of SSDT | ||
189 | |||
190 | acpi_os_name= [HW,ACPI] Tell ACPI BIOS the name of the OS | ||
191 | Format: To spoof as Windows 98: ="Microsoft Windows" | ||
192 | |||
193 | acpi_osi= [HW,ACPI] Modify list of supported OS interface strings | ||
194 | acpi_osi="string1" # add string1 -- only one string | ||
195 | acpi_osi="!string2" # remove built-in string2 | ||
196 | acpi_osi= # disable all strings | ||
197 | |||
198 | acpi_serialize [HW,ACPI] force serialization of AML methods | ||
199 | |||
200 | acpi_skip_timer_override [HW,ACPI] | ||
201 | Recognize and ignore IRQ0/pin2 Interrupt Override. | ||
202 | For broken nForce2 BIOS resulting in XT-PIC timer. | ||
203 | acpi_use_timer_override [HW,ACPI] | ||
204 | Use timer override. For some broken Nvidia NF5 boards | ||
205 | that require a timer override, but don't have | ||
206 | HPET | ||
207 | |||
208 | acpi_backlight= [HW,ACPI] | 162 | acpi_backlight= [HW,ACPI] |
209 | acpi_backlight=vendor | 163 | acpi_backlight=vendor |
210 | acpi_backlight=video | 164 | acpi_backlight=video |
@@ -212,11 +166,6 @@ and is between 256 and 4096 characters. It is defined in the file | |||
212 | (e.g. thinkpad_acpi, sony_acpi, etc.) instead | 166 | (e.g. thinkpad_acpi, sony_acpi, etc.) instead |
213 | of the ACPI video.ko driver. | 167 | of the ACPI video.ko driver. |
214 | 168 | ||
215 | acpi_display_output= [HW,ACPI] | ||
216 | acpi_display_output=vendor | ||
217 | acpi_display_output=video | ||
218 | See above. | ||
219 | |||
220 | acpi.debug_layer= [HW,ACPI,ACPI_DEBUG] | 169 | acpi.debug_layer= [HW,ACPI,ACPI_DEBUG] |
221 | acpi.debug_level= [HW,ACPI,ACPI_DEBUG] | 170 | acpi.debug_level= [HW,ACPI,ACPI_DEBUG] |
222 | Format: <int> | 171 | Format: <int> |
@@ -245,6 +194,41 @@ and is between 256 and 4096 characters. It is defined in the file | |||
245 | unusable. The "log_buf_len" parameter may be useful | 194 | unusable. The "log_buf_len" parameter may be useful |
246 | if you need to capture more output. | 195 | if you need to capture more output. |
247 | 196 | ||
197 | acpi_display_output= [HW,ACPI] | ||
198 | acpi_display_output=vendor | ||
199 | acpi_display_output=video | ||
200 | See above. | ||
201 | |||
202 | acpi_irq_balance [HW,ACPI] | ||
203 | ACPI will balance active IRQs | ||
204 | default in APIC mode | ||
205 | |||
206 | acpi_irq_nobalance [HW,ACPI] | ||
207 | ACPI will not move active IRQs (default) | ||
208 | default in PIC mode | ||
209 | |||
210 | acpi_irq_isa= [HW,ACPI] If irq_balance, mark listed IRQs used by ISA | ||
211 | Format: <irq>,<irq>... | ||
212 | |||
213 | acpi_irq_pci= [HW,ACPI] If irq_balance, clear listed IRQs for | ||
214 | use by PCI | ||
215 | Format: <irq>,<irq>... | ||
216 | |||
217 | acpi_no_auto_ssdt [HW,ACPI] Disable automatic loading of SSDT | ||
218 | |||
219 | acpi_os_name= [HW,ACPI] Tell ACPI BIOS the name of the OS | ||
220 | Format: To spoof as Windows 98: ="Microsoft Windows" | ||
221 | |||
222 | acpi_osi= [HW,ACPI] Modify list of supported OS interface strings | ||
223 | acpi_osi="string1" # add string1 -- only one string | ||
224 | acpi_osi="!string2" # remove built-in string2 | ||
225 | acpi_osi= # disable all strings | ||
226 | |||
227 | acpi_pm_good [X86] | ||
228 | Override the pmtimer bug detection: force the kernel | ||
229 | to assume that this machine's pmtimer latches its value | ||
230 | and always returns good values. | ||
231 | |||
248 | acpi.power_nocheck= [HW,ACPI] | 232 | acpi.power_nocheck= [HW,ACPI] |
249 | Format: 1/0 enable/disable the check of power state. | 233 | Format: 1/0 enable/disable the check of power state. |
250 | On some bogus BIOS the _PSC object/_STA object of | 234 | On some bogus BIOS the _PSC object/_STA object of |
@@ -253,30 +237,57 @@ and is between 256 and 4096 characters. It is defined in the file | |||
253 | power state again in power transition. | 237 | power state again in power transition. |
254 | 1 : disable the power state check | 238 | 1 : disable the power state check |
255 | 239 | ||
256 | acpi_pm_good [X86-32,X86-64] | 240 | acpi_sci= [HW,ACPI] ACPI System Control Interrupt trigger mode |
257 | Override the pmtimer bug detection: force the kernel | 241 | Format: { level | edge | high | low } |
258 | to assume that this machine's pmtimer latches its value | ||
259 | and always returns good values. | ||
260 | 242 | ||
261 | agp= [AGP] | 243 | acpi_serialize [HW,ACPI] force serialization of AML methods |
262 | { off | try_unsupported } | ||
263 | off: disable AGP support | ||
264 | try_unsupported: try to drive unsupported chipsets | ||
265 | (may crash computer or cause data corruption) | ||
266 | 244 | ||
267 | enable_timer_pin_1 [i386,x86-64] | 245 | acpi_skip_timer_override [HW,ACPI] |
268 | Enable PIN 1 of APIC timer | 246 | Recognize and ignore IRQ0/pin2 Interrupt Override. |
269 | Can be useful to work around chipset bugs | 247 | For broken nForce2 BIOS resulting in XT-PIC timer. |
270 | (in particular on some ATI chipsets). | ||
271 | The kernel tries to set a reasonable default. | ||
272 | 248 | ||
273 | disable_timer_pin_1 [i386,x86-64] | 249 | acpi_sleep= [HW,ACPI] Sleep options |
274 | Disable PIN 1 of APIC timer | 250 | Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig, |
275 | Can be useful to work around chipset bugs. | 251 | old_ordering, s4_nonvs } |
252 | See Documentation/power/video.txt for information on | ||
253 | s3_bios and s3_mode. | ||
254 | s3_beep is for debugging; it makes the PC's speaker beep | ||
255 | as soon as the kernel's real-mode entry point is called. | ||
256 | s4_nohwsig prevents ACPI hardware signature from being | ||
257 | used during resume from hibernation. | ||
258 | old_ordering causes the ACPI 1.0 ordering of the _PTS | ||
259 | control method, with respect to putting devices into | ||
260 | low power states, to be enforced (the ACPI 2.0 ordering | ||
261 | of _PTS is used by default). | ||
262 | s4_nonvs prevents the kernel from saving/restoring the | ||
263 | ACPI NVS memory during hibernation. | ||
264 | |||
265 | acpi_use_timer_override [HW,ACPI] | ||
266 | Use timer override. For some broken Nvidia NF5 boards | ||
267 | that require a timer override, but don't have HPET | ||
268 | |||
269 | acpi_enforce_resources= [ACPI] | ||
270 | { strict | lax | no } | ||
271 | Check for resource conflicts between native drivers | ||
272 | and ACPI OperationRegions (SystemIO and SystemMemory | ||
273 | only). IO ports and memory declared in ACPI might be | ||
274 | used by the ACPI subsystem in arbitrary AML code and | ||
275 | can interfere with legacy drivers. | ||
276 | strict (default): access to resources claimed by ACPI | ||
277 | is denied; legacy drivers trying to access reserved | ||
278 | resources will fail to bind to device using them. | ||
279 | lax: access to resources claimed by ACPI is allowed; | ||
280 | legacy drivers trying to access reserved resources | ||
281 | will bind successfully but a warning message is logged. | ||
282 | no: ACPI OperationRegions are not marked as reserved, | ||
283 | no further checks are performed. | ||
276 | 284 | ||
277 | ad1848= [HW,OSS] | 285 | ad1848= [HW,OSS] |
278 | Format: <io>,<irq>,<dma>,<dma2>,<type> | 286 | Format: <io>,<irq>,<dma>,<dma2>,<type> |
279 | 287 | ||
288 | add_efi_memmap [EFI; X86] Include EFI memory map in | ||
289 | kernel's map of available physical RAM. | ||
290 | |||
280 | advansys= [HW,SCSI] | 291 | advansys= [HW,SCSI] |
281 | See header of drivers/scsi/advansys.c. | 292 | See header of drivers/scsi/advansys.c. |
282 | 293 | ||
@@ -287,6 +298,12 @@ and is between 256 and 4096 characters. It is defined in the file | |||
287 | Format: <io>,<irq>,<dma>,<mss_io>,<mpu_io>,<mpu_irq> | 298 | Format: <io>,<irq>,<dma>,<mss_io>,<mpu_io>,<mpu_irq> |
288 | See also header of sound/oss/aedsp16.c. | 299 | See also header of sound/oss/aedsp16.c. |
289 | 300 | ||
301 | agp= [AGP] | ||
302 | { off | try_unsupported } | ||
303 | off: disable AGP support | ||
304 | try_unsupported: try to drive unsupported chipsets | ||
305 | (may crash computer or cause data corruption) | ||
306 | |||
290 | aha152x= [HW,SCSI] | 307 | aha152x= [HW,SCSI] |
291 | See Documentation/scsi/aha152x.txt. | 308 | See Documentation/scsi/aha152x.txt. |
292 | 309 | ||
@@ -312,11 +329,6 @@ and is between 256 and 4096 characters. It is defined in the file | |||
312 | flushed before they will be reused, which | 329 | flushed before they will be reused, which |
313 | is a lot of faster | 330 | is a lot of faster |
314 | 331 | ||
315 | amd_iommu_size= [HW,X86-64] | ||
316 | Define the size of the aperture for the AMD IOMMU | ||
317 | driver. Possible values are: | ||
318 | '32M', '64M' (default), '128M', '256M', '512M', '1G' | ||
319 | |||
320 | amijoy.map= [HW,JOY] Amiga joystick support | 332 | amijoy.map= [HW,JOY] Amiga joystick support |
321 | Map of devices attached to JOY0DAT and JOY1DAT | 333 | Map of devices attached to JOY0DAT and JOY1DAT |
322 | Format: <a>,<b> | 334 | Format: <a>,<b> |
@@ -334,7 +346,7 @@ and is between 256 and 4096 characters. It is defined in the file | |||
334 | not play well with APC CPU idle - disable it if you have | 346 | not play well with APC CPU idle - disable it if you have |
335 | APC and your system crashes randomly. | 347 | APC and your system crashes randomly. |
336 | 348 | ||
337 | apic= [APIC,i386] Advanced Programmable Interrupt Controller | 349 | apic= [APIC,X86-32] Advanced Programmable Interrupt Controller |
338 | Change the output verbosity whilst booting | 350 | Change the output verbosity whilst booting |
339 | Format: { quiet (default) | verbose | debug } | 351 | Format: { quiet (default) | verbose | debug } |
340 | Change the amount of debugging information output | 352 | Change the amount of debugging information output |
@@ -414,12 +426,6 @@ and is between 256 and 4096 characters. It is defined in the file | |||
414 | possible to determine what the correct size should be. | 426 | possible to determine what the correct size should be. |
415 | This option provides an override for these situations. | 427 | This option provides an override for these situations. |
416 | 428 | ||
417 | security= [SECURITY] Choose a security module to enable at boot. | ||
418 | If this boot parameter is not specified, only the first | ||
419 | security module asking for security registration will be | ||
420 | loaded. An invalid security module name will be treated | ||
421 | as if no module has been chosen. | ||
422 | |||
423 | capability.disable= | 429 | capability.disable= |
424 | [SECURITY] Disable capabilities. This would normally | 430 | [SECURITY] Disable capabilities. This would normally |
425 | be used only if an alternative security model is to be | 431 | be used only if an alternative security model is to be |
@@ -486,17 +492,18 @@ and is between 256 and 4096 characters. It is defined in the file | |||
486 | Also note the kernel might malfunction if you disable | 492 | Also note the kernel might malfunction if you disable |
487 | some critical bits. | 493 | some critical bits. |
488 | 494 | ||
489 | code_bytes [IA32/X86_64] How many bytes of object code to print | 495 | cmo_free_hint= [PPC] Format: { yes | no } |
496 | Specify whether pages are marked as being inactive | ||
497 | when they are freed. This is used in CMO environments | ||
498 | to determine OS memory pressure for page stealing by | ||
499 | a hypervisor. | ||
500 | Default: yes | ||
501 | |||
502 | code_bytes [X86] How many bytes of object code to print | ||
490 | in an oops report. | 503 | in an oops report. |
491 | Range: 0 - 8192 | 504 | Range: 0 - 8192 |
492 | Default: 64 | 505 | Default: 64 |
493 | 506 | ||
494 | hpet= [X86-32,HPET] option to control HPET usage | ||
495 | Format: { enable (default) | disable | force } | ||
496 | disable: disable HPET and use PIT instead | ||
497 | force: allow force enabled of undocumented chips (ICH4, | ||
498 | VIA, nVidia) | ||
499 | |||
500 | com20020= [HW,NET] ARCnet - COM20020 chipset | 507 | com20020= [HW,NET] ARCnet - COM20020 chipset |
501 | Format: | 508 | Format: |
502 | <io>[,<irq>[,<nodeID>[,<backplane>[,<ckp>[,<timeout>]]]]] | 509 | <io>[,<irq>[,<nodeID>[,<backplane>[,<ckp>[,<timeout>]]]]] |
@@ -540,22 +547,9 @@ and is between 256 and 4096 characters. It is defined in the file | |||
540 | console=brl,ttyS0 | 547 | console=brl,ttyS0 |
541 | For now, only VisioBraille is supported. | 548 | For now, only VisioBraille is supported. |
542 | 549 | ||
543 | earlycon= [KNL] Output early console device and options. | 550 | consoleblank= [KNL] The console blank (screen saver) timeout in |
544 | uart[8250],io,<addr>[,options] | 551 | seconds. Defaults to 10*60 = 10mins. A value of 0 |
545 | uart[8250],mmio,<addr>[,options] | 552 | disables the blank timer. |
546 | Start an early, polled-mode console on the 8250/16550 | ||
547 | UART at the specified I/O port or MMIO address. | ||
548 | The options are the same as for ttyS, above. | ||
549 | |||
550 | no_console_suspend | ||
551 | [HW] Never suspend the console | ||
552 | Disable suspending of consoles during suspend and | ||
553 | hibernate operations. Once disabled, debugging | ||
554 | messages can reach various consoles while the rest | ||
555 | of the system is being put to sleep (ie, while | ||
556 | debugging driver suspend/resume hooks). This may | ||
557 | not work reliably with all consoles, but is known | ||
558 | to work with serial and VGA consoles. | ||
559 | 553 | ||
560 | coredump_filter= | 554 | coredump_filter= |
561 | [KNL] Change the default value for | 555 | [KNL] Change the default value for |
@@ -604,36 +598,22 @@ and is between 256 and 4096 characters. It is defined in the file | |||
604 | 598 | ||
605 | debug_objects [KNL] Enable object debugging | 599 | debug_objects [KNL] Enable object debugging |
606 | 600 | ||
601 | no_debug_objects | ||
602 | [KNL] Disable object debugging | ||
603 | |||
607 | debugpat [X86] Enable PAT debugging | 604 | debugpat [X86] Enable PAT debugging |
608 | 605 | ||
609 | decnet.addr= [HW,NET] | 606 | decnet.addr= [HW,NET] |
610 | Format: <area>[,<node>] | 607 | Format: <area>[,<node>] |
611 | See also Documentation/networking/decnet.txt. | 608 | See also Documentation/networking/decnet.txt. |
612 | 609 | ||
613 | vt.default_blu= [VT] | 610 | default_hugepagesz= |
614 | Format: <blue0>,<blue1>,<blue2>,...,<blue15> | 611 | [same as hugepagesz=] The size of the default |
615 | Change the default blue palette of the console. | 612 | HugeTLB page size. This is the size represented by |
616 | This is a 16-member array composed of values | 613 | the legacy /proc/ hugepages APIs, used for SHM, and |
617 | ranging from 0-255. | 614 | default size when mounting hugetlbfs filesystems. |
618 | 615 | Defaults to the default architecture's huge page size | |
619 | vt.default_grn= [VT] | 616 | if not specified. |
620 | Format: <green0>,<green1>,<green2>,...,<green15> | ||
621 | Change the default green palette of the console. | ||
622 | This is a 16-member array composed of values | ||
623 | ranging from 0-255. | ||
624 | |||
625 | vt.default_red= [VT] | ||
626 | Format: <red0>,<red1>,<red2>,...,<red15> | ||
627 | Change the default red palette of the console. | ||
628 | This is a 16-member array composed of values | ||
629 | ranging from 0-255. | ||
630 | |||
631 | vt.default_utf8= | ||
632 | [VT] | ||
633 | Format=<0|1> | ||
634 | Set system-wide default UTF-8 mode for all tty's. | ||
635 | Default is 1, i.e. UTF-8 mode is enabled for all | ||
636 | newly opened terminals. | ||
637 | 617 | ||
638 | dhash_entries= [KNL] | 618 | dhash_entries= [KNL] |
639 | Set number of hash buckets for dentry cache. | 619 | Set number of hash buckets for dentry cache. |
@@ -646,27 +626,9 @@ and is between 256 and 4096 characters. It is defined in the file | |||
646 | Documentation/serial/digiepca.txt. | 626 | Documentation/serial/digiepca.txt. |
647 | 627 | ||
648 | disable_mtrr_cleanup [X86] | 628 | disable_mtrr_cleanup [X86] |
649 | enable_mtrr_cleanup [X86] | ||
650 | The kernel tries to adjust MTRR layout from continuous | 629 | The kernel tries to adjust MTRR layout from continuous |
651 | to discrete, to make X server driver able to add WB | 630 | to discrete, to make X server driver able to add WB |
652 | entry later. This parameter enables/disables that. | 631 | entry later. This parameter disables that. |
653 | |||
654 | mtrr_chunk_size=nn[KMG] [X86] | ||
655 | used for mtrr cleanup. It is largest continous chunk | ||
656 | that could hold holes aka. UC entries. | ||
657 | |||
658 | mtrr_gran_size=nn[KMG] [X86] | ||
659 | Used for mtrr cleanup. It is granularity of mtrr block. | ||
660 | Default is 1. | ||
661 | Large value could prevent small alignment from | ||
662 | using up MTRRs. | ||
663 | |||
664 | mtrr_spare_reg_nr=n [X86] | ||
665 | Format: <integer> | ||
666 | Range: 0,7 : spare reg number | ||
667 | Default : 1 | ||
668 | Used for mtrr cleanup. It is spare mtrr entries number. | ||
669 | Set to 2 or more if your graphical card needs more. | ||
670 | 632 | ||
671 | disable_mtrr_trim [X86, Intel and AMD only] | 633 | disable_mtrr_trim [X86, Intel and AMD only] |
672 | By default the kernel will trim any uncacheable | 634 | By default the kernel will trim any uncacheable |
@@ -674,13 +636,46 @@ and is between 256 and 4096 characters. It is defined in the file | |||
674 | MTRR settings. This parameter disables that behavior, | 636 | MTRR settings. This parameter disables that behavior, |
675 | possibly causing your machine to run very slowly. | 637 | possibly causing your machine to run very slowly. |
676 | 638 | ||
639 | disable_timer_pin_1 [X86] | ||
640 | Disable PIN 1 of APIC timer | ||
641 | Can be useful to work around chipset bugs. | ||
642 | |||
677 | dmasound= [HW,OSS] Sound subsystem buffers | 643 | dmasound= [HW,OSS] Sound subsystem buffers |
678 | 644 | ||
645 | dma_debug=off If the kernel is compiled with DMA_API_DEBUG support, | ||
646 | this option disables the debugging code at boot. | ||
647 | |||
648 | dma_debug_entries=<number> | ||
649 | This option allows to tune the number of preallocated | ||
650 | entries for DMA-API debugging code. One entry is | ||
651 | required per DMA-API allocation. Use this if the | ||
652 | DMA-API debugging code disables itself because the | ||
653 | architectural default is too low. | ||
654 | |||
655 | dma_debug_driver=<driver_name> | ||
656 | With this option the DMA-API debugging driver | ||
657 | filter feature can be enabled at boot time. Just | ||
658 | pass the driver to filter for as the parameter. | ||
659 | The filter can be disabled or changed to another | ||
660 | driver later using sysfs. | ||
661 | |||
679 | dscc4.setup= [NET] | 662 | dscc4.setup= [NET] |
680 | 663 | ||
681 | dtc3181e= [HW,SCSI] | 664 | dtc3181e= [HW,SCSI] |
682 | 665 | ||
683 | earlyprintk= [X86-32,X86-64,SH,BLACKFIN] | 666 | dynamic_printk Enables pr_debug()/dev_dbg() calls if |
667 | CONFIG_DYNAMIC_PRINTK_DEBUG has been enabled. | ||
668 | These can also be switched on/off via | ||
669 | <debugfs>/dynamic_printk/modules | ||
670 | |||
671 | earlycon= [KNL] Output early console device and options. | ||
672 | uart[8250],io,<addr>[,options] | ||
673 | uart[8250],mmio,<addr>[,options] | ||
674 | Start an early, polled-mode console on the 8250/16550 | ||
675 | UART at the specified I/O port or MMIO address. | ||
676 | The options are the same as for ttyS, above. | ||
677 | |||
678 | earlyprintk= [X86,SH,BLACKFIN] | ||
684 | earlyprintk=vga | 679 | earlyprintk=vga |
685 | earlyprintk=serial[,ttySn[,baudrate]] | 680 | earlyprintk=serial[,ttySn[,baudrate]] |
686 | earlyprintk=dbgp | 681 | earlyprintk=dbgp |
@@ -715,12 +710,23 @@ and is between 256 and 4096 characters. It is defined in the file | |||
715 | See Documentation/block/as-iosched.txt and | 710 | See Documentation/block/as-iosched.txt and |
716 | Documentation/block/deadline-iosched.txt for details. | 711 | Documentation/block/deadline-iosched.txt for details. |
717 | 712 | ||
718 | elfcorehdr= [IA64,PPC,SH,X86-32,X86_64] | 713 | elfcorehdr= [IA64,PPC,SH,X86] |
719 | Specifies physical address of start of kernel core | 714 | Specifies physical address of start of kernel core |
720 | image elf header. Generally kexec loader will | 715 | image elf header. Generally kexec loader will |
721 | pass this option to capture kernel. | 716 | pass this option to capture kernel. |
722 | See Documentation/kdump/kdump.txt for details. | 717 | See Documentation/kdump/kdump.txt for details. |
723 | 718 | ||
719 | enable_mtrr_cleanup [X86] | ||
720 | The kernel tries to adjust MTRR layout from continuous | ||
721 | to discrete, to make X server driver able to add WB | ||
722 | entry later. This parameter enables that. | ||
723 | |||
724 | enable_timer_pin_1 [X86] | ||
725 | Enable PIN 1 of APIC timer | ||
726 | Can be useful to work around chipset bugs | ||
727 | (in particular on some ATI chipsets). | ||
728 | The kernel tries to set a reasonable default. | ||
729 | |||
724 | enforcing [SELINUX] Set initial enforcing status. | 730 | enforcing [SELINUX] Set initial enforcing status. |
725 | Format: {"0" | "1"} | 731 | Format: {"0" | "1"} |
726 | See security/selinux/Kconfig help text. | 732 | See security/selinux/Kconfig help text. |
@@ -759,12 +765,25 @@ and is between 256 and 4096 characters. It is defined in the file | |||
759 | ia64_pal_cache_flush instead of SAL_CACHE_FLUSH. | 765 | ia64_pal_cache_flush instead of SAL_CACHE_FLUSH. |
760 | 766 | ||
761 | ftrace=[tracer] | 767 | ftrace=[tracer] |
762 | [ftrace] will set and start the specified tracer | 768 | [FTRACE] will set and start the specified tracer |
763 | as early as possible in order to facilitate early | 769 | as early as possible in order to facilitate early |
764 | boot debugging. | 770 | boot debugging. |
765 | 771 | ||
766 | ftrace_dump_on_oops | 772 | ftrace_dump_on_oops |
767 | [ftrace] will dump the trace buffers on oops. | 773 | [FTRACE] will dump the trace buffers on oops. |
774 | |||
775 | ftrace_filter=[function-list] | ||
776 | [FTRACE] Limit the functions traced by the function | ||
777 | tracer at boot up. function-list is a comma separated | ||
778 | list of functions. This list can be changed at run | ||
779 | time by the set_ftrace_filter file in the debugfs | ||
780 | tracing directory. | ||
781 | |||
782 | ftrace_notrace=[function-list] | ||
783 | [FTRACE] Do not trace the functions specified in | ||
784 | function-list. This list can be changed at run time | ||
785 | by the set_ftrace_notrace file in the debugfs | ||
786 | tracing directory. | ||
768 | 787 | ||
769 | gamecon.map[2|3]= | 788 | gamecon.map[2|3]= |
770 | [HW,JOY] Multisystem joystick and NES/SNES/PSX pad | 789 | [HW,JOY] Multisystem joystick and NES/SNES/PSX pad |
@@ -778,6 +797,12 @@ and is between 256 and 4096 characters. It is defined in the file | |||
778 | Format: off | on | 797 | Format: off | on |
779 | default: on | 798 | default: on |
780 | 799 | ||
800 | gcov_persist= [GCOV] When non-zero (default), profiling data for | ||
801 | kernel modules is saved and remains accessible via | ||
802 | debugfs, even when the module is unloaded/reloaded. | ||
803 | When zero, profiling data is discarded and associated | ||
804 | debugfs files are removed at module unload time. | ||
805 | |||
781 | gdth= [HW,SCSI] | 806 | gdth= [HW,SCSI] |
782 | See header of drivers/scsi/gdth.c. | 807 | See header of drivers/scsi/gdth.c. |
783 | 808 | ||
@@ -788,7 +813,7 @@ and is between 256 and 4096 characters. It is defined in the file | |||
788 | 813 | ||
789 | hashdist= [KNL,NUMA] Large hashes allocated during boot | 814 | hashdist= [KNL,NUMA] Large hashes allocated during boot |
790 | are distributed across NUMA nodes. Defaults on | 815 | are distributed across NUMA nodes. Defaults on |
791 | for IA-64, off otherwise. | 816 | for 64bit NUMA, off otherwise. |
792 | Format: 0 | 1 (for off | on) | 817 | Format: 0 | 1 (for off | on) |
793 | 818 | ||
794 | hcl= [IA-64] SGI's Hardware Graph compatibility layer | 819 | hcl= [IA-64] SGI's Hardware Graph compatibility layer |
@@ -808,6 +833,16 @@ and is between 256 and 4096 characters. It is defined in the file | |||
808 | hisax= [HW,ISDN] | 833 | hisax= [HW,ISDN] |
809 | See Documentation/isdn/README.HiSax. | 834 | See Documentation/isdn/README.HiSax. |
810 | 835 | ||
836 | hlt [BUGS=ARM,SH] | ||
837 | |||
838 | hpet= [X86-32,HPET] option to control HPET usage | ||
839 | Format: { enable (default) | disable | force | | ||
840 | verbose } | ||
841 | disable: disable HPET and use PIT instead | ||
842 | force: allow force enabled of undocumented chips (ICH4, | ||
843 | VIA, nVidia) | ||
844 | verbose: show contents of HPET registers during setup | ||
845 | |||
811 | hugepages= [HW,X86-32,IA-64] HugeTLB pages to allocate at boot. | 846 | hugepages= [HW,X86-32,IA-64] HugeTLB pages to allocate at boot. |
812 | hugepagesz= [HW,IA-64,PPC,X86-64] The size of the HugeTLB pages. | 847 | hugepagesz= [HW,IA-64,PPC,X86-64] The size of the HugeTLB pages. |
813 | On x86-64 and powerpc, this option can be specified | 848 | On x86-64 and powerpc, this option can be specified |
@@ -817,18 +852,18 @@ and is between 256 and 4096 characters. It is defined in the file | |||
817 | (when the CPU supports the "pdpe1gb" cpuinfo flag) | 852 | (when the CPU supports the "pdpe1gb" cpuinfo flag) |
818 | Note that 1GB pages can only be allocated at boot time | 853 | Note that 1GB pages can only be allocated at boot time |
819 | using hugepages= and not freed afterwards. | 854 | using hugepages= and not freed afterwards. |
820 | default_hugepagesz= | ||
821 | [same as hugepagesz=] The size of the default | ||
822 | HugeTLB page size. This is the size represented by | ||
823 | the legacy /proc/ hugepages APIs, used for SHM, and | ||
824 | default size when mounting hugetlbfs filesystems. | ||
825 | Defaults to the default architecture's huge page size | ||
826 | if not specified. | ||
827 | |||
828 | hlt [BUGS=ARM,SH] | ||
829 | 855 | ||
830 | hvc_iucv= [S390] Number of z/VM IUCV hypervisor console (HVC) | 856 | hvc_iucv= [S390] Number of z/VM IUCV hypervisor console (HVC) |
831 | terminal devices. Valid values: 0..8 | 857 | terminal devices. Valid values: 0..8 |
858 | hvc_iucv_allow= [S390] Comma-separated list of z/VM user IDs. | ||
859 | If specified, z/VM IUCV HVC accepts connections | ||
860 | from listed z/VM user IDs only. | ||
861 | |||
862 | i2c_bus= [HW] Override the default board specific I2C bus speed | ||
863 | or register an additional I2C bus that is not | ||
864 | registered from board initialization code. | ||
865 | Format: | ||
866 | <bus_id>,<clkrate> | ||
832 | 867 | ||
833 | i8042.debug [HW] Toggle i8042 debug mode | 868 | i8042.debug [HW] Toggle i8042 debug mode |
834 | i8042.direct [HW] Put keyboard port into non-translated mode | 869 | i8042.direct [HW] Put keyboard port into non-translated mode |
@@ -870,12 +905,12 @@ and is between 256 and 4096 characters. It is defined in the file | |||
870 | 905 | ||
871 | ide-core.nodma= [HW] (E)IDE subsystem | 906 | ide-core.nodma= [HW] (E)IDE subsystem |
872 | Format: =0.0 to prevent dma on hda, =0.1 hdb =1.0 hdc | 907 | Format: =0.0 to prevent dma on hda, =0.1 hdb =1.0 hdc |
873 | .vlb_clock .pci_clock .noflush .noprobe .nowerr .cdrom | 908 | .vlb_clock .pci_clock .noflush .nohpa .noprobe .nowerr |
874 | .chs .ignore_cable are additional options | 909 | .cdrom .chs .ignore_cable are additional options |
875 | See Documentation/ide/ide.txt. | 910 | See Documentation/ide/ide.txt. |
876 | 911 | ||
877 | idebus= [HW] (E)IDE subsystem - VLB/PCI bus speed | 912 | ide-pci-generic.all-generic-ide [HW] (E)IDE subsystem |
878 | See Documentation/ide/ide.txt. | 913 | Claim all unknown PCI IDE storage controllers. |
879 | 914 | ||
880 | idle= [X86] | 915 | idle= [X86] |
881 | Format: idle=poll, idle=mwait, idle=halt, idle=nomwait | 916 | Format: idle=poll, idle=mwait, idle=halt, idle=nomwait |
@@ -892,9 +927,6 @@ and is between 256 and 4096 characters. It is defined in the file | |||
892 | In such case C2/C3 won't be used again. | 927 | In such case C2/C3 won't be used again. |
893 | idle=nomwait: Disable mwait for CPU C-states | 928 | idle=nomwait: Disable mwait for CPU C-states |
894 | 929 | ||
895 | ide-pci-generic.all-generic-ide [HW] (E)IDE subsystem | ||
896 | Claim all unknown PCI IDE storage controllers. | ||
897 | |||
898 | ignore_loglevel [KNL] | 930 | ignore_loglevel [KNL] |
899 | Ignore loglevel setting - this will print /all/ | 931 | Ignore loglevel setting - this will print /all/ |
900 | kernel messages to the console. Useful for debugging. | 932 | kernel messages to the console. Useful for debugging. |
@@ -902,6 +934,21 @@ and is between 256 and 4096 characters. It is defined in the file | |||
902 | ihash_entries= [KNL] | 934 | ihash_entries= [KNL] |
903 | Set number of hash buckets for inode cache. | 935 | Set number of hash buckets for inode cache. |
904 | 936 | ||
937 | ima_audit= [IMA] | ||
938 | Format: { "0" | "1" } | ||
939 | 0 -- integrity auditing messages. (Default) | ||
940 | 1 -- enable informational integrity auditing messages. | ||
941 | |||
942 | ima_hash= [IMA] | ||
943 | Formt: { "sha1" | "md5" } | ||
944 | default: "sha1" | ||
945 | |||
946 | ima_tcb [IMA] | ||
947 | Load a policy which meets the needs of the Trusted | ||
948 | Computing Base. This means IMA will measure all | ||
949 | programs exec'd, files mmap'd for exec, and all files | ||
950 | opened for read by uid=0. | ||
951 | |||
905 | in2000= [HW,SCSI] | 952 | in2000= [HW,SCSI] |
906 | See header of drivers/scsi/in2000.c. | 953 | See header of drivers/scsi/in2000.c. |
907 | 954 | ||
@@ -919,25 +966,6 @@ and is between 256 and 4096 characters. It is defined in the file | |||
919 | inport.irq= [HW] Inport (ATI XL and Microsoft) busmouse driver | 966 | inport.irq= [HW] Inport (ATI XL and Microsoft) busmouse driver |
920 | Format: <irq> | 967 | Format: <irq> |
921 | 968 | ||
922 | inttest= [IA64] | ||
923 | |||
924 | iomem= Disable strict checking of access to MMIO memory | ||
925 | strict regions from userspace. | ||
926 | relaxed | ||
927 | |||
928 | iommu= [x86] | ||
929 | off | ||
930 | force | ||
931 | noforce | ||
932 | biomerge | ||
933 | panic | ||
934 | nopanic | ||
935 | merge | ||
936 | nomerge | ||
937 | forcesac | ||
938 | soft | ||
939 | |||
940 | |||
941 | intel_iommu= [DMAR] Intel IOMMU driver (DMAR) option | 969 | intel_iommu= [DMAR] Intel IOMMU driver (DMAR) option |
942 | on | 970 | on |
943 | Enable intel iommu driver. | 971 | Enable intel iommu driver. |
@@ -961,7 +989,29 @@ and is between 256 and 4096 characters. It is defined in the file | |||
961 | result in a hardware IOTLB flush operation as opposed | 989 | result in a hardware IOTLB flush operation as opposed |
962 | to batching them for performance. | 990 | to batching them for performance. |
963 | 991 | ||
964 | io_delay= [X86-32,X86-64] I/O delay method | 992 | inttest= [IA64] |
993 | |||
994 | iomem= Disable strict checking of access to MMIO memory | ||
995 | strict regions from userspace. | ||
996 | relaxed | ||
997 | |||
998 | iommu= [x86] | ||
999 | off | ||
1000 | force | ||
1001 | noforce | ||
1002 | biomerge | ||
1003 | panic | ||
1004 | nopanic | ||
1005 | merge | ||
1006 | nomerge | ||
1007 | forcesac | ||
1008 | soft | ||
1009 | |||
1010 | io7= [HW] IO7 for Marvel based alpha systems | ||
1011 | See comment before marvel_specify_io7 in | ||
1012 | arch/alpha/kernel/core_marvel.c. | ||
1013 | |||
1014 | io_delay= [X86] I/O delay method | ||
965 | 0x80 | 1015 | 0x80 |
966 | Standard port 0x80 based delay | 1016 | Standard port 0x80 based delay |
967 | 0xed | 1017 | 0xed |
@@ -971,10 +1021,6 @@ and is between 256 and 4096 characters. It is defined in the file | |||
971 | none | 1021 | none |
972 | No delay | 1022 | No delay |
973 | 1023 | ||
974 | io7= [HW] IO7 for Marvel based alpha systems | ||
975 | See comment before marvel_specify_io7 in | ||
976 | arch/alpha/kernel/core_marvel.c. | ||
977 | |||
978 | ip= [IP_PNP] | 1024 | ip= [IP_PNP] |
979 | See Documentation/filesystems/nfsroot.txt. | 1025 | See Documentation/filesystems/nfsroot.txt. |
980 | 1026 | ||
@@ -985,12 +1031,6 @@ and is between 256 and 4096 characters. It is defined in the file | |||
985 | ips= [HW,SCSI] Adaptec / IBM ServeRAID controller | 1031 | ips= [HW,SCSI] Adaptec / IBM ServeRAID controller |
986 | See header of drivers/scsi/ips.c. | 1032 | See header of drivers/scsi/ips.c. |
987 | 1033 | ||
988 | ports= [IP_VS_FTP] IPVS ftp helper module | ||
989 | Default is 21. | ||
990 | Up to 8 (IP_VS_APP_MAX_PORTS) ports | ||
991 | may be specified. | ||
992 | Format: <port>,<port>.... | ||
993 | |||
994 | irqfixup [HW] | 1034 | irqfixup [HW] |
995 | When an interrupt is not handled search all handlers | 1035 | When an interrupt is not handled search all handlers |
996 | for it. Intended to get systems with badly broken | 1036 | for it. Intended to get systems with badly broken |
@@ -1031,7 +1071,9 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1031 | js= [HW,JOY] Analog joystick | 1071 | js= [HW,JOY] Analog joystick |
1032 | See Documentation/input/joystick.txt. | 1072 | See Documentation/input/joystick.txt. |
1033 | 1073 | ||
1034 | kernelcore=nn[KMG] [KNL,X86-32,IA-64,PPC,X86-64] This parameter | 1074 | keepinitrd [HW,ARM] |
1075 | |||
1076 | kernelcore=nn[KMG] [KNL,X86,IA-64,PPC] This parameter | ||
1035 | specifies the amount of memory usable by the kernel | 1077 | specifies the amount of memory usable by the kernel |
1036 | for non-movable allocations. The requested amount is | 1078 | for non-movable allocations. The requested amount is |
1037 | spread evenly throughout all nodes in the system. The | 1079 | spread evenly throughout all nodes in the system. The |
@@ -1047,30 +1089,22 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1047 | use the HighMem zone if it exists, and the Normal | 1089 | use the HighMem zone if it exists, and the Normal |
1048 | zone if it does not. | 1090 | zone if it does not. |
1049 | 1091 | ||
1050 | movablecore=nn[KMG] [KNL,X86-32,IA-64,PPC,X86-64] This parameter | ||
1051 | is similar to kernelcore except it specifies the | ||
1052 | amount of memory used for migratable allocations. | ||
1053 | If both kernelcore and movablecore is specified, | ||
1054 | then kernelcore will be at *least* the specified | ||
1055 | value but may be more. If movablecore on its own | ||
1056 | is specified, the administrator must be careful | ||
1057 | that the amount of memory usable for all allocations | ||
1058 | is not too small. | ||
1059 | |||
1060 | keepinitrd [HW,ARM] | ||
1061 | |||
1062 | kstack=N [X86-32,X86-64] Print N words from the kernel stack | ||
1063 | in oops dumps. | ||
1064 | |||
1065 | kgdboc= [HW] kgdb over consoles. | 1092 | kgdboc= [HW] kgdb over consoles. |
1066 | Requires a tty driver that supports console polling. | 1093 | Requires a tty driver that supports console polling. |
1067 | (only serial suported for now) | 1094 | (only serial supported for now) |
1068 | Format: <serial_device>[,baud] | 1095 | Format: <serial_device>[,baud] |
1069 | 1096 | ||
1070 | kmac= [MIPS] korina ethernet MAC address. | 1097 | kmac= [MIPS] korina ethernet MAC address. |
1071 | Configure the RouterBoard 532 series on-chip | 1098 | Configure the RouterBoard 532 series on-chip |
1072 | Ethernet adapter MAC address. | 1099 | Ethernet adapter MAC address. |
1073 | 1100 | ||
1101 | kmemleak= [KNL] Boot-time kmemleak enable/disable | ||
1102 | Valid arguments: on, off | ||
1103 | Default: on | ||
1104 | |||
1105 | kstack=N [X86] Print N words from the kernel stack | ||
1106 | in oops dumps. | ||
1107 | |||
1074 | l2cr= [PPC] | 1108 | l2cr= [PPC] |
1075 | 1109 | ||
1076 | l3cr= [PPC] | 1110 | l3cr= [PPC] |
@@ -1078,7 +1112,7 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1078 | lapic [X86-32,APIC] Enable the local APIC even if BIOS | 1112 | lapic [X86-32,APIC] Enable the local APIC even if BIOS |
1079 | disabled it. | 1113 | disabled it. |
1080 | 1114 | ||
1081 | lapic_timer_c2_ok [X86-32,x86-64,APIC] trust the local apic timer | 1115 | lapic_timer_c2_ok [X86,APIC] trust the local apic timer |
1082 | in C2 power state. | 1116 | in C2 power state. |
1083 | 1117 | ||
1084 | libata.dma= [LIBATA] DMA control | 1118 | libata.dma= [LIBATA] DMA control |
@@ -1216,9 +1250,8 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1216 | (machvec) in a generic kernel. | 1250 | (machvec) in a generic kernel. |
1217 | Example: machvec=hpzx1_swiotlb | 1251 | Example: machvec=hpzx1_swiotlb |
1218 | 1252 | ||
1219 | max_loop= [LOOP] Maximum number of loopback devices that can | 1253 | max_addr=nn[KMG] [KNL,BOOT,ia64] All physical memory greater |
1220 | be mounted | 1254 | than or equal to this physical address is ignored. |
1221 | Format: <1-256> | ||
1222 | 1255 | ||
1223 | maxcpus= [SMP] Maximum number of processors that an SMP kernel | 1256 | maxcpus= [SMP] Maximum number of processors that an SMP kernel |
1224 | should make use of. maxcpus=n : n >= 0 limits the | 1257 | should make use of. maxcpus=n : n >= 0 limits the |
@@ -1226,8 +1259,9 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1226 | it is equivalent to "nosmp", which also disables | 1259 | it is equivalent to "nosmp", which also disables |
1227 | the IO APIC. | 1260 | the IO APIC. |
1228 | 1261 | ||
1229 | max_addr=nn[KMG] [KNL,BOOT,ia64] All physical memory greater than | 1262 | max_loop= [LOOP] Maximum number of loopback devices that can |
1230 | or equal to this physical address is ignored. | 1263 | be mounted |
1264 | Format: <1-256> | ||
1231 | 1265 | ||
1232 | max_luns= [SCSI] Maximum number of LUNs to probe. | 1266 | max_luns= [SCSI] Maximum number of LUNs to probe. |
1233 | Should be between 1 and 2^32-1. | 1267 | Should be between 1 and 2^32-1. |
@@ -1263,7 +1297,7 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1263 | [KNL,SH] Allow user to override the default size for | 1297 | [KNL,SH] Allow user to override the default size for |
1264 | per-device physically contiguous DMA buffers. | 1298 | per-device physically contiguous DMA buffers. |
1265 | 1299 | ||
1266 | memmap=exactmap [KNL,X86-32,X86_64] Enable setting of an exact | 1300 | memmap=exactmap [KNL,X86] Enable setting of an exact |
1267 | E820 memory map, as specified by the user. | 1301 | E820 memory map, as specified by the user. |
1268 | Such memmap=exactmap lines can be constructed based on | 1302 | Such memmap=exactmap lines can be constructed based on |
1269 | BIOS output or other requirements. See the memmap=nn@ss | 1303 | BIOS output or other requirements. See the memmap=nn@ss |
@@ -1310,8 +1344,13 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1310 | 1344 | ||
1311 | memtest= [KNL,X86] Enable memtest | 1345 | memtest= [KNL,X86] Enable memtest |
1312 | Format: <integer> | 1346 | Format: <integer> |
1313 | range: 0,4 : pattern number | ||
1314 | default : 0 <disable> | 1347 | default : 0 <disable> |
1348 | Specifies the number of memtest passes to be | ||
1349 | performed. Each pass selects another test | ||
1350 | pattern from a given set of patterns. Memtest | ||
1351 | fills the memory with this pattern, validates | ||
1352 | memory contents and reserves bad memory | ||
1353 | regions that are detected. | ||
1315 | 1354 | ||
1316 | meye.*= [HW] Set MotionEye Camera parameters | 1355 | meye.*= [HW] Set MotionEye Camera parameters |
1317 | See Documentation/video4linux/meye.txt. | 1356 | See Documentation/video4linux/meye.txt. |
@@ -1330,6 +1369,27 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1330 | min_addr=nn[KMG] [KNL,BOOT,ia64] All physical memory below this | 1369 | min_addr=nn[KMG] [KNL,BOOT,ia64] All physical memory below this |
1331 | physical address is ignored. | 1370 | physical address is ignored. |
1332 | 1371 | ||
1372 | mini2440= [ARM,HW,KNL] | ||
1373 | Format:[0..2][b][c][t] | ||
1374 | Default: "0tb" | ||
1375 | MINI2440 configuration specification: | ||
1376 | 0 - The attached screen is the 3.5" TFT | ||
1377 | 1 - The attached screen is the 7" TFT | ||
1378 | 2 - The VGA Shield is attached (1024x768) | ||
1379 | Leaving out the screen size parameter will not load | ||
1380 | the TFT driver, and the framebuffer will be left | ||
1381 | unconfigured. | ||
1382 | b - Enable backlight. The TFT backlight pin will be | ||
1383 | linked to the kernel VESA blanking code and a GPIO | ||
1384 | LED. This parameter is not necessary when using the | ||
1385 | VGA shield. | ||
1386 | c - Enable the s3c camera interface. | ||
1387 | t - Reserved for enabling touchscreen support. The | ||
1388 | touchscreen support is not enabled in the mainstream | ||
1389 | kernel as of 2.6.30, a preliminary port can be found | ||
1390 | in the "bleeding edge" mini2440 support kernel at | ||
1391 | http://repo.or.cz/w/linux-2.6/mini2440.git | ||
1392 | |||
1333 | mminit_loglevel= | 1393 | mminit_loglevel= |
1334 | [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this | 1394 | [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this |
1335 | parameter allows control of the logging verbosity for | 1395 | parameter allows control of the logging verbosity for |
@@ -1349,6 +1409,16 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1349 | mousedev.yres= [MOUSE] Vertical screen resolution, used for devices | 1409 | mousedev.yres= [MOUSE] Vertical screen resolution, used for devices |
1350 | reporting absolute coordinates, such as tablets | 1410 | reporting absolute coordinates, such as tablets |
1351 | 1411 | ||
1412 | movablecore=nn[KMG] [KNL,X86,IA-64,PPC] This parameter | ||
1413 | is similar to kernelcore except it specifies the | ||
1414 | amount of memory used for migratable allocations. | ||
1415 | If both kernelcore and movablecore is specified, | ||
1416 | then kernelcore will be at *least* the specified | ||
1417 | value but may be more. If movablecore on its own | ||
1418 | is specified, the administrator must be careful | ||
1419 | that the amount of memory usable for all allocations | ||
1420 | is not too small. | ||
1421 | |||
1352 | mpu401= [HW,OSS] | 1422 | mpu401= [HW,OSS] |
1353 | Format: <io>,<irq> | 1423 | Format: <io>,<irq> |
1354 | 1424 | ||
@@ -1361,6 +1431,16 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1361 | mtdparts= [MTD] | 1431 | mtdparts= [MTD] |
1362 | See drivers/mtd/cmdlinepart.c. | 1432 | See drivers/mtd/cmdlinepart.c. |
1363 | 1433 | ||
1434 | onenand.bdry= [HW,MTD] Flex-OneNAND Boundary Configuration | ||
1435 | |||
1436 | Format: [die0_boundary][,die0_lock][,die1_boundary][,die1_lock] | ||
1437 | |||
1438 | boundary - index of last SLC block on Flex-OneNAND. | ||
1439 | The remaining blocks are configured as MLC blocks. | ||
1440 | lock - Configure if Flex-OneNAND boundary should be locked. | ||
1441 | Once locked, the boundary cannot be changed. | ||
1442 | 1 indicates lock status, 0 indicates unlock status. | ||
1443 | |||
1364 | mtdset= [ARM] | 1444 | mtdset= [ARM] |
1365 | ARM/S3C2412 JIVE boot control | 1445 | ARM/S3C2412 JIVE boot control |
1366 | 1446 | ||
@@ -1370,6 +1450,23 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1370 | [HW] Make the MicroTouch USB driver use raw coordinates | 1450 | [HW] Make the MicroTouch USB driver use raw coordinates |
1371 | ('y', default) or cooked coordinates ('n') | 1451 | ('y', default) or cooked coordinates ('n') |
1372 | 1452 | ||
1453 | mtrr_chunk_size=nn[KMG] [X86] | ||
1454 | used for mtrr cleanup. It is largest continuous chunk | ||
1455 | that could hold holes aka. UC entries. | ||
1456 | |||
1457 | mtrr_gran_size=nn[KMG] [X86] | ||
1458 | Used for mtrr cleanup. It is granularity of mtrr block. | ||
1459 | Default is 1. | ||
1460 | Large value could prevent small alignment from | ||
1461 | using up MTRRs. | ||
1462 | |||
1463 | mtrr_spare_reg_nr=n [X86] | ||
1464 | Format: <integer> | ||
1465 | Range: 0,7 : spare reg number | ||
1466 | Default : 1 | ||
1467 | Used for mtrr cleanup. It is spare mtrr entries number. | ||
1468 | Set to 2 or more if your graphical card needs more. | ||
1469 | |||
1373 | n2= [NET] SDL Inc. RISCom/N2 synchronous serial card | 1470 | n2= [NET] SDL Inc. RISCom/N2 synchronous serial card |
1374 | 1471 | ||
1375 | NCR_D700= [HW,SCSI] | 1472 | NCR_D700= [HW,SCSI] |
@@ -1424,17 +1521,19 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1424 | when a NMI is triggered. | 1521 | when a NMI is triggered. |
1425 | Format: [state][,regs][,debounce][,die] | 1522 | Format: [state][,regs][,debounce][,die] |
1426 | 1523 | ||
1427 | nmi_watchdog= [KNL,BUGS=X86-32,X86-64] Debugging features for SMP kernels | 1524 | nmi_watchdog= [KNL,BUGS=X86] Debugging features for SMP kernels |
1428 | Format: [panic,][num] | 1525 | Format: [panic,][num] |
1429 | Valid num: 0,1,2 | 1526 | Valid num: 0,1,2 |
1430 | 0 - turn nmi_watchdog off | 1527 | 0 - turn nmi_watchdog off |
1431 | 1 - use the IO-APIC timer for the NMI watchdog | 1528 | 1 - use the IO-APIC timer for the NMI watchdog |
1432 | 2 - use the local APIC for the NMI watchdog using | 1529 | 2 - use the local APIC for the NMI watchdog using |
1433 | a performance counter. Note: This will use one performance | 1530 | a performance counter. Note: This will use one |
1434 | counter and the local APIC's performance vector. | 1531 | performance counter and the local APIC's performance |
1435 | When panic is specified panic when an NMI watchdog timeout occurs. | 1532 | vector. |
1436 | This is useful when you use a panic=... timeout and need the box | 1533 | When panic is specified, panic when an NMI watchdog |
1437 | quickly up again. | 1534 | timeout occurs. |
1535 | This is useful when you use a panic=... timeout and | ||
1536 | need the box quickly up again. | ||
1438 | Instead of 1 and 2 it is possible to use the following | 1537 | Instead of 1 and 2 it is possible to use the following |
1439 | symbolic names: lapic and ioapic | 1538 | symbolic names: lapic and ioapic |
1440 | Example: nmi_watchdog=2 or nmi_watchdog=panic,lapic | 1539 | Example: nmi_watchdog=2 or nmi_watchdog=panic,lapic |
@@ -1443,6 +1542,16 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1443 | emulation library even if a 387 maths coprocessor | 1542 | emulation library even if a 387 maths coprocessor |
1444 | is present. | 1543 | is present. |
1445 | 1544 | ||
1545 | no_console_suspend | ||
1546 | [HW] Never suspend the console | ||
1547 | Disable suspending of consoles during suspend and | ||
1548 | hibernate operations. Once disabled, debugging | ||
1549 | messages can reach various consoles while the rest | ||
1550 | of the system is being put to sleep (ie, while | ||
1551 | debugging driver suspend/resume hooks). This may | ||
1552 | not work reliably with all consoles, but is known | ||
1553 | to work with serial and VGA consoles. | ||
1554 | |||
1446 | noaliencache [MM, NUMA, SLAB] Disables the allocation of alien | 1555 | noaliencache [MM, NUMA, SLAB] Disables the allocation of alien |
1447 | caches in the slab allocator. Saves per-node memory, | 1556 | caches in the slab allocator. Saves per-node memory, |
1448 | but will impact performance. | 1557 | but will impact performance. |
@@ -1457,17 +1566,19 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1457 | 1566 | ||
1458 | nocache [ARM] | 1567 | nocache [ARM] |
1459 | 1568 | ||
1569 | noclflush [BUGS=X86] Don't use the CLFLUSH instruction | ||
1570 | |||
1460 | nodelayacct [KNL] Disable per-task delay accounting | 1571 | nodelayacct [KNL] Disable per-task delay accounting |
1461 | 1572 | ||
1462 | nodisconnect [HW,SCSI,M68K] Disables SCSI disconnects. | 1573 | nodisconnect [HW,SCSI,M68K] Disables SCSI disconnects. |
1463 | 1574 | ||
1464 | nodsp [SH] Disable hardware DSP at boot time. | 1575 | nodsp [SH] Disable hardware DSP at boot time. |
1465 | 1576 | ||
1466 | noefi [X86-32,X86-64] Disable EFI runtime services support. | 1577 | noefi [X86] Disable EFI runtime services support. |
1467 | 1578 | ||
1468 | noexec [IA-64] | 1579 | noexec [IA-64] |
1469 | 1580 | ||
1470 | noexec [X86-32,X86-64] | 1581 | noexec [X86] |
1471 | On X86-32 available only on PAE configured kernels. | 1582 | On X86-32 available only on PAE configured kernels. |
1472 | noexec=on: enable non-executable mappings (default) | 1583 | noexec=on: enable non-executable mappings (default) |
1473 | noexec=off: disable non-executable mappings | 1584 | noexec=off: disable non-executable mappings |
@@ -1485,9 +1596,13 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1485 | register save and restore. The kernel will only save | 1596 | register save and restore. The kernel will only save |
1486 | legacy floating-point registers on task switch. | 1597 | legacy floating-point registers on task switch. |
1487 | 1598 | ||
1488 | noclflush [BUGS=X86] Don't use the CLFLUSH instruction | 1599 | noxsave [BUGS=X86] Disables x86 extended register state save |
1600 | and restore using xsave. The kernel will fallback to | ||
1601 | enabling legacy floating-point and sse state. | ||
1489 | 1602 | ||
1490 | nohlt [BUGS=ARM,SH] | 1603 | nohlt [BUGS=ARM,SH] Tells the kernel that the sleep(SH) or |
1604 | wfi(ARM) instruction doesn't work correctly and not to | ||
1605 | use it. This is also useful when using JTAG debugger. | ||
1491 | 1606 | ||
1492 | no-hlt [BUGS=X86-32] Tells the kernel that the hlt | 1607 | no-hlt [BUGS=X86-32] Tells the kernel that the hlt |
1493 | instruction doesn't work correctly and not to | 1608 | instruction doesn't work correctly and not to |
@@ -1508,10 +1623,12 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1508 | Valid arguments: on, off | 1623 | Valid arguments: on, off |
1509 | Default: on | 1624 | Default: on |
1510 | 1625 | ||
1626 | noiotrap [SH] Disables trapped I/O port accesses. | ||
1627 | |||
1511 | noirqdebug [X86-32] Disables the code which attempts to detect and | 1628 | noirqdebug [X86-32] Disables the code which attempts to detect and |
1512 | disable unhandled interrupt sources. | 1629 | disable unhandled interrupt sources. |
1513 | 1630 | ||
1514 | no_timer_check [X86-32,X86_64,APIC] Disables the code which tests for | 1631 | no_timer_check [X86,APIC] Disables the code which tests for |
1515 | broken timer IRQ sources. | 1632 | broken timer IRQ sources. |
1516 | 1633 | ||
1517 | noisapnp [ISAPNP] Disables ISA PnP code. | 1634 | noisapnp [ISAPNP] Disables ISA PnP code. |
@@ -1519,6 +1636,9 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1519 | noinitrd [RAM] Tells the kernel not to load any configured | 1636 | noinitrd [RAM] Tells the kernel not to load any configured |
1520 | initial RAM disk. | 1637 | initial RAM disk. |
1521 | 1638 | ||
1639 | nointremap [X86-64, Intel-IOMMU] Do not enable interrupt | ||
1640 | remapping. | ||
1641 | |||
1522 | nointroute [IA-64] | 1642 | nointroute [IA-64] |
1523 | 1643 | ||
1524 | nojitter [IA64] Disables jitter checking for ITC timers. | 1644 | nojitter [IA64] Disables jitter checking for ITC timers. |
@@ -1527,12 +1647,6 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1527 | 1647 | ||
1528 | nolapic_timer [X86-32,APIC] Do not use the local APIC timer. | 1648 | nolapic_timer [X86-32,APIC] Do not use the local APIC timer. |
1529 | 1649 | ||
1530 | nox2apic [X86-64,APIC] Do not enable x2APIC mode. | ||
1531 | |||
1532 | x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of | ||
1533 | default x2apic cluster mode on platforms | ||
1534 | supporting x2apic. | ||
1535 | |||
1536 | noltlbs [PPC] Do not use large page/tlb entries for kernel | 1650 | noltlbs [PPC] Do not use large page/tlb entries for kernel |
1537 | lowmem mapping on PPC40x. | 1651 | lowmem mapping on PPC40x. |
1538 | 1652 | ||
@@ -1543,6 +1657,9 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1543 | nomfgpt [X86-32] Disable Multi-Function General Purpose | 1657 | nomfgpt [X86-32] Disable Multi-Function General Purpose |
1544 | Timer usage (for AMD Geode machines). | 1658 | Timer usage (for AMD Geode machines). |
1545 | 1659 | ||
1660 | norandmaps Don't use address space randomization. Equivalent to | ||
1661 | echo 0 > /proc/sys/kernel/randomize_va_space | ||
1662 | |||
1546 | noreplace-paravirt [X86-32,PV_OPS] Don't patch paravirt_ops | 1663 | noreplace-paravirt [X86-32,PV_OPS] Don't patch paravirt_ops |
1547 | 1664 | ||
1548 | noreplace-smp [X86-32,SMP] Don't replace SMP instructions | 1665 | noreplace-smp [X86-32,SMP] Don't replace SMP instructions |
@@ -1567,7 +1684,7 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1567 | nosoftlockup [KNL] Disable the soft-lockup detector. | 1684 | nosoftlockup [KNL] Disable the soft-lockup detector. |
1568 | 1685 | ||
1569 | noswapaccount [KNL] Disable accounting of swap in memory resource | 1686 | noswapaccount [KNL] Disable accounting of swap in memory resource |
1570 | controller. (See Documentation/controllers/memory.txt) | 1687 | controller. (See Documentation/cgroups/memory.txt) |
1571 | 1688 | ||
1572 | nosync [HW,M68K] Disables sync negotiation for all devices. | 1689 | nosync [HW,M68K] Disables sync negotiation for all devices. |
1573 | 1690 | ||
@@ -1577,17 +1694,19 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1577 | 1694 | ||
1578 | nowb [ARM] | 1695 | nowb [ARM] |
1579 | 1696 | ||
1697 | nox2apic [X86-64,APIC] Do not enable x2APIC mode. | ||
1698 | |||
1580 | nptcg= [IA64] Override max number of concurrent global TLB | 1699 | nptcg= [IA64] Override max number of concurrent global TLB |
1581 | purges which is reported from either PAL_VM_SUMMARY or | 1700 | purges which is reported from either PAL_VM_SUMMARY or |
1582 | SAL PALO. | 1701 | SAL PALO. |
1583 | 1702 | ||
1703 | nr_uarts= [SERIAL] maximum number of UARTs to be registered. | ||
1704 | |||
1584 | numa_zonelist_order= [KNL, BOOT] Select zonelist order for NUMA. | 1705 | numa_zonelist_order= [KNL, BOOT] Select zonelist order for NUMA. |
1585 | one of ['zone', 'node', 'default'] can be specified | 1706 | one of ['zone', 'node', 'default'] can be specified |
1586 | This can be set from sysctl after boot. | 1707 | This can be set from sysctl after boot. |
1587 | See Documentation/sysctl/vm.txt for details. | 1708 | See Documentation/sysctl/vm.txt for details. |
1588 | 1709 | ||
1589 | nr_uarts= [SERIAL] maximum number of UARTs to be registered. | ||
1590 | |||
1591 | ohci1394_dma=early [HW] enable debugging via the ohci1394 driver. | 1710 | ohci1394_dma=early [HW] enable debugging via the ohci1394 driver. |
1592 | See Documentation/debugging-via-ohci1394.txt for more | 1711 | See Documentation/debugging-via-ohci1394.txt for more |
1593 | info. | 1712 | info. |
@@ -1605,6 +1724,14 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1605 | oprofile.timer= [HW] | 1724 | oprofile.timer= [HW] |
1606 | Use timer interrupt instead of performance counters | 1725 | Use timer interrupt instead of performance counters |
1607 | 1726 | ||
1727 | oprofile.cpu_type= Force an oprofile cpu type | ||
1728 | This might be useful if you have an older oprofile | ||
1729 | userland or if you want common events. | ||
1730 | Format: { archperfmon } | ||
1731 | archperfmon: [X86] Force use of architectural | ||
1732 | perfmon on Intel CPUs instead of the | ||
1733 | CPU specific event set. | ||
1734 | |||
1608 | osst= [HW,SCSI] SCSI Tape Driver | 1735 | osst= [HW,SCSI] SCSI Tape Driver |
1609 | Format: <buffer_size>,<write_threshold> | 1736 | Format: <buffer_size>,<write_threshold> |
1610 | See also Documentation/scsi/st.txt. | 1737 | See also Documentation/scsi/st.txt. |
@@ -1659,6 +1786,8 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1659 | See also Documentation/blockdev/paride.txt. | 1786 | See also Documentation/blockdev/paride.txt. |
1660 | 1787 | ||
1661 | pci=option[,option...] [PCI] various PCI subsystem options: | 1788 | pci=option[,option...] [PCI] various PCI subsystem options: |
1789 | earlydump [X86] dump PCI config space before the kernel | ||
1790 | changes anything | ||
1662 | off [X86] don't probe for the PCI bus | 1791 | off [X86] don't probe for the PCI bus |
1663 | bios [X86-32] force use of PCI BIOS, don't access | 1792 | bios [X86-32] force use of PCI BIOS, don't access |
1664 | the hardware directly. Use this if your machine | 1793 | the hardware directly. Use this if your machine |
@@ -1676,8 +1805,11 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1676 | disable the use of PCIE advanced error reporting. | 1805 | disable the use of PCIE advanced error reporting. |
1677 | nodomains [PCI] Disable support for multiple PCI | 1806 | nodomains [PCI] Disable support for multiple PCI |
1678 | root domains (aka PCI segments, in ACPI-speak). | 1807 | root domains (aka PCI segments, in ACPI-speak). |
1679 | nommconf [X86-32,X86_64] Disable use of MMCONFIG for PCI | 1808 | nommconf [X86] Disable use of MMCONFIG for PCI |
1680 | Configuration | 1809 | Configuration |
1810 | check_enable_amd_mmconf [X86] check for and enable | ||
1811 | properly configured MMIO access to PCI | ||
1812 | config space on AMD family 10h CPU | ||
1681 | nomsi [MSI] If the PCI_MSI kernel config parameter is | 1813 | nomsi [MSI] If the PCI_MSI kernel config parameter is |
1682 | enabled, this kernel boot option can be used to | 1814 | enabled, this kernel boot option can be used to |
1683 | disable the use of MSI interrupts system-wide. | 1815 | disable the use of MSI interrupts system-wide. |
@@ -1730,7 +1862,7 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1730 | IRQ routing is enabled. | 1862 | IRQ routing is enabled. |
1731 | noacpi [X86] Do not use ACPI for IRQ routing | 1863 | noacpi [X86] Do not use ACPI for IRQ routing |
1732 | or for PCI scanning. | 1864 | or for PCI scanning. |
1733 | use_crs [X86] Use _CRS for PCI resource | 1865 | nocrs [X86] Don't use _CRS for PCI resource |
1734 | allocation. | 1866 | allocation. |
1735 | routeirq Do IRQ routing for all PCI devices. | 1867 | routeirq Do IRQ routing for all PCI devices. |
1736 | This is normally done in pci_enable_device(), | 1868 | This is normally done in pci_enable_device(), |
@@ -1758,6 +1890,21 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1758 | cbmemsize=nn[KMG] The fixed amount of bus space which is | 1890 | cbmemsize=nn[KMG] The fixed amount of bus space which is |
1759 | reserved for the CardBus bridge's memory | 1891 | reserved for the CardBus bridge's memory |
1760 | window. The default value is 64 megabytes. | 1892 | window. The default value is 64 megabytes. |
1893 | resource_alignment= | ||
1894 | Format: | ||
1895 | [<order of align>@][<domain>:]<bus>:<slot>.<func>[; ...] | ||
1896 | Specifies alignment and device to reassign | ||
1897 | aligned memory resources. | ||
1898 | If <order of align> is not specified, | ||
1899 | PAGE_SIZE is used as alignment. | ||
1900 | PCI-PCI bridge can be specified, if resource | ||
1901 | windows need to be expanded. | ||
1902 | ecrc= Enable/disable PCIe ECRC (transaction layer | ||
1903 | end-to-end CRC checking). | ||
1904 | bios: Use BIOS/firmware settings. This is the | ||
1905 | the default. | ||
1906 | off: Turn ECRC off | ||
1907 | on: Turn ECRC on. | ||
1761 | 1908 | ||
1762 | pcie_aspm= [PCIE] Forcibly enable or disable PCIe Active State Power | 1909 | pcie_aspm= [PCIE] Forcibly enable or disable PCIe Active State Power |
1763 | Management. | 1910 | Management. |
@@ -1816,10 +1963,11 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1816 | autoconfiguration. | 1963 | autoconfiguration. |
1817 | Ranges are in pairs (memory base and size). | 1964 | Ranges are in pairs (memory base and size). |
1818 | 1965 | ||
1819 | dynamic_printk Enables pr_debug()/dev_dbg() calls if | 1966 | ports= [IP_VS_FTP] IPVS ftp helper module |
1820 | CONFIG_DYNAMIC_PRINTK_DEBUG has been enabled. | 1967 | Default is 21. |
1821 | These can also be switched on/off via | 1968 | Up to 8 (IP_VS_APP_MAX_PORTS) ports |
1822 | <debugfs>/dynamic_printk/modules | 1969 | may be specified. |
1970 | Format: <port>,<port>.... | ||
1823 | 1971 | ||
1824 | print-fatal-signals= | 1972 | print-fatal-signals= |
1825 | [KNL] debug: print fatal signals | 1973 | [KNL] debug: print fatal signals |
@@ -1830,6 +1978,14 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1830 | printk.time= Show timing data prefixed to each printk message line | 1978 | printk.time= Show timing data prefixed to each printk message line |
1831 | Format: <bool> (1/Y/y=enable, 0/N/n=disable) | 1979 | Format: <bool> (1/Y/y=enable, 0/N/n=disable) |
1832 | 1980 | ||
1981 | processor.max_cstate= [HW,ACPI] | ||
1982 | Limit processor to maximum C-state | ||
1983 | max_cstate=9 overrides any DMI blacklist limit. | ||
1984 | |||
1985 | processor.nocst [HW,ACPI] | ||
1986 | Ignore the _CST method to determine C-states, | ||
1987 | instead using the legacy FADT method | ||
1988 | |||
1833 | profile= [KNL] Enable kernel profiling via /proc/profile | 1989 | profile= [KNL] Enable kernel profiling via /proc/profile |
1834 | Format: [schedule,]<number> | 1990 | Format: [schedule,]<number> |
1835 | Param: "schedule" - profile schedule points. | 1991 | Param: "schedule" - profile schedule points. |
@@ -1839,14 +1995,6 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1839 | Requires CONFIG_SCHEDSTATS | 1995 | Requires CONFIG_SCHEDSTATS |
1840 | Param: "kvm" - profile VM exits. | 1996 | Param: "kvm" - profile VM exits. |
1841 | 1997 | ||
1842 | processor.max_cstate= [HW,ACPI] | ||
1843 | Limit processor to maximum C-state | ||
1844 | max_cstate=9 overrides any DMI blacklist limit. | ||
1845 | |||
1846 | processor.nocst [HW,ACPI] | ||
1847 | Ignore the _CST method to determine C-states, | ||
1848 | instead using the legacy FADT method | ||
1849 | |||
1850 | prompt_ramdisk= [RAM] List of RAM disks to prompt for floppy disk | 1998 | prompt_ramdisk= [RAM] List of RAM disks to prompt for floppy disk |
1851 | before loading. | 1999 | before loading. |
1852 | See Documentation/blockdev/ramdisk.txt. | 2000 | See Documentation/blockdev/ramdisk.txt. |
@@ -1911,7 +2059,7 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1911 | 2059 | ||
1912 | relax_domain_level= | 2060 | relax_domain_level= |
1913 | [KNL, SMP] Set scheduler's default relax_domain_level. | 2061 | [KNL, SMP] Set scheduler's default relax_domain_level. |
1914 | See Documentation/cpusets.txt. | 2062 | See Documentation/cgroups/cpusets.txt. |
1915 | 2063 | ||
1916 | reserve= [KNL,BUGS] Force the kernel to ignore some iomem area | 2064 | reserve= [KNL,BUGS] Force the kernel to ignore some iomem area |
1917 | 2065 | ||
@@ -2000,7 +2148,13 @@ and is between 256 and 4096 characters. It is defined in the file | |||
2000 | allowing boot to proceed. none ignores them, expecting | 2148 | allowing boot to proceed. none ignores them, expecting |
2001 | user space to do the scan. | 2149 | user space to do the scan. |
2002 | 2150 | ||
2003 | selinux [SELINUX] Disable or enable SELinux at boot time. | 2151 | security= [SECURITY] Choose a security module to enable at boot. |
2152 | If this boot parameter is not specified, only the first | ||
2153 | security module asking for security registration will be | ||
2154 | loaded. An invalid security module name will be treated | ||
2155 | as if no module has been chosen. | ||
2156 | |||
2157 | selinux= [SELINUX] Disable or enable SELinux at boot time. | ||
2004 | Format: { "0" | "1" } | 2158 | Format: { "0" | "1" } |
2005 | See security/selinux/Kconfig help text. | 2159 | See security/selinux/Kconfig help text. |
2006 | 0 -- disable. | 2160 | 0 -- disable. |
@@ -2009,15 +2163,6 @@ and is between 256 and 4096 characters. It is defined in the file | |||
2009 | If enabled at boot time, /selinux/disable can be used | 2163 | If enabled at boot time, /selinux/disable can be used |
2010 | later to disable prior to initial policy load. | 2164 | later to disable prior to initial policy load. |
2011 | 2165 | ||
2012 | selinux_compat_net = | ||
2013 | [SELINUX] Set initial selinux_compat_net flag value. | ||
2014 | Format: { "0" | "1" } | ||
2015 | 0 -- use new secmark-based packet controls | ||
2016 | 1 -- use legacy packet controls | ||
2017 | Default value is 0 (preferred). | ||
2018 | Value can be changed at runtime via | ||
2019 | /selinux/compat_net. | ||
2020 | |||
2021 | serialnumber [BUGS=X86-32] | 2166 | serialnumber [BUGS=X86-32] |
2022 | 2167 | ||
2023 | shapers= [NET] | 2168 | shapers= [NET] |
@@ -2329,6 +2474,8 @@ and is between 256 and 4096 characters. It is defined in the file | |||
2329 | 2474 | ||
2330 | tp720= [HW,PS2] | 2475 | tp720= [HW,PS2] |
2331 | 2476 | ||
2477 | trace_buf_size=nn[KMG] [ftrace] will set tracing buffer size. | ||
2478 | |||
2332 | trix= [HW,OSS] MediaTrix AudioTrix Pro | 2479 | trix= [HW,OSS] MediaTrix AudioTrix Pro |
2333 | Format: | 2480 | Format: |
2334 | <io>,<irq>,<dma>,<dma2>,<sb_io>,<sb_irq>,<sb_dma>,<mpu_io>,<mpu_irq> | 2481 | <io>,<irq>,<dma>,<dma2>,<sb_io>,<sb_irq>,<sb_dma>,<mpu_io>,<mpu_irq> |
@@ -2364,7 +2511,7 @@ and is between 256 and 4096 characters. It is defined in the file | |||
2364 | reported either. | 2511 | reported either. |
2365 | 2512 | ||
2366 | unknown_nmi_panic | 2513 | unknown_nmi_panic |
2367 | [X86-32,X86-64] | 2514 | [X86] |
2368 | Set unknown_nmi_panic=1 early on boot. | 2515 | Set unknown_nmi_panic=1 early on boot. |
2369 | 2516 | ||
2370 | usbcore.autosuspend= | 2517 | usbcore.autosuspend= |
@@ -2431,15 +2578,12 @@ and is between 256 and 4096 characters. It is defined in the file | |||
2431 | medium is write-protected). | 2578 | medium is write-protected). |
2432 | Example: quirks=0419:aaf5:rl,0421:0433:rc | 2579 | Example: quirks=0419:aaf5:rl,0421:0433:rc |
2433 | 2580 | ||
2434 | add_efi_memmap [EFI; x86-32,X86-64] Include EFI memory map in | 2581 | vdso= [X86,SH] |
2435 | kernel's map of available physical RAM. | ||
2436 | |||
2437 | vdso= [X86-32,SH,x86-64] | ||
2438 | vdso=2: enable compat VDSO (default with COMPAT_VDSO) | 2582 | vdso=2: enable compat VDSO (default with COMPAT_VDSO) |
2439 | vdso=1: enable VDSO (default) | 2583 | vdso=1: enable VDSO (default) |
2440 | vdso=0: disable VDSO mapping | 2584 | vdso=0: disable VDSO mapping |
2441 | 2585 | ||
2442 | vdso32= [X86-32,X86-64] | 2586 | vdso32= [X86] |
2443 | vdso32=2: enable compat VDSO (default with COMPAT_VDSO) | 2587 | vdso32=2: enable compat VDSO (default with COMPAT_VDSO) |
2444 | vdso32=1: enable 32-bit VDSO (default) | 2588 | vdso32=1: enable 32-bit VDSO (default) |
2445 | vdso32=0: disable 32-bit VDSO mapping | 2589 | vdso32=0: disable 32-bit VDSO mapping |
@@ -2472,6 +2616,31 @@ and is between 256 and 4096 characters. It is defined in the file | |||
2472 | vmpoff= [KNL,S390] Perform z/VM CP command after power off. | 2616 | vmpoff= [KNL,S390] Perform z/VM CP command after power off. |
2473 | Format: <command> | 2617 | Format: <command> |
2474 | 2618 | ||
2619 | vt.default_blu= [VT] | ||
2620 | Format: <blue0>,<blue1>,<blue2>,...,<blue15> | ||
2621 | Change the default blue palette of the console. | ||
2622 | This is a 16-member array composed of values | ||
2623 | ranging from 0-255. | ||
2624 | |||
2625 | vt.default_grn= [VT] | ||
2626 | Format: <green0>,<green1>,<green2>,...,<green15> | ||
2627 | Change the default green palette of the console. | ||
2628 | This is a 16-member array composed of values | ||
2629 | ranging from 0-255. | ||
2630 | |||
2631 | vt.default_red= [VT] | ||
2632 | Format: <red0>,<red1>,<red2>,...,<red15> | ||
2633 | Change the default red palette of the console. | ||
2634 | This is a 16-member array composed of values | ||
2635 | ranging from 0-255. | ||
2636 | |||
2637 | vt.default_utf8= | ||
2638 | [VT] | ||
2639 | Format=<0|1> | ||
2640 | Set system-wide default UTF-8 mode for all tty's. | ||
2641 | Default is 1, i.e. UTF-8 mode is enabled for all | ||
2642 | newly opened terminals. | ||
2643 | |||
2475 | waveartist= [HW,OSS] | 2644 | waveartist= [HW,OSS] |
2476 | Format: <io>,<irq>,<dma>,<dma2> | 2645 | Format: <io>,<irq>,<dma>,<dma2> |
2477 | 2646 | ||
@@ -2484,6 +2653,10 @@ and is between 256 and 4096 characters. It is defined in the file | |||
2484 | wdt= [WDT] Watchdog | 2653 | wdt= [WDT] Watchdog |
2485 | See Documentation/watchdog/wdt.txt. | 2654 | See Documentation/watchdog/wdt.txt. |
2486 | 2655 | ||
2656 | x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of | ||
2657 | default x2apic cluster mode on platforms | ||
2658 | supporting x2apic. | ||
2659 | |||
2487 | xd= [HW,XT] Original XT pre-IDE (RLL encoded) disks. | 2660 | xd= [HW,XT] Original XT pre-IDE (RLL encoded) disks. |
2488 | xd_geo= See header of drivers/block/xd.c. | 2661 | xd_geo= See header of drivers/block/xd.c. |
2489 | 2662 | ||
@@ -2491,9 +2664,6 @@ and is between 256 and 4096 characters. It is defined in the file | |||
2491 | Format: | 2664 | Format: |
2492 | <irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]] | 2665 | <irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]] |
2493 | 2666 | ||
2494 | norandmaps Don't use address space randomization. Equivalent to | ||
2495 | echo 0 > /proc/sys/kernel/randomize_va_space | ||
2496 | |||
2497 | ______________________________________________________________________ | 2667 | ______________________________________________________________________ |
2498 | 2668 | ||
2499 | TODO: | 2669 | TODO: |
diff --git a/Documentation/kmemcheck.txt b/Documentation/kmemcheck.txt new file mode 100644 index 000000000000..363044609dad --- /dev/null +++ b/Documentation/kmemcheck.txt | |||
@@ -0,0 +1,773 @@ | |||
1 | GETTING STARTED WITH KMEMCHECK | ||
2 | ============================== | ||
3 | |||
4 | Vegard Nossum <vegardno@ifi.uio.no> | ||
5 | |||
6 | |||
7 | Contents | ||
8 | ======== | ||
9 | 0. Introduction | ||
10 | 1. Downloading | ||
11 | 2. Configuring and compiling | ||
12 | 3. How to use | ||
13 | 3.1. Booting | ||
14 | 3.2. Run-time enable/disable | ||
15 | 3.3. Debugging | ||
16 | 3.4. Annotating false positives | ||
17 | 4. Reporting errors | ||
18 | 5. Technical description | ||
19 | |||
20 | |||
21 | 0. Introduction | ||
22 | =============== | ||
23 | |||
24 | kmemcheck is a debugging feature for the Linux Kernel. More specifically, it | ||
25 | is a dynamic checker that detects and warns about some uses of uninitialized | ||
26 | memory. | ||
27 | |||
28 | Userspace programmers might be familiar with Valgrind's memcheck. The main | ||
29 | difference between memcheck and kmemcheck is that memcheck works for userspace | ||
30 | programs only, and kmemcheck works for the kernel only. The implementations | ||
31 | are of course vastly different. Because of this, kmemcheck is not as accurate | ||
32 | as memcheck, but it turns out to be good enough in practice to discover real | ||
33 | programmer errors that the compiler is not able to find through static | ||
34 | analysis. | ||
35 | |||
36 | Enabling kmemcheck on a kernel will probably slow it down to the extent that | ||
37 | the machine will not be usable for normal workloads such as e.g. an | ||
38 | interactive desktop. kmemcheck will also cause the kernel to use about twice | ||
39 | as much memory as normal. For this reason, kmemcheck is strictly a debugging | ||
40 | feature. | ||
41 | |||
42 | |||
43 | 1. Downloading | ||
44 | ============== | ||
45 | |||
46 | kmemcheck can only be downloaded using git. If you want to write patches | ||
47 | against the current code, you should use the kmemcheck development branch of | ||
48 | the tip tree. It is also possible to use the linux-next tree, which also | ||
49 | includes the latest version of kmemcheck. | ||
50 | |||
51 | Assuming that you've already cloned the linux-2.6.git repository, all you | ||
52 | have to do is add the -tip tree as a remote, like this: | ||
53 | |||
54 | $ git remote add tip git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git | ||
55 | |||
56 | To actually download the tree, fetch the remote: | ||
57 | |||
58 | $ git fetch tip | ||
59 | |||
60 | And to check out a new local branch with the kmemcheck code: | ||
61 | |||
62 | $ git checkout -b kmemcheck tip/kmemcheck | ||
63 | |||
64 | General instructions for the -tip tree can be found here: | ||
65 | http://people.redhat.com/mingo/tip.git/readme.txt | ||
66 | |||
67 | |||
68 | 2. Configuring and compiling | ||
69 | ============================ | ||
70 | |||
71 | kmemcheck only works for the x86 (both 32- and 64-bit) platform. A number of | ||
72 | configuration variables must have specific settings in order for the kmemcheck | ||
73 | menu to even appear in "menuconfig". These are: | ||
74 | |||
75 | o CONFIG_CC_OPTIMIZE_FOR_SIZE=n | ||
76 | |||
77 | This option is located under "General setup" / "Optimize for size". | ||
78 | |||
79 | Without this, gcc will use certain optimizations that usually lead to | ||
80 | false positive warnings from kmemcheck. An example of this is a 16-bit | ||
81 | field in a struct, where gcc may load 32 bits, then discard the upper | ||
82 | 16 bits. kmemcheck sees only the 32-bit load, and may trigger a | ||
83 | warning for the upper 16 bits (if they're uninitialized). | ||
84 | |||
85 | o CONFIG_SLAB=y or CONFIG_SLUB=y | ||
86 | |||
87 | This option is located under "General setup" / "Choose SLAB | ||
88 | allocator". | ||
89 | |||
90 | o CONFIG_FUNCTION_TRACER=n | ||
91 | |||
92 | This option is located under "Kernel hacking" / "Tracers" / "Kernel | ||
93 | Function Tracer" | ||
94 | |||
95 | When function tracing is compiled in, gcc emits a call to another | ||
96 | function at the beginning of every function. This means that when the | ||
97 | page fault handler is called, the ftrace framework will be called | ||
98 | before kmemcheck has had a chance to handle the fault. If ftrace then | ||
99 | modifies memory that was tracked by kmemcheck, the result is an | ||
100 | endless recursive page fault. | ||
101 | |||
102 | o CONFIG_DEBUG_PAGEALLOC=n | ||
103 | |||
104 | This option is located under "Kernel hacking" / "Debug page memory | ||
105 | allocations". | ||
106 | |||
107 | In addition, I highly recommend turning on CONFIG_DEBUG_INFO=y. This is also | ||
108 | located under "Kernel hacking". With this, you will be able to get line number | ||
109 | information from the kmemcheck warnings, which is extremely valuable in | ||
110 | debugging a problem. This option is not mandatory, however, because it slows | ||
111 | down the compilation process and produces a much bigger kernel image. | ||
112 | |||
113 | Now the kmemcheck menu should be visible (under "Kernel hacking" / "kmemcheck: | ||
114 | trap use of uninitialized memory"). Here follows a description of the | ||
115 | kmemcheck configuration variables: | ||
116 | |||
117 | o CONFIG_KMEMCHECK | ||
118 | |||
119 | This must be enabled in order to use kmemcheck at all... | ||
120 | |||
121 | o CONFIG_KMEMCHECK_[DISABLED | ENABLED | ONESHOT]_BY_DEFAULT | ||
122 | |||
123 | This option controls the status of kmemcheck at boot-time. "Enabled" | ||
124 | will enable kmemcheck right from the start, "disabled" will boot the | ||
125 | kernel as normal (but with the kmemcheck code compiled in, so it can | ||
126 | be enabled at run-time after the kernel has booted), and "one-shot" is | ||
127 | a special mode which will turn kmemcheck off automatically after | ||
128 | detecting the first use of uninitialized memory. | ||
129 | |||
130 | If you are using kmemcheck to actively debug a problem, then you | ||
131 | probably want to choose "enabled" here. | ||
132 | |||
133 | The one-shot mode is mostly useful in automated test setups because it | ||
134 | can prevent floods of warnings and increase the chances of the machine | ||
135 | surviving in case something is really wrong. In other cases, the one- | ||
136 | shot mode could actually be counter-productive because it would turn | ||
137 | itself off at the very first error -- in the case of a false positive | ||
138 | too -- and this would come in the way of debugging the specific | ||
139 | problem you were interested in. | ||
140 | |||
141 | If you would like to use your kernel as normal, but with a chance to | ||
142 | enable kmemcheck in case of some problem, it might be a good idea to | ||
143 | choose "disabled" here. When kmemcheck is disabled, most of the run- | ||
144 | time overhead is not incurred, and the kernel will be almost as fast | ||
145 | as normal. | ||
146 | |||
147 | o CONFIG_KMEMCHECK_QUEUE_SIZE | ||
148 | |||
149 | Select the maximum number of error reports to store in an internal | ||
150 | (fixed-size) buffer. Since errors can occur virtually anywhere and in | ||
151 | any context, we need a temporary storage area which is guaranteed not | ||
152 | to generate any other page faults when accessed. The queue will be | ||
153 | emptied as soon as a tasklet may be scheduled. If the queue is full, | ||
154 | new error reports will be lost. | ||
155 | |||
156 | The default value of 64 is probably fine. If some code produces more | ||
157 | than 64 errors within an irqs-off section, then the code is likely to | ||
158 | produce many, many more, too, and these additional reports seldom give | ||
159 | any more information (the first report is usually the most valuable | ||
160 | anyway). | ||
161 | |||
162 | This number might have to be adjusted if you are not using serial | ||
163 | console or similar to capture the kernel log. If you are using the | ||
164 | "dmesg" command to save the log, then getting a lot of kmemcheck | ||
165 | warnings might overflow the kernel log itself, and the earlier reports | ||
166 | will get lost in that way instead. Try setting this to 10 or so on | ||
167 | such a setup. | ||
168 | |||
169 | o CONFIG_KMEMCHECK_SHADOW_COPY_SHIFT | ||
170 | |||
171 | Select the number of shadow bytes to save along with each entry of the | ||
172 | error-report queue. These bytes indicate what parts of an allocation | ||
173 | are initialized, uninitialized, etc. and will be displayed when an | ||
174 | error is detected to help the debugging of a particular problem. | ||
175 | |||
176 | The number entered here is actually the logarithm of the number of | ||
177 | bytes that will be saved. So if you pick for example 5 here, kmemcheck | ||
178 | will save 2^5 = 32 bytes. | ||
179 | |||
180 | The default value should be fine for debugging most problems. It also | ||
181 | fits nicely within 80 columns. | ||
182 | |||
183 | o CONFIG_KMEMCHECK_PARTIAL_OK | ||
184 | |||
185 | This option (when enabled) works around certain GCC optimizations that | ||
186 | produce 32-bit reads from 16-bit variables where the upper 16 bits are | ||
187 | thrown away afterwards. | ||
188 | |||
189 | The default value (enabled) is recommended. This may of course hide | ||
190 | some real errors, but disabling it would probably produce a lot of | ||
191 | false positives. | ||
192 | |||
193 | o CONFIG_KMEMCHECK_BITOPS_OK | ||
194 | |||
195 | This option silences warnings that would be generated for bit-field | ||
196 | accesses where not all the bits are initialized at the same time. This | ||
197 | may also hide some real bugs. | ||
198 | |||
199 | This option is probably obsolete, or it should be replaced with | ||
200 | the kmemcheck-/bitfield-annotations for the code in question. The | ||
201 | default value is therefore fine. | ||
202 | |||
203 | Now compile the kernel as usual. | ||
204 | |||
205 | |||
206 | 3. How to use | ||
207 | ============= | ||
208 | |||
209 | 3.1. Booting | ||
210 | ============ | ||
211 | |||
212 | First some information about the command-line options. There is only one | ||
213 | option specific to kmemcheck, and this is called "kmemcheck". It can be used | ||
214 | to override the default mode as chosen by the CONFIG_KMEMCHECK_*_BY_DEFAULT | ||
215 | option. Its possible settings are: | ||
216 | |||
217 | o kmemcheck=0 (disabled) | ||
218 | o kmemcheck=1 (enabled) | ||
219 | o kmemcheck=2 (one-shot mode) | ||
220 | |||
221 | If SLUB debugging has been enabled in the kernel, it may take precedence over | ||
222 | kmemcheck in such a way that the slab caches which are under SLUB debugging | ||
223 | will not be tracked by kmemcheck. In order to ensure that this doesn't happen | ||
224 | (even though it shouldn't by default), use SLUB's boot option "slub_debug", | ||
225 | like this: slub_debug=- | ||
226 | |||
227 | In fact, this option may also be used for fine-grained control over SLUB vs. | ||
228 | kmemcheck. For example, if the command line includes "kmemcheck=1 | ||
229 | slub_debug=,dentry", then SLUB debugging will be used only for the "dentry" | ||
230 | slab cache, and with kmemcheck tracking all the other caches. This is advanced | ||
231 | usage, however, and is not generally recommended. | ||
232 | |||
233 | |||
234 | 3.2. Run-time enable/disable | ||
235 | ============================ | ||
236 | |||
237 | When the kernel has booted, it is possible to enable or disable kmemcheck at | ||
238 | run-time. WARNING: This feature is still experimental and may cause false | ||
239 | positive warnings to appear. Therefore, try not to use this. If you find that | ||
240 | it doesn't work properly (e.g. you see an unreasonable amount of warnings), I | ||
241 | will be happy to take bug reports. | ||
242 | |||
243 | Use the file /proc/sys/kernel/kmemcheck for this purpose, e.g.: | ||
244 | |||
245 | $ echo 0 > /proc/sys/kernel/kmemcheck # disables kmemcheck | ||
246 | |||
247 | The numbers are the same as for the kmemcheck= command-line option. | ||
248 | |||
249 | |||
250 | 3.3. Debugging | ||
251 | ============== | ||
252 | |||
253 | A typical report will look something like this: | ||
254 | |||
255 | WARNING: kmemcheck: Caught 32-bit read from uninitialized memory (ffff88003e4a2024) | ||
256 | 80000000000000000000000000000000000000000088ffff0000000000000000 | ||
257 | i i i i u u u u i i i i i i i i u u u u u u u u u u u u u u u u | ||
258 | ^ | ||
259 | |||
260 | Pid: 1856, comm: ntpdate Not tainted 2.6.29-rc5 #264 945P-A | ||
261 | RIP: 0010:[<ffffffff8104ede8>] [<ffffffff8104ede8>] __dequeue_signal+0xc8/0x190 | ||
262 | RSP: 0018:ffff88003cdf7d98 EFLAGS: 00210002 | ||
263 | RAX: 0000000000000030 RBX: ffff88003d4ea968 RCX: 0000000000000009 | ||
264 | RDX: ffff88003e5d6018 RSI: ffff88003e5d6024 RDI: ffff88003cdf7e84 | ||
265 | RBP: ffff88003cdf7db8 R08: ffff88003e5d6000 R09: 0000000000000000 | ||
266 | R10: 0000000000000080 R11: 0000000000000000 R12: 000000000000000e | ||
267 | R13: ffff88003cdf7e78 R14: ffff88003d530710 R15: ffff88003d5a98c8 | ||
268 | FS: 0000000000000000(0000) GS:ffff880001982000(0063) knlGS:00000 | ||
269 | CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033 | ||
270 | CR2: ffff88003f806ea0 CR3: 000000003c036000 CR4: 00000000000006a0 | ||
271 | DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 | ||
272 | DR3: 0000000000000000 DR6: 00000000ffff4ff0 DR7: 0000000000000400 | ||
273 | [<ffffffff8104f04e>] dequeue_signal+0x8e/0x170 | ||
274 | [<ffffffff81050bd8>] get_signal_to_deliver+0x98/0x390 | ||
275 | [<ffffffff8100b87d>] do_notify_resume+0xad/0x7d0 | ||
276 | [<ffffffff8100c7b5>] int_signal+0x12/0x17 | ||
277 | [<ffffffffffffffff>] 0xffffffffffffffff | ||
278 | |||
279 | The single most valuable information in this report is the RIP (or EIP on 32- | ||
280 | bit) value. This will help us pinpoint exactly which instruction that caused | ||
281 | the warning. | ||
282 | |||
283 | If your kernel was compiled with CONFIG_DEBUG_INFO=y, then all we have to do | ||
284 | is give this address to the addr2line program, like this: | ||
285 | |||
286 | $ addr2line -e vmlinux -i ffffffff8104ede8 | ||
287 | arch/x86/include/asm/string_64.h:12 | ||
288 | include/asm-generic/siginfo.h:287 | ||
289 | kernel/signal.c:380 | ||
290 | kernel/signal.c:410 | ||
291 | |||
292 | The "-e vmlinux" tells addr2line which file to look in. IMPORTANT: This must | ||
293 | be the vmlinux of the kernel that produced the warning in the first place! If | ||
294 | not, the line number information will almost certainly be wrong. | ||
295 | |||
296 | The "-i" tells addr2line to also print the line numbers of inlined functions. | ||
297 | In this case, the flag was very important, because otherwise, it would only | ||
298 | have printed the first line, which is just a call to memcpy(), which could be | ||
299 | called from a thousand places in the kernel, and is therefore not very useful. | ||
300 | These inlined functions would not show up in the stack trace above, simply | ||
301 | because the kernel doesn't load the extra debugging information. This | ||
302 | technique can of course be used with ordinary kernel oopses as well. | ||
303 | |||
304 | In this case, it's the caller of memcpy() that is interesting, and it can be | ||
305 | found in include/asm-generic/siginfo.h, line 287: | ||
306 | |||
307 | 281 static inline void copy_siginfo(struct siginfo *to, struct siginfo *from) | ||
308 | 282 { | ||
309 | 283 if (from->si_code < 0) | ||
310 | 284 memcpy(to, from, sizeof(*to)); | ||
311 | 285 else | ||
312 | 286 /* _sigchld is currently the largest know union member */ | ||
313 | 287 memcpy(to, from, __ARCH_SI_PREAMBLE_SIZE + sizeof(from->_sifields._sigchld)); | ||
314 | 288 } | ||
315 | |||
316 | Since this was a read (kmemcheck usually warns about reads only, though it can | ||
317 | warn about writes to unallocated or freed memory as well), it was probably the | ||
318 | "from" argument which contained some uninitialized bytes. Following the chain | ||
319 | of calls, we move upwards to see where "from" was allocated or initialized, | ||
320 | kernel/signal.c, line 380: | ||
321 | |||
322 | 359 static void collect_signal(int sig, struct sigpending *list, siginfo_t *info) | ||
323 | 360 { | ||
324 | ... | ||
325 | 367 list_for_each_entry(q, &list->list, list) { | ||
326 | 368 if (q->info.si_signo == sig) { | ||
327 | 369 if (first) | ||
328 | 370 goto still_pending; | ||
329 | 371 first = q; | ||
330 | ... | ||
331 | 377 if (first) { | ||
332 | 378 still_pending: | ||
333 | 379 list_del_init(&first->list); | ||
334 | 380 copy_siginfo(info, &first->info); | ||
335 | 381 __sigqueue_free(first); | ||
336 | ... | ||
337 | 392 } | ||
338 | 393 } | ||
339 | |||
340 | Here, it is &first->info that is being passed on to copy_siginfo(). The | ||
341 | variable "first" was found on a list -- passed in as the second argument to | ||
342 | collect_signal(). We continue our journey through the stack, to figure out | ||
343 | where the item on "list" was allocated or initialized. We move to line 410: | ||
344 | |||
345 | 395 static int __dequeue_signal(struct sigpending *pending, sigset_t *mask, | ||
346 | 396 siginfo_t *info) | ||
347 | 397 { | ||
348 | ... | ||
349 | 410 collect_signal(sig, pending, info); | ||
350 | ... | ||
351 | 414 } | ||
352 | |||
353 | Now we need to follow the "pending" pointer, since that is being passed on to | ||
354 | collect_signal() as "list". At this point, we've run out of lines from the | ||
355 | "addr2line" output. Not to worry, we just paste the next addresses from the | ||
356 | kmemcheck stack dump, i.e.: | ||
357 | |||
358 | [<ffffffff8104f04e>] dequeue_signal+0x8e/0x170 | ||
359 | [<ffffffff81050bd8>] get_signal_to_deliver+0x98/0x390 | ||
360 | [<ffffffff8100b87d>] do_notify_resume+0xad/0x7d0 | ||
361 | [<ffffffff8100c7b5>] int_signal+0x12/0x17 | ||
362 | |||
363 | $ addr2line -e vmlinux -i ffffffff8104f04e ffffffff81050bd8 \ | ||
364 | ffffffff8100b87d ffffffff8100c7b5 | ||
365 | kernel/signal.c:446 | ||
366 | kernel/signal.c:1806 | ||
367 | arch/x86/kernel/signal.c:805 | ||
368 | arch/x86/kernel/signal.c:871 | ||
369 | arch/x86/kernel/entry_64.S:694 | ||
370 | |||
371 | Remember that since these addresses were found on the stack and not as the | ||
372 | RIP value, they actually point to the _next_ instruction (they are return | ||
373 | addresses). This becomes obvious when we look at the code for line 446: | ||
374 | |||
375 | 422 int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) | ||
376 | 423 { | ||
377 | ... | ||
378 | 431 signr = __dequeue_signal(&tsk->signal->shared_pending, | ||
379 | 432 mask, info); | ||
380 | 433 /* | ||
381 | 434 * itimer signal ? | ||
382 | 435 * | ||
383 | 436 * itimers are process shared and we restart periodic | ||
384 | 437 * itimers in the signal delivery path to prevent DoS | ||
385 | 438 * attacks in the high resolution timer case. This is | ||
386 | 439 * compliant with the old way of self restarting | ||
387 | 440 * itimers, as the SIGALRM is a legacy signal and only | ||
388 | 441 * queued once. Changing the restart behaviour to | ||
389 | 442 * restart the timer in the signal dequeue path is | ||
390 | 443 * reducing the timer noise on heavy loaded !highres | ||
391 | 444 * systems too. | ||
392 | 445 */ | ||
393 | 446 if (unlikely(signr == SIGALRM)) { | ||
394 | ... | ||
395 | 489 } | ||
396 | |||
397 | So instead of looking at 446, we should be looking at 431, which is the line | ||
398 | that executes just before 446. Here we see that what we are looking for is | ||
399 | &tsk->signal->shared_pending. | ||
400 | |||
401 | Our next task is now to figure out which function that puts items on this | ||
402 | "shared_pending" list. A crude, but efficient tool, is git grep: | ||
403 | |||
404 | $ git grep -n 'shared_pending' kernel/ | ||
405 | ... | ||
406 | kernel/signal.c:828: pending = group ? &t->signal->shared_pending : &t->pending; | ||
407 | kernel/signal.c:1339: pending = group ? &t->signal->shared_pending : &t->pending; | ||
408 | ... | ||
409 | |||
410 | There were more results, but none of them were related to list operations, | ||
411 | and these were the only assignments. We inspect the line numbers more closely | ||
412 | and find that this is indeed where items are being added to the list: | ||
413 | |||
414 | 816 static int send_signal(int sig, struct siginfo *info, struct task_struct *t, | ||
415 | 817 int group) | ||
416 | 818 { | ||
417 | ... | ||
418 | 828 pending = group ? &t->signal->shared_pending : &t->pending; | ||
419 | ... | ||
420 | 851 q = __sigqueue_alloc(t, GFP_ATOMIC, (sig < SIGRTMIN && | ||
421 | 852 (is_si_special(info) || | ||
422 | 853 info->si_code >= 0))); | ||
423 | 854 if (q) { | ||
424 | 855 list_add_tail(&q->list, &pending->list); | ||
425 | ... | ||
426 | 890 } | ||
427 | |||
428 | and: | ||
429 | |||
430 | 1309 int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group) | ||
431 | 1310 { | ||
432 | .... | ||
433 | 1339 pending = group ? &t->signal->shared_pending : &t->pending; | ||
434 | 1340 list_add_tail(&q->list, &pending->list); | ||
435 | .... | ||
436 | 1347 } | ||
437 | |||
438 | In the first case, the list element we are looking for, "q", is being returned | ||
439 | from the function __sigqueue_alloc(), which looks like an allocation function. | ||
440 | Let's take a look at it: | ||
441 | |||
442 | 187 static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags, | ||
443 | 188 int override_rlimit) | ||
444 | 189 { | ||
445 | 190 struct sigqueue *q = NULL; | ||
446 | 191 struct user_struct *user; | ||
447 | 192 | ||
448 | 193 /* | ||
449 | 194 * We won't get problems with the target's UID changing under us | ||
450 | 195 * because changing it requires RCU be used, and if t != current, the | ||
451 | 196 * caller must be holding the RCU readlock (by way of a spinlock) and | ||
452 | 197 * we use RCU protection here | ||
453 | 198 */ | ||
454 | 199 user = get_uid(__task_cred(t)->user); | ||
455 | 200 atomic_inc(&user->sigpending); | ||
456 | 201 if (override_rlimit || | ||
457 | 202 atomic_read(&user->sigpending) <= | ||
458 | 203 t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur) | ||
459 | 204 q = kmem_cache_alloc(sigqueue_cachep, flags); | ||
460 | 205 if (unlikely(q == NULL)) { | ||
461 | 206 atomic_dec(&user->sigpending); | ||
462 | 207 free_uid(user); | ||
463 | 208 } else { | ||
464 | 209 INIT_LIST_HEAD(&q->list); | ||
465 | 210 q->flags = 0; | ||
466 | 211 q->user = user; | ||
467 | 212 } | ||
468 | 213 | ||
469 | 214 return q; | ||
470 | 215 } | ||
471 | |||
472 | We see that this function initializes q->list, q->flags, and q->user. It seems | ||
473 | that now is the time to look at the definition of "struct sigqueue", e.g.: | ||
474 | |||
475 | 14 struct sigqueue { | ||
476 | 15 struct list_head list; | ||
477 | 16 int flags; | ||
478 | 17 siginfo_t info; | ||
479 | 18 struct user_struct *user; | ||
480 | 19 }; | ||
481 | |||
482 | And, you might remember, it was a memcpy() on &first->info that caused the | ||
483 | warning, so this makes perfect sense. It also seems reasonable to assume that | ||
484 | it is the caller of __sigqueue_alloc() that has the responsibility of filling | ||
485 | out (initializing) this member. | ||
486 | |||
487 | But just which fields of the struct were uninitialized? Let's look at | ||
488 | kmemcheck's report again: | ||
489 | |||
490 | WARNING: kmemcheck: Caught 32-bit read from uninitialized memory (ffff88003e4a2024) | ||
491 | 80000000000000000000000000000000000000000088ffff0000000000000000 | ||
492 | i i i i u u u u i i i i i i i i u u u u u u u u u u u u u u u u | ||
493 | ^ | ||
494 | |||
495 | These first two lines are the memory dump of the memory object itself, and the | ||
496 | shadow bytemap, respectively. The memory object itself is in this case | ||
497 | &first->info. Just beware that the start of this dump is NOT the start of the | ||
498 | object itself! The position of the caret (^) corresponds with the address of | ||
499 | the read (ffff88003e4a2024). | ||
500 | |||
501 | The shadow bytemap dump legend is as follows: | ||
502 | |||
503 | i - initialized | ||
504 | u - uninitialized | ||
505 | a - unallocated (memory has been allocated by the slab layer, but has not | ||
506 | yet been handed off to anybody) | ||
507 | f - freed (memory has been allocated by the slab layer, but has been freed | ||
508 | by the previous owner) | ||
509 | |||
510 | In order to figure out where (relative to the start of the object) the | ||
511 | uninitialized memory was located, we have to look at the disassembly. For | ||
512 | that, we'll need the RIP address again: | ||
513 | |||
514 | RIP: 0010:[<ffffffff8104ede8>] [<ffffffff8104ede8>] __dequeue_signal+0xc8/0x190 | ||
515 | |||
516 | $ objdump -d --no-show-raw-insn vmlinux | grep -C 8 ffffffff8104ede8: | ||
517 | ffffffff8104edc8: mov %r8,0x8(%r8) | ||
518 | ffffffff8104edcc: test %r10d,%r10d | ||
519 | ffffffff8104edcf: js ffffffff8104ee88 <__dequeue_signal+0x168> | ||
520 | ffffffff8104edd5: mov %rax,%rdx | ||
521 | ffffffff8104edd8: mov $0xc,%ecx | ||
522 | ffffffff8104eddd: mov %r13,%rdi | ||
523 | ffffffff8104ede0: mov $0x30,%eax | ||
524 | ffffffff8104ede5: mov %rdx,%rsi | ||
525 | ffffffff8104ede8: rep movsl %ds:(%rsi),%es:(%rdi) | ||
526 | ffffffff8104edea: test $0x2,%al | ||
527 | ffffffff8104edec: je ffffffff8104edf0 <__dequeue_signal+0xd0> | ||
528 | ffffffff8104edee: movsw %ds:(%rsi),%es:(%rdi) | ||
529 | ffffffff8104edf0: test $0x1,%al | ||
530 | ffffffff8104edf2: je ffffffff8104edf5 <__dequeue_signal+0xd5> | ||
531 | ffffffff8104edf4: movsb %ds:(%rsi),%es:(%rdi) | ||
532 | ffffffff8104edf5: mov %r8,%rdi | ||
533 | ffffffff8104edf8: callq ffffffff8104de60 <__sigqueue_free> | ||
534 | |||
535 | As expected, it's the "rep movsl" instruction from the memcpy() that causes | ||
536 | the warning. We know about REP MOVSL that it uses the register RCX to count | ||
537 | the number of remaining iterations. By taking a look at the register dump | ||
538 | again (from the kmemcheck report), we can figure out how many bytes were left | ||
539 | to copy: | ||
540 | |||
541 | RAX: 0000000000000030 RBX: ffff88003d4ea968 RCX: 0000000000000009 | ||
542 | |||
543 | By looking at the disassembly, we also see that %ecx is being loaded with the | ||
544 | value $0xc just before (ffffffff8104edd8), so we are very lucky. Keep in mind | ||
545 | that this is the number of iterations, not bytes. And since this is a "long" | ||
546 | operation, we need to multiply by 4 to get the number of bytes. So this means | ||
547 | that the uninitialized value was encountered at 4 * (0xc - 0x9) = 12 bytes | ||
548 | from the start of the object. | ||
549 | |||
550 | We can now try to figure out which field of the "struct siginfo" that was not | ||
551 | initialized. This is the beginning of the struct: | ||
552 | |||
553 | 40 typedef struct siginfo { | ||
554 | 41 int si_signo; | ||
555 | 42 int si_errno; | ||
556 | 43 int si_code; | ||
557 | 44 | ||
558 | 45 union { | ||
559 | .. | ||
560 | 92 } _sifields; | ||
561 | 93 } siginfo_t; | ||
562 | |||
563 | On 64-bit, the int is 4 bytes long, so it must the the union member that has | ||
564 | not been initialized. We can verify this using gdb: | ||
565 | |||
566 | $ gdb vmlinux | ||
567 | ... | ||
568 | (gdb) p &((struct siginfo *) 0)->_sifields | ||
569 | $1 = (union {...} *) 0x10 | ||
570 | |||
571 | Actually, it seems that the union member is located at offset 0x10 -- which | ||
572 | means that gcc has inserted 4 bytes of padding between the members si_code | ||
573 | and _sifields. We can now get a fuller picture of the memory dump: | ||
574 | |||
575 | _----------------------------=> si_code | ||
576 | / _--------------------=> (padding) | ||
577 | | / _------------=> _sifields(._kill._pid) | ||
578 | | | / _----=> _sifields(._kill._uid) | ||
579 | | | | / | ||
580 | -------|-------|-------|-------| | ||
581 | 80000000000000000000000000000000000000000088ffff0000000000000000 | ||
582 | i i i i u u u u i i i i i i i i u u u u u u u u u u u u u u u u | ||
583 | |||
584 | This allows us to realize another important fact: si_code contains the value | ||
585 | 0x80. Remember that x86 is little endian, so the first 4 bytes "80000000" are | ||
586 | really the number 0x00000080. With a bit of research, we find that this is | ||
587 | actually the constant SI_KERNEL defined in include/asm-generic/siginfo.h: | ||
588 | |||
589 | 144 #define SI_KERNEL 0x80 /* sent by the kernel from somewhere */ | ||
590 | |||
591 | This macro is used in exactly one place in the x86 kernel: In send_signal() | ||
592 | in kernel/signal.c: | ||
593 | |||
594 | 816 static int send_signal(int sig, struct siginfo *info, struct task_struct *t, | ||
595 | 817 int group) | ||
596 | 818 { | ||
597 | ... | ||
598 | 828 pending = group ? &t->signal->shared_pending : &t->pending; | ||
599 | ... | ||
600 | 851 q = __sigqueue_alloc(t, GFP_ATOMIC, (sig < SIGRTMIN && | ||
601 | 852 (is_si_special(info) || | ||
602 | 853 info->si_code >= 0))); | ||
603 | 854 if (q) { | ||
604 | 855 list_add_tail(&q->list, &pending->list); | ||
605 | 856 switch ((unsigned long) info) { | ||
606 | ... | ||
607 | 865 case (unsigned long) SEND_SIG_PRIV: | ||
608 | 866 q->info.si_signo = sig; | ||
609 | 867 q->info.si_errno = 0; | ||
610 | 868 q->info.si_code = SI_KERNEL; | ||
611 | 869 q->info.si_pid = 0; | ||
612 | 870 q->info.si_uid = 0; | ||
613 | 871 break; | ||
614 | ... | ||
615 | 890 } | ||
616 | |||
617 | Not only does this match with the .si_code member, it also matches the place | ||
618 | we found earlier when looking for where siginfo_t objects are enqueued on the | ||
619 | "shared_pending" list. | ||
620 | |||
621 | So to sum up: It seems that it is the padding introduced by the compiler | ||
622 | between two struct fields that is uninitialized, and this gets reported when | ||
623 | we do a memcpy() on the struct. This means that we have identified a false | ||
624 | positive warning. | ||
625 | |||
626 | Normally, kmemcheck will not report uninitialized accesses in memcpy() calls | ||
627 | when both the source and destination addresses are tracked. (Instead, we copy | ||
628 | the shadow bytemap as well). In this case, the destination address clearly | ||
629 | was not tracked. We can dig a little deeper into the stack trace from above: | ||
630 | |||
631 | arch/x86/kernel/signal.c:805 | ||
632 | arch/x86/kernel/signal.c:871 | ||
633 | arch/x86/kernel/entry_64.S:694 | ||
634 | |||
635 | And we clearly see that the destination siginfo object is located on the | ||
636 | stack: | ||
637 | |||
638 | 782 static void do_signal(struct pt_regs *regs) | ||
639 | 783 { | ||
640 | 784 struct k_sigaction ka; | ||
641 | 785 siginfo_t info; | ||
642 | ... | ||
643 | 804 signr = get_signal_to_deliver(&info, &ka, regs, NULL); | ||
644 | ... | ||
645 | 854 } | ||
646 | |||
647 | And this &info is what eventually gets passed to copy_siginfo() as the | ||
648 | destination argument. | ||
649 | |||
650 | Now, even though we didn't find an actual error here, the example is still a | ||
651 | good one, because it shows how one would go about to find out what the report | ||
652 | was all about. | ||
653 | |||
654 | |||
655 | 3.4. Annotating false positives | ||
656 | =============================== | ||
657 | |||
658 | There are a few different ways to make annotations in the source code that | ||
659 | will keep kmemcheck from checking and reporting certain allocations. Here | ||
660 | they are: | ||
661 | |||
662 | o __GFP_NOTRACK_FALSE_POSITIVE | ||
663 | |||
664 | This flag can be passed to kmalloc() or kmem_cache_alloc() (therefore | ||
665 | also to other functions that end up calling one of these) to indicate | ||
666 | that the allocation should not be tracked because it would lead to | ||
667 | a false positive report. This is a "big hammer" way of silencing | ||
668 | kmemcheck; after all, even if the false positive pertains to | ||
669 | particular field in a struct, for example, we will now lose the | ||
670 | ability to find (real) errors in other parts of the same struct. | ||
671 | |||
672 | Example: | ||
673 | |||
674 | /* No warnings will ever trigger on accessing any part of x */ | ||
675 | x = kmalloc(sizeof *x, GFP_KERNEL | __GFP_NOTRACK_FALSE_POSITIVE); | ||
676 | |||
677 | o kmemcheck_bitfield_begin(name)/kmemcheck_bitfield_end(name) and | ||
678 | kmemcheck_annotate_bitfield(ptr, name) | ||
679 | |||
680 | The first two of these three macros can be used inside struct | ||
681 | definitions to signal, respectively, the beginning and end of a | ||
682 | bitfield. Additionally, this will assign the bitfield a name, which | ||
683 | is given as an argument to the macros. | ||
684 | |||
685 | Having used these markers, one can later use | ||
686 | kmemcheck_annotate_bitfield() at the point of allocation, to indicate | ||
687 | which parts of the allocation is part of a bitfield. | ||
688 | |||
689 | Example: | ||
690 | |||
691 | struct foo { | ||
692 | int x; | ||
693 | |||
694 | kmemcheck_bitfield_begin(flags); | ||
695 | int flag_a:1; | ||
696 | int flag_b:1; | ||
697 | kmemcheck_bitfield_end(flags); | ||
698 | |||
699 | int y; | ||
700 | }; | ||
701 | |||
702 | struct foo *x = kmalloc(sizeof *x); | ||
703 | |||
704 | /* No warnings will trigger on accessing the bitfield of x */ | ||
705 | kmemcheck_annotate_bitfield(x, flags); | ||
706 | |||
707 | Note that kmemcheck_annotate_bitfield() can be used even before the | ||
708 | return value of kmalloc() is checked -- in other words, passing NULL | ||
709 | as the first argument is legal (and will do nothing). | ||
710 | |||
711 | |||
712 | 4. Reporting errors | ||
713 | =================== | ||
714 | |||
715 | As we have seen, kmemcheck will produce false positive reports. Therefore, it | ||
716 | is not very wise to blindly post kmemcheck warnings to mailing lists and | ||
717 | maintainers. Instead, I encourage maintainers and developers to find errors | ||
718 | in their own code. If you get a warning, you can try to work around it, try | ||
719 | to figure out if it's a real error or not, or simply ignore it. Most | ||
720 | developers know their own code and will quickly and efficiently determine the | ||
721 | root cause of a kmemcheck report. This is therefore also the most efficient | ||
722 | way to work with kmemcheck. | ||
723 | |||
724 | That said, we (the kmemcheck maintainers) will always be on the lookout for | ||
725 | false positives that we can annotate and silence. So whatever you find, | ||
726 | please drop us a note privately! Kernel configs and steps to reproduce (if | ||
727 | available) are of course a great help too. | ||
728 | |||
729 | Happy hacking! | ||
730 | |||
731 | |||
732 | 5. Technical description | ||
733 | ======================== | ||
734 | |||
735 | kmemcheck works by marking memory pages non-present. This means that whenever | ||
736 | somebody attempts to access the page, a page fault is generated. The page | ||
737 | fault handler notices that the page was in fact only hidden, and so it calls | ||
738 | on the kmemcheck code to make further investigations. | ||
739 | |||
740 | When the investigations are completed, kmemcheck "shows" the page by marking | ||
741 | it present (as it would be under normal circumstances). This way, the | ||
742 | interrupted code can continue as usual. | ||
743 | |||
744 | But after the instruction has been executed, we should hide the page again, so | ||
745 | that we can catch the next access too! Now kmemcheck makes use of a debugging | ||
746 | feature of the processor, namely single-stepping. When the processor has | ||
747 | finished the one instruction that generated the memory access, a debug | ||
748 | exception is raised. From here, we simply hide the page again and continue | ||
749 | execution, this time with the single-stepping feature turned off. | ||
750 | |||
751 | kmemcheck requires some assistance from the memory allocator in order to work. | ||
752 | The memory allocator needs to | ||
753 | |||
754 | 1. Tell kmemcheck about newly allocated pages and pages that are about to | ||
755 | be freed. This allows kmemcheck to set up and tear down the shadow memory | ||
756 | for the pages in question. The shadow memory stores the status of each | ||
757 | byte in the allocation proper, e.g. whether it is initialized or | ||
758 | uninitialized. | ||
759 | |||
760 | 2. Tell kmemcheck which parts of memory should be marked uninitialized. | ||
761 | There are actually a few more states, such as "not yet allocated" and | ||
762 | "recently freed". | ||
763 | |||
764 | If a slab cache is set up using the SLAB_NOTRACK flag, it will never return | ||
765 | memory that can take page faults because of kmemcheck. | ||
766 | |||
767 | If a slab cache is NOT set up using the SLAB_NOTRACK flag, callers can still | ||
768 | request memory with the __GFP_NOTRACK or __GFP_NOTRACK_FALSE_POSITIVE flags. | ||
769 | This does not prevent the page faults from occurring, however, but marks the | ||
770 | object in question as being initialized so that no warnings will ever be | ||
771 | produced for this object. | ||
772 | |||
773 | Currently, the SLAB and SLUB allocators are supported by kmemcheck. | ||
diff --git a/Documentation/kmemleak.txt b/Documentation/kmemleak.txt new file mode 100644 index 000000000000..0112da3b9ab8 --- /dev/null +++ b/Documentation/kmemleak.txt | |||
@@ -0,0 +1,142 @@ | |||
1 | Kernel Memory Leak Detector | ||
2 | =========================== | ||
3 | |||
4 | Introduction | ||
5 | ------------ | ||
6 | |||
7 | Kmemleak provides a way of detecting possible kernel memory leaks in a | ||
8 | way similar to a tracing garbage collector | ||
9 | (http://en.wikipedia.org/wiki/Garbage_collection_%28computer_science%29#Tracing_garbage_collectors), | ||
10 | with the difference that the orphan objects are not freed but only | ||
11 | reported via /sys/kernel/debug/kmemleak. A similar method is used by the | ||
12 | Valgrind tool (memcheck --leak-check) to detect the memory leaks in | ||
13 | user-space applications. | ||
14 | |||
15 | Usage | ||
16 | ----- | ||
17 | |||
18 | CONFIG_DEBUG_KMEMLEAK in "Kernel hacking" has to be enabled. A kernel | ||
19 | thread scans the memory every 10 minutes (by default) and prints any new | ||
20 | unreferenced objects found. To trigger an intermediate scan and display | ||
21 | all the possible memory leaks: | ||
22 | |||
23 | # mount -t debugfs nodev /sys/kernel/debug/ | ||
24 | # cat /sys/kernel/debug/kmemleak | ||
25 | |||
26 | Note that the orphan objects are listed in the order they were allocated | ||
27 | and one object at the beginning of the list may cause other subsequent | ||
28 | objects to be reported as orphan. | ||
29 | |||
30 | Memory scanning parameters can be modified at run-time by writing to the | ||
31 | /sys/kernel/debug/kmemleak file. The following parameters are supported: | ||
32 | |||
33 | off - disable kmemleak (irreversible) | ||
34 | stack=on - enable the task stacks scanning | ||
35 | stack=off - disable the tasks stacks scanning | ||
36 | scan=on - start the automatic memory scanning thread | ||
37 | scan=off - stop the automatic memory scanning thread | ||
38 | scan=<secs> - set the automatic memory scanning period in seconds (0 | ||
39 | to disable it) | ||
40 | |||
41 | Kmemleak can also be disabled at boot-time by passing "kmemleak=off" on | ||
42 | the kernel command line. | ||
43 | |||
44 | Basic Algorithm | ||
45 | --------------- | ||
46 | |||
47 | The memory allocations via kmalloc, vmalloc, kmem_cache_alloc and | ||
48 | friends are traced and the pointers, together with additional | ||
49 | information like size and stack trace, are stored in a prio search tree. | ||
50 | The corresponding freeing function calls are tracked and the pointers | ||
51 | removed from the kmemleak data structures. | ||
52 | |||
53 | An allocated block of memory is considered orphan if no pointer to its | ||
54 | start address or to any location inside the block can be found by | ||
55 | scanning the memory (including saved registers). This means that there | ||
56 | might be no way for the kernel to pass the address of the allocated | ||
57 | block to a freeing function and therefore the block is considered a | ||
58 | memory leak. | ||
59 | |||
60 | The scanning algorithm steps: | ||
61 | |||
62 | 1. mark all objects as white (remaining white objects will later be | ||
63 | considered orphan) | ||
64 | 2. scan the memory starting with the data section and stacks, checking | ||
65 | the values against the addresses stored in the prio search tree. If | ||
66 | a pointer to a white object is found, the object is added to the | ||
67 | gray list | ||
68 | 3. scan the gray objects for matching addresses (some white objects | ||
69 | can become gray and added at the end of the gray list) until the | ||
70 | gray set is finished | ||
71 | 4. the remaining white objects are considered orphan and reported via | ||
72 | /sys/kernel/debug/kmemleak | ||
73 | |||
74 | Some allocated memory blocks have pointers stored in the kernel's | ||
75 | internal data structures and they cannot be detected as orphans. To | ||
76 | avoid this, kmemleak can also store the number of values pointing to an | ||
77 | address inside the block address range that need to be found so that the | ||
78 | block is not considered a leak. One example is __vmalloc(). | ||
79 | |||
80 | Kmemleak API | ||
81 | ------------ | ||
82 | |||
83 | See the include/linux/kmemleak.h header for the functions prototype. | ||
84 | |||
85 | kmemleak_init - initialize kmemleak | ||
86 | kmemleak_alloc - notify of a memory block allocation | ||
87 | kmemleak_free - notify of a memory block freeing | ||
88 | kmemleak_not_leak - mark an object as not a leak | ||
89 | kmemleak_ignore - do not scan or report an object as leak | ||
90 | kmemleak_scan_area - add scan areas inside a memory block | ||
91 | kmemleak_no_scan - do not scan a memory block | ||
92 | kmemleak_erase - erase an old value in a pointer variable | ||
93 | kmemleak_alloc_recursive - as kmemleak_alloc but checks the recursiveness | ||
94 | kmemleak_free_recursive - as kmemleak_free but checks the recursiveness | ||
95 | |||
96 | Dealing with false positives/negatives | ||
97 | -------------------------------------- | ||
98 | |||
99 | The false negatives are real memory leaks (orphan objects) but not | ||
100 | reported by kmemleak because values found during the memory scanning | ||
101 | point to such objects. To reduce the number of false negatives, kmemleak | ||
102 | provides the kmemleak_ignore, kmemleak_scan_area, kmemleak_no_scan and | ||
103 | kmemleak_erase functions (see above). The task stacks also increase the | ||
104 | amount of false negatives and their scanning is not enabled by default. | ||
105 | |||
106 | The false positives are objects wrongly reported as being memory leaks | ||
107 | (orphan). For objects known not to be leaks, kmemleak provides the | ||
108 | kmemleak_not_leak function. The kmemleak_ignore could also be used if | ||
109 | the memory block is known not to contain other pointers and it will no | ||
110 | longer be scanned. | ||
111 | |||
112 | Some of the reported leaks are only transient, especially on SMP | ||
113 | systems, because of pointers temporarily stored in CPU registers or | ||
114 | stacks. Kmemleak defines MSECS_MIN_AGE (defaulting to 1000) representing | ||
115 | the minimum age of an object to be reported as a memory leak. | ||
116 | |||
117 | Limitations and Drawbacks | ||
118 | ------------------------- | ||
119 | |||
120 | The main drawback is the reduced performance of memory allocation and | ||
121 | freeing. To avoid other penalties, the memory scanning is only performed | ||
122 | when the /sys/kernel/debug/kmemleak file is read. Anyway, this tool is | ||
123 | intended for debugging purposes where the performance might not be the | ||
124 | most important requirement. | ||
125 | |||
126 | To keep the algorithm simple, kmemleak scans for values pointing to any | ||
127 | address inside a block's address range. This may lead to an increased | ||
128 | number of false negatives. However, it is likely that a real memory leak | ||
129 | will eventually become visible. | ||
130 | |||
131 | Another source of false negatives is the data stored in non-pointer | ||
132 | values. In a future version, kmemleak could only scan the pointer | ||
133 | members in the allocated structures. This feature would solve many of | ||
134 | the false negative cases described above. | ||
135 | |||
136 | The tool can report false positives. These are cases where an allocated | ||
137 | block doesn't need to be freed (some cases in the init_call functions), | ||
138 | the pointer is calculated by other methods than the usual container_of | ||
139 | macro or the pointer is stored in a location not scanned by kmemleak. | ||
140 | |||
141 | Page allocations and ioremap are not tracked. Only the ARM and x86 | ||
142 | architectures are currently supported. | ||
diff --git a/Documentation/kobject.txt b/Documentation/kobject.txt index b2e374586bd8..c79ab996dada 100644 --- a/Documentation/kobject.txt +++ b/Documentation/kobject.txt | |||
@@ -132,7 +132,7 @@ kobject_name(): | |||
132 | const char *kobject_name(const struct kobject * kobj); | 132 | const char *kobject_name(const struct kobject * kobj); |
133 | 133 | ||
134 | There is a helper function to both initialize and add the kobject to the | 134 | There is a helper function to both initialize and add the kobject to the |
135 | kernel at the same time, called supprisingly enough kobject_init_and_add(): | 135 | kernel at the same time, called surprisingly enough kobject_init_and_add(): |
136 | 136 | ||
137 | int kobject_init_and_add(struct kobject *kobj, struct kobj_type *ktype, | 137 | int kobject_init_and_add(struct kobject *kobj, struct kobj_type *ktype, |
138 | struct kobject *parent, const char *fmt, ...); | 138 | struct kobject *parent, const char *fmt, ...); |
diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt index 48b3de90eb1e..053037a1fe6d 100644 --- a/Documentation/kprobes.txt +++ b/Documentation/kprobes.txt | |||
@@ -212,7 +212,9 @@ hit, Kprobes calls kp->pre_handler. After the probed instruction | |||
212 | is single-stepped, Kprobe calls kp->post_handler. If a fault | 212 | is single-stepped, Kprobe calls kp->post_handler. If a fault |
213 | occurs during execution of kp->pre_handler or kp->post_handler, | 213 | occurs during execution of kp->pre_handler or kp->post_handler, |
214 | or during single-stepping of the probed instruction, Kprobes calls | 214 | or during single-stepping of the probed instruction, Kprobes calls |
215 | kp->fault_handler. Any or all handlers can be NULL. | 215 | kp->fault_handler. Any or all handlers can be NULL. If kp->flags |
216 | is set KPROBE_FLAG_DISABLED, that kp will be registered but disabled, | ||
217 | so, it's handlers aren't hit until calling enable_kprobe(kp). | ||
216 | 218 | ||
217 | NOTE: | 219 | NOTE: |
218 | 1. With the introduction of the "symbol_name" field to struct kprobe, | 220 | 1. With the introduction of the "symbol_name" field to struct kprobe, |
@@ -363,6 +365,26 @@ probes) in the specified array, they clear the addr field of those | |||
363 | incorrect probes. However, other probes in the array are | 365 | incorrect probes. However, other probes in the array are |
364 | unregistered correctly. | 366 | unregistered correctly. |
365 | 367 | ||
368 | 4.7 disable_*probe | ||
369 | |||
370 | #include <linux/kprobes.h> | ||
371 | int disable_kprobe(struct kprobe *kp); | ||
372 | int disable_kretprobe(struct kretprobe *rp); | ||
373 | int disable_jprobe(struct jprobe *jp); | ||
374 | |||
375 | Temporarily disables the specified *probe. You can enable it again by using | ||
376 | enable_*probe(). You must specify the probe which has been registered. | ||
377 | |||
378 | 4.8 enable_*probe | ||
379 | |||
380 | #include <linux/kprobes.h> | ||
381 | int enable_kprobe(struct kprobe *kp); | ||
382 | int enable_kretprobe(struct kretprobe *rp); | ||
383 | int enable_jprobe(struct jprobe *jp); | ||
384 | |||
385 | Enables *probe which has been disabled by disable_*probe(). You must specify | ||
386 | the probe which has been registered. | ||
387 | |||
366 | 5. Kprobes Features and Limitations | 388 | 5. Kprobes Features and Limitations |
367 | 389 | ||
368 | Kprobes allows multiple probes at the same address. Currently, | 390 | Kprobes allows multiple probes at the same address. Currently, |
@@ -485,9 +507,9 @@ http://www.linuxsymposium.org/2006/linuxsymposium_procv2.pdf (pages 101-115) | |||
485 | Appendix A: The kprobes debugfs interface | 507 | Appendix A: The kprobes debugfs interface |
486 | 508 | ||
487 | With recent kernels (> 2.6.20) the list of registered kprobes is visible | 509 | With recent kernels (> 2.6.20) the list of registered kprobes is visible |
488 | under the /debug/kprobes/ directory (assuming debugfs is mounted at /debug). | 510 | under the /sys/kernel/debug/kprobes/ directory (assuming debugfs is mounted at //sys/kernel/debug). |
489 | 511 | ||
490 | /debug/kprobes/list: Lists all registered probes on the system | 512 | /sys/kernel/debug/kprobes/list: Lists all registered probes on the system |
491 | 513 | ||
492 | c015d71a k vfs_read+0x0 | 514 | c015d71a k vfs_read+0x0 |
493 | c011a316 j do_fork+0x0 | 515 | c011a316 j do_fork+0x0 |
@@ -500,10 +522,14 @@ the probe. If the probed function belongs to a module, the module name | |||
500 | is also specified. Following columns show probe status. If the probe is on | 522 | is also specified. Following columns show probe status. If the probe is on |
501 | a virtual address that is no longer valid (module init sections, module | 523 | a virtual address that is no longer valid (module init sections, module |
502 | virtual addresses that correspond to modules that've been unloaded), | 524 | virtual addresses that correspond to modules that've been unloaded), |
503 | such probes are marked with [GONE]. | 525 | such probes are marked with [GONE]. If the probe is temporarily disabled, |
526 | such probes are marked with [DISABLED]. | ||
504 | 527 | ||
505 | /debug/kprobes/enabled: Turn kprobes ON/OFF | 528 | /sys/kernel/debug/kprobes/enabled: Turn kprobes ON/OFF forcibly. |
506 | 529 | ||
507 | Provides a knob to globally turn registered kprobes ON or OFF. By default, | 530 | Provides a knob to globally and forcibly turn registered kprobes ON or OFF. |
508 | all kprobes are enabled. By echoing "0" to this file, all registered probes | 531 | By default, all kprobes are enabled. By echoing "0" to this file, all |
509 | will be disarmed, till such time a "1" is echoed to this file. | 532 | registered probes will be disarmed, till such time a "1" is echoed to this |
533 | file. Note that this knob just disarms and arms all kprobes and doesn't | ||
534 | change each probe's disabling state. This means that disabled kprobes (marked | ||
535 | [DISABLED]) will be not enabled if you turn ON all kprobes by this knob. | ||
diff --git a/Documentation/laptops/acer-wmi.txt b/Documentation/laptops/acer-wmi.txt index 2b3a6b5260bf..0768fcc3ba3e 100644 --- a/Documentation/laptops/acer-wmi.txt +++ b/Documentation/laptops/acer-wmi.txt | |||
@@ -1,9 +1,9 @@ | |||
1 | Acer Laptop WMI Extras Driver | 1 | Acer Laptop WMI Extras Driver |
2 | http://code.google.com/p/aceracpi | 2 | http://code.google.com/p/aceracpi |
3 | Version 0.2 | 3 | Version 0.3 |
4 | 18th August 2008 | 4 | 4th April 2009 |
5 | 5 | ||
6 | Copyright 2007-2008 Carlos Corbacho <carlos@strangeworlds.co.uk> | 6 | Copyright 2007-2009 Carlos Corbacho <carlos@strangeworlds.co.uk> |
7 | 7 | ||
8 | acer-wmi is a driver to allow you to control various parts of your Acer laptop | 8 | acer-wmi is a driver to allow you to control various parts of your Acer laptop |
9 | hardware under Linux which are exposed via ACPI-WMI. | 9 | hardware under Linux which are exposed via ACPI-WMI. |
@@ -36,7 +36,11 @@ not possible in kernel space from a 64 bit OS. | |||
36 | Supported Hardware | 36 | Supported Hardware |
37 | ****************** | 37 | ****************** |
38 | 38 | ||
39 | Please see the website for the current list of known working hardare: | 39 | NOTE: The Acer Aspire One is not supported hardware. It cannot work with |
40 | acer-wmi until Acer fix their ACPI-WMI implementation on them, so has been | ||
41 | blacklisted until that happens. | ||
42 | |||
43 | Please see the website for the current list of known working hardware: | ||
40 | 44 | ||
41 | http://code.google.com/p/aceracpi/wiki/SupportedHardware | 45 | http://code.google.com/p/aceracpi/wiki/SupportedHardware |
42 | 46 | ||
diff --git a/Documentation/laptops/sony-laptop.txt b/Documentation/laptops/sony-laptop.txt index 8b2bc1572d98..23ce7d350d1a 100644 --- a/Documentation/laptops/sony-laptop.txt +++ b/Documentation/laptops/sony-laptop.txt | |||
@@ -22,7 +22,7 @@ If your laptop model supports it, you will find sysfs files in the | |||
22 | /sys/class/backlight/sony/ | 22 | /sys/class/backlight/sony/ |
23 | directory. You will be able to query and set the current screen | 23 | directory. You will be able to query and set the current screen |
24 | brightness: | 24 | brightness: |
25 | brightness get/set screen brightness (an iteger | 25 | brightness get/set screen brightness (an integer |
26 | between 0 and 7) | 26 | between 0 and 7) |
27 | actual_brightness reading from this file will query the HW | 27 | actual_brightness reading from this file will query the HW |
28 | to get real brightness value | 28 | to get real brightness value |
diff --git a/Documentation/laptops/thinkpad-acpi.txt b/Documentation/laptops/thinkpad-acpi.txt index 41bc99fa1884..78e354b42f67 100644 --- a/Documentation/laptops/thinkpad-acpi.txt +++ b/Documentation/laptops/thinkpad-acpi.txt | |||
@@ -1,7 +1,7 @@ | |||
1 | ThinkPad ACPI Extras Driver | 1 | ThinkPad ACPI Extras Driver |
2 | 2 | ||
3 | Version 0.22 | 3 | Version 0.23 |
4 | November 23rd, 2008 | 4 | April 10th, 2009 |
5 | 5 | ||
6 | Borislav Deianov <borislav@users.sf.net> | 6 | Borislav Deianov <borislav@users.sf.net> |
7 | Henrique de Moraes Holschuh <hmh@hmh.eng.br> | 7 | Henrique de Moraes Holschuh <hmh@hmh.eng.br> |
@@ -20,7 +20,8 @@ moved to the drivers/misc tree and renamed to thinkpad-acpi for kernel | |||
20 | kernel 2.6.29 and release 0.22. | 20 | kernel 2.6.29 and release 0.22. |
21 | 21 | ||
22 | The driver is named "thinkpad-acpi". In some places, like module | 22 | The driver is named "thinkpad-acpi". In some places, like module |
23 | names, "thinkpad_acpi" is used because of userspace issues. | 23 | names and log messages, "thinkpad_acpi" is used because of userspace |
24 | issues. | ||
24 | 25 | ||
25 | "tpacpi" is used as a shorthand where "thinkpad-acpi" would be too | 26 | "tpacpi" is used as a shorthand where "thinkpad-acpi" would be too |
26 | long due to length limitations on some Linux kernel versions. | 27 | long due to length limitations on some Linux kernel versions. |
@@ -37,7 +38,7 @@ detailed description): | |||
37 | - ThinkLight on and off | 38 | - ThinkLight on and off |
38 | - limited docking and undocking | 39 | - limited docking and undocking |
39 | - UltraBay eject | 40 | - UltraBay eject |
40 | - CMOS control | 41 | - CMOS/UCMS control |
41 | - LED control | 42 | - LED control |
42 | - ACPI sounds | 43 | - ACPI sounds |
43 | - temperature sensors | 44 | - temperature sensors |
@@ -46,6 +47,7 @@ detailed description): | |||
46 | - Volume control | 47 | - Volume control |
47 | - Fan control and monitoring: fan speed, fan enable/disable | 48 | - Fan control and monitoring: fan speed, fan enable/disable |
48 | - WAN enable and disable | 49 | - WAN enable and disable |
50 | - UWB enable and disable | ||
49 | 51 | ||
50 | A compatibility table by model and feature is maintained on the web | 52 | A compatibility table by model and feature is maintained on the web |
51 | site, http://ibm-acpi.sf.net/. I appreciate any success or failure | 53 | site, http://ibm-acpi.sf.net/. I appreciate any success or failure |
@@ -53,7 +55,7 @@ reports, especially if they add to or correct the compatibility table. | |||
53 | Please include the following information in your report: | 55 | Please include the following information in your report: |
54 | 56 | ||
55 | - ThinkPad model name | 57 | - ThinkPad model name |
56 | - a copy of your DSDT, from /proc/acpi/dsdt | 58 | - a copy of your ACPI tables, using the "acpidump" utility |
57 | - a copy of the output of dmidecode, with serial numbers | 59 | - a copy of the output of dmidecode, with serial numbers |
58 | and UUIDs masked off | 60 | and UUIDs masked off |
59 | - which driver features work and which don't | 61 | - which driver features work and which don't |
@@ -66,17 +68,18 @@ Installation | |||
66 | ------------ | 68 | ------------ |
67 | 69 | ||
68 | If you are compiling this driver as included in the Linux kernel | 70 | If you are compiling this driver as included in the Linux kernel |
69 | sources, simply enable the CONFIG_THINKPAD_ACPI option, and optionally | 71 | sources, look for the CONFIG_THINKPAD_ACPI Kconfig option. |
70 | enable the CONFIG_THINKPAD_ACPI_BAY option if you want the | 72 | It is located on the menu path: "Device Drivers" -> "X86 Platform |
71 | thinkpad-specific bay functionality. | 73 | Specific Device Drivers" -> "ThinkPad ACPI Laptop Extras". |
74 | |||
72 | 75 | ||
73 | Features | 76 | Features |
74 | -------- | 77 | -------- |
75 | 78 | ||
76 | The driver exports two different interfaces to userspace, which can be | 79 | The driver exports two different interfaces to userspace, which can be |
77 | used to access the features it provides. One is a legacy procfs-based | 80 | used to access the features it provides. One is a legacy procfs-based |
78 | interface, which will be removed at some time in the distant future. | 81 | interface, which will be removed at some time in the future. The other |
79 | The other is a new sysfs-based interface which is not complete yet. | 82 | is a new sysfs-based interface which is not complete yet. |
80 | 83 | ||
81 | The procfs interface creates the /proc/acpi/ibm directory. There is a | 84 | The procfs interface creates the /proc/acpi/ibm directory. There is a |
82 | file under that directory for each feature it supports. The procfs | 85 | file under that directory for each feature it supports. The procfs |
@@ -111,15 +114,17 @@ The version of thinkpad-acpi's sysfs interface is exported by the driver | |||
111 | as a driver attribute (see below). | 114 | as a driver attribute (see below). |
112 | 115 | ||
113 | Sysfs driver attributes are on the driver's sysfs attribute space, | 116 | Sysfs driver attributes are on the driver's sysfs attribute space, |
114 | for 2.6.23 this is /sys/bus/platform/drivers/thinkpad_acpi/ and | 117 | for 2.6.23+ this is /sys/bus/platform/drivers/thinkpad_acpi/ and |
115 | /sys/bus/platform/drivers/thinkpad_hwmon/ | 118 | /sys/bus/platform/drivers/thinkpad_hwmon/ |
116 | 119 | ||
117 | Sysfs device attributes are on the thinkpad_acpi device sysfs attribute | 120 | Sysfs device attributes are on the thinkpad_acpi device sysfs attribute |
118 | space, for 2.6.23 this is /sys/devices/platform/thinkpad_acpi/. | 121 | space, for 2.6.23+ this is /sys/devices/platform/thinkpad_acpi/. |
119 | 122 | ||
120 | Sysfs device attributes for the sensors and fan are on the | 123 | Sysfs device attributes for the sensors and fan are on the |
121 | thinkpad_hwmon device's sysfs attribute space, but you should locate it | 124 | thinkpad_hwmon device's sysfs attribute space, but you should locate it |
122 | looking for a hwmon device with the name attribute of "thinkpad". | 125 | looking for a hwmon device with the name attribute of "thinkpad", or |
126 | better yet, through libsensors. | ||
127 | |||
123 | 128 | ||
124 | Driver version | 129 | Driver version |
125 | -------------- | 130 | -------------- |
@@ -129,6 +134,7 @@ sysfs driver attribute: version | |||
129 | 134 | ||
130 | The driver name and version. No commands can be written to this file. | 135 | The driver name and version. No commands can be written to this file. |
131 | 136 | ||
137 | |||
132 | Sysfs interface version | 138 | Sysfs interface version |
133 | ----------------------- | 139 | ----------------------- |
134 | 140 | ||
@@ -160,6 +166,7 @@ expect that an attribute might not be there, and deal with it properly | |||
160 | (an attribute not being there *is* a valid way to make it clear that a | 166 | (an attribute not being there *is* a valid way to make it clear that a |
161 | feature is not available in sysfs). | 167 | feature is not available in sysfs). |
162 | 168 | ||
169 | |||
163 | Hot keys | 170 | Hot keys |
164 | -------- | 171 | -------- |
165 | 172 | ||
@@ -172,17 +179,14 @@ system. Enabling the hotkey functionality of thinkpad-acpi signals the | |||
172 | firmware that such a driver is present, and modifies how the ThinkPad | 179 | firmware that such a driver is present, and modifies how the ThinkPad |
173 | firmware will behave in many situations. | 180 | firmware will behave in many situations. |
174 | 181 | ||
175 | The driver enables the hot key feature automatically when loaded. The | 182 | The driver enables the HKEY ("hot key") event reporting automatically |
176 | feature can later be disabled and enabled back at runtime. The driver | 183 | when loaded, and disables it when it is removed. |
177 | will also restore the hot key feature to its previous state and mask | ||
178 | when it is unloaded. | ||
179 | 184 | ||
180 | When the hotkey feature is enabled and the hot key mask is set (see | 185 | The driver will report HKEY events in the following format: |
181 | below), the driver will report HKEY events in the following format: | ||
182 | 186 | ||
183 | ibm/hotkey HKEY 00000080 0000xxxx | 187 | ibm/hotkey HKEY 00000080 0000xxxx |
184 | 188 | ||
185 | Some of these events refer to hot key presses, but not all. | 189 | Some of these events refer to hot key presses, but not all of them. |
186 | 190 | ||
187 | The driver will generate events over the input layer for hot keys and | 191 | The driver will generate events over the input layer for hot keys and |
188 | radio switches, and over the ACPI netlink layer for other events. The | 192 | radio switches, and over the ACPI netlink layer for other events. The |
@@ -214,13 +218,17 @@ procfs notes: | |||
214 | 218 | ||
215 | The following commands can be written to the /proc/acpi/ibm/hotkey file: | 219 | The following commands can be written to the /proc/acpi/ibm/hotkey file: |
216 | 220 | ||
217 | echo enable > /proc/acpi/ibm/hotkey -- enable the hot keys feature | ||
218 | echo disable > /proc/acpi/ibm/hotkey -- disable the hot keys feature | ||
219 | echo 0xffffffff > /proc/acpi/ibm/hotkey -- enable all hot keys | 221 | echo 0xffffffff > /proc/acpi/ibm/hotkey -- enable all hot keys |
220 | echo 0 > /proc/acpi/ibm/hotkey -- disable all possible hot keys | 222 | echo 0 > /proc/acpi/ibm/hotkey -- disable all possible hot keys |
221 | ... any other 8-hex-digit mask ... | 223 | ... any other 8-hex-digit mask ... |
222 | echo reset > /proc/acpi/ibm/hotkey -- restore the original mask | 224 | echo reset > /proc/acpi/ibm/hotkey -- restore the original mask |
223 | 225 | ||
226 | The following commands have been deprecated and will cause the kernel | ||
227 | to log a warning: | ||
228 | |||
229 | echo enable > /proc/acpi/ibm/hotkey -- does nothing | ||
230 | echo disable > /proc/acpi/ibm/hotkey -- returns an error | ||
231 | |||
224 | The procfs interface does not support NVRAM polling control. So as to | 232 | The procfs interface does not support NVRAM polling control. So as to |
225 | maintain maximum bug-to-bug compatibility, it does not report any masks, | 233 | maintain maximum bug-to-bug compatibility, it does not report any masks, |
226 | nor does it allow one to manipulate the hot key mask when the firmware | 234 | nor does it allow one to manipulate the hot key mask when the firmware |
@@ -229,12 +237,9 @@ does not support masks at all, even if NVRAM polling is in use. | |||
229 | sysfs notes: | 237 | sysfs notes: |
230 | 238 | ||
231 | hotkey_bios_enabled: | 239 | hotkey_bios_enabled: |
232 | Returns the status of the hot keys feature when | 240 | DEPRECATED, WILL BE REMOVED SOON. |
233 | thinkpad-acpi was loaded. Upon module unload, the hot | ||
234 | key feature status will be restored to this value. | ||
235 | 241 | ||
236 | 0: hot keys were disabled | 242 | Returns 0. |
237 | 1: hot keys were enabled (unusual) | ||
238 | 243 | ||
239 | hotkey_bios_mask: | 244 | hotkey_bios_mask: |
240 | Returns the hot keys mask when thinkpad-acpi was loaded. | 245 | Returns the hot keys mask when thinkpad-acpi was loaded. |
@@ -242,13 +247,10 @@ sysfs notes: | |||
242 | to this value. | 247 | to this value. |
243 | 248 | ||
244 | hotkey_enable: | 249 | hotkey_enable: |
245 | Enables/disables the hot keys feature in the ACPI | 250 | DEPRECATED, WILL BE REMOVED SOON. |
246 | firmware, and reports current status of the hot keys | ||
247 | feature. Has no effect on the NVRAM hot key polling | ||
248 | functionality. | ||
249 | 251 | ||
250 | 0: disables the hot keys feature / feature disabled | 252 | 0: returns -EPERM |
251 | 1: enables the hot keys feature / feature enabled | 253 | 1: does nothing |
252 | 254 | ||
253 | hotkey_mask: | 255 | hotkey_mask: |
254 | bit mask to enable driver-handling (and depending on | 256 | bit mask to enable driver-handling (and depending on |
@@ -504,7 +506,7 @@ generate input device EV_KEY events. | |||
504 | In addition to the EV_KEY events, thinkpad-acpi may also issue EV_SW | 506 | In addition to the EV_KEY events, thinkpad-acpi may also issue EV_SW |
505 | events for switches: | 507 | events for switches: |
506 | 508 | ||
507 | SW_RFKILL_ALL T60 and later hardare rfkill rocker switch | 509 | SW_RFKILL_ALL T60 and later hardware rfkill rocker switch |
508 | SW_TABLET_MODE Tablet ThinkPads HKEY events 0x5009 and 0x500A | 510 | SW_TABLET_MODE Tablet ThinkPads HKEY events 0x5009 and 0x500A |
509 | 511 | ||
510 | Non hot-key ACPI HKEY event map: | 512 | Non hot-key ACPI HKEY event map: |
@@ -618,6 +620,7 @@ For Lenovo models *with* ACPI backlight control: | |||
618 | and map them to KEY_BRIGHTNESS_UP and KEY_BRIGHTNESS_DOWN. Process | 620 | and map them to KEY_BRIGHTNESS_UP and KEY_BRIGHTNESS_DOWN. Process |
619 | these keys on userspace somehow (e.g. by calling xbacklight). | 621 | these keys on userspace somehow (e.g. by calling xbacklight). |
620 | 622 | ||
623 | |||
621 | Bluetooth | 624 | Bluetooth |
622 | --------- | 625 | --------- |
623 | 626 | ||
@@ -628,6 +631,9 @@ sysfs rfkill class: switch "tpacpi_bluetooth_sw" | |||
628 | This feature shows the presence and current state of a ThinkPad | 631 | This feature shows the presence and current state of a ThinkPad |
629 | Bluetooth device in the internal ThinkPad CDC slot. | 632 | Bluetooth device in the internal ThinkPad CDC slot. |
630 | 633 | ||
634 | If the ThinkPad supports it, the Bluetooth state is stored in NVRAM, | ||
635 | so it is kept across reboots and power-off. | ||
636 | |||
631 | Procfs notes: | 637 | Procfs notes: |
632 | 638 | ||
633 | If Bluetooth is installed, the following commands can be used: | 639 | If Bluetooth is installed, the following commands can be used: |
@@ -652,6 +658,7 @@ Sysfs notes: | |||
652 | rfkill controller switch "tpacpi_bluetooth_sw": refer to | 658 | rfkill controller switch "tpacpi_bluetooth_sw": refer to |
653 | Documentation/rfkill.txt for details. | 659 | Documentation/rfkill.txt for details. |
654 | 660 | ||
661 | |||
655 | Video output control -- /proc/acpi/ibm/video | 662 | Video output control -- /proc/acpi/ibm/video |
656 | -------------------------------------------- | 663 | -------------------------------------------- |
657 | 664 | ||
@@ -693,11 +700,8 @@ Fn-F7 from working. This also disables the video output switching | |||
693 | features of this driver, as it uses the same ACPI methods as | 700 | features of this driver, as it uses the same ACPI methods as |
694 | Fn-F7. Video switching on the console should still work. | 701 | Fn-F7. Video switching on the console should still work. |
695 | 702 | ||
696 | UPDATE: There's now a patch for the X.org Radeon driver which | 703 | UPDATE: refer to https://bugs.freedesktop.org/show_bug.cgi?id=2000 |
697 | addresses this issue. Some people are reporting success with the patch | ||
698 | while others are still having problems. For more information: | ||
699 | 704 | ||
700 | https://bugs.freedesktop.org/show_bug.cgi?id=2000 | ||
701 | 705 | ||
702 | ThinkLight control | 706 | ThinkLight control |
703 | ------------------ | 707 | ------------------ |
@@ -720,10 +724,11 @@ The ThinkLight sysfs interface is documented by the LED class | |||
720 | documentation, in Documentation/leds-class.txt. The ThinkLight LED name | 724 | documentation, in Documentation/leds-class.txt. The ThinkLight LED name |
721 | is "tpacpi::thinklight". | 725 | is "tpacpi::thinklight". |
722 | 726 | ||
723 | Due to limitations in the sysfs LED class, if the status of the thinklight | 727 | Due to limitations in the sysfs LED class, if the status of the ThinkLight |
724 | cannot be read or if it is unknown, thinkpad-acpi will report it as "off". | 728 | cannot be read or if it is unknown, thinkpad-acpi will report it as "off". |
725 | It is impossible to know if the status returned through sysfs is valid. | 729 | It is impossible to know if the status returned through sysfs is valid. |
726 | 730 | ||
731 | |||
727 | Docking / undocking -- /proc/acpi/ibm/dock | 732 | Docking / undocking -- /proc/acpi/ibm/dock |
728 | ------------------------------------------ | 733 | ------------------------------------------ |
729 | 734 | ||
@@ -784,6 +789,7 @@ the only docking stations currently supported are the X-series | |||
784 | UltraBase docks and "dumb" port replicators like the Mini Dock (the | 789 | UltraBase docks and "dumb" port replicators like the Mini Dock (the |
785 | latter don't need any ACPI support, actually). | 790 | latter don't need any ACPI support, actually). |
786 | 791 | ||
792 | |||
787 | UltraBay eject -- /proc/acpi/ibm/bay | 793 | UltraBay eject -- /proc/acpi/ibm/bay |
788 | ------------------------------------ | 794 | ------------------------------------ |
789 | 795 | ||
@@ -847,8 +853,9 @@ supported. Use "eject2" instead of "eject" for the second bay. | |||
847 | Note: the UltraBay eject support on the 600e/x, A22p and A3x is | 853 | Note: the UltraBay eject support on the 600e/x, A22p and A3x is |
848 | EXPERIMENTAL and may not work as expected. USE WITH CAUTION! | 854 | EXPERIMENTAL and may not work as expected. USE WITH CAUTION! |
849 | 855 | ||
850 | CMOS control | 856 | |
851 | ------------ | 857 | CMOS/UCMS control |
858 | ----------------- | ||
852 | 859 | ||
853 | procfs: /proc/acpi/ibm/cmos | 860 | procfs: /proc/acpi/ibm/cmos |
854 | sysfs device attribute: cmos_command | 861 | sysfs device attribute: cmos_command |
@@ -882,6 +889,7 @@ The cmos command interface is prone to firmware split-brain problems, as | |||
882 | in newer ThinkPads it is just a compatibility layer. Do not use it, it is | 889 | in newer ThinkPads it is just a compatibility layer. Do not use it, it is |
883 | exported just as a debug tool. | 890 | exported just as a debug tool. |
884 | 891 | ||
892 | |||
885 | LED control | 893 | LED control |
886 | ----------- | 894 | ----------- |
887 | 895 | ||
@@ -893,6 +901,17 @@ some older ThinkPad models, it is possible to query the status of the | |||
893 | LED indicators as well. Newer ThinkPads cannot query the real status | 901 | LED indicators as well. Newer ThinkPads cannot query the real status |
894 | of the LED indicators. | 902 | of the LED indicators. |
895 | 903 | ||
904 | Because misuse of the LEDs could induce an unaware user to perform | ||
905 | dangerous actions (like undocking or ejecting a bay device while the | ||
906 | buses are still active), or mask an important alarm (such as a nearly | ||
907 | empty battery, or a broken battery), access to most LEDs is | ||
908 | restricted. | ||
909 | |||
910 | Unrestricted access to all LEDs requires that thinkpad-acpi be | ||
911 | compiled with the CONFIG_THINKPAD_ACPI_UNSAFE_LEDS option enabled. | ||
912 | Distributions must never enable this option. Individual users that | ||
913 | are aware of the consequences are welcome to enabling it. | ||
914 | |||
896 | procfs notes: | 915 | procfs notes: |
897 | 916 | ||
898 | The available commands are: | 917 | The available commands are: |
@@ -939,6 +958,7 @@ ThinkPad indicator LED should blink in hardware accelerated mode, use the | |||
939 | "timer" trigger, and leave the delay_on and delay_off parameters set to | 958 | "timer" trigger, and leave the delay_on and delay_off parameters set to |
940 | zero (to request hardware acceleration autodetection). | 959 | zero (to request hardware acceleration autodetection). |
941 | 960 | ||
961 | |||
942 | ACPI sounds -- /proc/acpi/ibm/beep | 962 | ACPI sounds -- /proc/acpi/ibm/beep |
943 | ---------------------------------- | 963 | ---------------------------------- |
944 | 964 | ||
@@ -968,6 +988,7 @@ X40: | |||
968 | 16 - one medium-pitched beep repeating constantly, stop with 17 | 988 | 16 - one medium-pitched beep repeating constantly, stop with 17 |
969 | 17 - stop 16 | 989 | 17 - stop 16 |
970 | 990 | ||
991 | |||
971 | Temperature sensors | 992 | Temperature sensors |
972 | ------------------- | 993 | ------------------- |
973 | 994 | ||
@@ -1115,6 +1136,7 @@ registers contain the current battery capacity, etc. If you experiment | |||
1115 | with this, do send me your results (including some complete dumps with | 1136 | with this, do send me your results (including some complete dumps with |
1116 | a description of the conditions when they were taken.) | 1137 | a description of the conditions when they were taken.) |
1117 | 1138 | ||
1139 | |||
1118 | LCD brightness control | 1140 | LCD brightness control |
1119 | ---------------------- | 1141 | ---------------------- |
1120 | 1142 | ||
@@ -1124,10 +1146,9 @@ sysfs backlight device "thinkpad_screen" | |||
1124 | This feature allows software control of the LCD brightness on ThinkPad | 1146 | This feature allows software control of the LCD brightness on ThinkPad |
1125 | models which don't have a hardware brightness slider. | 1147 | models which don't have a hardware brightness slider. |
1126 | 1148 | ||
1127 | It has some limitations: the LCD backlight cannot be actually turned on or | 1149 | It has some limitations: the LCD backlight cannot be actually turned |
1128 | off by this interface, and in many ThinkPad models, the "dim while on | 1150 | on or off by this interface, it just controls the backlight brightness |
1129 | battery" functionality will be enabled by the BIOS when this interface is | 1151 | level. |
1130 | used, and cannot be controlled. | ||
1131 | 1152 | ||
1132 | On IBM (and some of the earlier Lenovo) ThinkPads, the backlight control | 1153 | On IBM (and some of the earlier Lenovo) ThinkPads, the backlight control |
1133 | has eight brightness levels, ranging from 0 to 7. Some of the levels | 1154 | has eight brightness levels, ranging from 0 to 7. Some of the levels |
@@ -1136,10 +1157,15 @@ display backlight brightness control methods have 16 levels, ranging | |||
1136 | from 0 to 15. | 1157 | from 0 to 15. |
1137 | 1158 | ||
1138 | There are two interfaces to the firmware for direct brightness control, | 1159 | There are two interfaces to the firmware for direct brightness control, |
1139 | EC and CMOS. To select which one should be used, use the | 1160 | EC and UCMS (or CMOS). To select which one should be used, use the |
1140 | brightness_mode module parameter: brightness_mode=1 selects EC mode, | 1161 | brightness_mode module parameter: brightness_mode=1 selects EC mode, |
1141 | brightness_mode=2 selects CMOS mode, brightness_mode=3 selects both EC | 1162 | brightness_mode=2 selects UCMS mode, brightness_mode=3 selects EC |
1142 | and CMOS. The driver tries to auto-detect which interface to use. | 1163 | mode with NVRAM backing (so that brightness changes are remembered |
1164 | across shutdown/reboot). | ||
1165 | |||
1166 | The driver tries to select which interface to use from a table of | ||
1167 | defaults for each ThinkPad model. If it makes a wrong choice, please | ||
1168 | report this as a bug, so that we can fix it. | ||
1143 | 1169 | ||
1144 | When display backlight brightness controls are available through the | 1170 | When display backlight brightness controls are available through the |
1145 | standard ACPI interface, it is best to use it instead of this direct | 1171 | standard ACPI interface, it is best to use it instead of this direct |
@@ -1201,6 +1227,7 @@ WARNING: | |||
1201 | and maybe reduce the life of the backlight lamps by needlessly kicking | 1227 | and maybe reduce the life of the backlight lamps by needlessly kicking |
1202 | its level up and down at every change. | 1228 | its level up and down at every change. |
1203 | 1229 | ||
1230 | |||
1204 | Volume control -- /proc/acpi/ibm/volume | 1231 | Volume control -- /proc/acpi/ibm/volume |
1205 | --------------------------------------- | 1232 | --------------------------------------- |
1206 | 1233 | ||
@@ -1217,6 +1244,11 @@ distinct. The unmute the volume after the mute command, use either the | |||
1217 | up or down command (the level command will not unmute the volume). | 1244 | up or down command (the level command will not unmute the volume). |
1218 | The current volume level and mute state is shown in the file. | 1245 | The current volume level and mute state is shown in the file. |
1219 | 1246 | ||
1247 | The ALSA mixer interface to this feature is still missing, but patches | ||
1248 | to add it exist. That problem should be addressed in the not so | ||
1249 | distant future. | ||
1250 | |||
1251 | |||
1220 | Fan control and monitoring: fan speed, fan enable/disable | 1252 | Fan control and monitoring: fan speed, fan enable/disable |
1221 | --------------------------------------------------------- | 1253 | --------------------------------------------------------- |
1222 | 1254 | ||
@@ -1383,8 +1415,11 @@ procfs: /proc/acpi/ibm/wan | |||
1383 | sysfs device attribute: wwan_enable (deprecated) | 1415 | sysfs device attribute: wwan_enable (deprecated) |
1384 | sysfs rfkill class: switch "tpacpi_wwan_sw" | 1416 | sysfs rfkill class: switch "tpacpi_wwan_sw" |
1385 | 1417 | ||
1386 | This feature shows the presence and current state of a W-WAN (Sierra | 1418 | This feature shows the presence and current state of the built-in |
1387 | Wireless EV-DO) device. | 1419 | Wireless WAN device. |
1420 | |||
1421 | If the ThinkPad supports it, the WWAN state is stored in NVRAM, | ||
1422 | so it is kept across reboots and power-off. | ||
1388 | 1423 | ||
1389 | It was tested on a Lenovo ThinkPad X60. It should probably work on other | 1424 | It was tested on a Lenovo ThinkPad X60. It should probably work on other |
1390 | ThinkPad models which come with this module installed. | 1425 | ThinkPad models which come with this module installed. |
@@ -1413,6 +1448,7 @@ Sysfs notes: | |||
1413 | rfkill controller switch "tpacpi_wwan_sw": refer to | 1448 | rfkill controller switch "tpacpi_wwan_sw": refer to |
1414 | Documentation/rfkill.txt for details. | 1449 | Documentation/rfkill.txt for details. |
1415 | 1450 | ||
1451 | |||
1416 | EXPERIMENTAL: UWB | 1452 | EXPERIMENTAL: UWB |
1417 | ----------------- | 1453 | ----------------- |
1418 | 1454 | ||
@@ -1431,6 +1467,7 @@ Sysfs notes: | |||
1431 | rfkill controller switch "tpacpi_uwb_sw": refer to | 1467 | rfkill controller switch "tpacpi_uwb_sw": refer to |
1432 | Documentation/rfkill.txt for details. | 1468 | Documentation/rfkill.txt for details. |
1433 | 1469 | ||
1470 | |||
1434 | Multiple Commands, Module Parameters | 1471 | Multiple Commands, Module Parameters |
1435 | ------------------------------------ | 1472 | ------------------------------------ |
1436 | 1473 | ||
@@ -1445,6 +1482,7 @@ for example: | |||
1445 | 1482 | ||
1446 | modprobe thinkpad_acpi hotkey=enable,0xffff video=auto_disable | 1483 | modprobe thinkpad_acpi hotkey=enable,0xffff video=auto_disable |
1447 | 1484 | ||
1485 | |||
1448 | Enabling debugging output | 1486 | Enabling debugging output |
1449 | ------------------------- | 1487 | ------------------------- |
1450 | 1488 | ||
@@ -1457,8 +1495,15 @@ will enable all debugging output classes. It takes a bitmask, so | |||
1457 | to enable more than one output class, just add their values. | 1495 | to enable more than one output class, just add their values. |
1458 | 1496 | ||
1459 | Debug bitmask Description | 1497 | Debug bitmask Description |
1498 | 0x8000 Disclose PID of userspace programs | ||
1499 | accessing some functions of the driver | ||
1460 | 0x0001 Initialization and probing | 1500 | 0x0001 Initialization and probing |
1461 | 0x0002 Removal | 1501 | 0x0002 Removal |
1502 | 0x0004 RF Transmitter control (RFKILL) | ||
1503 | (bluetooth, WWAN, UWB...) | ||
1504 | 0x0008 HKEY event interface, hotkeys | ||
1505 | 0x0010 Fan control | ||
1506 | 0x0020 Backlight brightness | ||
1462 | 1507 | ||
1463 | There is also a kernel build option to enable more debugging | 1508 | There is also a kernel build option to enable more debugging |
1464 | information, which may be necessary to debug driver problems. | 1509 | information, which may be necessary to debug driver problems. |
@@ -1467,6 +1512,7 @@ The level of debugging information output by the driver can be changed | |||
1467 | at runtime through sysfs, using the driver attribute debug_level. The | 1512 | at runtime through sysfs, using the driver attribute debug_level. The |
1468 | attribute takes the same bitmask as the debug module parameter above. | 1513 | attribute takes the same bitmask as the debug module parameter above. |
1469 | 1514 | ||
1515 | |||
1470 | Force loading of module | 1516 | Force loading of module |
1471 | ----------------------- | 1517 | ----------------------- |
1472 | 1518 | ||
@@ -1505,3 +1551,7 @@ Sysfs interface changelog: | |||
1505 | 1551 | ||
1506 | 0x020200: Add poll()/select() support to the following attributes: | 1552 | 0x020200: Add poll()/select() support to the following attributes: |
1507 | hotkey_radio_sw, wakeup_hotunplug_complete, wakeup_reason | 1553 | hotkey_radio_sw, wakeup_hotunplug_complete, wakeup_reason |
1554 | |||
1555 | 0x020300: hotkey enable/disable support removed, attributes | ||
1556 | hotkey_bios_enabled and hotkey_enable deprecated and | ||
1557 | marked for removal. | ||
diff --git a/Documentation/lguest/.gitignore b/Documentation/lguest/.gitignore new file mode 100644 index 000000000000..115587fd5f65 --- /dev/null +++ b/Documentation/lguest/.gitignore | |||
@@ -0,0 +1 @@ | |||
lguest | |||
diff --git a/Documentation/lguest/Makefile b/Documentation/lguest/Makefile index 1f4f9e888bd1..28c8cdfcafd8 100644 --- a/Documentation/lguest/Makefile +++ b/Documentation/lguest/Makefile | |||
@@ -1,6 +1,5 @@ | |||
1 | # This creates the demonstration utility "lguest" which runs a Linux guest. | 1 | # This creates the demonstration utility "lguest" which runs a Linux guest. |
2 | CFLAGS:=-Wall -Wmissing-declarations -Wmissing-prototypes -O3 -I../../include -I../../arch/x86/include -U_FORTIFY_SOURCE | 2 | CFLAGS:=-m32 -Wall -Wmissing-declarations -Wmissing-prototypes -O3 -I../../include -I../../arch/x86/include -U_FORTIFY_SOURCE |
3 | LDLIBS:=-lz | ||
4 | 3 | ||
5 | all: lguest | 4 | all: lguest |
6 | 5 | ||
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index f2dbbf3bdeab..9ebcd6ef361b 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <sys/types.h> | 16 | #include <sys/types.h> |
17 | #include <sys/stat.h> | 17 | #include <sys/stat.h> |
18 | #include <sys/wait.h> | 18 | #include <sys/wait.h> |
19 | #include <sys/eventfd.h> | ||
19 | #include <fcntl.h> | 20 | #include <fcntl.h> |
20 | #include <stdbool.h> | 21 | #include <stdbool.h> |
21 | #include <errno.h> | 22 | #include <errno.h> |
@@ -59,7 +60,6 @@ typedef uint8_t u8; | |||
59 | /*:*/ | 60 | /*:*/ |
60 | 61 | ||
61 | #define PAGE_PRESENT 0x7 /* Present, RW, Execute */ | 62 | #define PAGE_PRESENT 0x7 /* Present, RW, Execute */ |
62 | #define NET_PEERNUM 1 | ||
63 | #define BRIDGE_PFX "bridge:" | 63 | #define BRIDGE_PFX "bridge:" |
64 | #ifndef SIOCBRADDIF | 64 | #ifndef SIOCBRADDIF |
65 | #define SIOCBRADDIF 0x89a2 /* add interface to bridge */ | 65 | #define SIOCBRADDIF 0x89a2 /* add interface to bridge */ |
@@ -76,19 +76,12 @@ static bool verbose; | |||
76 | do { if (verbose) printf(args); } while(0) | 76 | do { if (verbose) printf(args); } while(0) |
77 | /*:*/ | 77 | /*:*/ |
78 | 78 | ||
79 | /* File descriptors for the Waker. */ | ||
80 | struct { | ||
81 | int pipe[2]; | ||
82 | int lguest_fd; | ||
83 | } waker_fds; | ||
84 | |||
85 | /* The pointer to the start of guest memory. */ | 79 | /* The pointer to the start of guest memory. */ |
86 | static void *guest_base; | 80 | static void *guest_base; |
87 | /* The maximum guest physical address allowed, and maximum possible. */ | 81 | /* The maximum guest physical address allowed, and maximum possible. */ |
88 | static unsigned long guest_limit, guest_max; | 82 | static unsigned long guest_limit, guest_max; |
89 | /* The pipe for signal hander to write to. */ | 83 | /* The /dev/lguest file descriptor. */ |
90 | static int timeoutpipe[2]; | 84 | static int lguest_fd; |
91 | static unsigned int timeout_usec = 500; | ||
92 | 85 | ||
93 | /* a per-cpu variable indicating whose vcpu is currently running */ | 86 | /* a per-cpu variable indicating whose vcpu is currently running */ |
94 | static unsigned int __thread cpu_id; | 87 | static unsigned int __thread cpu_id; |
@@ -96,11 +89,6 @@ static unsigned int __thread cpu_id; | |||
96 | /* This is our list of devices. */ | 89 | /* This is our list of devices. */ |
97 | struct device_list | 90 | struct device_list |
98 | { | 91 | { |
99 | /* Summary information about the devices in our list: ready to pass to | ||
100 | * select() to ask which need servicing.*/ | ||
101 | fd_set infds; | ||
102 | int max_infd; | ||
103 | |||
104 | /* Counter to assign interrupt numbers. */ | 92 | /* Counter to assign interrupt numbers. */ |
105 | unsigned int next_irq; | 93 | unsigned int next_irq; |
106 | 94 | ||
@@ -126,22 +114,21 @@ struct device | |||
126 | /* The linked-list pointer. */ | 114 | /* The linked-list pointer. */ |
127 | struct device *next; | 115 | struct device *next; |
128 | 116 | ||
129 | /* The this device's descriptor, as mapped into the Guest. */ | 117 | /* The device's descriptor, as mapped into the Guest. */ |
130 | struct lguest_device_desc *desc; | 118 | struct lguest_device_desc *desc; |
131 | 119 | ||
120 | /* We can't trust desc values once Guest has booted: we use these. */ | ||
121 | unsigned int feature_len; | ||
122 | unsigned int num_vq; | ||
123 | |||
132 | /* The name of this device, for --verbose. */ | 124 | /* The name of this device, for --verbose. */ |
133 | const char *name; | 125 | const char *name; |
134 | 126 | ||
135 | /* If handle_input is set, it wants to be called when this file | ||
136 | * descriptor is ready. */ | ||
137 | int fd; | ||
138 | bool (*handle_input)(int fd, struct device *me); | ||
139 | |||
140 | /* Any queues attached to this device */ | 127 | /* Any queues attached to this device */ |
141 | struct virtqueue *vq; | 128 | struct virtqueue *vq; |
142 | 129 | ||
143 | /* Handle status being finalized (ie. feature bits stable). */ | 130 | /* Is it operational */ |
144 | void (*ready)(struct device *me); | 131 | bool running; |
145 | 132 | ||
146 | /* Device-specific data. */ | 133 | /* Device-specific data. */ |
147 | void *priv; | 134 | void *priv; |
@@ -164,22 +151,28 @@ struct virtqueue | |||
164 | /* Last available index we saw. */ | 151 | /* Last available index we saw. */ |
165 | u16 last_avail_idx; | 152 | u16 last_avail_idx; |
166 | 153 | ||
167 | /* The routine to call when the Guest pings us, or timeout. */ | 154 | /* How many are used since we sent last irq? */ |
168 | void (*handle_output)(int fd, struct virtqueue *me, bool timeout); | 155 | unsigned int pending_used; |
169 | 156 | ||
170 | /* Outstanding buffers */ | 157 | /* Eventfd where Guest notifications arrive. */ |
171 | unsigned int inflight; | 158 | int eventfd; |
172 | 159 | ||
173 | /* Is this blocked awaiting a timer? */ | 160 | /* Function for the thread which is servicing this virtqueue. */ |
174 | bool blocked; | 161 | void (*service)(struct virtqueue *vq); |
162 | pid_t thread; | ||
175 | }; | 163 | }; |
176 | 164 | ||
177 | /* Remember the arguments to the program so we can "reboot" */ | 165 | /* Remember the arguments to the program so we can "reboot" */ |
178 | static char **main_args; | 166 | static char **main_args; |
179 | 167 | ||
180 | /* Since guest is UP and we don't run at the same time, we don't need barriers. | 168 | /* The original tty settings to restore on exit. */ |
181 | * But I include them in the code in case others copy it. */ | 169 | static struct termios orig_term; |
182 | #define wmb() | 170 | |
171 | /* We have to be careful with barriers: our devices are all run in separate | ||
172 | * threads and so we need to make sure that changes visible to the Guest happen | ||
173 | * in precise order. */ | ||
174 | #define wmb() __asm__ __volatile__("" : : : "memory") | ||
175 | #define mb() __asm__ __volatile__("" : : : "memory") | ||
183 | 176 | ||
184 | /* Convert an iovec element to the given type. | 177 | /* Convert an iovec element to the given type. |
185 | * | 178 | * |
@@ -245,7 +238,7 @@ static void iov_consume(struct iovec iov[], unsigned num_iov, unsigned len) | |||
245 | static u8 *get_feature_bits(struct device *dev) | 238 | static u8 *get_feature_bits(struct device *dev) |
246 | { | 239 | { |
247 | return (u8 *)(dev->desc + 1) | 240 | return (u8 *)(dev->desc + 1) |
248 | + dev->desc->num_vq * sizeof(struct lguest_vqconfig); | 241 | + dev->num_vq * sizeof(struct lguest_vqconfig); |
249 | } | 242 | } |
250 | 243 | ||
251 | /*L:100 The Launcher code itself takes us out into userspace, that scary place | 244 | /*L:100 The Launcher code itself takes us out into userspace, that scary place |
@@ -505,99 +498,19 @@ static void concat(char *dst, char *args[]) | |||
505 | * saw the arguments it expects when we looked at initialize() in lguest_user.c: | 498 | * saw the arguments it expects when we looked at initialize() in lguest_user.c: |
506 | * the base of Guest "physical" memory, the top physical page to allow and the | 499 | * the base of Guest "physical" memory, the top physical page to allow and the |
507 | * entry point for the Guest. */ | 500 | * entry point for the Guest. */ |
508 | static int tell_kernel(unsigned long start) | 501 | static void tell_kernel(unsigned long start) |
509 | { | 502 | { |
510 | unsigned long args[] = { LHREQ_INITIALIZE, | 503 | unsigned long args[] = { LHREQ_INITIALIZE, |
511 | (unsigned long)guest_base, | 504 | (unsigned long)guest_base, |
512 | guest_limit / getpagesize(), start }; | 505 | guest_limit / getpagesize(), start }; |
513 | int fd; | ||
514 | |||
515 | verbose("Guest: %p - %p (%#lx)\n", | 506 | verbose("Guest: %p - %p (%#lx)\n", |
516 | guest_base, guest_base + guest_limit, guest_limit); | 507 | guest_base, guest_base + guest_limit, guest_limit); |
517 | fd = open_or_die("/dev/lguest", O_RDWR); | 508 | lguest_fd = open_or_die("/dev/lguest", O_RDWR); |
518 | if (write(fd, args, sizeof(args)) < 0) | 509 | if (write(lguest_fd, args, sizeof(args)) < 0) |
519 | err(1, "Writing to /dev/lguest"); | 510 | err(1, "Writing to /dev/lguest"); |
520 | |||
521 | /* We return the /dev/lguest file descriptor to control this Guest */ | ||
522 | return fd; | ||
523 | } | 511 | } |
524 | /*:*/ | 512 | /*:*/ |
525 | 513 | ||
526 | static void add_device_fd(int fd) | ||
527 | { | ||
528 | FD_SET(fd, &devices.infds); | ||
529 | if (fd > devices.max_infd) | ||
530 | devices.max_infd = fd; | ||
531 | } | ||
532 | |||
533 | /*L:200 | ||
534 | * The Waker. | ||
535 | * | ||
536 | * With console, block and network devices, we can have lots of input which we | ||
537 | * need to process. We could try to tell the kernel what file descriptors to | ||
538 | * watch, but handing a file descriptor mask through to the kernel is fairly | ||
539 | * icky. | ||
540 | * | ||
541 | * Instead, we clone off a thread which watches the file descriptors and writes | ||
542 | * the LHREQ_BREAK command to the /dev/lguest file descriptor to tell the Host | ||
543 | * stop running the Guest. This causes the Launcher to return from the | ||
544 | * /dev/lguest read with -EAGAIN, where it will write to /dev/lguest to reset | ||
545 | * the LHREQ_BREAK and wake us up again. | ||
546 | * | ||
547 | * This, of course, is merely a different *kind* of icky. | ||
548 | * | ||
549 | * Given my well-known antipathy to threads, I'd prefer to use processes. But | ||
550 | * it's easier to share Guest memory with threads, and trivial to share the | ||
551 | * devices.infds as the Launcher changes it. | ||
552 | */ | ||
553 | static int waker(void *unused) | ||
554 | { | ||
555 | /* Close the write end of the pipe: only the Launcher has it open. */ | ||
556 | close(waker_fds.pipe[1]); | ||
557 | |||
558 | for (;;) { | ||
559 | fd_set rfds = devices.infds; | ||
560 | unsigned long args[] = { LHREQ_BREAK, 1 }; | ||
561 | unsigned int maxfd = devices.max_infd; | ||
562 | |||
563 | /* We also listen to the pipe from the Launcher. */ | ||
564 | FD_SET(waker_fds.pipe[0], &rfds); | ||
565 | if (waker_fds.pipe[0] > maxfd) | ||
566 | maxfd = waker_fds.pipe[0]; | ||
567 | |||
568 | /* Wait until input is ready from one of the devices. */ | ||
569 | select(maxfd+1, &rfds, NULL, NULL, NULL); | ||
570 | |||
571 | /* Message from Launcher? */ | ||
572 | if (FD_ISSET(waker_fds.pipe[0], &rfds)) { | ||
573 | char c; | ||
574 | /* If this fails, then assume Launcher has exited. | ||
575 | * Don't do anything on exit: we're just a thread! */ | ||
576 | if (read(waker_fds.pipe[0], &c, 1) != 1) | ||
577 | _exit(0); | ||
578 | continue; | ||
579 | } | ||
580 | |||
581 | /* Send LHREQ_BREAK command to snap the Launcher out of it. */ | ||
582 | pwrite(waker_fds.lguest_fd, args, sizeof(args), cpu_id); | ||
583 | } | ||
584 | return 0; | ||
585 | } | ||
586 | |||
587 | /* This routine just sets up a pipe to the Waker process. */ | ||
588 | static void setup_waker(int lguest_fd) | ||
589 | { | ||
590 | /* This pipe is closed when Launcher dies, telling Waker. */ | ||
591 | if (pipe(waker_fds.pipe) != 0) | ||
592 | err(1, "Creating pipe for Waker"); | ||
593 | |||
594 | /* Waker also needs to know the lguest fd */ | ||
595 | waker_fds.lguest_fd = lguest_fd; | ||
596 | |||
597 | if (clone(waker, malloc(4096) + 4096, CLONE_VM | SIGCHLD, NULL) == -1) | ||
598 | err(1, "Creating Waker"); | ||
599 | } | ||
600 | |||
601 | /* | 514 | /* |
602 | * Device Handling. | 515 | * Device Handling. |
603 | * | 516 | * |
@@ -623,49 +536,90 @@ static void *_check_pointer(unsigned long addr, unsigned int size, | |||
623 | /* Each buffer in the virtqueues is actually a chain of descriptors. This | 536 | /* Each buffer in the virtqueues is actually a chain of descriptors. This |
624 | * function returns the next descriptor in the chain, or vq->vring.num if we're | 537 | * function returns the next descriptor in the chain, or vq->vring.num if we're |
625 | * at the end. */ | 538 | * at the end. */ |
626 | static unsigned next_desc(struct virtqueue *vq, unsigned int i) | 539 | static unsigned next_desc(struct vring_desc *desc, |
540 | unsigned int i, unsigned int max) | ||
627 | { | 541 | { |
628 | unsigned int next; | 542 | unsigned int next; |
629 | 543 | ||
630 | /* If this descriptor says it doesn't chain, we're done. */ | 544 | /* If this descriptor says it doesn't chain, we're done. */ |
631 | if (!(vq->vring.desc[i].flags & VRING_DESC_F_NEXT)) | 545 | if (!(desc[i].flags & VRING_DESC_F_NEXT)) |
632 | return vq->vring.num; | 546 | return max; |
633 | 547 | ||
634 | /* Check they're not leading us off end of descriptors. */ | 548 | /* Check they're not leading us off end of descriptors. */ |
635 | next = vq->vring.desc[i].next; | 549 | next = desc[i].next; |
636 | /* Make sure compiler knows to grab that: we don't want it changing! */ | 550 | /* Make sure compiler knows to grab that: we don't want it changing! */ |
637 | wmb(); | 551 | wmb(); |
638 | 552 | ||
639 | if (next >= vq->vring.num) | 553 | if (next >= max) |
640 | errx(1, "Desc next is %u", next); | 554 | errx(1, "Desc next is %u", next); |
641 | 555 | ||
642 | return next; | 556 | return next; |
643 | } | 557 | } |
644 | 558 | ||
559 | /* This actually sends the interrupt for this virtqueue */ | ||
560 | static void trigger_irq(struct virtqueue *vq) | ||
561 | { | ||
562 | unsigned long buf[] = { LHREQ_IRQ, vq->config.irq }; | ||
563 | |||
564 | /* Don't inform them if nothing used. */ | ||
565 | if (!vq->pending_used) | ||
566 | return; | ||
567 | vq->pending_used = 0; | ||
568 | |||
569 | /* If they don't want an interrupt, don't send one, unless empty. */ | ||
570 | if ((vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) | ||
571 | && lg_last_avail(vq) != vq->vring.avail->idx) | ||
572 | return; | ||
573 | |||
574 | /* Send the Guest an interrupt tell them we used something up. */ | ||
575 | if (write(lguest_fd, buf, sizeof(buf)) != 0) | ||
576 | err(1, "Triggering irq %i", vq->config.irq); | ||
577 | } | ||
578 | |||
645 | /* This looks in the virtqueue and for the first available buffer, and converts | 579 | /* This looks in the virtqueue and for the first available buffer, and converts |
646 | * it to an iovec for convenient access. Since descriptors consist of some | 580 | * it to an iovec for convenient access. Since descriptors consist of some |
647 | * number of output then some number of input descriptors, it's actually two | 581 | * number of output then some number of input descriptors, it's actually two |
648 | * iovecs, but we pack them into one and note how many of each there were. | 582 | * iovecs, but we pack them into one and note how many of each there were. |
649 | * | 583 | * |
650 | * This function returns the descriptor number found, or vq->vring.num (which | 584 | * This function returns the descriptor number found. */ |
651 | * is never a valid descriptor number) if none was found. */ | 585 | static unsigned wait_for_vq_desc(struct virtqueue *vq, |
652 | static unsigned get_vq_desc(struct virtqueue *vq, | 586 | struct iovec iov[], |
653 | struct iovec iov[], | 587 | unsigned int *out_num, unsigned int *in_num) |
654 | unsigned int *out_num, unsigned int *in_num) | ||
655 | { | 588 | { |
656 | unsigned int i, head; | 589 | unsigned int i, head, max; |
657 | u16 last_avail; | 590 | struct vring_desc *desc; |
591 | u16 last_avail = lg_last_avail(vq); | ||
592 | |||
593 | while (last_avail == vq->vring.avail->idx) { | ||
594 | u64 event; | ||
595 | |||
596 | /* OK, tell Guest about progress up to now. */ | ||
597 | trigger_irq(vq); | ||
598 | |||
599 | /* OK, now we need to know about added descriptors. */ | ||
600 | vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY; | ||
601 | |||
602 | /* They could have slipped one in as we were doing that: make | ||
603 | * sure it's written, then check again. */ | ||
604 | mb(); | ||
605 | if (last_avail != vq->vring.avail->idx) { | ||
606 | vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY; | ||
607 | break; | ||
608 | } | ||
609 | |||
610 | /* Nothing new? Wait for eventfd to tell us they refilled. */ | ||
611 | if (read(vq->eventfd, &event, sizeof(event)) != sizeof(event)) | ||
612 | errx(1, "Event read failed?"); | ||
613 | |||
614 | /* We don't need to be notified again. */ | ||
615 | vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY; | ||
616 | } | ||
658 | 617 | ||
659 | /* Check it isn't doing very strange things with descriptor numbers. */ | 618 | /* Check it isn't doing very strange things with descriptor numbers. */ |
660 | last_avail = lg_last_avail(vq); | ||
661 | if ((u16)(vq->vring.avail->idx - last_avail) > vq->vring.num) | 619 | if ((u16)(vq->vring.avail->idx - last_avail) > vq->vring.num) |
662 | errx(1, "Guest moved used index from %u to %u", | 620 | errx(1, "Guest moved used index from %u to %u", |
663 | last_avail, vq->vring.avail->idx); | 621 | last_avail, vq->vring.avail->idx); |
664 | 622 | ||
665 | /* If there's nothing new since last we looked, return invalid. */ | ||
666 | if (vq->vring.avail->idx == last_avail) | ||
667 | return vq->vring.num; | ||
668 | |||
669 | /* Grab the next descriptor number they're advertising, and increment | 623 | /* Grab the next descriptor number they're advertising, and increment |
670 | * the index we've seen. */ | 624 | * the index we've seen. */ |
671 | head = vq->vring.avail->ring[last_avail % vq->vring.num]; | 625 | head = vq->vring.avail->ring[last_avail % vq->vring.num]; |
@@ -678,15 +632,28 @@ static unsigned get_vq_desc(struct virtqueue *vq, | |||
678 | /* When we start there are none of either input nor output. */ | 632 | /* When we start there are none of either input nor output. */ |
679 | *out_num = *in_num = 0; | 633 | *out_num = *in_num = 0; |
680 | 634 | ||
635 | max = vq->vring.num; | ||
636 | desc = vq->vring.desc; | ||
681 | i = head; | 637 | i = head; |
638 | |||
639 | /* If this is an indirect entry, then this buffer contains a descriptor | ||
640 | * table which we handle as if it's any normal descriptor chain. */ | ||
641 | if (desc[i].flags & VRING_DESC_F_INDIRECT) { | ||
642 | if (desc[i].len % sizeof(struct vring_desc)) | ||
643 | errx(1, "Invalid size for indirect buffer table"); | ||
644 | |||
645 | max = desc[i].len / sizeof(struct vring_desc); | ||
646 | desc = check_pointer(desc[i].addr, desc[i].len); | ||
647 | i = 0; | ||
648 | } | ||
649 | |||
682 | do { | 650 | do { |
683 | /* Grab the first descriptor, and check it's OK. */ | 651 | /* Grab the first descriptor, and check it's OK. */ |
684 | iov[*out_num + *in_num].iov_len = vq->vring.desc[i].len; | 652 | iov[*out_num + *in_num].iov_len = desc[i].len; |
685 | iov[*out_num + *in_num].iov_base | 653 | iov[*out_num + *in_num].iov_base |
686 | = check_pointer(vq->vring.desc[i].addr, | 654 | = check_pointer(desc[i].addr, desc[i].len); |
687 | vq->vring.desc[i].len); | ||
688 | /* If this is an input descriptor, increment that count. */ | 655 | /* If this is an input descriptor, increment that count. */ |
689 | if (vq->vring.desc[i].flags & VRING_DESC_F_WRITE) | 656 | if (desc[i].flags & VRING_DESC_F_WRITE) |
690 | (*in_num)++; | 657 | (*in_num)++; |
691 | else { | 658 | else { |
692 | /* If it's an output descriptor, they're all supposed | 659 | /* If it's an output descriptor, they're all supposed |
@@ -697,11 +664,10 @@ static unsigned get_vq_desc(struct virtqueue *vq, | |||
697 | } | 664 | } |
698 | 665 | ||
699 | /* If we've got too many, that implies a descriptor loop. */ | 666 | /* If we've got too many, that implies a descriptor loop. */ |
700 | if (*out_num + *in_num > vq->vring.num) | 667 | if (*out_num + *in_num > max) |
701 | errx(1, "Looped descriptor"); | 668 | errx(1, "Looped descriptor"); |
702 | } while ((i = next_desc(vq, i)) != vq->vring.num); | 669 | } while ((i = next_desc(desc, i, max)) != max); |
703 | 670 | ||
704 | vq->inflight++; | ||
705 | return head; | 671 | return head; |
706 | } | 672 | } |
707 | 673 | ||
@@ -719,44 +685,20 @@ static void add_used(struct virtqueue *vq, unsigned int head, int len) | |||
719 | /* Make sure buffer is written before we update index. */ | 685 | /* Make sure buffer is written before we update index. */ |
720 | wmb(); | 686 | wmb(); |
721 | vq->vring.used->idx++; | 687 | vq->vring.used->idx++; |
722 | vq->inflight--; | 688 | vq->pending_used++; |
723 | } | ||
724 | |||
725 | /* This actually sends the interrupt for this virtqueue */ | ||
726 | static void trigger_irq(int fd, struct virtqueue *vq) | ||
727 | { | ||
728 | unsigned long buf[] = { LHREQ_IRQ, vq->config.irq }; | ||
729 | |||
730 | /* If they don't want an interrupt, don't send one, unless empty. */ | ||
731 | if ((vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) | ||
732 | && vq->inflight) | ||
733 | return; | ||
734 | |||
735 | /* Send the Guest an interrupt tell them we used something up. */ | ||
736 | if (write(fd, buf, sizeof(buf)) != 0) | ||
737 | err(1, "Triggering irq %i", vq->config.irq); | ||
738 | } | 689 | } |
739 | 690 | ||
740 | /* And here's the combo meal deal. Supersize me! */ | 691 | /* And here's the combo meal deal. Supersize me! */ |
741 | static void add_used_and_trigger(int fd, struct virtqueue *vq, | 692 | static void add_used_and_trigger(struct virtqueue *vq, unsigned head, int len) |
742 | unsigned int head, int len) | ||
743 | { | 693 | { |
744 | add_used(vq, head, len); | 694 | add_used(vq, head, len); |
745 | trigger_irq(fd, vq); | 695 | trigger_irq(vq); |
746 | } | 696 | } |
747 | 697 | ||
748 | /* | 698 | /* |
749 | * The Console | 699 | * The Console |
750 | * | 700 | * |
751 | * Here is the input terminal setting we save, and the routine to restore them | 701 | * We associate some data with the console for our exit hack. */ |
752 | * on exit so the user gets their terminal back. */ | ||
753 | static struct termios orig_term; | ||
754 | static void restore_term(void) | ||
755 | { | ||
756 | tcsetattr(STDIN_FILENO, TCSANOW, &orig_term); | ||
757 | } | ||
758 | |||
759 | /* We associate some data with the console for our exit hack. */ | ||
760 | struct console_abort | 702 | struct console_abort |
761 | { | 703 | { |
762 | /* How many times have they hit ^C? */ | 704 | /* How many times have they hit ^C? */ |
@@ -766,276 +708,275 @@ struct console_abort | |||
766 | }; | 708 | }; |
767 | 709 | ||
768 | /* This is the routine which handles console input (ie. stdin). */ | 710 | /* This is the routine which handles console input (ie. stdin). */ |
769 | static bool handle_console_input(int fd, struct device *dev) | 711 | static void console_input(struct virtqueue *vq) |
770 | { | 712 | { |
771 | int len; | 713 | int len; |
772 | unsigned int head, in_num, out_num; | 714 | unsigned int head, in_num, out_num; |
773 | struct iovec iov[dev->vq->vring.num]; | 715 | struct console_abort *abort = vq->dev->priv; |
774 | struct console_abort *abort = dev->priv; | 716 | struct iovec iov[vq->vring.num]; |
775 | |||
776 | /* First we need a console buffer from the Guests's input virtqueue. */ | ||
777 | head = get_vq_desc(dev->vq, iov, &out_num, &in_num); | ||
778 | |||
779 | /* If they're not ready for input, stop listening to this file | ||
780 | * descriptor. We'll start again once they add an input buffer. */ | ||
781 | if (head == dev->vq->vring.num) | ||
782 | return false; | ||
783 | 717 | ||
718 | /* Make sure there's a descriptor waiting. */ | ||
719 | head = wait_for_vq_desc(vq, iov, &out_num, &in_num); | ||
784 | if (out_num) | 720 | if (out_num) |
785 | errx(1, "Output buffers in console in queue?"); | 721 | errx(1, "Output buffers in console in queue?"); |
786 | 722 | ||
787 | /* This is why we convert to iovecs: the readv() call uses them, and so | 723 | /* Read it in. */ |
788 | * it reads straight into the Guest's buffer. */ | 724 | len = readv(STDIN_FILENO, iov, in_num); |
789 | len = readv(dev->fd, iov, in_num); | ||
790 | if (len <= 0) { | 725 | if (len <= 0) { |
791 | /* This implies that the console is closed, is /dev/null, or | 726 | /* Ran out of input? */ |
792 | * something went terribly wrong. */ | ||
793 | warnx("Failed to get console input, ignoring console."); | 727 | warnx("Failed to get console input, ignoring console."); |
794 | /* Put the input terminal back. */ | 728 | /* For simplicity, dying threads kill the whole Launcher. So |
795 | restore_term(); | 729 | * just nap here. */ |
796 | /* Remove callback from input vq, so it doesn't restart us. */ | 730 | for (;;) |
797 | dev->vq->handle_output = NULL; | 731 | pause(); |
798 | /* Stop listening to this fd: don't call us again. */ | ||
799 | return false; | ||
800 | } | 732 | } |
801 | 733 | ||
802 | /* Tell the Guest about the new input. */ | 734 | add_used_and_trigger(vq, head, len); |
803 | add_used_and_trigger(fd, dev->vq, head, len); | ||
804 | 735 | ||
805 | /* Three ^C within one second? Exit. | 736 | /* Three ^C within one second? Exit. |
806 | * | 737 | * |
807 | * This is such a hack, but works surprisingly well. Each ^C has to be | 738 | * This is such a hack, but works surprisingly well. Each ^C has to |
808 | * in a buffer by itself, so they can't be too fast. But we check that | 739 | * be in a buffer by itself, so they can't be too fast. But we check |
809 | * we get three within about a second, so they can't be too slow. */ | 740 | * that we get three within about a second, so they can't be too |
810 | if (len == 1 && ((char *)iov[0].iov_base)[0] == 3) { | 741 | * slow. */ |
811 | if (!abort->count++) | 742 | if (len != 1 || ((char *)iov[0].iov_base)[0] != 3) { |
812 | gettimeofday(&abort->start, NULL); | ||
813 | else if (abort->count == 3) { | ||
814 | struct timeval now; | ||
815 | gettimeofday(&now, NULL); | ||
816 | if (now.tv_sec <= abort->start.tv_sec+1) { | ||
817 | unsigned long args[] = { LHREQ_BREAK, 0 }; | ||
818 | /* Close the fd so Waker will know it has to | ||
819 | * exit. */ | ||
820 | close(waker_fds.pipe[1]); | ||
821 | /* Just in case Waker is blocked in BREAK, send | ||
822 | * unbreak now. */ | ||
823 | write(fd, args, sizeof(args)); | ||
824 | exit(2); | ||
825 | } | ||
826 | abort->count = 0; | ||
827 | } | ||
828 | } else | ||
829 | /* Any other key resets the abort counter. */ | ||
830 | abort->count = 0; | 743 | abort->count = 0; |
744 | return; | ||
745 | } | ||
831 | 746 | ||
832 | /* Everything went OK! */ | 747 | abort->count++; |
833 | return true; | 748 | if (abort->count == 1) |
749 | gettimeofday(&abort->start, NULL); | ||
750 | else if (abort->count == 3) { | ||
751 | struct timeval now; | ||
752 | gettimeofday(&now, NULL); | ||
753 | /* Kill all Launcher processes with SIGINT, like normal ^C */ | ||
754 | if (now.tv_sec <= abort->start.tv_sec+1) | ||
755 | kill(0, SIGINT); | ||
756 | abort->count = 0; | ||
757 | } | ||
834 | } | 758 | } |
835 | 759 | ||
836 | /* Handling output for console is simple: we just get all the output buffers | 760 | /* This is the routine which handles console output (ie. stdout). */ |
837 | * and write them to stdout. */ | 761 | static void console_output(struct virtqueue *vq) |
838 | static void handle_console_output(int fd, struct virtqueue *vq, bool timeout) | ||
839 | { | 762 | { |
840 | unsigned int head, out, in; | 763 | unsigned int head, out, in; |
841 | int len; | ||
842 | struct iovec iov[vq->vring.num]; | 764 | struct iovec iov[vq->vring.num]; |
843 | 765 | ||
844 | /* Keep getting output buffers from the Guest until we run out. */ | 766 | head = wait_for_vq_desc(vq, iov, &out, &in); |
845 | while ((head = get_vq_desc(vq, iov, &out, &in)) != vq->vring.num) { | 767 | if (in) |
846 | if (in) | 768 | errx(1, "Input buffers in console output queue?"); |
847 | errx(1, "Input buffers in output queue?"); | 769 | while (!iov_empty(iov, out)) { |
848 | len = writev(STDOUT_FILENO, iov, out); | 770 | int len = writev(STDOUT_FILENO, iov, out); |
849 | add_used_and_trigger(fd, vq, head, len); | 771 | if (len <= 0) |
772 | err(1, "Write to stdout gave %i", len); | ||
773 | iov_consume(iov, out, len); | ||
850 | } | 774 | } |
851 | } | 775 | add_used(vq, head, 0); |
852 | |||
853 | /* This is called when we no longer want to hear about Guest changes to a | ||
854 | * virtqueue. This is more efficient in high-traffic cases, but it means we | ||
855 | * have to set a timer to check if any more changes have occurred. */ | ||
856 | static void block_vq(struct virtqueue *vq) | ||
857 | { | ||
858 | struct itimerval itm; | ||
859 | |||
860 | vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY; | ||
861 | vq->blocked = true; | ||
862 | |||
863 | itm.it_interval.tv_sec = 0; | ||
864 | itm.it_interval.tv_usec = 0; | ||
865 | itm.it_value.tv_sec = 0; | ||
866 | itm.it_value.tv_usec = timeout_usec; | ||
867 | |||
868 | setitimer(ITIMER_REAL, &itm, NULL); | ||
869 | } | 776 | } |
870 | 777 | ||
871 | /* | 778 | /* |
872 | * The Network | 779 | * The Network |
873 | * | 780 | * |
874 | * Handling output for network is also simple: we get all the output buffers | 781 | * Handling output for network is also simple: we get all the output buffers |
875 | * and write them (ignoring the first element) to this device's file descriptor | 782 | * and write them to /dev/net/tun. |
876 | * (/dev/net/tun). | ||
877 | */ | 783 | */ |
878 | static void handle_net_output(int fd, struct virtqueue *vq, bool timeout) | 784 | struct net_info { |
785 | int tunfd; | ||
786 | }; | ||
787 | |||
788 | static void net_output(struct virtqueue *vq) | ||
879 | { | 789 | { |
880 | unsigned int head, out, in, num = 0; | 790 | struct net_info *net_info = vq->dev->priv; |
881 | int len; | 791 | unsigned int head, out, in; |
882 | struct iovec iov[vq->vring.num]; | 792 | struct iovec iov[vq->vring.num]; |
883 | static int last_timeout_num; | ||
884 | |||
885 | /* Keep getting output buffers from the Guest until we run out. */ | ||
886 | while ((head = get_vq_desc(vq, iov, &out, &in)) != vq->vring.num) { | ||
887 | if (in) | ||
888 | errx(1, "Input buffers in output queue?"); | ||
889 | len = writev(vq->dev->fd, iov, out); | ||
890 | if (len < 0) | ||
891 | err(1, "Writing network packet to tun"); | ||
892 | add_used_and_trigger(fd, vq, head, len); | ||
893 | num++; | ||
894 | } | ||
895 | 793 | ||
896 | /* Block further kicks and set up a timer if we saw anything. */ | 794 | head = wait_for_vq_desc(vq, iov, &out, &in); |
897 | if (!timeout && num) | 795 | if (in) |
898 | block_vq(vq); | 796 | errx(1, "Input buffers in net output queue?"); |
899 | 797 | if (writev(net_info->tunfd, iov, out) < 0) | |
900 | /* We never quite know how long should we wait before we check the | 798 | errx(1, "Write to tun failed?"); |
901 | * queue again for more packets. We start at 500 microseconds, and if | 799 | add_used(vq, head, 0); |
902 | * we get fewer packets than last time, we assume we made the timeout | ||
903 | * too small and increase it by 10 microseconds. Otherwise, we drop it | ||
904 | * by one microsecond every time. It seems to work well enough. */ | ||
905 | if (timeout) { | ||
906 | if (num < last_timeout_num) | ||
907 | timeout_usec += 10; | ||
908 | else if (timeout_usec > 1) | ||
909 | timeout_usec--; | ||
910 | last_timeout_num = num; | ||
911 | } | ||
912 | } | 800 | } |
913 | 801 | ||
914 | /* This is where we handle a packet coming in from the tun device to our | 802 | /* Will reading from this file descriptor block? */ |
803 | static bool will_block(int fd) | ||
804 | { | ||
805 | fd_set fdset; | ||
806 | struct timeval zero = { 0, 0 }; | ||
807 | FD_ZERO(&fdset); | ||
808 | FD_SET(fd, &fdset); | ||
809 | return select(fd+1, &fdset, NULL, NULL, &zero) != 1; | ||
810 | } | ||
811 | |||
812 | /* This is where we handle packets coming in from the tun device to our | ||
915 | * Guest. */ | 813 | * Guest. */ |
916 | static bool handle_tun_input(int fd, struct device *dev) | 814 | static void net_input(struct virtqueue *vq) |
917 | { | 815 | { |
918 | unsigned int head, in_num, out_num; | ||
919 | int len; | 816 | int len; |
920 | struct iovec iov[dev->vq->vring.num]; | 817 | unsigned int head, out, in; |
921 | 818 | struct iovec iov[vq->vring.num]; | |
922 | /* First we need a network buffer from the Guests's recv virtqueue. */ | 819 | struct net_info *net_info = vq->dev->priv; |
923 | head = get_vq_desc(dev->vq, iov, &out_num, &in_num); | ||
924 | if (head == dev->vq->vring.num) { | ||
925 | /* Now, it's expected that if we try to send a packet too | ||
926 | * early, the Guest won't be ready yet. Wait until the device | ||
927 | * status says it's ready. */ | ||
928 | /* FIXME: Actually want DRIVER_ACTIVE here. */ | ||
929 | |||
930 | /* Now tell it we want to know if new things appear. */ | ||
931 | dev->vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY; | ||
932 | wmb(); | ||
933 | |||
934 | /* We'll turn this back on if input buffers are registered. */ | ||
935 | return false; | ||
936 | } else if (out_num) | ||
937 | errx(1, "Output buffers in network recv queue?"); | ||
938 | |||
939 | /* Read the packet from the device directly into the Guest's buffer. */ | ||
940 | len = readv(dev->fd, iov, in_num); | ||
941 | if (len <= 0) | ||
942 | err(1, "reading network"); | ||
943 | 820 | ||
944 | /* Tell the Guest about the new packet. */ | 821 | head = wait_for_vq_desc(vq, iov, &out, &in); |
945 | add_used_and_trigger(fd, dev->vq, head, len); | 822 | if (out) |
823 | errx(1, "Output buffers in net input queue?"); | ||
946 | 824 | ||
947 | verbose("tun input packet len %i [%02x %02x] (%s)\n", len, | 825 | /* Deliver interrupt now, since we're about to sleep. */ |
948 | ((u8 *)iov[1].iov_base)[0], ((u8 *)iov[1].iov_base)[1], | 826 | if (vq->pending_used && will_block(net_info->tunfd)) |
949 | head != dev->vq->vring.num ? "sent" : "discarded"); | 827 | trigger_irq(vq); |
950 | 828 | ||
951 | /* All good. */ | 829 | len = readv(net_info->tunfd, iov, in); |
952 | return true; | 830 | if (len <= 0) |
831 | err(1, "Failed to read from tun."); | ||
832 | add_used(vq, head, len); | ||
953 | } | 833 | } |
954 | 834 | ||
955 | /*L:215 This is the callback attached to the network and console input | 835 | /* This is the helper to create threads. */ |
956 | * virtqueues: it ensures we try again, in case we stopped console or net | 836 | static int do_thread(void *_vq) |
957 | * delivery because Guest didn't have any buffers. */ | ||
958 | static void enable_fd(int fd, struct virtqueue *vq, bool timeout) | ||
959 | { | 837 | { |
960 | add_device_fd(vq->dev->fd); | 838 | struct virtqueue *vq = _vq; |
961 | /* Snap the Waker out of its select loop. */ | 839 | |
962 | write(waker_fds.pipe[1], "", 1); | 840 | for (;;) |
841 | vq->service(vq); | ||
842 | return 0; | ||
963 | } | 843 | } |
964 | 844 | ||
965 | static void net_enable_fd(int fd, struct virtqueue *vq, bool timeout) | 845 | /* When a child dies, we kill our entire process group with SIGTERM. This |
846 | * also has the side effect that the shell restores the console for us! */ | ||
847 | static void kill_launcher(int signal) | ||
966 | { | 848 | { |
967 | /* We don't need to know again when Guest refills receive buffer. */ | 849 | kill(0, SIGTERM); |
968 | vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY; | ||
969 | enable_fd(fd, vq, timeout); | ||
970 | } | 850 | } |
971 | 851 | ||
972 | /* When the Guest tells us they updated the status field, we handle it. */ | 852 | static void reset_device(struct device *dev) |
973 | static void update_device_status(struct device *dev) | ||
974 | { | 853 | { |
975 | struct virtqueue *vq; | 854 | struct virtqueue *vq; |
976 | 855 | ||
977 | /* This is a reset. */ | 856 | verbose("Resetting device %s\n", dev->name); |
978 | if (dev->desc->status == 0) { | 857 | |
979 | verbose("Resetting device %s\n", dev->name); | 858 | /* Clear any features they've acked. */ |
859 | memset(get_feature_bits(dev) + dev->feature_len, 0, dev->feature_len); | ||
980 | 860 | ||
981 | /* Clear any features they've acked. */ | 861 | /* We're going to be explicitly killing threads, so ignore them. */ |
982 | memset(get_feature_bits(dev) + dev->desc->feature_len, 0, | 862 | signal(SIGCHLD, SIG_IGN); |
983 | dev->desc->feature_len); | ||
984 | 863 | ||
985 | /* Zero out the virtqueues. */ | 864 | /* Zero out the virtqueues, get rid of their threads */ |
986 | for (vq = dev->vq; vq; vq = vq->next) { | 865 | for (vq = dev->vq; vq; vq = vq->next) { |
987 | memset(vq->vring.desc, 0, | 866 | if (vq->thread != (pid_t)-1) { |
988 | vring_size(vq->config.num, LGUEST_VRING_ALIGN)); | 867 | kill(vq->thread, SIGTERM); |
989 | lg_last_avail(vq) = 0; | 868 | waitpid(vq->thread, NULL, 0); |
869 | vq->thread = (pid_t)-1; | ||
990 | } | 870 | } |
991 | } else if (dev->desc->status & VIRTIO_CONFIG_S_FAILED) { | 871 | memset(vq->vring.desc, 0, |
872 | vring_size(vq->config.num, LGUEST_VRING_ALIGN)); | ||
873 | lg_last_avail(vq) = 0; | ||
874 | } | ||
875 | dev->running = false; | ||
876 | |||
877 | /* Now we care if threads die. */ | ||
878 | signal(SIGCHLD, (void *)kill_launcher); | ||
879 | } | ||
880 | |||
881 | static void create_thread(struct virtqueue *vq) | ||
882 | { | ||
883 | /* Create stack for thread and run it. Since stack grows | ||
884 | * upwards, we point the stack pointer to the end of this | ||
885 | * region. */ | ||
886 | char *stack = malloc(32768); | ||
887 | unsigned long args[] = { LHREQ_EVENTFD, | ||
888 | vq->config.pfn*getpagesize(), 0 }; | ||
889 | |||
890 | /* Create a zero-initialized eventfd. */ | ||
891 | vq->eventfd = eventfd(0, 0); | ||
892 | if (vq->eventfd < 0) | ||
893 | err(1, "Creating eventfd"); | ||
894 | args[2] = vq->eventfd; | ||
895 | |||
896 | /* Attach an eventfd to this virtqueue: it will go off | ||
897 | * when the Guest does an LHCALL_NOTIFY for this vq. */ | ||
898 | if (write(lguest_fd, &args, sizeof(args)) != 0) | ||
899 | err(1, "Attaching eventfd"); | ||
900 | |||
901 | /* CLONE_VM: because it has to access the Guest memory, and | ||
902 | * SIGCHLD so we get a signal if it dies. */ | ||
903 | vq->thread = clone(do_thread, stack + 32768, CLONE_VM | SIGCHLD, vq); | ||
904 | if (vq->thread == (pid_t)-1) | ||
905 | err(1, "Creating clone"); | ||
906 | /* We close our local copy, now the child has it. */ | ||
907 | close(vq->eventfd); | ||
908 | } | ||
909 | |||
910 | static void start_device(struct device *dev) | ||
911 | { | ||
912 | unsigned int i; | ||
913 | struct virtqueue *vq; | ||
914 | |||
915 | verbose("Device %s OK: offered", dev->name); | ||
916 | for (i = 0; i < dev->feature_len; i++) | ||
917 | verbose(" %02x", get_feature_bits(dev)[i]); | ||
918 | verbose(", accepted"); | ||
919 | for (i = 0; i < dev->feature_len; i++) | ||
920 | verbose(" %02x", get_feature_bits(dev) | ||
921 | [dev->feature_len+i]); | ||
922 | |||
923 | for (vq = dev->vq; vq; vq = vq->next) { | ||
924 | if (vq->service) | ||
925 | create_thread(vq); | ||
926 | } | ||
927 | dev->running = true; | ||
928 | } | ||
929 | |||
930 | static void cleanup_devices(void) | ||
931 | { | ||
932 | struct device *dev; | ||
933 | |||
934 | for (dev = devices.dev; dev; dev = dev->next) | ||
935 | reset_device(dev); | ||
936 | |||
937 | /* If we saved off the original terminal settings, restore them now. */ | ||
938 | if (orig_term.c_lflag & (ISIG|ICANON|ECHO)) | ||
939 | tcsetattr(STDIN_FILENO, TCSANOW, &orig_term); | ||
940 | } | ||
941 | |||
942 | /* When the Guest tells us they updated the status field, we handle it. */ | ||
943 | static void update_device_status(struct device *dev) | ||
944 | { | ||
945 | /* A zero status is a reset, otherwise it's a set of flags. */ | ||
946 | if (dev->desc->status == 0) | ||
947 | reset_device(dev); | ||
948 | else if (dev->desc->status & VIRTIO_CONFIG_S_FAILED) { | ||
992 | warnx("Device %s configuration FAILED", dev->name); | 949 | warnx("Device %s configuration FAILED", dev->name); |
950 | if (dev->running) | ||
951 | reset_device(dev); | ||
993 | } else if (dev->desc->status & VIRTIO_CONFIG_S_DRIVER_OK) { | 952 | } else if (dev->desc->status & VIRTIO_CONFIG_S_DRIVER_OK) { |
994 | unsigned int i; | 953 | if (!dev->running) |
995 | 954 | start_device(dev); | |
996 | verbose("Device %s OK: offered", dev->name); | ||
997 | for (i = 0; i < dev->desc->feature_len; i++) | ||
998 | verbose(" %02x", get_feature_bits(dev)[i]); | ||
999 | verbose(", accepted"); | ||
1000 | for (i = 0; i < dev->desc->feature_len; i++) | ||
1001 | verbose(" %02x", get_feature_bits(dev) | ||
1002 | [dev->desc->feature_len+i]); | ||
1003 | |||
1004 | if (dev->ready) | ||
1005 | dev->ready(dev); | ||
1006 | } | 955 | } |
1007 | } | 956 | } |
1008 | 957 | ||
1009 | /* This is the generic routine we call when the Guest uses LHCALL_NOTIFY. */ | 958 | /* This is the generic routine we call when the Guest uses LHCALL_NOTIFY. */ |
1010 | static void handle_output(int fd, unsigned long addr) | 959 | static void handle_output(unsigned long addr) |
1011 | { | 960 | { |
1012 | struct device *i; | 961 | struct device *i; |
1013 | struct virtqueue *vq; | ||
1014 | 962 | ||
1015 | /* Check each device and virtqueue. */ | 963 | /* Check each device. */ |
1016 | for (i = devices.dev; i; i = i->next) { | 964 | for (i = devices.dev; i; i = i->next) { |
965 | struct virtqueue *vq; | ||
966 | |||
1017 | /* Notifications to device descriptors update device status. */ | 967 | /* Notifications to device descriptors update device status. */ |
1018 | if (from_guest_phys(addr) == i->desc) { | 968 | if (from_guest_phys(addr) == i->desc) { |
1019 | update_device_status(i); | 969 | update_device_status(i); |
1020 | return; | 970 | return; |
1021 | } | 971 | } |
1022 | 972 | ||
1023 | /* Notifications to virtqueues mean output has occurred. */ | 973 | /* Devices *can* be used before status is set to DRIVER_OK. */ |
1024 | for (vq = i->vq; vq; vq = vq->next) { | 974 | for (vq = i->vq; vq; vq = vq->next) { |
1025 | if (vq->config.pfn != addr/getpagesize()) | 975 | if (addr != vq->config.pfn*getpagesize()) |
1026 | continue; | 976 | continue; |
1027 | 977 | if (i->running) | |
1028 | /* Guest should acknowledge (and set features!) before | 978 | errx(1, "Notification on running %s", i->name); |
1029 | * using the device. */ | 979 | start_device(i); |
1030 | if (i->desc->status == 0) { | ||
1031 | warnx("%s gave early output", i->name); | ||
1032 | return; | ||
1033 | } | ||
1034 | |||
1035 | if (strcmp(vq->dev->name, "console") != 0) | ||
1036 | verbose("Output to %s\n", vq->dev->name); | ||
1037 | if (vq->handle_output) | ||
1038 | vq->handle_output(fd, vq, false); | ||
1039 | return; | 980 | return; |
1040 | } | 981 | } |
1041 | } | 982 | } |
@@ -1049,71 +990,6 @@ static void handle_output(int fd, unsigned long addr) | |||
1049 | strnlen(from_guest_phys(addr), guest_limit - addr)); | 990 | strnlen(from_guest_phys(addr), guest_limit - addr)); |
1050 | } | 991 | } |
1051 | 992 | ||
1052 | static void handle_timeout(int fd) | ||
1053 | { | ||
1054 | char buf[32]; | ||
1055 | struct device *i; | ||
1056 | struct virtqueue *vq; | ||
1057 | |||
1058 | /* Clear the pipe */ | ||
1059 | read(timeoutpipe[0], buf, sizeof(buf)); | ||
1060 | |||
1061 | /* Check each device and virtqueue: flush blocked ones. */ | ||
1062 | for (i = devices.dev; i; i = i->next) { | ||
1063 | for (vq = i->vq; vq; vq = vq->next) { | ||
1064 | if (!vq->blocked) | ||
1065 | continue; | ||
1066 | |||
1067 | vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY; | ||
1068 | vq->blocked = false; | ||
1069 | if (vq->handle_output) | ||
1070 | vq->handle_output(fd, vq, true); | ||
1071 | } | ||
1072 | } | ||
1073 | } | ||
1074 | |||
1075 | /* This is called when the Waker wakes us up: check for incoming file | ||
1076 | * descriptors. */ | ||
1077 | static void handle_input(int fd) | ||
1078 | { | ||
1079 | /* select() wants a zeroed timeval to mean "don't wait". */ | ||
1080 | struct timeval poll = { .tv_sec = 0, .tv_usec = 0 }; | ||
1081 | |||
1082 | for (;;) { | ||
1083 | struct device *i; | ||
1084 | fd_set fds = devices.infds; | ||
1085 | int num; | ||
1086 | |||
1087 | num = select(devices.max_infd+1, &fds, NULL, NULL, &poll); | ||
1088 | /* Could get interrupted */ | ||
1089 | if (num < 0) | ||
1090 | continue; | ||
1091 | /* If nothing is ready, we're done. */ | ||
1092 | if (num == 0) | ||
1093 | break; | ||
1094 | |||
1095 | /* Otherwise, call the device(s) which have readable file | ||
1096 | * descriptors and a method of handling them. */ | ||
1097 | for (i = devices.dev; i; i = i->next) { | ||
1098 | if (i->handle_input && FD_ISSET(i->fd, &fds)) { | ||
1099 | if (i->handle_input(fd, i)) | ||
1100 | continue; | ||
1101 | |||
1102 | /* If handle_input() returns false, it means we | ||
1103 | * should no longer service it. Networking and | ||
1104 | * console do this when there's no input | ||
1105 | * buffers to deliver into. Console also uses | ||
1106 | * it when it discovers that stdin is closed. */ | ||
1107 | FD_CLR(i->fd, &devices.infds); | ||
1108 | } | ||
1109 | } | ||
1110 | |||
1111 | /* Is this the timeout fd? */ | ||
1112 | if (FD_ISSET(timeoutpipe[0], &fds)) | ||
1113 | handle_timeout(fd); | ||
1114 | } | ||
1115 | } | ||
1116 | |||
1117 | /*L:190 | 993 | /*L:190 |
1118 | * Device Setup | 994 | * Device Setup |
1119 | * | 995 | * |
@@ -1129,8 +1005,8 @@ static void handle_input(int fd) | |||
1129 | static u8 *device_config(const struct device *dev) | 1005 | static u8 *device_config(const struct device *dev) |
1130 | { | 1006 | { |
1131 | return (void *)(dev->desc + 1) | 1007 | return (void *)(dev->desc + 1) |
1132 | + dev->desc->num_vq * sizeof(struct lguest_vqconfig) | 1008 | + dev->num_vq * sizeof(struct lguest_vqconfig) |
1133 | + dev->desc->feature_len * 2; | 1009 | + dev->feature_len * 2; |
1134 | } | 1010 | } |
1135 | 1011 | ||
1136 | /* This routine allocates a new "struct lguest_device_desc" from descriptor | 1012 | /* This routine allocates a new "struct lguest_device_desc" from descriptor |
@@ -1159,7 +1035,7 @@ static struct lguest_device_desc *new_dev_desc(u16 type) | |||
1159 | /* Each device descriptor is followed by the description of its virtqueues. We | 1035 | /* Each device descriptor is followed by the description of its virtqueues. We |
1160 | * specify how many descriptors the virtqueue is to have. */ | 1036 | * specify how many descriptors the virtqueue is to have. */ |
1161 | static void add_virtqueue(struct device *dev, unsigned int num_descs, | 1037 | static void add_virtqueue(struct device *dev, unsigned int num_descs, |
1162 | void (*handle_output)(int, struct virtqueue *, bool)) | 1038 | void (*service)(struct virtqueue *)) |
1163 | { | 1039 | { |
1164 | unsigned int pages; | 1040 | unsigned int pages; |
1165 | struct virtqueue **i, *vq = malloc(sizeof(*vq)); | 1041 | struct virtqueue **i, *vq = malloc(sizeof(*vq)); |
@@ -1174,8 +1050,8 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, | |||
1174 | vq->next = NULL; | 1050 | vq->next = NULL; |
1175 | vq->last_avail_idx = 0; | 1051 | vq->last_avail_idx = 0; |
1176 | vq->dev = dev; | 1052 | vq->dev = dev; |
1177 | vq->inflight = 0; | 1053 | vq->service = service; |
1178 | vq->blocked = false; | 1054 | vq->thread = (pid_t)-1; |
1179 | 1055 | ||
1180 | /* Initialize the configuration. */ | 1056 | /* Initialize the configuration. */ |
1181 | vq->config.num = num_descs; | 1057 | vq->config.num = num_descs; |
@@ -1191,6 +1067,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, | |||
1191 | * yet, otherwise we'd be overwriting them. */ | 1067 | * yet, otherwise we'd be overwriting them. */ |
1192 | assert(dev->desc->config_len == 0 && dev->desc->feature_len == 0); | 1068 | assert(dev->desc->config_len == 0 && dev->desc->feature_len == 0); |
1193 | memcpy(device_config(dev), &vq->config, sizeof(vq->config)); | 1069 | memcpy(device_config(dev), &vq->config, sizeof(vq->config)); |
1070 | dev->num_vq++; | ||
1194 | dev->desc->num_vq++; | 1071 | dev->desc->num_vq++; |
1195 | 1072 | ||
1196 | verbose("Virtqueue page %#lx\n", to_guest_phys(p)); | 1073 | verbose("Virtqueue page %#lx\n", to_guest_phys(p)); |
@@ -1199,15 +1076,6 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, | |||
1199 | * second. */ | 1076 | * second. */ |
1200 | for (i = &dev->vq; *i; i = &(*i)->next); | 1077 | for (i = &dev->vq; *i; i = &(*i)->next); |
1201 | *i = vq; | 1078 | *i = vq; |
1202 | |||
1203 | /* Set the routine to call when the Guest does something to this | ||
1204 | * virtqueue. */ | ||
1205 | vq->handle_output = handle_output; | ||
1206 | |||
1207 | /* As an optimization, set the advisory "Don't Notify Me" flag if we | ||
1208 | * don't have a handler */ | ||
1209 | if (!handle_output) | ||
1210 | vq->vring.used->flags = VRING_USED_F_NO_NOTIFY; | ||
1211 | } | 1079 | } |
1212 | 1080 | ||
1213 | /* The first half of the feature bitmask is for us to advertise features. The | 1081 | /* The first half of the feature bitmask is for us to advertise features. The |
@@ -1219,7 +1087,7 @@ static void add_feature(struct device *dev, unsigned bit) | |||
1219 | /* We can't extend the feature bits once we've added config bytes */ | 1087 | /* We can't extend the feature bits once we've added config bytes */ |
1220 | if (dev->desc->feature_len <= bit / CHAR_BIT) { | 1088 | if (dev->desc->feature_len <= bit / CHAR_BIT) { |
1221 | assert(dev->desc->config_len == 0); | 1089 | assert(dev->desc->config_len == 0); |
1222 | dev->desc->feature_len = (bit / CHAR_BIT) + 1; | 1090 | dev->feature_len = dev->desc->feature_len = (bit/CHAR_BIT) + 1; |
1223 | } | 1091 | } |
1224 | 1092 | ||
1225 | features[bit / CHAR_BIT] |= (1 << (bit % CHAR_BIT)); | 1093 | features[bit / CHAR_BIT] |= (1 << (bit % CHAR_BIT)); |
@@ -1243,22 +1111,17 @@ static void set_config(struct device *dev, unsigned len, const void *conf) | |||
1243 | * calling new_dev_desc() to allocate the descriptor and device memory. | 1111 | * calling new_dev_desc() to allocate the descriptor and device memory. |
1244 | * | 1112 | * |
1245 | * See what I mean about userspace being boring? */ | 1113 | * See what I mean about userspace being boring? */ |
1246 | static struct device *new_device(const char *name, u16 type, int fd, | 1114 | static struct device *new_device(const char *name, u16 type) |
1247 | bool (*handle_input)(int, struct device *)) | ||
1248 | { | 1115 | { |
1249 | struct device *dev = malloc(sizeof(*dev)); | 1116 | struct device *dev = malloc(sizeof(*dev)); |
1250 | 1117 | ||
1251 | /* Now we populate the fields one at a time. */ | 1118 | /* Now we populate the fields one at a time. */ |
1252 | dev->fd = fd; | ||
1253 | /* If we have an input handler for this file descriptor, then we add it | ||
1254 | * to the device_list's fdset and maxfd. */ | ||
1255 | if (handle_input) | ||
1256 | add_device_fd(dev->fd); | ||
1257 | dev->desc = new_dev_desc(type); | 1119 | dev->desc = new_dev_desc(type); |
1258 | dev->handle_input = handle_input; | ||
1259 | dev->name = name; | 1120 | dev->name = name; |
1260 | dev->vq = NULL; | 1121 | dev->vq = NULL; |
1261 | dev->ready = NULL; | 1122 | dev->feature_len = 0; |
1123 | dev->num_vq = 0; | ||
1124 | dev->running = false; | ||
1262 | 1125 | ||
1263 | /* Append to device list. Prepending to a single-linked list is | 1126 | /* Append to device list. Prepending to a single-linked list is |
1264 | * easier, but the user expects the devices to be arranged on the bus | 1127 | * easier, but the user expects the devices to be arranged on the bus |
@@ -1286,13 +1149,10 @@ static void setup_console(void) | |||
1286 | * raw input stream to the Guest. */ | 1149 | * raw input stream to the Guest. */ |
1287 | term.c_lflag &= ~(ISIG|ICANON|ECHO); | 1150 | term.c_lflag &= ~(ISIG|ICANON|ECHO); |
1288 | tcsetattr(STDIN_FILENO, TCSANOW, &term); | 1151 | tcsetattr(STDIN_FILENO, TCSANOW, &term); |
1289 | /* If we exit gracefully, the original settings will be | ||
1290 | * restored so the user can see what they're typing. */ | ||
1291 | atexit(restore_term); | ||
1292 | } | 1152 | } |
1293 | 1153 | ||
1294 | dev = new_device("console", VIRTIO_ID_CONSOLE, | 1154 | dev = new_device("console", VIRTIO_ID_CONSOLE); |
1295 | STDIN_FILENO, handle_console_input); | 1155 | |
1296 | /* We store the console state in dev->priv, and initialize it. */ | 1156 | /* We store the console state in dev->priv, and initialize it. */ |
1297 | dev->priv = malloc(sizeof(struct console_abort)); | 1157 | dev->priv = malloc(sizeof(struct console_abort)); |
1298 | ((struct console_abort *)dev->priv)->count = 0; | 1158 | ((struct console_abort *)dev->priv)->count = 0; |
@@ -1301,31 +1161,13 @@ static void setup_console(void) | |||
1301 | * they put something the input queue, we make sure we're listening to | 1161 | * they put something the input queue, we make sure we're listening to |
1302 | * stdin. When they put something in the output queue, we write it to | 1162 | * stdin. When they put something in the output queue, we write it to |
1303 | * stdout. */ | 1163 | * stdout. */ |
1304 | add_virtqueue(dev, VIRTQUEUE_NUM, enable_fd); | 1164 | add_virtqueue(dev, VIRTQUEUE_NUM, console_input); |
1305 | add_virtqueue(dev, VIRTQUEUE_NUM, handle_console_output); | 1165 | add_virtqueue(dev, VIRTQUEUE_NUM, console_output); |
1306 | 1166 | ||
1307 | verbose("device %u: console\n", devices.device_num++); | 1167 | verbose("device %u: console\n", ++devices.device_num); |
1308 | } | 1168 | } |
1309 | /*:*/ | 1169 | /*:*/ |
1310 | 1170 | ||
1311 | static void timeout_alarm(int sig) | ||
1312 | { | ||
1313 | write(timeoutpipe[1], "", 1); | ||
1314 | } | ||
1315 | |||
1316 | static void setup_timeout(void) | ||
1317 | { | ||
1318 | if (pipe(timeoutpipe) != 0) | ||
1319 | err(1, "Creating timeout pipe"); | ||
1320 | |||
1321 | if (fcntl(timeoutpipe[1], F_SETFL, | ||
1322 | fcntl(timeoutpipe[1], F_GETFL) | O_NONBLOCK) != 0) | ||
1323 | err(1, "Making timeout pipe nonblocking"); | ||
1324 | |||
1325 | add_device_fd(timeoutpipe[0]); | ||
1326 | signal(SIGALRM, timeout_alarm); | ||
1327 | } | ||
1328 | |||
1329 | /*M:010 Inter-guest networking is an interesting area. Simplest is to have a | 1171 | /*M:010 Inter-guest networking is an interesting area. Simplest is to have a |
1330 | * --sharenet=<name> option which opens or creates a named pipe. This can be | 1172 | * --sharenet=<name> option which opens or creates a named pipe. This can be |
1331 | * used to send packets to another guest in a 1:1 manner. | 1173 | * used to send packets to another guest in a 1:1 manner. |
@@ -1447,21 +1289,23 @@ static int get_tun_device(char tapif[IFNAMSIZ]) | |||
1447 | static void setup_tun_net(char *arg) | 1289 | static void setup_tun_net(char *arg) |
1448 | { | 1290 | { |
1449 | struct device *dev; | 1291 | struct device *dev; |
1450 | int netfd, ipfd; | 1292 | struct net_info *net_info = malloc(sizeof(*net_info)); |
1293 | int ipfd; | ||
1451 | u32 ip = INADDR_ANY; | 1294 | u32 ip = INADDR_ANY; |
1452 | bool bridging = false; | 1295 | bool bridging = false; |
1453 | char tapif[IFNAMSIZ], *p; | 1296 | char tapif[IFNAMSIZ], *p; |
1454 | struct virtio_net_config conf; | 1297 | struct virtio_net_config conf; |
1455 | 1298 | ||
1456 | netfd = get_tun_device(tapif); | 1299 | net_info->tunfd = get_tun_device(tapif); |
1457 | 1300 | ||
1458 | /* First we create a new network device. */ | 1301 | /* First we create a new network device. */ |
1459 | dev = new_device("net", VIRTIO_ID_NET, netfd, handle_tun_input); | 1302 | dev = new_device("net", VIRTIO_ID_NET); |
1303 | dev->priv = net_info; | ||
1460 | 1304 | ||
1461 | /* Network devices need a receive and a send queue, just like | 1305 | /* Network devices need a receive and a send queue, just like |
1462 | * console. */ | 1306 | * console. */ |
1463 | add_virtqueue(dev, VIRTQUEUE_NUM, net_enable_fd); | 1307 | add_virtqueue(dev, VIRTQUEUE_NUM, net_input); |
1464 | add_virtqueue(dev, VIRTQUEUE_NUM, handle_net_output); | 1308 | add_virtqueue(dev, VIRTQUEUE_NUM, net_output); |
1465 | 1309 | ||
1466 | /* We need a socket to perform the magic network ioctls to bring up the | 1310 | /* We need a socket to perform the magic network ioctls to bring up the |
1467 | * tap interface, connect to the bridge etc. Any socket will do! */ | 1311 | * tap interface, connect to the bridge etc. Any socket will do! */ |
@@ -1502,6 +1346,8 @@ static void setup_tun_net(char *arg) | |||
1502 | add_feature(dev, VIRTIO_NET_F_HOST_TSO4); | 1346 | add_feature(dev, VIRTIO_NET_F_HOST_TSO4); |
1503 | add_feature(dev, VIRTIO_NET_F_HOST_TSO6); | 1347 | add_feature(dev, VIRTIO_NET_F_HOST_TSO6); |
1504 | add_feature(dev, VIRTIO_NET_F_HOST_ECN); | 1348 | add_feature(dev, VIRTIO_NET_F_HOST_ECN); |
1349 | /* We handle indirect ring entries */ | ||
1350 | add_feature(dev, VIRTIO_RING_F_INDIRECT_DESC); | ||
1505 | set_config(dev, sizeof(conf), &conf); | 1351 | set_config(dev, sizeof(conf), &conf); |
1506 | 1352 | ||
1507 | /* We don't need the socket any more; setup is done. */ | 1353 | /* We don't need the socket any more; setup is done. */ |
@@ -1550,20 +1396,18 @@ struct vblk_info | |||
1550 | * Remember that the block device is handled by a separate I/O thread. We head | 1396 | * Remember that the block device is handled by a separate I/O thread. We head |
1551 | * straight into the core of that thread here: | 1397 | * straight into the core of that thread here: |
1552 | */ | 1398 | */ |
1553 | static bool service_io(struct device *dev) | 1399 | static void blk_request(struct virtqueue *vq) |
1554 | { | 1400 | { |
1555 | struct vblk_info *vblk = dev->priv; | 1401 | struct vblk_info *vblk = vq->dev->priv; |
1556 | unsigned int head, out_num, in_num, wlen; | 1402 | unsigned int head, out_num, in_num, wlen; |
1557 | int ret; | 1403 | int ret; |
1558 | u8 *in; | 1404 | u8 *in; |
1559 | struct virtio_blk_outhdr *out; | 1405 | struct virtio_blk_outhdr *out; |
1560 | struct iovec iov[dev->vq->vring.num]; | 1406 | struct iovec iov[vq->vring.num]; |
1561 | off64_t off; | 1407 | off64_t off; |
1562 | 1408 | ||
1563 | /* See if there's a request waiting. If not, nothing to do. */ | 1409 | /* Get the next request. */ |
1564 | head = get_vq_desc(dev->vq, iov, &out_num, &in_num); | 1410 | head = wait_for_vq_desc(vq, iov, &out_num, &in_num); |
1565 | if (head == dev->vq->vring.num) | ||
1566 | return false; | ||
1567 | 1411 | ||
1568 | /* Every block request should contain at least one output buffer | 1412 | /* Every block request should contain at least one output buffer |
1569 | * (detailing the location on disk and the type of request) and one | 1413 | * (detailing the location on disk and the type of request) and one |
@@ -1630,83 +1474,28 @@ static bool service_io(struct device *dev) | |||
1630 | } | 1474 | } |
1631 | } | 1475 | } |
1632 | 1476 | ||
1633 | /* We can't trigger an IRQ, because we're not the Launcher. It does | 1477 | /* OK, so we noted that it was pretty poor to use an fdatasync as a |
1634 | * that when we tell it we're done. */ | 1478 | * barrier. But Christoph Hellwig points out that we need a sync |
1635 | add_used(dev->vq, head, wlen); | 1479 | * *afterwards* as well: "Barriers specify no reordering to the front |
1636 | return true; | 1480 | * or the back." And Jens Axboe confirmed it, so here we are: */ |
1637 | } | 1481 | if (out->type & VIRTIO_BLK_T_BARRIER) |
1638 | 1482 | fdatasync(vblk->fd); | |
1639 | /* This is the thread which actually services the I/O. */ | ||
1640 | static int io_thread(void *_dev) | ||
1641 | { | ||
1642 | struct device *dev = _dev; | ||
1643 | struct vblk_info *vblk = dev->priv; | ||
1644 | char c; | ||
1645 | |||
1646 | /* Close other side of workpipe so we get 0 read when main dies. */ | ||
1647 | close(vblk->workpipe[1]); | ||
1648 | /* Close the other side of the done_fd pipe. */ | ||
1649 | close(dev->fd); | ||
1650 | |||
1651 | /* When this read fails, it means Launcher died, so we follow. */ | ||
1652 | while (read(vblk->workpipe[0], &c, 1) == 1) { | ||
1653 | /* We acknowledge each request immediately to reduce latency, | ||
1654 | * rather than waiting until we've done them all. I haven't | ||
1655 | * measured to see if it makes any difference. | ||
1656 | * | ||
1657 | * That would be an interesting test, wouldn't it? You could | ||
1658 | * also try having more than one I/O thread. */ | ||
1659 | while (service_io(dev)) | ||
1660 | write(vblk->done_fd, &c, 1); | ||
1661 | } | ||
1662 | return 0; | ||
1663 | } | ||
1664 | |||
1665 | /* Now we've seen the I/O thread, we return to the Launcher to see what happens | ||
1666 | * when that thread tells us it's completed some I/O. */ | ||
1667 | static bool handle_io_finish(int fd, struct device *dev) | ||
1668 | { | ||
1669 | char c; | ||
1670 | |||
1671 | /* If the I/O thread died, presumably it printed the error, so we | ||
1672 | * simply exit. */ | ||
1673 | if (read(dev->fd, &c, 1) != 1) | ||
1674 | exit(1); | ||
1675 | |||
1676 | /* It did some work, so trigger the irq. */ | ||
1677 | trigger_irq(fd, dev->vq); | ||
1678 | return true; | ||
1679 | } | ||
1680 | |||
1681 | /* When the Guest submits some I/O, we just need to wake the I/O thread. */ | ||
1682 | static void handle_virtblk_output(int fd, struct virtqueue *vq, bool timeout) | ||
1683 | { | ||
1684 | struct vblk_info *vblk = vq->dev->priv; | ||
1685 | char c = 0; | ||
1686 | 1483 | ||
1687 | /* Wake up I/O thread and tell it to go to work! */ | 1484 | add_used(vq, head, wlen); |
1688 | if (write(vblk->workpipe[1], &c, 1) != 1) | ||
1689 | /* Presumably it indicated why it died. */ | ||
1690 | exit(1); | ||
1691 | } | 1485 | } |
1692 | 1486 | ||
1693 | /*L:198 This actually sets up a virtual block device. */ | 1487 | /*L:198 This actually sets up a virtual block device. */ |
1694 | static void setup_block_file(const char *filename) | 1488 | static void setup_block_file(const char *filename) |
1695 | { | 1489 | { |
1696 | int p[2]; | ||
1697 | struct device *dev; | 1490 | struct device *dev; |
1698 | struct vblk_info *vblk; | 1491 | struct vblk_info *vblk; |
1699 | void *stack; | ||
1700 | struct virtio_blk_config conf; | 1492 | struct virtio_blk_config conf; |
1701 | 1493 | ||
1702 | /* This is the pipe the I/O thread will use to tell us I/O is done. */ | ||
1703 | pipe(p); | ||
1704 | |||
1705 | /* The device responds to return from I/O thread. */ | 1494 | /* The device responds to return from I/O thread. */ |
1706 | dev = new_device("block", VIRTIO_ID_BLOCK, p[0], handle_io_finish); | 1495 | dev = new_device("block", VIRTIO_ID_BLOCK); |
1707 | 1496 | ||
1708 | /* The device has one virtqueue, where the Guest places requests. */ | 1497 | /* The device has one virtqueue, where the Guest places requests. */ |
1709 | add_virtqueue(dev, VIRTQUEUE_NUM, handle_virtblk_output); | 1498 | add_virtqueue(dev, VIRTQUEUE_NUM, blk_request); |
1710 | 1499 | ||
1711 | /* Allocate the room for our own bookkeeping */ | 1500 | /* Allocate the room for our own bookkeeping */ |
1712 | vblk = dev->priv = malloc(sizeof(*vblk)); | 1501 | vblk = dev->priv = malloc(sizeof(*vblk)); |
@@ -1728,49 +1517,29 @@ static void setup_block_file(const char *filename) | |||
1728 | 1517 | ||
1729 | set_config(dev, sizeof(conf), &conf); | 1518 | set_config(dev, sizeof(conf), &conf); |
1730 | 1519 | ||
1731 | /* The I/O thread writes to this end of the pipe when done. */ | ||
1732 | vblk->done_fd = p[1]; | ||
1733 | |||
1734 | /* This is the second pipe, which is how we tell the I/O thread about | ||
1735 | * more work. */ | ||
1736 | pipe(vblk->workpipe); | ||
1737 | |||
1738 | /* Create stack for thread and run it. Since stack grows upwards, we | ||
1739 | * point the stack pointer to the end of this region. */ | ||
1740 | stack = malloc(32768); | ||
1741 | /* SIGCHLD - We dont "wait" for our cloned thread, so prevent it from | ||
1742 | * becoming a zombie. */ | ||
1743 | if (clone(io_thread, stack + 32768, CLONE_VM | SIGCHLD, dev) == -1) | ||
1744 | err(1, "Creating clone"); | ||
1745 | |||
1746 | /* We don't need to keep the I/O thread's end of the pipes open. */ | ||
1747 | close(vblk->done_fd); | ||
1748 | close(vblk->workpipe[0]); | ||
1749 | |||
1750 | verbose("device %u: virtblock %llu sectors\n", | 1520 | verbose("device %u: virtblock %llu sectors\n", |
1751 | devices.device_num, le64_to_cpu(conf.capacity)); | 1521 | ++devices.device_num, le64_to_cpu(conf.capacity)); |
1752 | } | 1522 | } |
1753 | 1523 | ||
1524 | struct rng_info { | ||
1525 | int rfd; | ||
1526 | }; | ||
1527 | |||
1754 | /* Our random number generator device reads from /dev/random into the Guest's | 1528 | /* Our random number generator device reads from /dev/random into the Guest's |
1755 | * input buffers. The usual case is that the Guest doesn't want random numbers | 1529 | * input buffers. The usual case is that the Guest doesn't want random numbers |
1756 | * and so has no buffers although /dev/random is still readable, whereas | 1530 | * and so has no buffers although /dev/random is still readable, whereas |
1757 | * console is the reverse. | 1531 | * console is the reverse. |
1758 | * | 1532 | * |
1759 | * The same logic applies, however. */ | 1533 | * The same logic applies, however. */ |
1760 | static bool handle_rng_input(int fd, struct device *dev) | 1534 | static void rng_input(struct virtqueue *vq) |
1761 | { | 1535 | { |
1762 | int len; | 1536 | int len; |
1763 | unsigned int head, in_num, out_num, totlen = 0; | 1537 | unsigned int head, in_num, out_num, totlen = 0; |
1764 | struct iovec iov[dev->vq->vring.num]; | 1538 | struct rng_info *rng_info = vq->dev->priv; |
1539 | struct iovec iov[vq->vring.num]; | ||
1765 | 1540 | ||
1766 | /* First we need a buffer from the Guests's virtqueue. */ | 1541 | /* First we need a buffer from the Guests's virtqueue. */ |
1767 | head = get_vq_desc(dev->vq, iov, &out_num, &in_num); | 1542 | head = wait_for_vq_desc(vq, iov, &out_num, &in_num); |
1768 | |||
1769 | /* If they're not ready for input, stop listening to this file | ||
1770 | * descriptor. We'll start again once they add an input buffer. */ | ||
1771 | if (head == dev->vq->vring.num) | ||
1772 | return false; | ||
1773 | |||
1774 | if (out_num) | 1543 | if (out_num) |
1775 | errx(1, "Output buffers in rng?"); | 1544 | errx(1, "Output buffers in rng?"); |
1776 | 1545 | ||
@@ -1778,7 +1547,7 @@ static bool handle_rng_input(int fd, struct device *dev) | |||
1778 | * it reads straight into the Guest's buffer. We loop to make sure we | 1547 | * it reads straight into the Guest's buffer. We loop to make sure we |
1779 | * fill it. */ | 1548 | * fill it. */ |
1780 | while (!iov_empty(iov, in_num)) { | 1549 | while (!iov_empty(iov, in_num)) { |
1781 | len = readv(dev->fd, iov, in_num); | 1550 | len = readv(rng_info->rfd, iov, in_num); |
1782 | if (len <= 0) | 1551 | if (len <= 0) |
1783 | err(1, "Read from /dev/random gave %i", len); | 1552 | err(1, "Read from /dev/random gave %i", len); |
1784 | iov_consume(iov, in_num, len); | 1553 | iov_consume(iov, in_num, len); |
@@ -1786,25 +1555,23 @@ static bool handle_rng_input(int fd, struct device *dev) | |||
1786 | } | 1555 | } |
1787 | 1556 | ||
1788 | /* Tell the Guest about the new input. */ | 1557 | /* Tell the Guest about the new input. */ |
1789 | add_used_and_trigger(fd, dev->vq, head, totlen); | 1558 | add_used(vq, head, totlen); |
1790 | |||
1791 | /* Everything went OK! */ | ||
1792 | return true; | ||
1793 | } | 1559 | } |
1794 | 1560 | ||
1795 | /* And this creates a "hardware" random number device for the Guest. */ | 1561 | /* And this creates a "hardware" random number device for the Guest. */ |
1796 | static void setup_rng(void) | 1562 | static void setup_rng(void) |
1797 | { | 1563 | { |
1798 | struct device *dev; | 1564 | struct device *dev; |
1799 | int fd; | 1565 | struct rng_info *rng_info = malloc(sizeof(*rng_info)); |
1800 | 1566 | ||
1801 | fd = open_or_die("/dev/random", O_RDONLY); | 1567 | rng_info->rfd = open_or_die("/dev/random", O_RDONLY); |
1802 | 1568 | ||
1803 | /* The device responds to return from I/O thread. */ | 1569 | /* The device responds to return from I/O thread. */ |
1804 | dev = new_device("rng", VIRTIO_ID_RNG, fd, handle_rng_input); | 1570 | dev = new_device("rng", VIRTIO_ID_RNG); |
1571 | dev->priv = rng_info; | ||
1805 | 1572 | ||
1806 | /* The device has one virtqueue, where the Guest places inbufs. */ | 1573 | /* The device has one virtqueue, where the Guest places inbufs. */ |
1807 | add_virtqueue(dev, VIRTQUEUE_NUM, enable_fd); | 1574 | add_virtqueue(dev, VIRTQUEUE_NUM, rng_input); |
1808 | 1575 | ||
1809 | verbose("device %u: rng\n", devices.device_num++); | 1576 | verbose("device %u: rng\n", devices.device_num++); |
1810 | } | 1577 | } |
@@ -1820,17 +1587,18 @@ static void __attribute__((noreturn)) restart_guest(void) | |||
1820 | for (i = 3; i < FD_SETSIZE; i++) | 1587 | for (i = 3; i < FD_SETSIZE; i++) |
1821 | close(i); | 1588 | close(i); |
1822 | 1589 | ||
1823 | /* The exec automatically gets rid of the I/O and Waker threads. */ | 1590 | /* Reset all the devices (kills all threads). */ |
1591 | cleanup_devices(); | ||
1592 | |||
1824 | execv(main_args[0], main_args); | 1593 | execv(main_args[0], main_args); |
1825 | err(1, "Could not exec %s", main_args[0]); | 1594 | err(1, "Could not exec %s", main_args[0]); |
1826 | } | 1595 | } |
1827 | 1596 | ||
1828 | /*L:220 Finally we reach the core of the Launcher which runs the Guest, serves | 1597 | /*L:220 Finally we reach the core of the Launcher which runs the Guest, serves |
1829 | * its input and output, and finally, lays it to rest. */ | 1598 | * its input and output, and finally, lays it to rest. */ |
1830 | static void __attribute__((noreturn)) run_guest(int lguest_fd) | 1599 | static void __attribute__((noreturn)) run_guest(void) |
1831 | { | 1600 | { |
1832 | for (;;) { | 1601 | for (;;) { |
1833 | unsigned long args[] = { LHREQ_BREAK, 0 }; | ||
1834 | unsigned long notify_addr; | 1602 | unsigned long notify_addr; |
1835 | int readval; | 1603 | int readval; |
1836 | 1604 | ||
@@ -1841,8 +1609,7 @@ static void __attribute__((noreturn)) run_guest(int lguest_fd) | |||
1841 | /* One unsigned long means the Guest did HCALL_NOTIFY */ | 1609 | /* One unsigned long means the Guest did HCALL_NOTIFY */ |
1842 | if (readval == sizeof(notify_addr)) { | 1610 | if (readval == sizeof(notify_addr)) { |
1843 | verbose("Notify on address %#lx\n", notify_addr); | 1611 | verbose("Notify on address %#lx\n", notify_addr); |
1844 | handle_output(lguest_fd, notify_addr); | 1612 | handle_output(notify_addr); |
1845 | continue; | ||
1846 | /* ENOENT means the Guest died. Reading tells us why. */ | 1613 | /* ENOENT means the Guest died. Reading tells us why. */ |
1847 | } else if (errno == ENOENT) { | 1614 | } else if (errno == ENOENT) { |
1848 | char reason[1024] = { 0 }; | 1615 | char reason[1024] = { 0 }; |
@@ -1851,19 +1618,9 @@ static void __attribute__((noreturn)) run_guest(int lguest_fd) | |||
1851 | /* ERESTART means that we need to reboot the guest */ | 1618 | /* ERESTART means that we need to reboot the guest */ |
1852 | } else if (errno == ERESTART) { | 1619 | } else if (errno == ERESTART) { |
1853 | restart_guest(); | 1620 | restart_guest(); |
1854 | /* EAGAIN means a signal (timeout). | 1621 | /* Anything else means a bug or incompatible change. */ |
1855 | * Anything else means a bug or incompatible change. */ | 1622 | } else |
1856 | } else if (errno != EAGAIN) | ||
1857 | err(1, "Running guest failed"); | 1623 | err(1, "Running guest failed"); |
1858 | |||
1859 | /* Only service input on thread for CPU 0. */ | ||
1860 | if (cpu_id != 0) | ||
1861 | continue; | ||
1862 | |||
1863 | /* Service input, then unset the BREAK to release the Waker. */ | ||
1864 | handle_input(lguest_fd); | ||
1865 | if (pwrite(lguest_fd, args, sizeof(args), cpu_id) < 0) | ||
1866 | err(1, "Resetting break"); | ||
1867 | } | 1624 | } |
1868 | } | 1625 | } |
1869 | /*L:240 | 1626 | /*L:240 |
@@ -1897,8 +1654,8 @@ int main(int argc, char *argv[]) | |||
1897 | /* Memory, top-level pagetable, code startpoint and size of the | 1654 | /* Memory, top-level pagetable, code startpoint and size of the |
1898 | * (optional) initrd. */ | 1655 | * (optional) initrd. */ |
1899 | unsigned long mem = 0, start, initrd_size = 0; | 1656 | unsigned long mem = 0, start, initrd_size = 0; |
1900 | /* Two temporaries and the /dev/lguest file descriptor. */ | 1657 | /* Two temporaries. */ |
1901 | int i, c, lguest_fd; | 1658 | int i, c; |
1902 | /* The boot information for the Guest. */ | 1659 | /* The boot information for the Guest. */ |
1903 | struct boot_params *boot; | 1660 | struct boot_params *boot; |
1904 | /* If they specify an initrd file to load. */ | 1661 | /* If they specify an initrd file to load. */ |
@@ -1906,18 +1663,10 @@ int main(int argc, char *argv[]) | |||
1906 | 1663 | ||
1907 | /* Save the args: we "reboot" by execing ourselves again. */ | 1664 | /* Save the args: we "reboot" by execing ourselves again. */ |
1908 | main_args = argv; | 1665 | main_args = argv; |
1909 | /* We don't "wait" for the children, so prevent them from becoming | ||
1910 | * zombies. */ | ||
1911 | signal(SIGCHLD, SIG_IGN); | ||
1912 | 1666 | ||
1913 | /* First we initialize the device list. Since console and network | 1667 | /* First we initialize the device list. We keep a pointer to the last |
1914 | * device receive input from a file descriptor, we keep an fdset | 1668 | * device, and the next interrupt number to use for devices (1: |
1915 | * (infds) and the maximum fd number (max_infd) with the head of the | 1669 | * remember that 0 is used by the timer). */ |
1916 | * list. We also keep a pointer to the last device. Finally, we keep | ||
1917 | * the next interrupt number to use for devices (1: remember that 0 is | ||
1918 | * used by the timer). */ | ||
1919 | FD_ZERO(&devices.infds); | ||
1920 | devices.max_infd = -1; | ||
1921 | devices.lastdev = NULL; | 1670 | devices.lastdev = NULL; |
1922 | devices.next_irq = 1; | 1671 | devices.next_irq = 1; |
1923 | 1672 | ||
@@ -1975,9 +1724,6 @@ int main(int argc, char *argv[]) | |||
1975 | /* We always have a console device */ | 1724 | /* We always have a console device */ |
1976 | setup_console(); | 1725 | setup_console(); |
1977 | 1726 | ||
1978 | /* We can timeout waiting for Guest network transmit. */ | ||
1979 | setup_timeout(); | ||
1980 | |||
1981 | /* Now we load the kernel */ | 1727 | /* Now we load the kernel */ |
1982 | start = load_kernel(open_or_die(argv[optind+1], O_RDONLY)); | 1728 | start = load_kernel(open_or_die(argv[optind+1], O_RDONLY)); |
1983 | 1729 | ||
@@ -2016,15 +1762,16 @@ int main(int argc, char *argv[]) | |||
2016 | 1762 | ||
2017 | /* We tell the kernel to initialize the Guest: this returns the open | 1763 | /* We tell the kernel to initialize the Guest: this returns the open |
2018 | * /dev/lguest file descriptor. */ | 1764 | * /dev/lguest file descriptor. */ |
2019 | lguest_fd = tell_kernel(start); | 1765 | tell_kernel(start); |
1766 | |||
1767 | /* Ensure that we terminate if a child dies. */ | ||
1768 | signal(SIGCHLD, kill_launcher); | ||
2020 | 1769 | ||
2021 | /* We clone off a thread, which wakes the Launcher whenever one of the | 1770 | /* If we exit via err(), this kills all the threads, restores tty. */ |
2022 | * input file descriptors needs attention. We call this the Waker, and | 1771 | atexit(cleanup_devices); |
2023 | * we'll cover it in a moment. */ | ||
2024 | setup_waker(lguest_fd); | ||
2025 | 1772 | ||
2026 | /* Finally, run the Guest. This doesn't return. */ | 1773 | /* Finally, run the Guest. This doesn't return. */ |
2027 | run_guest(lguest_fd); | 1774 | run_guest(); |
2028 | } | 1775 | } |
2029 | /*:*/ | 1776 | /*:*/ |
2030 | 1777 | ||
diff --git a/Documentation/lguest/lguest.txt b/Documentation/lguest/lguest.txt index 29510dc51510..efb3a6a045a2 100644 --- a/Documentation/lguest/lguest.txt +++ b/Documentation/lguest/lguest.txt | |||
@@ -3,11 +3,11 @@ | |||
3 | /, /` - or, A Young Coder's Illustrated Hypervisor | 3 | /, /` - or, A Young Coder's Illustrated Hypervisor |
4 | \\"--\\ http://lguest.ozlabs.org | 4 | \\"--\\ http://lguest.ozlabs.org |
5 | 5 | ||
6 | Lguest is designed to be a minimal hypervisor for the Linux kernel, for | 6 | Lguest is designed to be a minimal 32-bit x86 hypervisor for the Linux kernel, |
7 | Linux developers and users to experiment with virtualization with the | 7 | for Linux developers and users to experiment with virtualization with the |
8 | minimum of complexity. Nonetheless, it should have sufficient | 8 | minimum of complexity. Nonetheless, it should have sufficient features to |
9 | features to make it useful for specific tasks, and, of course, you are | 9 | make it useful for specific tasks, and, of course, you are encouraged to fork |
10 | encouraged to fork and enhance it (see drivers/lguest/README). | 10 | and enhance it (see drivers/lguest/README). |
11 | 11 | ||
12 | Features: | 12 | Features: |
13 | 13 | ||
diff --git a/Documentation/local_ops.txt b/Documentation/local_ops.txt index 23045b8b50f0..300da4bdfdbd 100644 --- a/Documentation/local_ops.txt +++ b/Documentation/local_ops.txt | |||
@@ -34,7 +34,7 @@ out of order wrt other memory writes by the owner CPU. | |||
34 | 34 | ||
35 | It can be done by slightly modifying the standard atomic operations : only | 35 | It can be done by slightly modifying the standard atomic operations : only |
36 | their UP variant must be kept. It typically means removing LOCK prefix (on | 36 | their UP variant must be kept. It typically means removing LOCK prefix (on |
37 | i386 and x86_64) and any SMP sychronization barrier. If the architecture does | 37 | i386 and x86_64) and any SMP synchronization barrier. If the architecture does |
38 | not have a different behavior between SMP and UP, including asm-generic/local.h | 38 | not have a different behavior between SMP and UP, including asm-generic/local.h |
39 | in your architecture's local.h is sufficient. | 39 | in your architecture's local.h is sufficient. |
40 | 40 | ||
diff --git a/Documentation/lockdep-design.txt b/Documentation/lockdep-design.txt index 488773018152..e20d913d5914 100644 --- a/Documentation/lockdep-design.txt +++ b/Documentation/lockdep-design.txt | |||
@@ -27,33 +27,37 @@ lock-class. | |||
27 | State | 27 | State |
28 | ----- | 28 | ----- |
29 | 29 | ||
30 | The validator tracks lock-class usage history into 5 separate state bits: | 30 | The validator tracks lock-class usage history into 4n + 1 separate state bits: |
31 | 31 | ||
32 | - 'ever held in hardirq context' [ == hardirq-safe ] | 32 | - 'ever held in STATE context' |
33 | - 'ever held in softirq context' [ == softirq-safe ] | 33 | - 'ever head as readlock in STATE context' |
34 | - 'ever held with hardirqs enabled' [ == hardirq-unsafe ] | 34 | - 'ever head with STATE enabled' |
35 | - 'ever held with softirqs and hardirqs enabled' [ == softirq-unsafe ] | 35 | - 'ever head as readlock with STATE enabled' |
36 | |||
37 | Where STATE can be either one of (kernel/lockdep_states.h) | ||
38 | - hardirq | ||
39 | - softirq | ||
40 | - reclaim_fs | ||
36 | 41 | ||
37 | - 'ever used' [ == !unused ] | 42 | - 'ever used' [ == !unused ] |
38 | 43 | ||
39 | When locking rules are violated, these 4 state bits are presented in the | 44 | When locking rules are violated, these state bits are presented in the |
40 | locking error messages, inside curlies. A contrived example: | 45 | locking error messages, inside curlies. A contrived example: |
41 | 46 | ||
42 | modprobe/2287 is trying to acquire lock: | 47 | modprobe/2287 is trying to acquire lock: |
43 | (&sio_locks[i].lock){--..}, at: [<c02867fd>] mutex_lock+0x21/0x24 | 48 | (&sio_locks[i].lock){-.-...}, at: [<c02867fd>] mutex_lock+0x21/0x24 |
44 | 49 | ||
45 | but task is already holding lock: | 50 | but task is already holding lock: |
46 | (&sio_locks[i].lock){--..}, at: [<c02867fd>] mutex_lock+0x21/0x24 | 51 | (&sio_locks[i].lock){-.-...}, at: [<c02867fd>] mutex_lock+0x21/0x24 |
47 | 52 | ||
48 | 53 | ||
49 | The bit position indicates hardirq, softirq, hardirq-read, | 54 | The bit position indicates STATE, STATE-read, for each of the states listed |
50 | softirq-read respectively, and the character displayed in each | 55 | above, and the character displayed in each indicates: |
51 | indicates: | ||
52 | 56 | ||
53 | '.' acquired while irqs disabled | 57 | '.' acquired while irqs disabled and not in irq context |
54 | '+' acquired in irq context | 58 | '-' acquired in irq context |
55 | '-' acquired with irqs enabled | 59 | '+' acquired with irqs enabled |
56 | '?' read acquired in irq context with irqs enabled. | 60 | '?' acquired in irq context with irqs enabled. |
57 | 61 | ||
58 | Unused mutexes cannot be part of the cause of an error. | 62 | Unused mutexes cannot be part of the cause of an error. |
59 | 63 | ||
diff --git a/Documentation/logo.gif b/Documentation/logo.gif new file mode 100644 index 000000000000..2eae75fecfb9 --- /dev/null +++ b/Documentation/logo.gif | |||
Binary files differ | |||
diff --git a/Documentation/logo.svg b/Documentation/logo.svg deleted file mode 100644 index cb9e4851d8c3..000000000000 --- a/Documentation/logo.svg +++ /dev/null | |||
@@ -1,2911 +0,0 @@ | |||
1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?> | ||
2 | <!-- Created with Inkscape (http://www.inkscape.org/) --> | ||
3 | <svg | ||
4 | xmlns:dc="http://purl.org/dc/elements/1.1/" | ||
5 | xmlns:cc="http://creativecommons.org/ns#" | ||
6 | xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" | ||
7 | xmlns:svg="http://www.w3.org/2000/svg" | ||
8 | xmlns="http://www.w3.org/2000/svg" | ||
9 | xmlns:xlink="http://www.w3.org/1999/xlink" | ||
10 | xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" | ||
11 | xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" | ||
12 | width="1771.6534" | ||
13 | height="1417.3228" | ||
14 | id="svg2" | ||
15 | sodipodi:version="0.32" | ||
16 | inkscape:version="0.46" | ||
17 | sodipodi:docname="tuz.svg" | ||
18 | inkscape:output_extension="org.inkscape.output.svg.inkscape" | ||
19 | version="1.0" | ||
20 | style="display:inline;enable-background:new" | ||
21 | inkscape:export-filename="/home/cheeseness/Documents/LCA09/mascot/tuz_final.png" | ||
22 | inkscape:export-xdpi="100.03588" | ||
23 | inkscape:export-ydpi="100.03588"> | ||
24 | <sodipodi:namedview | ||
25 | id="base" | ||
26 | pagecolor="#ffffff" | ||
27 | bordercolor="#666666" | ||
28 | borderopacity="1.0" | ||
29 | gridtolerance="10000" | ||
30 | guidetolerance="10" | ||
31 | objecttolerance="10" | ||
32 | inkscape:pageopacity="0.0" | ||
33 | inkscape:pageshadow="2" | ||
34 | inkscape:zoom="0.25" | ||
35 | inkscape:cx="-174.7931" | ||
36 | inkscape:cy="784.26325" | ||
37 | inkscape:document-units="px" | ||
38 | inkscape:current-layer="svg2" | ||
39 | showgrid="false" | ||
40 | inkscape:window-width="1280" | ||
41 | inkscape:window-height="823" | ||
42 | inkscape:window-x="-4" | ||
43 | inkscape:window-y="25" | ||
44 | showguides="true" | ||
45 | inkscape:guide-bbox="true" | ||
46 | units="mm" /> | ||
47 | <defs | ||
48 | id="defs4"> | ||
49 | <filter | ||
50 | inkscape:collect="always" | ||
51 | x="-0.084654994" | ||
52 | width="1.16931" | ||
53 | y="-0.36592469" | ||
54 | height="1.7318494" | ||
55 | id="filter11361"> | ||
56 | <feGaussianBlur | ||
57 | inkscape:collect="always" | ||
58 | stdDeviation="4.5740586" | ||
59 | id="feGaussianBlur11363" /> | ||
60 | </filter> | ||
61 | <inkscape:perspective | ||
62 | sodipodi:type="inkscape:persp3d" | ||
63 | inkscape:vp_x="0 : 564.0976 : 1" | ||
64 | inkscape:vp_y="0 : 1000 : 0" | ||
65 | inkscape:vp_z="1445.8591 : 564.0976 : 1" | ||
66 | inkscape:persp3d-origin="722.92957 : 376.06506 : 1" | ||
67 | id="perspective8145" /> | ||
68 | <linearGradient | ||
69 | id="linearGradient7622"> | ||
70 | <stop | ||
71 | style="stop-color:#ffffff;stop-opacity:1;" | ||
72 | offset="0" | ||
73 | id="stop7624" /> | ||
74 | <stop | ||
75 | style="stop-color:#ffffff;stop-opacity:0;" | ||
76 | offset="1" | ||
77 | id="stop7626" /> | ||
78 | </linearGradient> | ||
79 | <linearGradient | ||
80 | id="linearGradient4113"> | ||
81 | <stop | ||
82 | style="stop-color:#000000;stop-opacity:0;" | ||
83 | offset="0" | ||
84 | id="stop4115" /> | ||
85 | <stop | ||
86 | style="stop-color:#000000;stop-opacity:1;" | ||
87 | offset="1" | ||
88 | id="stop4117" /> | ||
89 | </linearGradient> | ||
90 | <linearGradient | ||
91 | inkscape:collect="always" | ||
92 | id="linearGradient3660"> | ||
93 | <stop | ||
94 | style="stop-color:#ffffff;stop-opacity:1;" | ||
95 | offset="0" | ||
96 | id="stop3662" /> | ||
97 | <stop | ||
98 | style="stop-color:#ffffff;stop-opacity:0;" | ||
99 | offset="1" | ||
100 | id="stop3664" /> | ||
101 | </linearGradient> | ||
102 | <linearGradient | ||
103 | id="linearGradient3627"> | ||
104 | <stop | ||
105 | style="stop-color:#ffffff;stop-opacity:1;" | ||
106 | offset="0" | ||
107 | id="stop3629" /> | ||
108 | <stop | ||
109 | style="stop-color:#000000;stop-opacity:1;" | ||
110 | offset="1" | ||
111 | id="stop3631" /> | ||
112 | </linearGradient> | ||
113 | <linearGradient | ||
114 | id="linearGradient2843"> | ||
115 | <stop | ||
116 | id="stop2845" | ||
117 | offset="0" | ||
118 | style="stop-color:#000000;stop-opacity:1;" /> | ||
119 | <stop | ||
120 | style="stop-color:#000000;stop-opacity:1;" | ||
121 | offset="0.02188784" | ||
122 | id="stop2847" /> | ||
123 | <stop | ||
124 | style="stop-color:#000000;stop-opacity:1;" | ||
125 | offset="0.75866222" | ||
126 | id="stop2849" /> | ||
127 | <stop | ||
128 | id="stop2851" | ||
129 | offset="0.88508981" | ||
130 | style="stop-color:#232323;stop-opacity:1;" /> | ||
131 | <stop | ||
132 | id="stop2853" | ||
133 | offset="1" | ||
134 | style="stop-color:#595959;stop-opacity:1;" /> | ||
135 | </linearGradient> | ||
136 | <linearGradient | ||
137 | inkscape:collect="always" | ||
138 | id="linearGradient8964"> | ||
139 | <stop | ||
140 | style="stop-color:#1a1a1a;stop-opacity:1;" | ||
141 | offset="0" | ||
142 | id="stop8966" /> | ||
143 | <stop | ||
144 | style="stop-color:#1a1a1a;stop-opacity:0;" | ||
145 | offset="1" | ||
146 | id="stop8968" /> | ||
147 | </linearGradient> | ||
148 | <linearGradient | ||
149 | id="linearGradient8952"> | ||
150 | <stop | ||
151 | style="stop-color:#0a0c0c;stop-opacity:1;" | ||
152 | offset="0" | ||
153 | id="stop8954" /> | ||
154 | <stop | ||
155 | style="stop-color:#1f2727;stop-opacity:0;" | ||
156 | offset="1" | ||
157 | id="stop8956" /> | ||
158 | </linearGradient> | ||
159 | <linearGradient | ||
160 | id="linearGradient8430"> | ||
161 | <stop | ||
162 | style="stop-color:#1e2323;stop-opacity:1;" | ||
163 | offset="0" | ||
164 | id="stop8432" /> | ||
165 | <stop | ||
166 | id="stop8438" | ||
167 | offset="0.55992389" | ||
168 | style="stop-color:#181d1d;stop-opacity:1;" /> | ||
169 | <stop | ||
170 | style="stop-color:#000000;stop-opacity:1;" | ||
171 | offset="1" | ||
172 | id="stop8434" /> | ||
173 | </linearGradient> | ||
174 | <linearGradient | ||
175 | id="linearGradient8398"> | ||
176 | <stop | ||
177 | style="stop-color:#283131;stop-opacity:0;" | ||
178 | offset="0" | ||
179 | id="stop8400" /> | ||
180 | <stop | ||
181 | id="stop8402" | ||
182 | offset="0.5125587" | ||
183 | style="stop-color:#1e2424;stop-opacity:0;" /> | ||
184 | <stop | ||
185 | style="stop-color:#000000;stop-opacity:1;" | ||
186 | offset="1" | ||
187 | id="stop8404" /> | ||
188 | </linearGradient> | ||
189 | <linearGradient | ||
190 | inkscape:collect="always" | ||
191 | id="linearGradient4870"> | ||
192 | <stop | ||
193 | style="stop-color:#c7bd80;stop-opacity:1;" | ||
194 | offset="0" | ||
195 | id="stop4872" /> | ||
196 | <stop | ||
197 | style="stop-color:#c7bd80;stop-opacity:0;" | ||
198 | offset="1" | ||
199 | id="stop4874" /> | ||
200 | </linearGradient> | ||
201 | <linearGradient | ||
202 | inkscape:collect="always" | ||
203 | id="linearGradient4862"> | ||
204 | <stop | ||
205 | style="stop-color:#e2e2e2;stop-opacity:1;" | ||
206 | offset="0" | ||
207 | id="stop4864" /> | ||
208 | <stop | ||
209 | style="stop-color:#e2e2e2;stop-opacity:0;" | ||
210 | offset="1" | ||
211 | id="stop4866" /> | ||
212 | </linearGradient> | ||
213 | <linearGradient | ||
214 | id="linearGradient4478"> | ||
215 | <stop | ||
216 | style="stop-color:#f9eed3;stop-opacity:1;" | ||
217 | offset="0" | ||
218 | id="stop4480" /> | ||
219 | <stop | ||
220 | style="stop-color:#000000;stop-opacity:0;" | ||
221 | offset="1" | ||
222 | id="stop4482" /> | ||
223 | </linearGradient> | ||
224 | <linearGradient | ||
225 | id="linearGradient4106"> | ||
226 | <stop | ||
227 | style="stop-color:#d9e002;stop-opacity:1;" | ||
228 | offset="0" | ||
229 | id="stop4108" /> | ||
230 | <stop | ||
231 | id="stop4114" | ||
232 | offset="0.5" | ||
233 | style="stop-color:#a9ae01;stop-opacity:1;" /> | ||
234 | <stop | ||
235 | style="stop-color:#717501;stop-opacity:1;" | ||
236 | offset="1" | ||
237 | id="stop4110" /> | ||
238 | </linearGradient> | ||
239 | <linearGradient | ||
240 | id="linearGradient4084"> | ||
241 | <stop | ||
242 | style="stop-color:#7d7d00;stop-opacity:1;" | ||
243 | offset="0" | ||
244 | id="stop4086" /> | ||
245 | <stop | ||
246 | id="stop4088" | ||
247 | offset="0.3636601" | ||
248 | style="stop-color:#c6c700;stop-opacity:1;" /> | ||
249 | <stop | ||
250 | style="stop-color:#f6f800;stop-opacity:1;" | ||
251 | offset="1" | ||
252 | id="stop4090" /> | ||
253 | </linearGradient> | ||
254 | <linearGradient | ||
255 | id="linearGradient4041"> | ||
256 | <stop | ||
257 | id="stop4043" | ||
258 | offset="0" | ||
259 | style="stop-color:#ffff00;stop-opacity:1;" /> | ||
260 | <stop | ||
261 | id="stop4045" | ||
262 | offset="1" | ||
263 | style="stop-color:#ffff00;stop-opacity:0;" /> | ||
264 | </linearGradient> | ||
265 | <linearGradient | ||
266 | id="linearGradient4025"> | ||
267 | <stop | ||
268 | style="stop-color:#ffffff;stop-opacity:1;" | ||
269 | offset="0" | ||
270 | id="stop4027" /> | ||
271 | <stop | ||
272 | style="stop-color:#ffffff;stop-opacity:0;" | ||
273 | offset="1" | ||
274 | id="stop4031" /> | ||
275 | </linearGradient> | ||
276 | <linearGradient | ||
277 | id="linearGradient4013"> | ||
278 | <stop | ||
279 | style="stop-color:#ffff00;stop-opacity:1;" | ||
280 | offset="0" | ||
281 | id="stop4015" /> | ||
282 | <stop | ||
283 | style="stop-color:#b2b200;stop-opacity:1;" | ||
284 | offset="1" | ||
285 | id="stop4017" /> | ||
286 | </linearGradient> | ||
287 | <linearGradient | ||
288 | id="linearGradient3985"> | ||
289 | <stop | ||
290 | style="stop-color:#000000;stop-opacity:1;" | ||
291 | offset="0" | ||
292 | id="stop3987" /> | ||
293 | <stop | ||
294 | style="stop-color:#1d1d1d;stop-opacity:1;" | ||
295 | offset="1" | ||
296 | id="stop3989" /> | ||
297 | </linearGradient> | ||
298 | <linearGradient | ||
299 | id="linearGradient3961"> | ||
300 | <stop | ||
301 | style="stop-color:#283131;stop-opacity:0;" | ||
302 | offset="0" | ||
303 | id="stop3963" /> | ||
304 | <stop | ||
305 | id="stop3965" | ||
306 | offset="0.5" | ||
307 | style="stop-color:#1e2424;stop-opacity:1;" /> | ||
308 | <stop | ||
309 | style="stop-color:#000000;stop-opacity:1;" | ||
310 | offset="1" | ||
311 | id="stop3967" /> | ||
312 | </linearGradient> | ||
313 | <linearGradient | ||
314 | id="linearGradient3951"> | ||
315 | <stop | ||
316 | id="stop3953" | ||
317 | offset="0" | ||
318 | style="stop-color:#344040;stop-opacity:1;" /> | ||
319 | <stop | ||
320 | style="stop-color:#222929;stop-opacity:1;" | ||
321 | offset="0.5" | ||
322 | id="stop3955" /> | ||
323 | <stop | ||
324 | id="stop3957" | ||
325 | offset="1" | ||
326 | style="stop-color:#000000;stop-opacity:1;" /> | ||
327 | </linearGradient> | ||
328 | <linearGradient | ||
329 | id="linearGradient3909"> | ||
330 | <stop | ||
331 | style="stop-color:#283131;stop-opacity:1;" | ||
332 | offset="0" | ||
333 | id="stop3911" /> | ||
334 | <stop | ||
335 | id="stop3917" | ||
336 | offset="0.5" | ||
337 | style="stop-color:#1e2424;stop-opacity:1;" /> | ||
338 | <stop | ||
339 | style="stop-color:#000000;stop-opacity:1;" | ||
340 | offset="1" | ||
341 | id="stop3913" /> | ||
342 | </linearGradient> | ||
343 | <linearGradient | ||
344 | id="linearGradient3537"> | ||
345 | <stop | ||
346 | style="stop-color:#ada469;stop-opacity:1;" | ||
347 | offset="0" | ||
348 | id="stop3539" /> | ||
349 | <stop | ||
350 | id="stop3545" | ||
351 | offset="0.81132078" | ||
352 | style="stop-color:#ada469;stop-opacity:1;" /> | ||
353 | <stop | ||
354 | style="stop-color:#ffffff;stop-opacity:1;" | ||
355 | offset="1" | ||
356 | id="stop3541" /> | ||
357 | </linearGradient> | ||
358 | <linearGradient | ||
359 | id="linearGradient3317"> | ||
360 | <stop | ||
361 | style="stop-color:#cfc690;stop-opacity:1" | ||
362 | offset="0" | ||
363 | id="stop3319" /> | ||
364 | <stop | ||
365 | id="stop3321" | ||
366 | offset="0.21161865" | ||
367 | style="stop-color:#afa775;stop-opacity:1;" /> | ||
368 | <stop | ||
369 | id="stop3323" | ||
370 | offset="0.53408515" | ||
371 | style="stop-color:#615c3a;stop-opacity:1;" /> | ||
372 | <stop | ||
373 | style="stop-color:#000000;stop-opacity:1;" | ||
374 | offset="0.76504093" | ||
375 | id="stop3325" /> | ||
376 | <stop | ||
377 | id="stop3327" | ||
378 | offset="1" | ||
379 | style="stop-color:#403518;stop-opacity:1;" /> | ||
380 | </linearGradient> | ||
381 | <linearGradient | ||
382 | id="linearGradient3239"> | ||
383 | <stop | ||
384 | id="stop3251" | ||
385 | offset="0" | ||
386 | style="stop-color:#cfc690;stop-opacity:1;" /> | ||
387 | <stop | ||
388 | style="stop-color:#afa775;stop-opacity:1;" | ||
389 | offset="0.21161865" | ||
390 | id="stop3267" /> | ||
391 | <stop | ||
392 | style="stop-color:#615c3a;stop-opacity:1;" | ||
393 | offset="0.53408515" | ||
394 | id="stop3261" /> | ||
395 | <stop | ||
396 | id="stop3265" | ||
397 | offset="0.76504093" | ||
398 | style="stop-color:#000000;stop-opacity:1;" /> | ||
399 | <stop | ||
400 | style="stop-color:#403518;stop-opacity:1;" | ||
401 | offset="1" | ||
402 | id="stop3243" /> | ||
403 | </linearGradient> | ||
404 | <radialGradient | ||
405 | inkscape:collect="always" | ||
406 | xlink:href="#linearGradient3239" | ||
407 | id="radialGradient3281" | ||
408 | gradientUnits="userSpaceOnUse" | ||
409 | gradientTransform="matrix(1.5480423,1.7414304,-1.9683515,1.7497638,-1130.5586,-1872.5121)" | ||
410 | spreadMethod="pad" | ||
411 | cx="806.52582" | ||
412 | cy="212.68117" | ||
413 | fx="806.52582" | ||
414 | fy="212.68117" | ||
415 | r="48.363216" /> | ||
416 | <radialGradient | ||
417 | inkscape:collect="always" | ||
418 | xlink:href="#linearGradient3317" | ||
419 | id="radialGradient3315" | ||
420 | cx="543.6698" | ||
421 | cy="147.3131" | ||
422 | fx="543.6698" | ||
423 | fy="147.3131" | ||
424 | r="47.863216" | ||
425 | gradientTransform="matrix(2.1382256,0,0,2.3382884,-77.03847,-101.68704)" | ||
426 | gradientUnits="userSpaceOnUse" /> | ||
427 | <radialGradient | ||
428 | inkscape:collect="always" | ||
429 | xlink:href="#linearGradient3537" | ||
430 | id="radialGradient3543" | ||
431 | cx="385" | ||
432 | cy="237.00504" | ||
433 | fx="385" | ||
434 | fy="237.00504" | ||
435 | r="86.928574" | ||
436 | gradientTransform="matrix(1,0,0,0.8562038,0,34.080427)" | ||
437 | gradientUnits="userSpaceOnUse" /> | ||
438 | <radialGradient | ||
439 | inkscape:collect="always" | ||
440 | xlink:href="#linearGradient3909" | ||
441 | id="radialGradient3915" | ||
442 | cx="418.30365" | ||
443 | cy="342.47794" | ||
444 | fx="418.30365" | ||
445 | fy="342.47794" | ||
446 | r="131.4509" | ||
447 | gradientTransform="matrix(1.3957347,0.6211056,-0.4244067,0.9537174,-15.061913,-227.96711)" | ||
448 | gradientUnits="userSpaceOnUse" /> | ||
449 | <radialGradient | ||
450 | inkscape:collect="always" | ||
451 | xlink:href="#linearGradient3951" | ||
452 | id="radialGradient3933" | ||
453 | cx="397.16388" | ||
454 | cy="336.95245" | ||
455 | fx="397.16388" | ||
456 | fy="336.95245" | ||
457 | r="36.75" | ||
458 | gradientUnits="userSpaceOnUse" | ||
459 | gradientTransform="matrix(1.9449972,2.4894837e-7,-2.4894833e-7,1.9449969,-375.31868,-318.41912)" /> | ||
460 | <linearGradient | ||
461 | inkscape:collect="always" | ||
462 | xlink:href="#linearGradient3961" | ||
463 | id="linearGradient3959" | ||
464 | x1="398.21429" | ||
465 | y1="343.52289" | ||
466 | x2="379.28571" | ||
467 | y2="265.30862" | ||
468 | gradientUnits="userSpaceOnUse" | ||
469 | gradientTransform="translate(450.03125,73.843964)" /> | ||
470 | <filter | ||
471 | inkscape:collect="always" | ||
472 | id="filter3981" | ||
473 | x="-0.30000001" | ||
474 | width="1.6" | ||
475 | y="-0.30000001" | ||
476 | height="1.6"> | ||
477 | <feGaussianBlur | ||
478 | inkscape:collect="always" | ||
479 | stdDeviation="2" | ||
480 | id="feGaussianBlur3983" /> | ||
481 | </filter> | ||
482 | <radialGradient | ||
483 | inkscape:collect="always" | ||
484 | xlink:href="#linearGradient3985" | ||
485 | id="radialGradient3991" | ||
486 | cx="402.48898" | ||
487 | cy="317.23578" | ||
488 | fx="402.48898" | ||
489 | fy="317.23578" | ||
490 | r="23.714285" | ||
491 | gradientUnits="userSpaceOnUse" | ||
492 | gradientTransform="matrix(4.3776616,0,0,4.3776616,-1358.3025,-1070.7357)" /> | ||
493 | <clipPath | ||
494 | clipPathUnits="userSpaceOnUse" | ||
495 | id="clipPath3999"> | ||
496 | <path | ||
497 | style="opacity:1;fill:#f5ff04;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:20.79999924;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;display:inline" | ||
498 | d="M 179.64286,267.36218 C 157.23242,307.0651 119.02676,383.14247 110.35715,417.00504 C 101.70994,450.78014 101.58516,483.42158 110,503.43362 C 118.3602,523.31575 136.16398,539.06642 150.71428,544.86218 C 150.1179,530.48631 165.08723,501.57635 223.57143,472.36218 C 282.1977,443.07704 301.95306,445.23132 327.14285,425.21932 C 352.77291,404.85756 339.75316,358.17469 330.35714,331.29075 C 320.9229,304.29747 295.38973,272.16627 263.92857,261.6479 C 232.8953,251.27258 198.91081,256.79953 179.64286,267.36218 z" | ||
499 | id="path4001" | ||
500 | sodipodi:nodetypes="czzczzzzc" /> | ||
501 | </clipPath> | ||
502 | <radialGradient | ||
503 | inkscape:collect="always" | ||
504 | xlink:href="#linearGradient4013" | ||
505 | id="radialGradient4056" | ||
506 | gradientUnits="userSpaceOnUse" | ||
507 | gradientTransform="matrix(1.1323239,0.7659488,-1.4550286,2.1510098,588.75376,-711.79716)" | ||
508 | cx="228.81355" | ||
509 | cy="440.26971" | ||
510 | fx="228.81355" | ||
511 | fy="440.26971" | ||
512 | r="119.17509" /> | ||
513 | <radialGradient | ||
514 | inkscape:collect="always" | ||
515 | xlink:href="#linearGradient4041" | ||
516 | id="radialGradient4060" | ||
517 | gradientUnits="userSpaceOnUse" | ||
518 | gradientTransform="matrix(5.911206e-2,2.6869855,-0.7234268,1.5914947e-2,408.72779,-424.56452)" | ||
519 | cx="275.4422" | ||
520 | cy="335.34866" | ||
521 | fx="275.4422" | ||
522 | fy="335.34866" | ||
523 | r="36.75" /> | ||
524 | <radialGradient | ||
525 | inkscape:collect="always" | ||
526 | xlink:href="#linearGradient4025" | ||
527 | id="radialGradient4062" | ||
528 | gradientUnits="userSpaceOnUse" | ||
529 | gradientTransform="matrix(5.911206e-2,2.6869855,-0.7234268,1.5914947e-2,408.72779,-424.56452)" | ||
530 | cx="275.4422" | ||
531 | cy="335.34866" | ||
532 | fx="275.4422" | ||
533 | fy="335.34866" | ||
534 | r="36.75" /> | ||
535 | <linearGradient | ||
536 | inkscape:collect="always" | ||
537 | xlink:href="#linearGradient4084" | ||
538 | id="linearGradient4082" | ||
539 | gradientUnits="userSpaceOnUse" | ||
540 | x1="182.35046" | ||
541 | y1="256.11136" | ||
542 | x2="145.53348" | ||
543 | y2="542.20502" /> | ||
544 | <clipPath | ||
545 | clipPathUnits="userSpaceOnUse" | ||
546 | id="clipPath4100"> | ||
547 | <path | ||
548 | style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.9000755px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" | ||
549 | d="M 265.93541,126.68393 L 247.1682,295.54701 L 421.27363,222.42633 L 483.22803,311.08516 L 541.11243,279.09486 L 503.57801,99.035183 L 265.93541,126.68393 z" | ||
550 | id="path4102" | ||
551 | sodipodi:nodetypes="ccccccc" /> | ||
552 | </clipPath> | ||
553 | <radialGradient | ||
554 | inkscape:collect="always" | ||
555 | xlink:href="#linearGradient4106" | ||
556 | id="radialGradient4112" | ||
557 | cx="250.22678" | ||
558 | cy="475.09763" | ||
559 | fx="250.22678" | ||
560 | fy="475.09763" | ||
561 | r="95.98877" | ||
562 | gradientTransform="matrix(1.2259004,-0.7077739,0.1413989,0.2449102,322.22326,608.91815)" | ||
563 | gradientUnits="userSpaceOnUse" /> | ||
564 | <linearGradient | ||
565 | inkscape:collect="always" | ||
566 | xlink:href="#linearGradient4478" | ||
567 | id="linearGradient4484" | ||
568 | x1="412.08926" | ||
569 | y1="404.91574" | ||
570 | x2="417.375" | ||
571 | y2="401.82648" | ||
572 | gradientUnits="userSpaceOnUse" /> | ||
573 | <linearGradient | ||
574 | inkscape:collect="always" | ||
575 | xlink:href="#linearGradient4478" | ||
576 | id="linearGradient4486" | ||
577 | x1="411.91071" | ||
578 | y1="404.91577" | ||
579 | x2="417.375" | ||
580 | y2="401.82648" | ||
581 | gradientUnits="userSpaceOnUse" /> | ||
582 | <linearGradient | ||
583 | inkscape:collect="always" | ||
584 | xlink:href="#linearGradient4478" | ||
585 | id="linearGradient4488" | ||
586 | x1="411.91071" | ||
587 | y1="405.54077" | ||
588 | x2="417.375" | ||
589 | y2="401.82648" | ||
590 | gradientUnits="userSpaceOnUse" /> | ||
591 | <linearGradient | ||
592 | inkscape:collect="always" | ||
593 | xlink:href="#linearGradient4478" | ||
594 | id="linearGradient4490" | ||
595 | x1="412.08926" | ||
596 | y1="405.54077" | ||
597 | x2="417.375" | ||
598 | y2="401.82648" | ||
599 | gradientUnits="userSpaceOnUse" /> | ||
600 | <linearGradient | ||
601 | inkscape:collect="always" | ||
602 | xlink:href="#linearGradient4478" | ||
603 | id="linearGradient4492" | ||
604 | x1="411.73212" | ||
605 | y1="405.54077" | ||
606 | x2="417.375" | ||
607 | y2="401.82648" | ||
608 | gradientUnits="userSpaceOnUse" /> | ||
609 | <radialGradient | ||
610 | inkscape:collect="always" | ||
611 | xlink:href="#linearGradient4862" | ||
612 | id="radialGradient4868" | ||
613 | cx="429.56738" | ||
614 | cy="377.42877" | ||
615 | fx="429.56738" | ||
616 | fy="377.42877" | ||
617 | r="72.079735" | ||
618 | gradientTransform="matrix(1,0,0,0.618034,0,144.16496)" | ||
619 | gradientUnits="userSpaceOnUse" /> | ||
620 | <radialGradient | ||
621 | inkscape:collect="always" | ||
622 | xlink:href="#linearGradient4870" | ||
623 | id="radialGradient4876" | ||
624 | cx="437.6991" | ||
625 | cy="391.21735" | ||
626 | fx="437.6991" | ||
627 | fy="391.21735" | ||
628 | r="36.611931" | ||
629 | gradientTransform="matrix(1,0,0,0.618034,0,149.43174)" | ||
630 | gradientUnits="userSpaceOnUse" /> | ||
631 | <radialGradient | ||
632 | inkscape:collect="always" | ||
633 | xlink:href="#linearGradient4013" | ||
634 | id="radialGradient3585" | ||
635 | gradientUnits="userSpaceOnUse" | ||
636 | gradientTransform="matrix(1.1323239,0.7659488,-1.4550286,2.1510098,588.75376,-711.79716)" | ||
637 | cx="228.81355" | ||
638 | cy="440.26971" | ||
639 | fx="228.81355" | ||
640 | fy="440.26971" | ||
641 | r="119.17509" /> | ||
642 | <linearGradient | ||
643 | inkscape:collect="always" | ||
644 | xlink:href="#linearGradient4084" | ||
645 | id="linearGradient3587" | ||
646 | gradientUnits="userSpaceOnUse" | ||
647 | x1="182.35046" | ||
648 | y1="256.11136" | ||
649 | x2="145.53348" | ||
650 | y2="542.20502" /> | ||
651 | <radialGradient | ||
652 | inkscape:collect="always" | ||
653 | xlink:href="#linearGradient3317" | ||
654 | id="radialGradient8410" | ||
655 | gradientUnits="userSpaceOnUse" | ||
656 | gradientTransform="matrix(1.0036478,-1.0345492e-7,1.7124628e-7,1.6613125,-753.99632,-302.76972)" | ||
657 | cx="317.78754" | ||
658 | cy="129.65378" | ||
659 | fx="317.78754" | ||
660 | fy="129.65378" | ||
661 | r="47.863216" /> | ||
662 | <radialGradient | ||
663 | inkscape:collect="always" | ||
664 | xlink:href="#linearGradient8398" | ||
665 | id="radialGradient8412" | ||
666 | gradientUnits="userSpaceOnUse" | ||
667 | gradientTransform="matrix(2.0747661,-0.1577957,0.2382425,3.1325183,-1144.2358,-272.29325)" | ||
668 | cx="325.30847" | ||
669 | cy="80.909554" | ||
670 | fx="325.30847" | ||
671 | fy="80.909554" | ||
672 | r="26.937988" /> | ||
673 | <clipPath | ||
674 | clipPathUnits="userSpaceOnUse" | ||
675 | id="clipPath8514"> | ||
676 | <path | ||
677 | style="opacity:1;fill:#262f2f;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" | ||
678 | d="M 352.24553,211.99185 C 348.4411,186.72762 335.43581,161.35383 335.08873,136.46662 C 334.90247,123.1111 338.36158,109.89571 348.84426,96.912574 C 385.19128,31.616739 465.78517,12.217889 534.77892,5.447147 C 621.70131,-5.569654 719.69159,23.387219 768.15026,100.84843 C 822.27428,176.58173 824.82502,273.38755 848.7623,360.37638 C 878.20009,487.50398 903.54144,616.59052 909.15454,747.22673 C 906.09106,825.40858 900.7282,912.41088 848.65133,975.36086 C 800.62479,1025.7183 725.86486,1025.4139 661.58145,1034.3632 C 571.02606,1039.0182 477.22992,1018.2174 399.79755,970.16496 C 335.02191,932.22495 304.06736,856.68633 302.51815,784.14538 C 294.12898,704.27022 328.90967,630.33687 354.13855,556.98577 C 361.60916,474.2247 363.55141,390.73802 363.79189,307.60093 C 362.95507,275.40549 356.70236,243.7836 352.24553,211.99185 z" | ||
679 | id="path8516" | ||
680 | sodipodi:nodetypes="cscccccccccccc" /> | ||
681 | </clipPath> | ||
682 | <clipPath | ||
683 | clipPathUnits="userSpaceOnUse" | ||
684 | id="clipPath8604"> | ||
685 | <path | ||
686 | style="opacity:1;fill:#262f2f;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" | ||
687 | d="M 352.24553,211.99185 C 348.4411,186.72762 335.43581,161.35383 335.08873,136.46662 C 334.90247,123.1111 338.36158,109.89571 348.84426,96.912574 C 385.19128,31.616739 465.78517,12.217889 534.77892,5.447147 C 621.70131,-5.569654 719.69159,23.387219 768.15026,100.84843 C 822.27428,176.58173 824.82502,273.38755 848.7623,360.37638 C 878.20009,487.50398 903.54144,616.59052 909.15454,747.22673 C 906.09106,825.40858 900.7282,912.41088 848.65133,975.36086 C 800.62479,1025.7183 725.86486,1025.4139 661.58145,1034.3632 C 571.02606,1039.0182 477.22992,1018.2174 399.79755,970.16496 C 335.02191,932.22495 304.06736,856.68633 302.51815,784.14538 C 294.12898,704.27022 328.90967,630.33687 354.13855,556.98577 C 361.60916,474.2247 363.55141,390.73802 363.79189,307.60093 C 362.95507,275.40549 356.70236,243.7836 352.24553,211.99185 z" | ||
688 | id="path8606" | ||
689 | sodipodi:nodetypes="cscccccccccccc" /> | ||
690 | </clipPath> | ||
691 | <clipPath | ||
692 | clipPathUnits="userSpaceOnUse" | ||
693 | id="clipPath8610"> | ||
694 | <path | ||
695 | style="opacity:1;fill:#262f2f;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" | ||
696 | d="M 352.24553,211.99185 C 348.4411,186.72762 335.43581,161.35383 335.08873,136.46662 C 334.90247,123.1111 338.36158,109.89571 348.84426,96.912574 C 385.19128,31.616739 465.78517,12.217889 534.77892,5.447147 C 621.70131,-5.569654 719.69159,23.387219 768.15026,100.84843 C 822.27428,176.58173 824.82502,273.38755 848.7623,360.37638 C 878.20009,487.50398 903.54144,616.59052 909.15454,747.22673 C 906.09106,825.40858 900.7282,912.41088 848.65133,975.36086 C 800.62479,1025.7183 725.86486,1025.4139 661.58145,1034.3632 C 571.02606,1039.0182 477.22992,1018.2174 399.79755,970.16496 C 335.02191,932.22495 304.06736,856.68633 302.51815,784.14538 C 294.12898,704.27022 328.90967,630.33687 354.13855,556.98577 C 361.60916,474.2247 363.55141,390.73802 363.79189,307.60093 C 362.95507,275.40549 356.70236,243.7836 352.24553,211.99185 z" | ||
697 | id="path8612" | ||
698 | sodipodi:nodetypes="cscccccccccccc" /> | ||
699 | </clipPath> | ||
700 | <clipPath | ||
701 | clipPathUnits="userSpaceOnUse" | ||
702 | id="clipPath8616"> | ||
703 | <path | ||
704 | style="opacity:1;fill:#262f2f;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" | ||
705 | d="M 352.24553,211.99185 C 348.4411,186.72762 335.43581,161.35383 335.08873,136.46662 C 334.90247,123.1111 338.36158,109.89571 348.84426,96.912574 C 385.19128,31.616739 465.78517,12.217889 534.77892,5.447147 C 621.70131,-5.569654 719.69159,23.387219 768.15026,100.84843 C 822.27428,176.58173 824.82502,273.38755 848.7623,360.37638 C 878.20009,487.50398 903.54144,616.59052 909.15454,747.22673 C 906.09106,825.40858 900.7282,912.41088 848.65133,975.36086 C 800.62479,1025.7183 725.86486,1025.4139 661.58145,1034.3632 C 571.02606,1039.0182 477.22992,1018.2174 399.79755,970.16496 C 335.02191,932.22495 304.06736,856.68633 302.51815,784.14538 C 294.12898,704.27022 328.90967,630.33687 354.13855,556.98577 C 361.60916,474.2247 363.55141,390.73802 363.79189,307.60093 C 362.95507,275.40549 356.70236,243.7836 352.24553,211.99185 z" | ||
706 | id="path8618" | ||
707 | sodipodi:nodetypes="cscccccccccccc" /> | ||
708 | </clipPath> | ||
709 | <clipPath | ||
710 | clipPathUnits="userSpaceOnUse" | ||
711 | id="clipPath8622"> | ||
712 | <path | ||
713 | style="opacity:1;fill:#202020;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" | ||
714 | d="M 821.64329,477.88997 C 821.64329,477.88997 844.26276,471.38316 857.38604,472.01724 C 870.50932,472.65133 888.02762,473.95586 901.09489,484.20343 C 914.16216,494.45099 926.16263,511.3435 935.20728,542.57308 C 944.25193,573.80266 936.9056,641.82509 929.03125,685.92043 C 921.1569,730.01577 900.76615,792.03341 884.03125,825.92043 C 867.29635,859.80745 834.23354,903.41563 823.46182,915.79659 C 812.0976,928.85856 767.25593,952.22276 744.03125,958.06326 C 749.33455,947.45666 792.93101,907.47442 779.03125,897.349 C 765.01228,887.13674 733.27116,943.33136 694.7381,926.38217 C 716.12041,913.25005 736.5175,875.19611 728.77871,859.78772 C 720.93846,844.17733 698.07378,908.54529 635.24317,896.8006 C 665.29521,869.27394 690.65023,825.89659 676.50587,813.8209 C 662.09071,801.51403 616.04412,868.11405 616.04412,868.11405 C 616.04412,868.11405 613.22222,826.41287 629.81732,799.50673 C 646.45667,772.52886 709.47029,717.89146 729.37045,687.80331 C 749.2706,657.71517 762.98301,621.79429 771.50587,595.28537 C 780.02873,568.77645 787.30681,518.18583 787.30681,518.18583" | ||
715 | id="path8624" | ||
716 | sodipodi:nodetypes="czzzzzzczczczczzzc" /> | ||
717 | </clipPath> | ||
718 | <clipPath | ||
719 | clipPathUnits="userSpaceOnUse" | ||
720 | id="clipPath8642"> | ||
721 | <path | ||
722 | style="opacity:1;fill:#0f0f0f;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" | ||
723 | d="M 366.88839,504.13471 C 366.88839,504.13471 337.33433,544.70776 319.03125,578.42042 C 300.72816,612.13309 260.41016,704.77736 248.67411,749.49185 C 236.91471,794.29529 186.17411,873.06329 186.17411,873.06329 L 262.24554,891.27757 C 262.24554,891.27757 274.05266,878.45422 293.31696,845.20614 C 312.58126,811.95806 353.67411,706.63471 353.67411,706.63471 L 366.88839,504.13471 z" | ||
724 | id="path8644" | ||
725 | sodipodi:nodetypes="czzcczcc" /> | ||
726 | </clipPath> | ||
727 | <clipPath | ||
728 | clipPathUnits="userSpaceOnUse" | ||
729 | id="clipPath8658"> | ||
730 | <path | ||
731 | style="opacity:1;fill:#0b0b0b;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" | ||
732 | d="M 569.03125,1018.7776 C 564.74554,1019.4919 541.4031,1022.3957 511.17411,1028.7776 C 480.94512,1035.1595 411.39918,1054.7395 368.31696,1064.4919 C 325.23474,1074.2443 251.05253,1099.3079 211.40434,1091.7573 C 171.75616,1084.2067 121.88839,1027.349 121.88839,1027.349 L 126.17411,933.06329 C 126.17411,933.06329 212.05962,916.86235 238.31696,899.49186 C 264.57431,882.12137 283.89934,849.82588 297.60268,828.06329 C 311.30602,806.3007 330.45982,756.63471 330.45982,756.63471 L 569.03125,1018.7776 z" | ||
733 | id="path8660" | ||
734 | sodipodi:nodetypes="czzzcczzcc" /> | ||
735 | </clipPath> | ||
736 | <filter | ||
737 | inkscape:collect="always" | ||
738 | id="filter8802" | ||
739 | x="-0.35311759" | ||
740 | width="1.7062352" | ||
741 | y="-0.1817714" | ||
742 | height="1.3635428"> | ||
743 | <feGaussianBlur | ||
744 | inkscape:collect="always" | ||
745 | stdDeviation="48.038491" | ||
746 | id="feGaussianBlur8804" /> | ||
747 | </filter> | ||
748 | <filter | ||
749 | inkscape:collect="always" | ||
750 | id="filter8806" | ||
751 | x="-0.61142862" | ||
752 | width="2.2228572" | ||
753 | y="-0.14930232" | ||
754 | height="1.2986046"> | ||
755 | <feGaussianBlur | ||
756 | inkscape:collect="always" | ||
757 | stdDeviation="37.830213" | ||
758 | id="feGaussianBlur8808" /> | ||
759 | </filter> | ||
760 | <filter | ||
761 | inkscape:collect="always" | ||
762 | id="filter8810" | ||
763 | x="-0.23519406" | ||
764 | width="1.4703881" | ||
765 | y="-0.24500646" | ||
766 | height="1.4900129"> | ||
767 | <feGaussianBlur | ||
768 | inkscape:collect="always" | ||
769 | stdDeviation="58.328041" | ||
770 | id="feGaussianBlur8812" /> | ||
771 | </filter> | ||
772 | <filter | ||
773 | inkscape:collect="always" | ||
774 | id="filter8814" | ||
775 | x="-0.20466694" | ||
776 | width="1.4093339" | ||
777 | y="-0.29007819" | ||
778 | height="1.5801564"> | ||
779 | <feGaussianBlur | ||
780 | inkscape:collect="always" | ||
781 | stdDeviation="22.300169" | ||
782 | id="feGaussianBlur8816" /> | ||
783 | </filter> | ||
784 | <filter | ||
785 | inkscape:collect="always" | ||
786 | id="filter8818" | ||
787 | x="-0.34381232" | ||
788 | width="1.6876246" | ||
789 | y="-0.18433961" | ||
790 | height="1.3686792"> | ||
791 | <feGaussianBlur | ||
792 | inkscape:collect="always" | ||
793 | stdDeviation="34.542167" | ||
794 | id="feGaussianBlur8820" /> | ||
795 | </filter> | ||
796 | <filter | ||
797 | inkscape:collect="always" | ||
798 | id="filter8822" | ||
799 | x="-0.2742857" | ||
800 | width="1.5485713" | ||
801 | y="-0.21333334" | ||
802 | height="1.4266667"> | ||
803 | <feGaussianBlur | ||
804 | inkscape:collect="always" | ||
805 | stdDeviation="11.313708" | ||
806 | id="feGaussianBlur8824" /> | ||
807 | </filter> | ||
808 | <filter | ||
809 | inkscape:collect="always" | ||
810 | id="filter8826" | ||
811 | x="-0.25894088" | ||
812 | width="1.5178818" | ||
813 | y="-0.2236412" | ||
814 | height="1.4472824"> | ||
815 | <feGaussianBlur | ||
816 | inkscape:collect="always" | ||
817 | stdDeviation="19.631544" | ||
818 | id="feGaussianBlur8828" /> | ||
819 | </filter> | ||
820 | <filter | ||
821 | inkscape:collect="always" | ||
822 | id="filter8856" | ||
823 | x="-0.3253231" | ||
824 | width="1.6506462" | ||
825 | y="-0.19013336" | ||
826 | height="1.3802667"> | ||
827 | <feGaussianBlur | ||
828 | inkscape:collect="always" | ||
829 | stdDeviation="28.712591" | ||
830 | id="feGaussianBlur8858" /> | ||
831 | </filter> | ||
832 | <filter | ||
833 | inkscape:collect="always" | ||
834 | id="filter8860" | ||
835 | x="-0.38093024" | ||
836 | width="1.7618605" | ||
837 | y="-0.17518716" | ||
838 | height="1.3503743"> | ||
839 | <feGaussianBlur | ||
840 | inkscape:collect="always" | ||
841 | stdDeviation="19.304015" | ||
842 | id="feGaussianBlur8862" /> | ||
843 | </filter> | ||
844 | <filter | ||
845 | inkscape:collect="always" | ||
846 | id="filter8888" | ||
847 | x="-0.2112188" | ||
848 | width="1.4224375" | ||
849 | y="-0.16808605" | ||
850 | height="1.3361721"> | ||
851 | <feGaussianBlur | ||
852 | inkscape:collect="always" | ||
853 | stdDeviation="8.3693583" | ||
854 | id="feGaussianBlur8890" /> | ||
855 | </filter> | ||
856 | <filter | ||
857 | inkscape:collect="always" | ||
858 | id="filter8892" | ||
859 | x="-0.18692794" | ||
860 | width="1.3738559" | ||
861 | y="-0.23646873" | ||
862 | height="1.4729375"> | ||
863 | <feGaussianBlur | ||
864 | inkscape:collect="always" | ||
865 | stdDeviation="31.21228" | ||
866 | id="feGaussianBlur8894" /> | ||
867 | </filter> | ||
868 | <clipPath | ||
869 | clipPathUnits="userSpaceOnUse" | ||
870 | id="clipPath8906"> | ||
871 | <path | ||
872 | style="opacity:1;fill:#262f2f;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" | ||
873 | d="M 352.24553,211.99185 C 348.4411,186.72762 335.43581,161.35383 335.08873,136.46662 C 334.90247,123.1111 338.36158,109.89571 348.84426,96.912574 C 385.19128,31.616739 465.78517,12.217889 534.77892,5.447147 C 621.70131,-5.569654 719.69159,23.387219 768.15026,100.84843 C 822.27428,176.58173 824.82502,273.38755 848.7623,360.37638 C 878.20009,487.50398 903.54144,616.59052 909.15454,747.22673 C 906.09106,825.40858 900.7282,912.41088 848.65133,975.36086 C 800.62479,1025.7183 725.86486,1025.4139 661.58145,1034.3632 C 571.02606,1039.0182 477.22992,1018.2174 399.79755,970.16496 C 335.02191,932.22495 304.06736,856.68633 302.51815,784.14538 C 294.12898,704.27022 328.90967,630.33687 354.13855,556.98577 C 361.60916,474.2247 363.55141,390.73802 363.79189,307.60093 C 362.95507,275.40549 356.70236,243.7836 352.24553,211.99185 z" | ||
874 | id="path8908" | ||
875 | sodipodi:nodetypes="cscccccccccccc" /> | ||
876 | </clipPath> | ||
877 | <filter | ||
878 | inkscape:collect="always" | ||
879 | id="filter8940" | ||
880 | x="-0.25152978" | ||
881 | width="1.5030596" | ||
882 | y="-0.053035267" | ||
883 | height="1.1060705"> | ||
884 | <feGaussianBlur | ||
885 | inkscape:collect="always" | ||
886 | stdDeviation="13.024603" | ||
887 | id="feGaussianBlur8942" /> | ||
888 | </filter> | ||
889 | <linearGradient | ||
890 | inkscape:collect="always" | ||
891 | xlink:href="#linearGradient8952" | ||
892 | id="linearGradient8958" | ||
893 | x1="609.31244" | ||
894 | y1="239.46866" | ||
895 | x2="560.83142" | ||
896 | y2="262.86206" | ||
897 | gradientUnits="userSpaceOnUse" | ||
898 | gradientTransform="translate(450.03125,73.843964)" /> | ||
899 | <linearGradient | ||
900 | inkscape:collect="always" | ||
901 | xlink:href="#linearGradient8964" | ||
902 | id="linearGradient8970" | ||
903 | x1="603.84064" | ||
904 | y1="627.85303" | ||
905 | x2="616.24396" | ||
906 | y2="585.42664" | ||
907 | gradientUnits="userSpaceOnUse" | ||
908 | gradientTransform="translate(450.03125,73.843964)" /> | ||
909 | <filter | ||
910 | inkscape:collect="always" | ||
911 | id="filter9020" | ||
912 | x="-0.32861114" | ||
913 | width="1.6572223" | ||
914 | y="-0.182" | ||
915 | height="1.364"> | ||
916 | <feGaussianBlur | ||
917 | inkscape:collect="always" | ||
918 | stdDeviation="20.912684" | ||
919 | id="feGaussianBlur9022" /> | ||
920 | </filter> | ||
921 | <filter | ||
922 | inkscape:collect="always" | ||
923 | id="filter9024" | ||
924 | x="-0.55453134" | ||
925 | width="2.1090627" | ||
926 | y="-0.51434779" | ||
927 | height="2.0286956"> | ||
928 | <feGaussianBlur | ||
929 | inkscape:collect="always" | ||
930 | stdDeviation="20.912684" | ||
931 | id="feGaussianBlur9026" /> | ||
932 | </filter> | ||
933 | <filter | ||
934 | inkscape:collect="always" | ||
935 | id="filter9044" | ||
936 | x="-0.32631579" | ||
937 | width="1.6526316" | ||
938 | y="-0.84545463" | ||
939 | height="2.6909094"> | ||
940 | <feGaussianBlur | ||
941 | inkscape:collect="always" | ||
942 | stdDeviation="21.92031" | ||
943 | id="feGaussianBlur9046" /> | ||
944 | </filter> | ||
945 | <filter | ||
946 | inkscape:collect="always" | ||
947 | id="filter9048" | ||
948 | x="-0.40879121" | ||
949 | width="1.8175824" | ||
950 | y="-0.71538466" | ||
951 | height="2.4307692"> | ||
952 | <feGaussianBlur | ||
953 | inkscape:collect="always" | ||
954 | stdDeviation="21.92031" | ||
955 | id="feGaussianBlur9050" /> | ||
956 | </filter> | ||
957 | <filter | ||
958 | inkscape:collect="always" | ||
959 | id="filter3587" | ||
960 | x="-0.1"> | ||
961 | <feGaussianBlur | ||
962 | inkscape:collect="always" | ||
963 | stdDeviation="8.881432" | ||
964 | id="feGaussianBlur3589" /> | ||
965 | </filter> | ||
966 | <clipPath | ||
967 | clipPathUnits="userSpaceOnUse" | ||
968 | id="clipPath3602"> | ||
969 | <path | ||
970 | sodipodi:nodetypes="czzzzzzczczczczzzc" | ||
971 | id="path3604" | ||
972 | d="M 647.61204,540.04601 C 647.61204,540.04601 670.23151,533.5392 683.35479,534.17328 C 696.47807,534.80737 713.99637,536.1119 727.06364,546.35947 C 740.13091,556.60703 752.13138,573.49954 761.17603,604.72912 C 770.22068,635.9587 762.87435,703.98113 755,748.07647 C 747.12565,792.17181 726.7349,854.18945 710,888.07647 C 693.2651,921.96349 660.20229,965.57167 649.43057,977.95263 C 638.06635,991.0146 593.22468,1014.3788 570,1020.2193 C 575.3033,1009.6127 618.89976,969.63046 605,959.50504 C 590.98103,949.29278 559.23991,1005.4874 520.70685,988.53821 C 542.08916,975.40609 562.48625,937.35215 554.74746,921.94376 C 546.90721,906.33337 524.04253,970.70133 461.21192,958.95664 C 491.26396,931.42998 516.61898,888.05263 502.47462,875.97694 C 488.05946,863.67007 442.01287,930.27009 442.01287,930.27009 C 442.01287,930.27009 439.19097,888.56891 455.78607,861.66277 C 472.42542,834.6849 535.43904,780.0475 555.3392,749.95935 C 575.23935,719.87121 588.95176,683.95033 597.47462,657.44141 C 605.99748,630.93249 613.27556,580.34187 613.27556,580.34187" | ||
973 | style="opacity:1;fill:#202020;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" /> | ||
974 | </clipPath> | ||
975 | <filter | ||
976 | inkscape:collect="always" | ||
977 | id="filter4120" | ||
978 | x="-0.2770822" | ||
979 | width="1.5541644" | ||
980 | y="-0.32482043" | ||
981 | height="1.6496409"> | ||
982 | <feGaussianBlur | ||
983 | inkscape:collect="always" | ||
984 | stdDeviation="19.956289" | ||
985 | id="feGaussianBlur4122" /> | ||
986 | </filter> | ||
987 | <clipPath | ||
988 | clipPathUnits="userSpaceOnUse" | ||
989 | id="clipPath3631"> | ||
990 | <path | ||
991 | style="opacity:1;fill:#ada469;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" | ||
992 | d="M 760.16396,935.83377 C 766.95806,954.73656 770.65765,969.13346 772.05426,987.04566 C 773.45088,1004.958 768.27158,1038.8465 769.1538,1057.7018 C 770.03555,1076.547 777.28749,1097.8008 796.49843,1106.6707 C 815.9173,1115.6365 845.81767,1116.882 870.61827,1103.5251 C 895.41887,1090.1681 928.01929,1033.1996 941.59253,1006.2164 C 955.21638,979.13246 980.3536,891.71903 986.25333,856.44781 C 992.15306,821.1766 988.80387,815.14704 981.63585,807.39232 C 984.27615,779.55217 980.13613,752.45689 994.74554,720.20614 C 964.49653,732.03184 957.36325,760.36684 946.42665,785.71122 C 938.42574,734.77829 946.63581,714.43803 949.74554,684.49186 C 920.68078,699.26977 906.88403,731.60588 904.74554,777.349 C 893.82159,776.0448 883.3541,772.91477 871.17411,776.63471 C 870.91007,730.61137 869.71055,699.7453 880.08474,662.42822 C 826.82927,683.45508 817.13746,769.02232 824.03125,775.20614 C 813.14843,775.74114 802.66017,773.90884 791.17411,778.06329 C 791.81303,735.49194 790.91365,693.15468 761.17411,655.20614 C 761.17411,655.20614 730.21605,736.12848 729.74554,758.77757 C 729.27503,781.42666 739.19713,798.94345 739.19713,798.94345 C 739.19713,798.94345 730.62906,835.68396 732.89854,857.17568 C 735.19439,878.91714 753.34144,916.85185 760.16396,935.83377 z" | ||
993 | id="path3633" | ||
994 | sodipodi:nodetypes="czzzzzzcccccccccczczz" /> | ||
995 | </clipPath> | ||
996 | <clipPath | ||
997 | clipPathUnits="userSpaceOnUse" | ||
998 | id="clipPath3665"> | ||
999 | <path | ||
1000 | sodipodi:nodetypes="czzcczcc" | ||
1001 | id="path3667" | ||
1002 | d="M 366.88839,504.13471 C 366.88839,504.13471 337.33433,544.70776 319.03125,578.42042 C 300.72816,612.13309 260.41016,704.77736 248.67411,749.49185 C 236.91471,794.29529 186.17411,873.06329 186.17411,873.06329 L 262.24554,891.27757 C 262.24554,891.27757 274.05266,878.45422 293.31696,845.20614 C 312.58126,811.95806 353.67411,706.63471 353.67411,706.63471 L 366.88839,504.13471 z" | ||
1003 | style="opacity:1;fill:#0f0f0f;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" /> | ||
1004 | </clipPath> | ||
1005 | <clipPath | ||
1006 | clipPathUnits="userSpaceOnUse" | ||
1007 | id="clipPath3677"> | ||
1008 | <path | ||
1009 | style="opacity:1;fill:#ada469;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" | ||
1010 | d="M 586.13271,997.98981 C 592.92681,1016.8926 596.6264,1031.2895 598.02301,1049.2017 C 599.41963,1067.114 594.24033,1101.0025 595.12255,1119.8578 C 596.0043,1138.703 603.25624,1159.9568 622.46718,1168.8267 C 641.88605,1177.7925 671.78642,1179.038 696.58702,1165.6811 C 721.38762,1152.3241 753.98804,1095.3556 767.56128,1068.3724 C 781.18513,1041.2885 806.32235,953.87507 812.22208,918.60385 C 818.12181,883.33264 814.77262,877.30308 807.6046,869.54836 C 810.2449,841.70821 806.10488,814.61293 820.71429,782.36218 C 790.46528,794.18788 783.332,822.52288 772.3954,847.86726 C 764.39449,796.93433 772.60456,776.59407 775.71429,746.6479 C 746.64953,761.42581 732.85278,793.76192 730.71429,839.50504 C 719.79034,838.20084 709.32285,835.07081 697.14286,838.79075 C 696.87882,792.76741 695.6793,761.90134 706.05349,724.58426 C 652.79802,745.61112 643.10621,831.17836 650,837.36218 C 639.11718,837.89718 628.62892,836.06488 617.14286,840.21933 C 617.78178,797.64798 616.8824,755.31072 587.14286,717.36218 C 587.14286,717.36218 556.1848,798.28452 555.71429,820.93361 C 555.24378,843.5827 565.16588,861.09949 565.16588,861.09949 C 565.16588,861.09949 556.59781,897.84 558.86729,919.33172 C 561.16314,941.07318 579.31019,979.00789 586.13271,997.98981 z" | ||
1011 | id="path3679" | ||
1012 | sodipodi:nodetypes="czzzzzzcccccccccczczz" /> | ||
1013 | </clipPath> | ||
1014 | <filter | ||
1015 | inkscape:collect="always" | ||
1016 | id="filter3898"> | ||
1017 | <feGaussianBlur | ||
1018 | inkscape:collect="always" | ||
1019 | stdDeviation="10.892985" | ||
1020 | id="feGaussianBlur3900" /> | ||
1021 | </filter> | ||
1022 | <filter | ||
1023 | inkscape:collect="always" | ||
1024 | id="filter4130" | ||
1025 | x="-0.49509686" | ||
1026 | width="1.9901937" | ||
1027 | y="-0.26708817" | ||
1028 | height="1.5341763"> | ||
1029 | <feGaussianBlur | ||
1030 | inkscape:collect="always" | ||
1031 | stdDeviation="10.730622" | ||
1032 | id="feGaussianBlur4132" /> | ||
1033 | </filter> | ||
1034 | <filter | ||
1035 | inkscape:collect="always" | ||
1036 | id="filter4141" | ||
1037 | x="-0.40611032" | ||
1038 | width="1.8122206" | ||
1039 | y="-0.30260596" | ||
1040 | height="1.6052119"> | ||
1041 | <feGaussianBlur | ||
1042 | inkscape:collect="always" | ||
1043 | stdDeviation="9.8586086" | ||
1044 | id="feGaussianBlur4143" /> | ||
1045 | </filter> | ||
1046 | <clipPath | ||
1047 | clipPathUnits="userSpaceOnUse" | ||
1048 | id="clipPath4177"> | ||
1049 | <path | ||
1050 | sodipodi:nodetypes="czzzzzzcccccccccczczz" | ||
1051 | id="path4179" | ||
1052 | d="M 586.13271,997.98981 C 592.92681,1016.8926 596.6264,1031.2895 598.02301,1049.2017 C 599.41963,1067.114 594.24033,1101.0025 595.12255,1119.8578 C 596.0043,1138.703 603.25624,1159.9568 622.46718,1168.8267 C 641.88605,1177.7925 671.78642,1179.038 696.58702,1165.6811 C 721.38762,1152.3241 753.98804,1095.3556 767.56128,1068.3724 C 781.18513,1041.2885 806.32235,953.87507 812.22208,918.60385 C 818.12181,883.33264 814.77262,877.30308 807.6046,869.54836 C 810.2449,841.70821 806.10488,814.61293 820.71429,782.36218 C 790.46528,794.18788 783.332,822.52288 772.3954,847.86726 C 764.39449,796.93433 772.60456,776.59407 775.71429,746.6479 C 746.64953,761.42581 732.85278,793.76192 730.71429,839.50504 C 719.79034,838.20084 709.32285,835.07081 697.14286,838.79075 C 696.87882,792.76741 695.6793,761.90134 706.05349,724.58426 C 652.79802,745.61112 643.10621,831.17836 650,837.36218 C 639.11718,837.89718 628.62892,836.06488 617.14286,840.21933 C 617.78178,797.64798 616.8824,755.31072 587.14286,717.36218 C 587.14286,717.36218 556.1848,798.28452 555.71429,820.93361 C 555.24378,843.5827 565.16588,861.09949 565.16588,861.09949 C 565.16588,861.09949 556.59781,897.84 558.86729,919.33172 C 561.16314,941.07318 579.31019,979.00789 586.13271,997.98981 z" | ||
1053 | style="opacity:1;fill:#ada469;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" /> | ||
1054 | </clipPath> | ||
1055 | <filter | ||
1056 | inkscape:collect="always" | ||
1057 | id="filter4185"> | ||
1058 | <feGaussianBlur | ||
1059 | inkscape:collect="always" | ||
1060 | stdDeviation="3.6164709" | ||
1061 | id="feGaussianBlur4187" /> | ||
1062 | </filter> | ||
1063 | <filter | ||
1064 | inkscape:collect="always" | ||
1065 | id="filter4105"> | ||
1066 | <feGaussianBlur | ||
1067 | inkscape:collect="always" | ||
1068 | stdDeviation="3.8640966" | ||
1069 | id="feGaussianBlur4107" /> | ||
1070 | </filter> | ||
1071 | <clipPath | ||
1072 | clipPathUnits="userSpaceOnUse" | ||
1073 | id="clipPath2833"> | ||
1074 | <path | ||
1075 | style="opacity:1;fill:#292929;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" | ||
1076 | d="M 569.03125,1018.7776 C 564.74554,1019.4919 541.4031,1022.3957 511.17411,1028.7776 C 480.94512,1035.1595 453.86016,1033.7437 375.38803,1046.1072 C 295.53625,1058.688 281.32367,1088.6495 267.26578,1093.1715 C 252.56564,1097.9001 121.88839,1027.349 121.88839,1027.349 L 126.17411,933.06329 C 126.17411,933.06329 212.05962,916.86235 238.31696,899.49186 C 264.57431,882.12137 283.89934,849.82588 297.60268,828.06329 C 311.30602,806.3007 330.45982,756.63471 330.45982,756.63471 L 569.03125,1018.7776 z" | ||
1077 | id="path2835" | ||
1078 | sodipodi:nodetypes="czzzcczzcc" /> | ||
1079 | </clipPath> | ||
1080 | <linearGradient | ||
1081 | inkscape:collect="always" | ||
1082 | xlink:href="#linearGradient2843" | ||
1083 | id="linearGradient2841" | ||
1084 | gradientUnits="userSpaceOnUse" | ||
1085 | x1="347.89655" | ||
1086 | y1="1070.2124" | ||
1087 | x2="275.58191" | ||
1088 | y2="867.97992" /> | ||
1089 | <linearGradient | ||
1090 | inkscape:collect="always" | ||
1091 | xlink:href="#linearGradient3627" | ||
1092 | id="linearGradient3688" | ||
1093 | gradientUnits="userSpaceOnUse" | ||
1094 | x1="699.32867" | ||
1095 | y1="269.76755" | ||
1096 | x2="698.97504" | ||
1097 | y2="346.1351" /> | ||
1098 | <mask | ||
1099 | maskUnits="userSpaceOnUse" | ||
1100 | id="mask3684"> | ||
1101 | <path | ||
1102 | sodipodi:type="arc" | ||
1103 | style="opacity:1;fill:url(#linearGradient3688);fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.43724918px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" | ||
1104 | id="path3686" | ||
1105 | sodipodi:cx="579.474" | ||
1106 | sodipodi:cy="260.57516" | ||
1107 | sodipodi:rx="192.6866" | ||
1108 | sodipodi:ry="164.04877" | ||
1109 | d="M 772.1606,260.57516 A 192.6866,164.04877 0 1 1 386.7874,260.57516 A 192.6866,164.04877 0 1 1 772.1606,260.57516 z" | ||
1110 | transform="translate(-174.03125,62.156036)" /> | ||
1111 | </mask> | ||
1112 | <clipPath | ||
1113 | clipPathUnits="userSpaceOnUse" | ||
1114 | id="clipPath3622"> | ||
1115 | <path | ||
1116 | style="opacity:1;fill:#ada469;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" | ||
1117 | d="M 266.27183,924.57186 C 264.86456,943.37307 265.12693,957.32289 268.35357,973.87514 C 271.58022,990.42748 284.75965,1019.7825 288.68797,1037.0589 C 292.61419,1054.326 291.3821,1075.3685 276.22853,1088.2071 C 260.91092,1101.1845 234.17726,1109.806 208.39623,1103.9409 C 182.61517,1098.0756 138.84716,1054.7175 119.80604,1033.7126 C 100.6939,1012.6293 56.045183,939.86194 41.867508,909.43681 C 27.689836,879.01169 29.207903,872.71824 33.747793,863.90708 C 24.381071,839.38658 21.334081,813.84027 0.035335518,788.33044 C 30.360815,791.44488 43.915625,815.28677 60.161025,835.47019 C 54.631129,787.39416 42.10631,771.05369 31.787073,744.74589 C 61.781368,750.82755 82.366433,776.61829 95.766856,817.45839 C 105.32101,813.54048 114.00462,808.08545 125.95427,808.39719 C 114.65677,766.70139 108.00481,738.48135 89.267015,707.32725 C 142.70898,712.99758 172.92404,787.96657 168.23844,795.28805 C 178.21641,793.04406 187.24409,788.75767 198.67497,789.63638 C 187.42601,751.28936 177.62716,712.76848 195.01526,670.9882 C 195.01526,670.9882 243.30204,736.42507 249.40492,756.79397 C 255.50779,777.16288 250.92373,795.49449 250.92373,795.49449 C 250.92373,795.49449 267.8833,826.57978 271.21765,846.58862 C 274.59075,866.82997 267.68496,905.69194 266.27183,924.57186 z" | ||
1118 | id="path3624" | ||
1119 | sodipodi:nodetypes="czzzzzzcccccccccczczz" /> | ||
1120 | </clipPath> | ||
1121 | <clipPath | ||
1122 | clipPathUnits="userSpaceOnUse" | ||
1123 | id="clipPath3636"> | ||
1124 | <path | ||
1125 | style="opacity:1;fill:#ada469;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" | ||
1126 | d="M 760.16396,935.83377 C 766.95806,954.73656 770.65765,969.13346 772.05426,987.04566 C 773.45088,1004.958 768.27158,1038.8465 769.1538,1057.7018 C 770.03555,1076.547 777.28749,1097.8008 796.49843,1106.6707 C 815.9173,1115.6365 845.81767,1116.882 870.61827,1103.5251 C 895.41887,1090.1681 928.01929,1033.1996 941.59253,1006.2164 C 955.21638,979.13246 980.3536,891.71903 986.25333,856.44781 C 992.15306,821.1766 988.80387,815.14704 981.63585,807.39232 C 984.27615,779.55217 980.13613,752.45689 994.74554,720.20614 C 964.49653,732.03184 957.36325,760.36684 946.42665,785.71122 C 938.42574,734.77829 946.63581,714.43803 949.74554,684.49186 C 920.68078,699.26977 906.88403,731.60588 904.74554,777.349 C 893.82159,776.0448 883.3541,772.91477 871.17411,776.63471 C 870.91007,730.61137 869.71055,699.7453 880.08474,662.42822 C 826.82927,683.45508 817.13746,769.02232 824.03125,775.20614 C 813.14843,775.74114 802.66017,773.90884 791.17411,778.06329 C 791.81303,735.49194 790.91365,693.15468 761.17411,655.20614 C 761.17411,655.20614 730.21605,736.12848 729.74554,758.77757 C 729.27503,781.42666 739.19713,798.94345 739.19713,798.94345 C 739.19713,798.94345 730.62906,835.68396 732.89854,857.17568 C 735.19439,878.91714 753.34144,916.85185 760.16396,935.83377 z" | ||
1127 | id="path3638" | ||
1128 | sodipodi:nodetypes="czzzzzzcccccccccczczz" /> | ||
1129 | </clipPath> | ||
1130 | <linearGradient | ||
1131 | inkscape:collect="always" | ||
1132 | xlink:href="#linearGradient3660" | ||
1133 | id="linearGradient3666" | ||
1134 | x1="1255.7386" | ||
1135 | y1="667.09216" | ||
1136 | x2="893.69995" | ||
1137 | y2="858.01099" | ||
1138 | gradientUnits="userSpaceOnUse" /> | ||
1139 | <filter | ||
1140 | inkscape:collect="always" | ||
1141 | id="filter3779" | ||
1142 | x="-0.087980822" | ||
1143 | width="1.1759616" | ||
1144 | y="-0.17728332" | ||
1145 | height="1.3545666"> | ||
1146 | <feGaussianBlur | ||
1147 | inkscape:collect="always" | ||
1148 | stdDeviation="16.340344" | ||
1149 | id="feGaussianBlur3781" /> | ||
1150 | </filter> | ||
1151 | <filter | ||
1152 | id="filter3785" | ||
1153 | inkscape:label="White Fur"> | ||
1154 | <feTurbulence | ||
1155 | id="feTurbulence3787" | ||
1156 | in="SourceAlpha" | ||
1157 | type="fractalNoise" | ||
1158 | baseFrequency="0.24044943820224721" | ||
1159 | numOctaves="10" | ||
1160 | seed="655" | ||
1161 | result="result0" /> | ||
1162 | <feDisplacementMap | ||
1163 | id="feDisplacementMap3789" | ||
1164 | in="SourceGraphic" | ||
1165 | in2="result0" | ||
1166 | scale="62" | ||
1167 | xChannelSelector="B" | ||
1168 | yChannelSelector="G" /> | ||
1169 | </filter> | ||
1170 | <filter | ||
1171 | inkscape:collect="always" | ||
1172 | id="filter3677"> | ||
1173 | <feGaussianBlur | ||
1174 | inkscape:collect="always" | ||
1175 | stdDeviation="2.0397518" | ||
1176 | id="feGaussianBlur3679" /> | ||
1177 | </filter> | ||
1178 | <clipPath | ||
1179 | clipPathUnits="userSpaceOnUse" | ||
1180 | id="clipPath3722"> | ||
1181 | <path | ||
1182 | style="opacity:1;fill:#121212;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" | ||
1183 | d="M 709.28572,844.50504 C 763.57143,843.07647 835.32072,829.45305 879.28572,817.71932 C 923.33843,805.96218 1005.172,781.37208 1054.6428,759.86218 C 1103.9821,738.40946 1168.2465,700.58058 1208.9286,667.71933 C 1249.4367,634.99864 1261.3185,611.89952 1269.6429,634.1479 C 1278.012,656.51569 1253.2359,690.47352 1231.7857,715.21933 C 1210.1816,740.14273 1179.0544,767.92466 1132.8571,804.50504 C 1086.6598,841.08542 976.77458,906.08967 920,933.07647 C 862.93394,960.20183 791.79666,991.31489 747.85714,1005.5765 C 703.91762,1019.8381 616.42857,1036.6479 616.42857,1036.6479 L 709.28572,844.50504 z" | ||
1184 | id="path3724" | ||
1185 | sodipodi:nodetypes="czzzzzzzzcc" /> | ||
1186 | </clipPath> | ||
1187 | <clipPath | ||
1188 | clipPathUnits="userSpaceOnUse" | ||
1189 | id="clipPath3986"> | ||
1190 | <path | ||
1191 | style="opacity:1;fill:#121212;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" | ||
1192 | d="M 709.28572,844.50504 C 763.57143,843.07647 835.32072,829.45305 879.28572,817.71932 C 923.33843,805.96218 1005.172,781.37208 1054.6428,759.86218 C 1103.9821,738.40946 1168.2465,700.58058 1208.9286,667.71933 C 1249.4367,634.99864 1261.3185,611.89952 1269.6429,634.1479 C 1278.012,656.51569 1253.2359,690.47352 1231.7857,715.21933 C 1210.1816,740.14273 1179.0544,767.92466 1132.8571,804.50504 C 1086.6598,841.08542 976.77458,906.08967 920,933.07647 C 862.93394,960.20183 791.79666,991.31489 747.85714,1005.5765 C 703.91762,1019.8381 616.42857,1036.6479 616.42857,1036.6479 L 709.28572,844.50504 z" | ||
1193 | id="path3988" | ||
1194 | sodipodi:nodetypes="czzzzzzzzcc" /> | ||
1195 | </clipPath> | ||
1196 | <clipPath | ||
1197 | clipPathUnits="userSpaceOnUse" | ||
1198 | id="clipPath3992"> | ||
1199 | <path | ||
1200 | style="opacity:1;fill:#121212;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" | ||
1201 | d="M 709.28572,844.50504 C 763.57143,843.07647 835.32072,829.45305 879.28572,817.71932 C 923.33843,805.96218 1005.172,781.37208 1054.6428,759.86218 C 1103.9821,738.40946 1168.2465,700.58058 1208.9286,667.71933 C 1249.4367,634.99864 1261.3185,611.89952 1269.6429,634.1479 C 1278.012,656.51569 1253.2359,690.47352 1231.7857,715.21933 C 1210.1816,740.14273 1179.0544,767.92466 1132.8571,804.50504 C 1086.6598,841.08542 976.77458,906.08967 920,933.07647 C 862.93394,960.20183 791.79666,991.31489 747.85714,1005.5765 C 703.91762,1019.8381 616.42857,1036.6479 616.42857,1036.6479 L 709.28572,844.50504 z" | ||
1202 | id="path3994" | ||
1203 | sodipodi:nodetypes="czzzzzzzzcc" /> | ||
1204 | </clipPath> | ||
1205 | <clipPath | ||
1206 | clipPathUnits="userSpaceOnUse" | ||
1207 | id="clipPath3998"> | ||
1208 | <path | ||
1209 | style="opacity:1;fill:#262f2f;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" | ||
1210 | d="M 178.21428,274.14789 C 174.40985,248.88366 161.40456,223.50987 161.05748,198.62266 C 160.87122,185.26714 164.33033,172.05175 174.81301,159.06861 C 211.16003,93.772775 291.75392,74.373925 360.74767,67.603183 C 447.67006,56.586382 545.66034,85.543255 594.11901,163.00447 C 648.24303,238.73777 650.79377,335.54359 674.73105,422.53242 C 704.16884,549.66002 729.51019,678.74656 735.12329,809.38277 C 732.05981,887.56462 726.69695,974.56692 674.62008,1037.5169 C 626.59354,1087.8743 551.83361,1087.5699 487.5502,1096.5192 C 396.99481,1101.1742 303.19867,1080.3734 225.7663,1032.321 C 160.99066,994.38099 130.03611,918.84237 128.4869,846.30142 C 120.09773,766.42626 154.87842,692.49291 180.1073,619.14181 C 187.57791,536.38074 189.52016,452.89406 189.76064,369.75697 C 188.92382,337.56153 182.67111,305.93964 178.21428,274.14789 z" | ||
1211 | id="path4000" | ||
1212 | sodipodi:nodetypes="cscccccccccccc" /> | ||
1213 | </clipPath> | ||
1214 | <filter | ||
1215 | inkscape:collect="always" | ||
1216 | id="filter4002" | ||
1217 | x="-0.24334238" | ||
1218 | width="1.4866848" | ||
1219 | y="-0.39104807" | ||
1220 | height="1.7820961"> | ||
1221 | <feGaussianBlur | ||
1222 | inkscape:collect="always" | ||
1223 | stdDeviation="14.589518" | ||
1224 | id="feGaussianBlur4004" /> | ||
1225 | </filter> | ||
1226 | <filter | ||
1227 | inkscape:collect="always" | ||
1228 | id="filter4010" | ||
1229 | x="-0.14577261" | ||
1230 | width="1.2915452" | ||
1231 | y="-0.23523259" | ||
1232 | height="1.4704652"> | ||
1233 | <feGaussianBlur | ||
1234 | inkscape:collect="always" | ||
1235 | stdDeviation="4.4442907" | ||
1236 | id="feGaussianBlur4012" /> | ||
1237 | </filter> | ||
1238 | <filter | ||
1239 | inkscape:collect="always" | ||
1240 | id="filter4053"> | ||
1241 | <feGaussianBlur | ||
1242 | inkscape:collect="always" | ||
1243 | stdDeviation="0.6062947" | ||
1244 | id="feGaussianBlur4055" /> | ||
1245 | </filter> | ||
1246 | <filter | ||
1247 | inkscape:collect="always" | ||
1248 | id="filter4079"> | ||
1249 | <feGaussianBlur | ||
1250 | inkscape:collect="always" | ||
1251 | stdDeviation="6.5887624" | ||
1252 | id="feGaussianBlur4081" /> | ||
1253 | </filter> | ||
1254 | <filter | ||
1255 | inkscape:collect="always" | ||
1256 | id="filter4083"> | ||
1257 | <feGaussianBlur | ||
1258 | inkscape:collect="always" | ||
1259 | stdDeviation="1.5052066" | ||
1260 | id="feGaussianBlur4085" /> | ||
1261 | </filter> | ||
1262 | <radialGradient | ||
1263 | inkscape:collect="always" | ||
1264 | xlink:href="#linearGradient4113" | ||
1265 | id="radialGradient4119" | ||
1266 | cx="296.33783" | ||
1267 | cy="427.17749" | ||
1268 | fx="296.33783" | ||
1269 | fy="427.17749" | ||
1270 | r="19.704132" | ||
1271 | gradientUnits="userSpaceOnUse" | ||
1272 | gradientTransform="matrix(2.9797125,0,0,2.9797125,-599.28727,-827.0855)" /> | ||
1273 | <filter | ||
1274 | inkscape:collect="always" | ||
1275 | id="filter6949" | ||
1276 | x="-0.10294895" | ||
1277 | width="1.2058979" | ||
1278 | y="-0.34224695" | ||
1279 | height="1.6844939"> | ||
1280 | <feGaussianBlur | ||
1281 | inkscape:collect="always" | ||
1282 | stdDeviation="1.1675612" | ||
1283 | id="feGaussianBlur6951" /> | ||
1284 | </filter> | ||
1285 | <filter | ||
1286 | inkscape:collect="always" | ||
1287 | id="filter6953" | ||
1288 | x="-0.098320946" | ||
1289 | width="1.1966419" | ||
1290 | y="-0.19750816" | ||
1291 | height="1.3950163"> | ||
1292 | <feGaussianBlur | ||
1293 | inkscape:collect="always" | ||
1294 | stdDeviation="1.1675612" | ||
1295 | id="feGaussianBlur6955" /> | ||
1296 | </filter> | ||
1297 | <filter | ||
1298 | inkscape:collect="always" | ||
1299 | id="filter6957" | ||
1300 | x="-0.098213427" | ||
1301 | width="1.1964267" | ||
1302 | y="-0.19838208" | ||
1303 | height="1.3967642"> | ||
1304 | <feGaussianBlur | ||
1305 | inkscape:collect="always" | ||
1306 | stdDeviation="1.1675612" | ||
1307 | id="feGaussianBlur6959" /> | ||
1308 | </filter> | ||
1309 | <filter | ||
1310 | inkscape:collect="always" | ||
1311 | id="filter6961" | ||
1312 | x="-0.09919104" | ||
1313 | width="1.1983821" | ||
1314 | y="-0.22643611" | ||
1315 | height="1.4528722"> | ||
1316 | <feGaussianBlur | ||
1317 | inkscape:collect="always" | ||
1318 | stdDeviation="1.1675612" | ||
1319 | id="feGaussianBlur6963" /> | ||
1320 | </filter> | ||
1321 | <filter | ||
1322 | inkscape:collect="always" | ||
1323 | id="filter6965" | ||
1324 | x="-0.099081434" | ||
1325 | width="1.1981629" | ||
1326 | y="-0.22529824" | ||
1327 | height="1.4505965"> | ||
1328 | <feGaussianBlur | ||
1329 | inkscape:collect="always" | ||
1330 | stdDeviation="1.1675612" | ||
1331 | id="feGaussianBlur6967" /> | ||
1332 | </filter> | ||
1333 | <filter | ||
1334 | inkscape:collect="always" | ||
1335 | id="filter6969" | ||
1336 | x="-0.10450897" | ||
1337 | width="1.2090179" | ||
1338 | y="-0.40468886" | ||
1339 | height="1.8093777"> | ||
1340 | <feGaussianBlur | ||
1341 | inkscape:collect="always" | ||
1342 | stdDeviation="1.1675612" | ||
1343 | id="feGaussianBlur6971" /> | ||
1344 | </filter> | ||
1345 | <filter | ||
1346 | inkscape:collect="always" | ||
1347 | id="filter6973" | ||
1348 | x="-0.10330495" | ||
1349 | width="1.2066098" | ||
1350 | y="-0.36439717" | ||
1351 | height="1.7287945"> | ||
1352 | <feGaussianBlur | ||
1353 | inkscape:collect="always" | ||
1354 | stdDeviation="1.1675612" | ||
1355 | id="feGaussianBlur6975" /> | ||
1356 | </filter> | ||
1357 | <filter | ||
1358 | inkscape:collect="always" | ||
1359 | id="filter6977" | ||
1360 | x="-0.10224481" | ||
1361 | width="1.2044896" | ||
1362 | y="-0.32371372" | ||
1363 | height="1.6474274"> | ||
1364 | <feGaussianBlur | ||
1365 | inkscape:collect="always" | ||
1366 | stdDeviation="1.1675612" | ||
1367 | id="feGaussianBlur6979" /> | ||
1368 | </filter> | ||
1369 | <filter | ||
1370 | inkscape:collect="always" | ||
1371 | id="filter6981" | ||
1372 | x="-0.10052545" | ||
1373 | width="1.2010509" | ||
1374 | y="-0.2742162" | ||
1375 | height="1.5484324"> | ||
1376 | <feGaussianBlur | ||
1377 | inkscape:collect="always" | ||
1378 | stdDeviation="1.1675612" | ||
1379 | id="feGaussianBlur6983" /> | ||
1380 | </filter> | ||
1381 | <filter | ||
1382 | inkscape:collect="always" | ||
1383 | id="filter6985" | ||
1384 | x="-0.098428868" | ||
1385 | width="1.1968577" | ||
1386 | y="-0.20853186" | ||
1387 | height="1.4170637"> | ||
1388 | <feGaussianBlur | ||
1389 | inkscape:collect="always" | ||
1390 | stdDeviation="1.1675612" | ||
1391 | id="feGaussianBlur6987" /> | ||
1392 | </filter> | ||
1393 | <filter | ||
1394 | inkscape:collect="always" | ||
1395 | id="filter6989" | ||
1396 | x="-0.098428868" | ||
1397 | width="1.1968577" | ||
1398 | y="-0.20287035" | ||
1399 | height="1.4057407"> | ||
1400 | <feGaussianBlur | ||
1401 | inkscape:collect="always" | ||
1402 | stdDeviation="1.1675612" | ||
1403 | id="feGaussianBlur6991" /> | ||
1404 | </filter> | ||
1405 | <filter | ||
1406 | inkscape:collect="always" | ||
1407 | id="filter6993" | ||
1408 | x="-0.098213255" | ||
1409 | width="1.1964265" | ||
1410 | y="-0.19838208" | ||
1411 | height="1.3967642"> | ||
1412 | <feGaussianBlur | ||
1413 | inkscape:collect="always" | ||
1414 | stdDeviation="1.1675612" | ||
1415 | id="feGaussianBlur6995" /> | ||
1416 | </filter> | ||
1417 | <filter | ||
1418 | inkscape:collect="always" | ||
1419 | id="filter6997"> | ||
1420 | <feGaussianBlur | ||
1421 | inkscape:collect="always" | ||
1422 | stdDeviation="1.1675612" | ||
1423 | id="feGaussianBlur6999" /> | ||
1424 | </filter> | ||
1425 | <filter | ||
1426 | inkscape:collect="always" | ||
1427 | id="filter7001"> | ||
1428 | <feGaussianBlur | ||
1429 | inkscape:collect="always" | ||
1430 | stdDeviation="1.1675612" | ||
1431 | id="feGaussianBlur7003" /> | ||
1432 | </filter> | ||
1433 | <filter | ||
1434 | inkscape:collect="always" | ||
1435 | id="filter7285" | ||
1436 | x="-0.030884685" | ||
1437 | width="1.0617694" | ||
1438 | y="-0.10267408" | ||
1439 | height="1.2053483"> | ||
1440 | <feGaussianBlur | ||
1441 | inkscape:collect="always" | ||
1442 | stdDeviation="0.35026836" | ||
1443 | id="feGaussianBlur7287" /> | ||
1444 | </filter> | ||
1445 | <filter | ||
1446 | inkscape:collect="always" | ||
1447 | id="filter7289"> | ||
1448 | <feGaussianBlur | ||
1449 | inkscape:collect="always" | ||
1450 | stdDeviation="0.35026836" | ||
1451 | id="feGaussianBlur7291" /> | ||
1452 | </filter> | ||
1453 | <filter | ||
1454 | inkscape:collect="always" | ||
1455 | id="filter7293"> | ||
1456 | <feGaussianBlur | ||
1457 | inkscape:collect="always" | ||
1458 | stdDeviation="0.35026836" | ||
1459 | id="feGaussianBlur7295" /> | ||
1460 | </filter> | ||
1461 | <filter | ||
1462 | inkscape:collect="always" | ||
1463 | id="filter7297"> | ||
1464 | <feGaussianBlur | ||
1465 | inkscape:collect="always" | ||
1466 | stdDeviation="0.35026836" | ||
1467 | id="feGaussianBlur7299" /> | ||
1468 | </filter> | ||
1469 | <filter | ||
1470 | inkscape:collect="always" | ||
1471 | id="filter7301"> | ||
1472 | <feGaussianBlur | ||
1473 | inkscape:collect="always" | ||
1474 | stdDeviation="0.35026836" | ||
1475 | id="feGaussianBlur7303" /> | ||
1476 | </filter> | ||
1477 | <filter | ||
1478 | inkscape:collect="always" | ||
1479 | id="filter7305"> | ||
1480 | <feGaussianBlur | ||
1481 | inkscape:collect="always" | ||
1482 | stdDeviation="0.35026836" | ||
1483 | id="feGaussianBlur7307" /> | ||
1484 | </filter> | ||
1485 | <filter | ||
1486 | inkscape:collect="always" | ||
1487 | id="filter7309"> | ||
1488 | <feGaussianBlur | ||
1489 | inkscape:collect="always" | ||
1490 | stdDeviation="0.35026836" | ||
1491 | id="feGaussianBlur7311" /> | ||
1492 | </filter> | ||
1493 | <filter | ||
1494 | inkscape:collect="always" | ||
1495 | id="filter7313"> | ||
1496 | <feGaussianBlur | ||
1497 | inkscape:collect="always" | ||
1498 | stdDeviation="0.35026836" | ||
1499 | id="feGaussianBlur7315" /> | ||
1500 | </filter> | ||
1501 | <filter | ||
1502 | inkscape:collect="always" | ||
1503 | id="filter7317"> | ||
1504 | <feGaussianBlur | ||
1505 | inkscape:collect="always" | ||
1506 | stdDeviation="0.35026836" | ||
1507 | id="feGaussianBlur7319" /> | ||
1508 | </filter> | ||
1509 | <filter | ||
1510 | inkscape:collect="always" | ||
1511 | id="filter7321"> | ||
1512 | <feGaussianBlur | ||
1513 | inkscape:collect="always" | ||
1514 | stdDeviation="0.35026836" | ||
1515 | id="feGaussianBlur7323" /> | ||
1516 | </filter> | ||
1517 | <filter | ||
1518 | inkscape:collect="always" | ||
1519 | id="filter7325" | ||
1520 | x="-0.031352691" | ||
1521 | width="1.0627054" | ||
1522 | y="-0.12140666" | ||
1523 | height="1.2428133"> | ||
1524 | <feGaussianBlur | ||
1525 | inkscape:collect="always" | ||
1526 | stdDeviation="0.35026836" | ||
1527 | id="feGaussianBlur7327" /> | ||
1528 | </filter> | ||
1529 | <filter | ||
1530 | inkscape:collect="always" | ||
1531 | id="filter7329" | ||
1532 | x="-0.030991485" | ||
1533 | width="1.061983" | ||
1534 | y="-0.10931916" | ||
1535 | height="1.2186383"> | ||
1536 | <feGaussianBlur | ||
1537 | inkscape:collect="always" | ||
1538 | stdDeviation="0.35026836" | ||
1539 | id="feGaussianBlur7331" /> | ||
1540 | </filter> | ||
1541 | <filter | ||
1542 | inkscape:collect="always" | ||
1543 | id="filter7333"> | ||
1544 | <feGaussianBlur | ||
1545 | inkscape:collect="always" | ||
1546 | stdDeviation="0.35026836" | ||
1547 | id="feGaussianBlur7335" /> | ||
1548 | </filter> | ||
1549 | <filter | ||
1550 | inkscape:collect="always" | ||
1551 | id="filter7337"> | ||
1552 | <feGaussianBlur | ||
1553 | inkscape:collect="always" | ||
1554 | stdDeviation="0.35026836" | ||
1555 | id="feGaussianBlur7339" /> | ||
1556 | </filter> | ||
1557 | <filter | ||
1558 | inkscape:collect="always" | ||
1559 | id="filter7345"> | ||
1560 | <feGaussianBlur | ||
1561 | inkscape:collect="always" | ||
1562 | stdDeviation="1.7233839" | ||
1563 | id="feGaussianBlur7347" /> | ||
1564 | </filter> | ||
1565 | <clipPath | ||
1566 | clipPathUnits="userSpaceOnUse" | ||
1567 | id="clipPath7421"> | ||
1568 | <path | ||
1569 | sodipodi:type="inkscape:offset" | ||
1570 | inkscape:radius="0" | ||
1571 | inkscape:original="M 1111.4062 -285.9375 L 1107.4688 -284.0625 C 1107.4283 -284.05228 1107.3692 -284.04201 1107.3438 -284.03125 C 1106.925 -283.8184 1107.1791 -283.93067 1106.6875 -283.71875 C 1106.2014 -283.50919 1104.9499 -283.13456 1102.5938 -282.25 C 1099.2626 -280.99942 1096.7895 -280.10016 1095.5938 -279.1875 C 1094.0576 -279.16623 1091.8733 -278.95419 1089.9375 -278.46875 C 1086.956 -277.72108 1085.0823 -277.29474 1083.1875 -276.875 C 1081.2927 -276.45527 1081.512 -276.23281 1080.3125 -276 C 1079.0159 -275.74833 1078.5911 -276.00899 1074.875 -275.21875 C 1071.3851 -274.4766 1065.9802 -273.28768 1064.7188 -272.53125 C 1063.1348 -272.71203 1060.8513 -272.85303 1058.875 -272.5625 C 1055.8346 -272.11554 1053.9588 -271.88974 1052.0312 -271.65625 C 1051.3758 -271.57687 1050.9902 -271.45547 1050.6875 -271.375 C 1050.2613 -271.24334 1050.0017 -271.11498 1049.3125 -271.03125 C 1048.0009 -270.87188 1047.5503 -271.18808 1043.7812 -270.75 C 1040.2273 -270.33691 1034.7758 -269.47718 1033.5312 -268.8125 C 1031.9322 -269.10979 1029.6735 -269.34669 1027.6875 -269.15625 C 1024.6287 -268.86293 1022.7155 -268.67226 1020.7812 -268.5 C 1018.847 -268.32773 1019.0926 -268.07763 1017.875 -267.96875 C 1016.5588 -267.85105 1016.1152 -268.13238 1012.3438 -267.71875 C 1008.8017 -267.3303 1003.3359 -266.50948 1002.0625 -265.84375 C 1000.4636 -266.13844 998.1753 -266.35076 996.1875 -266.15625 C 993.12921 -265.857 991.2463 -265.67601 989.3125 -265.5 C 988.65501 -265.44015 988.27245 -265.32144 987.96875 -265.25 C 987.54105 -265.13104 987.28525 -265.03193 986.59375 -264.96875 C 985.27775 -264.84849 984.834 -265.16363 981.0625 -264.75 C 977.50631 -264.35998 972.0569 -263.51084 970.8125 -262.84375 C 969.21381 -263.13793 966.95265 -263.36747 964.96875 -263.15625 C 961.91305 -262.83092 959.9947 -262.63001 958.0625 -262.4375 C 956.13031 -262.24499 956.37275 -261.99662 955.15625 -261.875 C 953.84137 -261.74353 953.3932 -262.03954 949.625 -261.59375 C 946.08611 -261.17509 940.6473 -260.30158 939.375 -259.625 C 937.77741 -259.90604 935.51505 -260.04543 933.53125 -259.8125 C 930.47927 -259.45413 928.58625 -259.24464 926.65625 -259.03125 C 926.00007 -258.95869 925.6156 -258.85856 925.3125 -258.78125 C 924.88571 -258.65402 924.6276 -258.51405 923.9375 -258.4375 C 922.62411 -258.29181 922.17015 -258.61152 918.40625 -258.125 C 914.85737 -257.66624 909.4276 -256.70598 908.1875 -256 C 906.59441 -256.24424 904.3537 -256.38135 902.375 -256.125 C 899.32741 -255.73018 897.4243 -255.47655 895.5 -255.21875 C 893.57571 -254.96096 893.7739 -254.72522 892.5625 -254.5625 C 891.25301 -254.3866 890.8153 -254.66688 887.0625 -254.09375 C 883.53821 -253.55551 878.1393 -252.39458 876.875 -251.65625 C 875.28751 -251.85979 873.0295 -251.91098 871.0625 -251.5625 C 868.03631 -251.02638 866.1636 -250.70081 864.25 -250.375 C 863.59941 -250.26423 863.2363 -250.10406 862.9375 -250 C 862.51681 -249.83512 862.27405 -249.6687 861.59375 -249.53125 C 860.29905 -249.26966 859.86665 -249.53745 856.15625 -248.71875 C 852.65777 -247.9468 847.31035 -246.33582 846.09375 -245.5 C 844.53085 -245.57745 842.33625 -245.41472 840.40625 -244.90625 C 837.43387 -244.12312 835.58855 -243.67416 833.71875 -243.15625 C 831.84875 -242.63835 832.0521 -242.38897 830.875 -242.0625 C 829.60251 -241.7096 829.17795 -241.95541 825.53125 -240.875 C 822.10657 -239.86037 816.88185 -237.94183 815.65625 -237.03125 C 814.11747 -237.01851 811.93645 -236.75903 810.03125 -236.15625 C 807.10027 -235.22891 805.2809 -234.69783 803.4375 -234.09375 C 802.81071 -233.88837 802.44585 -233.70117 802.15625 -233.5625 C 801.74867 -233.34889 801.50295 -233.15375 800.84375 -232.9375 C 799.58925 -232.52596 799.1576 -232.74846 795.5625 -231.5 C 792.17261 -230.32283 786.96755 -228.2863 785.78125 -227.34375 C 784.25737 -227.28408 782.1312 -226.94888 780.25 -226.28125 C 777.35261 -225.25296 775.55095 -224.60577 773.71875 -223.96875 C 771.88655 -223.33174 772.0909 -223.12021 770.9375 -222.71875 C 769.69071 -222.28479 769.27395 -222.51903 765.71875 -221.15625 C 762.38005 -219.87645 757.23165 -217.6737 756.03125 -216.6875 C 754.52407 -216.57981 752.39555 -216.1887 750.53125 -215.46875 C 747.66307 -214.36115 745.90735 -213.68719 744.09375 -213 C 743.47705 -212.76637 743.0973 -212.55797 742.8125 -212.40625 C 742.81251 -212.40625 742.8125 -212.37673 742.8125 -212.375 L 734.8125 -209.1875 L 736.625 -194.46875 C 736.36701 -194.52956 742.8125 -191.15625 742.8125 -191.15625 C 743.03891 -191.30093 743.26145 -191.42886 743.53125 -191.53125 C 744.61177 -191.94123 745.70285 -191.74702 749.53125 -193.21875 C 753.35977 -194.69049 754.7553 -195.22373 755.4375 -195.625 C 756.11711 -196.02478 757.04925 -196.50437 757.65625 -197.15625 C 759.48317 -197.294 761.22705 -197.64948 762.59375 -198.15625 C 765.56175 -199.25677 767.4691 -199.96244 769.375 -200.625 C 771.28081 -201.28754 771.72915 -202.03987 772.78125 -202.40625 C 773.87287 -202.78636 774.97635 -202.57163 778.84375 -203.9375 C 782.71115 -205.30336 784.1269 -205.76458 784.8125 -206.15625 C 785.51361 -206.55677 786.5133 -207.08923 787.125 -207.75 C 789.09581 -207.80466 790.94195 -208.13463 792.40625 -208.625 C 795.40777 -209.63008 797.3324 -210.24671 799.25 -210.875 C 800.78861 -211.3791 801.42415 -211.92177 802.15625 -212.3125 C 802.38647 -212.44681 802.63215 -212.56623 802.90625 -212.65625 C 804.00457 -213.01673 805.0877 -212.73762 809 -213.96875 C 812.91231 -215.19988 814.366 -215.6417 815.0625 -216 C 815.75641 -216.35697 816.6926 -216.79261 817.3125 -217.40625 C 819.17771 -217.42891 820.94835 -217.67308 822.34375 -218.09375 C 825.37415 -219.00729 827.33615 -219.52385 829.28125 -220.0625 C 831.22637 -220.60114 831.70745 -221.32702 832.78125 -221.625 C 833.89527 -221.93415 835.00125 -221.61761 838.96875 -222.65625 C 842.93625 -223.69488 844.38625 -224.08898 845.09375 -224.40625 C 845.82855 -224.73584 846.90765 -225.15997 847.53125 -225.78125 C 849.52907 -225.66525 851.3887 -225.80134 852.875 -226.15625 C 855.95311 -226.89125 857.9584 -227.25719 859.9375 -227.65625 C 861.52541 -227.97643 862.1818 -228.4468 862.9375 -228.75 C 863.17501 -228.8568 863.4044 -228.94276 863.6875 -229 C 864.82091 -229.22919 865.99215 -228.79107 870.03125 -229.5 C 874.07067 -230.20893 875.5315 -230.42709 876.25 -230.6875 C 876.96581 -230.94694 877.95435 -231.25474 878.59375 -231.78125 C 880.51795 -231.54176 882.34165 -231.55672 883.78125 -231.78125 C 886.90767 -232.26887 888.9358 -232.48192 890.9375 -232.75 C 892.93921 -233.01807 893.42625 -233.69514 894.53125 -233.84375 C 895.67767 -233.99793 896.8071 -233.54218 900.875 -234.0625 C 904.94281 -234.58282 906.43525 -234.75823 907.15625 -235 C 907.89337 -235.24714 908.95435 -235.58623 909.59375 -236.125 C 911.64375 -235.78947 913.56745 -235.72704 915.09375 -235.90625 C 918.23595 -236.27521 920.27375 -236.46561 922.28125 -236.6875 C 923.89207 -236.86552 924.5459 -237.2957 925.3125 -237.53125 C 925.55341 -237.61677 925.80655 -237.68685 926.09375 -237.71875 C 927.24345 -237.84647 928.39505 -237.3721 932.46875 -237.84375 C 936.54245 -238.3154 938.0278 -238.45435 938.75 -238.6875 C 939.46941 -238.91977 940.45025 -239.16096 941.09375 -239.65625 C 943.03005 -239.32279 944.8638 -239.25201 946.3125 -239.40625 C 949.45851 -239.7412 951.49 -239.92484 953.5 -240.125 C 955.50991 -240.32514 955.98415 -240.95139 957.09375 -241.0625 C 958.24485 -241.17778 959.39025 -240.69744 963.46875 -241.125 C 967.54725 -241.55256 969.05765 -241.68709 969.78125 -241.90625 C 970.52047 -242.13011 971.57685 -242.4195 972.21875 -242.9375 C 974.27575 -242.53883 976.2206 -242.4441 977.75 -242.59375 C 980.89871 -242.90185 982.9258 -243.067 984.9375 -243.25 C 986.55151 -243.39682 987.20055 -243.81055 987.96875 -244.03125 C 988.21005 -244.11211 988.4623 -244.16116 988.75 -244.1875 C 989.90211 -244.29295 991.0429 -243.79475 995.125 -244.1875 C 999.20711 -244.58025 1000.7139 -244.71834 1001.4375 -244.9375 C 1002.1584 -245.15583 1003.1371 -245.3852 1003.7812 -245.875 C 1005.7193 -245.52501 1007.5501 -245.42062 1009 -245.5625 C 1012.1487 -245.8706 1014.1758 -246.03575 1016.1875 -246.21875 C 1018.1991 -246.40174 1018.7017 -247.05677 1019.8125 -247.15625 C 1020.9648 -247.25948 1022.1047 -246.77142 1026.1875 -247.15625 C 1030.2704 -247.54107 1031.7762 -247.65725 1032.5 -247.875 C 1033.2393 -248.09743 1034.2956 -248.38949 1034.9375 -248.90625 C 1036.9949 -248.50448 1038.9404 -248.40292 1040.4688 -248.5625 C 1043.6153 -248.89102 1045.6458 -249.0852 1047.6562 -249.28125 C 1049.2692 -249.43854 1049.9219 -249.91273 1050.6875 -250.15625 C 1050.9282 -250.24429 1051.1507 -250.27762 1051.4375 -250.3125 C 1052.5858 -250.4522 1053.7542 -249.97259 1057.8125 -250.5625 C 1061.8708 -251.15242 1063.3743 -251.33964 1064.0938 -251.59375 C 1064.8104 -251.84691 1065.7684 -252.15182 1066.4062 -252.6875 C 1068.3259 -252.47556 1070.1262 -252.53609 1071.5625 -252.78125 C 1074.6816 -253.31365 1076.6741 -253.70986 1078.6562 -254.09375 C 1080.6383 -254.47762 1081.1305 -255.1334 1082.2188 -255.375 C 1083.3475 -255.62566 1084.489 -255.25871 1088.4688 -256.25 C 1092.4483 -257.24127 1093.8983 -257.6693 1094.5938 -258.03125 C 1095.316 -258.40725 1096.3555 -258.90183 1096.9688 -259.5625 C 1098.9317 -259.57454 1100.7625 -259.85355 1102.1875 -260.40625 C 1105.1387 -261.55085 1107.0607 -262.27567 1108.875 -263.15625 C 1110.3307 -263.86277 1111.1941 -264.85828 1111.4062 -265.15625 C 1111.6185 -265.4542 1111.5051 -265.8848 1111.5312 -265.90625 C 1111.5742 -265.94148 1111.8716 -266.00028 1112.0312 -266.34375 C 1112.8902 -268.19082 1114.3544 -271.97139 1114.4688 -272.65625 C 1114.5825 -273.33839 1114.6368 -274.00902 1114.6875 -274.40625 C 1114.7169 -274.63575 1114.5404 -275.28515 1114.5625 -275.34375 C 1114.5934 -275.42579 1114.8508 -275.59432 1114.9062 -275.84375 C 1115.1725 -277.04206 1114.9953 -278.05111 1114.7812 -279.46875 C 1114.5673 -280.88638 1113.8096 -284.08338 1113.1562 -284.9375 C 1112.4973 -285.79922 1111.9314 -285.94801 1111.4062 -285.9375 z " | ||
1572 | style="fill:#bcb786;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" | ||
1573 | id="path7423" | ||
1574 | d="M 1111.4062,-285.9375 L 1107.4688,-284.0625 C 1107.4283,-284.05228 1107.3692,-284.04201 1107.3438,-284.03125 C 1106.925,-283.8184 1107.1791,-283.93067 1106.6875,-283.71875 C 1106.2014,-283.50919 1104.9499,-283.13456 1102.5938,-282.25 C 1099.2626,-280.99942 1096.7895,-280.10016 1095.5938,-279.1875 C 1094.0576,-279.16623 1091.8733,-278.95419 1089.9375,-278.46875 C 1086.956,-277.72108 1085.0823,-277.29474 1083.1875,-276.875 C 1081.2927,-276.45527 1081.512,-276.23281 1080.3125,-276 C 1079.0159,-275.74833 1078.5911,-276.00899 1074.875,-275.21875 C 1071.3851,-274.4766 1065.9802,-273.28768 1064.7188,-272.53125 C 1063.1348,-272.71203 1060.8513,-272.85303 1058.875,-272.5625 C 1055.8346,-272.11554 1053.9588,-271.88974 1052.0312,-271.65625 C 1051.3758,-271.57687 1050.9902,-271.45547 1050.6875,-271.375 C 1050.2613,-271.24334 1050.0017,-271.11498 1049.3125,-271.03125 C 1048.0009,-270.87188 1047.5503,-271.18808 1043.7812,-270.75 C 1040.2273,-270.33691 1034.7758,-269.47718 1033.5312,-268.8125 C 1031.9322,-269.10979 1029.6735,-269.34669 1027.6875,-269.15625 C 1024.6287,-268.86293 1022.7155,-268.67226 1020.7812,-268.5 C 1018.847,-268.32773 1019.0926,-268.07763 1017.875,-267.96875 C 1016.5588,-267.85105 1016.1152,-268.13238 1012.3438,-267.71875 C 1008.8017,-267.3303 1003.3359,-266.50948 1002.0625,-265.84375 C 1000.4636,-266.13844 998.1753,-266.35076 996.1875,-266.15625 C 993.12921,-265.857 991.2463,-265.67601 989.3125,-265.5 C 988.65501,-265.44015 988.27245,-265.32144 987.96875,-265.25 C 987.54105,-265.13104 987.28525,-265.03193 986.59375,-264.96875 C 985.27775,-264.84849 984.834,-265.16363 981.0625,-264.75 C 977.50631,-264.35998 972.0569,-263.51084 970.8125,-262.84375 C 969.21381,-263.13793 966.95265,-263.36747 964.96875,-263.15625 C 961.91305,-262.83092 959.9947,-262.63001 958.0625,-262.4375 C 956.13031,-262.24499 956.37275,-261.99662 955.15625,-261.875 C 953.84137,-261.74353 953.3932,-262.03954 949.625,-261.59375 C 946.08611,-261.17509 940.6473,-260.30158 939.375,-259.625 C 937.77741,-259.90604 935.51505,-260.04543 933.53125,-259.8125 C 930.47927,-259.45413 928.58625,-259.24464 926.65625,-259.03125 C 926.00007,-258.95869 925.6156,-258.85856 925.3125,-258.78125 C 924.88571,-258.65402 924.6276,-258.51405 923.9375,-258.4375 C 922.62411,-258.29181 922.17015,-258.61152 918.40625,-258.125 C 914.85737,-257.66624 909.4276,-256.70598 908.1875,-256 C 906.59441,-256.24424 904.3537,-256.38135 902.375,-256.125 C 899.32741,-255.73018 897.4243,-255.47655 895.5,-255.21875 C 893.57571,-254.96096 893.7739,-254.72522 892.5625,-254.5625 C 891.25301,-254.3866 890.8153,-254.66688 887.0625,-254.09375 C 883.53821,-253.55551 878.1393,-252.39458 876.875,-251.65625 C 875.28751,-251.85979 873.0295,-251.91098 871.0625,-251.5625 C 868.03631,-251.02638 866.1636,-250.70081 864.25,-250.375 C 863.59941,-250.26423 863.2363,-250.10406 862.9375,-250 C 862.51681,-249.83512 862.27405,-249.6687 861.59375,-249.53125 C 860.29905,-249.26966 859.86665,-249.53745 856.15625,-248.71875 C 852.65777,-247.9468 847.31035,-246.33582 846.09375,-245.5 C 844.53085,-245.57745 842.33625,-245.41472 840.40625,-244.90625 C 837.43387,-244.12312 835.58855,-243.67416 833.71875,-243.15625 C 831.84875,-242.63835 832.0521,-242.38897 830.875,-242.0625 C 829.60251,-241.7096 829.17795,-241.95541 825.53125,-240.875 C 822.10657,-239.86037 816.88185,-237.94183 815.65625,-237.03125 C 814.11747,-237.01851 811.93645,-236.75903 810.03125,-236.15625 C 807.10027,-235.22891 805.2809,-234.69783 803.4375,-234.09375 C 802.81071,-233.88837 802.44585,-233.70117 802.15625,-233.5625 C 801.74867,-233.34889 801.50295,-233.15375 800.84375,-232.9375 C 799.58925,-232.52596 799.1576,-232.74846 795.5625,-231.5 C 792.17261,-230.32283 786.96755,-228.2863 785.78125,-227.34375 C 784.25737,-227.28408 782.1312,-226.94888 780.25,-226.28125 C 777.35261,-225.25296 775.55095,-224.60577 773.71875,-223.96875 C 771.88655,-223.33174 772.0909,-223.12021 770.9375,-222.71875 C 769.69071,-222.28479 769.27395,-222.51903 765.71875,-221.15625 C 762.38005,-219.87645 757.23165,-217.6737 756.03125,-216.6875 C 754.52407,-216.57981 752.39555,-216.1887 750.53125,-215.46875 C 747.66307,-214.36115 745.90735,-213.68719 744.09375,-213 C 743.47705,-212.76637 743.0973,-212.55797 742.8125,-212.40625 C 742.81251,-212.40625 742.8125,-212.37673 742.8125,-212.375 L 734.8125,-209.1875 L 736.625,-194.46875 C 736.36701,-194.52956 742.8125,-191.15625 742.8125,-191.15625 C 743.03891,-191.30093 743.26145,-191.42886 743.53125,-191.53125 C 744.61177,-191.94123 745.70285,-191.74702 749.53125,-193.21875 C 753.35977,-194.69049 754.7553,-195.22373 755.4375,-195.625 C 756.11711,-196.02478 757.04925,-196.50437 757.65625,-197.15625 C 759.48317,-197.294 761.22705,-197.64948 762.59375,-198.15625 C 765.56175,-199.25677 767.4691,-199.96244 769.375,-200.625 C 771.28081,-201.28754 771.72915,-202.03987 772.78125,-202.40625 C 773.87287,-202.78636 774.97635,-202.57163 778.84375,-203.9375 C 782.71115,-205.30336 784.1269,-205.76458 784.8125,-206.15625 C 785.51361,-206.55677 786.5133,-207.08923 787.125,-207.75 C 789.09581,-207.80466 790.94195,-208.13463 792.40625,-208.625 C 795.40777,-209.63008 797.3324,-210.24671 799.25,-210.875 C 800.78861,-211.3791 801.42415,-211.92177 802.15625,-212.3125 C 802.38647,-212.44681 802.63215,-212.56623 802.90625,-212.65625 C 804.00457,-213.01673 805.0877,-212.73762 809,-213.96875 C 812.91231,-215.19988 814.366,-215.6417 815.0625,-216 C 815.75641,-216.35697 816.6926,-216.79261 817.3125,-217.40625 C 819.17771,-217.42891 820.94835,-217.67308 822.34375,-218.09375 C 825.37415,-219.00729 827.33615,-219.52385 829.28125,-220.0625 C 831.22637,-220.60114 831.70745,-221.32702 832.78125,-221.625 C 833.89527,-221.93415 835.00125,-221.61761 838.96875,-222.65625 C 842.93625,-223.69488 844.38625,-224.08898 845.09375,-224.40625 C 845.82855,-224.73584 846.90765,-225.15997 847.53125,-225.78125 C 849.52907,-225.66525 851.3887,-225.80134 852.875,-226.15625 C 855.95311,-226.89125 857.9584,-227.25719 859.9375,-227.65625 C 861.52541,-227.97643 862.1818,-228.4468 862.9375,-228.75 C 863.17501,-228.8568 863.4044,-228.94276 863.6875,-229 C 864.82091,-229.22919 865.99215,-228.79107 870.03125,-229.5 C 874.07067,-230.20893 875.5315,-230.42709 876.25,-230.6875 C 876.96581,-230.94694 877.95435,-231.25474 878.59375,-231.78125 C 880.51795,-231.54176 882.34165,-231.55672 883.78125,-231.78125 C 886.90767,-232.26887 888.9358,-232.48192 890.9375,-232.75 C 892.93921,-233.01807 893.42625,-233.69514 894.53125,-233.84375 C 895.67767,-233.99793 896.8071,-233.54218 900.875,-234.0625 C 904.94281,-234.58282 906.43525,-234.75823 907.15625,-235 C 907.89337,-235.24714 908.95435,-235.58623 909.59375,-236.125 C 911.64375,-235.78947 913.56745,-235.72704 915.09375,-235.90625 C 918.23595,-236.27521 920.27375,-236.46561 922.28125,-236.6875 C 923.89207,-236.86552 924.5459,-237.2957 925.3125,-237.53125 C 925.55341,-237.61677 925.80655,-237.68685 926.09375,-237.71875 C 927.24345,-237.84647 928.39505,-237.3721 932.46875,-237.84375 C 936.54245,-238.3154 938.0278,-238.45435 938.75,-238.6875 C 939.46941,-238.91977 940.45025,-239.16096 941.09375,-239.65625 C 943.03005,-239.32279 944.8638,-239.25201 946.3125,-239.40625 C 949.45851,-239.7412 951.49,-239.92484 953.5,-240.125 C 955.50991,-240.32514 955.98415,-240.95139 957.09375,-241.0625 C 958.24485,-241.17778 959.39025,-240.69744 963.46875,-241.125 C 967.54725,-241.55256 969.05765,-241.68709 969.78125,-241.90625 C 970.52047,-242.13011 971.57685,-242.4195 972.21875,-242.9375 C 974.27575,-242.53883 976.2206,-242.4441 977.75,-242.59375 C 980.89871,-242.90185 982.9258,-243.067 984.9375,-243.25 C 986.55151,-243.39682 987.20055,-243.81055 987.96875,-244.03125 C 988.21005,-244.11211 988.4623,-244.16116 988.75,-244.1875 C 989.90211,-244.29295 991.0429,-243.79475 995.125,-244.1875 C 999.20711,-244.58025 1000.7139,-244.71834 1001.4375,-244.9375 C 1002.1584,-245.15583 1003.1371,-245.3852 1003.7812,-245.875 C 1005.7193,-245.52501 1007.5501,-245.42062 1009,-245.5625 C 1012.1487,-245.8706 1014.1758,-246.03575 1016.1875,-246.21875 C 1018.1991,-246.40174 1018.7017,-247.05677 1019.8125,-247.15625 C 1020.9648,-247.25948 1022.1047,-246.77142 1026.1875,-247.15625 C 1030.2704,-247.54107 1031.7762,-247.65725 1032.5,-247.875 C 1033.2393,-248.09743 1034.2956,-248.38949 1034.9375,-248.90625 C 1036.9949,-248.50448 1038.9404,-248.40292 1040.4688,-248.5625 C 1043.6153,-248.89102 1045.6458,-249.0852 1047.6562,-249.28125 C 1049.2692,-249.43854 1049.9219,-249.91273 1050.6875,-250.15625 C 1050.9282,-250.24429 1051.1507,-250.27762 1051.4375,-250.3125 C 1052.5858,-250.4522 1053.7542,-249.97259 1057.8125,-250.5625 C 1061.8708,-251.15242 1063.3743,-251.33964 1064.0938,-251.59375 C 1064.8104,-251.84691 1065.7684,-252.15182 1066.4062,-252.6875 C 1068.3259,-252.47556 1070.1262,-252.53609 1071.5625,-252.78125 C 1074.6816,-253.31365 1076.6741,-253.70986 1078.6562,-254.09375 C 1080.6383,-254.47762 1081.1305,-255.1334 1082.2188,-255.375 C 1083.3475,-255.62566 1084.489,-255.25871 1088.4688,-256.25 C 1092.4483,-257.24127 1093.8983,-257.6693 1094.5938,-258.03125 C 1095.316,-258.40725 1096.3555,-258.90183 1096.9688,-259.5625 C 1098.9317,-259.57454 1100.7625,-259.85355 1102.1875,-260.40625 C 1105.1387,-261.55085 1107.0607,-262.27567 1108.875,-263.15625 C 1110.3307,-263.86277 1111.1941,-264.85828 1111.4062,-265.15625 C 1111.6185,-265.4542 1111.5051,-265.8848 1111.5312,-265.90625 C 1111.5742,-265.94148 1111.8716,-266.00028 1112.0312,-266.34375 C 1112.8902,-268.19082 1114.3544,-271.97139 1114.4688,-272.65625 C 1114.5825,-273.33839 1114.6368,-274.00902 1114.6875,-274.40625 C 1114.7169,-274.63575 1114.5404,-275.28515 1114.5625,-275.34375 C 1114.5934,-275.42579 1114.8508,-275.59432 1114.9062,-275.84375 C 1115.1725,-277.04206 1114.9953,-278.05111 1114.7812,-279.46875 C 1114.5673,-280.88638 1113.8096,-284.08338 1113.1562,-284.9375 C 1112.4973,-285.79922 1111.9314,-285.94801 1111.4062,-285.9375 z" | ||
1575 | transform="translate(8.0045714e-2,-3.125e-2)" /> | ||
1576 | </clipPath> | ||
1577 | <filter | ||
1578 | inkscape:collect="always" | ||
1579 | id="filter7578" | ||
1580 | x="-0.08160872" | ||
1581 | width="1.1632174" | ||
1582 | y="-0.22659944" | ||
1583 | height="1.4531989"> | ||
1584 | <feGaussianBlur | ||
1585 | inkscape:collect="always" | ||
1586 | stdDeviation="2.437399" | ||
1587 | id="feGaussianBlur7580" /> | ||
1588 | </filter> | ||
1589 | <filter | ||
1590 | inkscape:collect="always" | ||
1591 | id="filter7594" | ||
1592 | x="-0.040804356" | ||
1593 | width="1.0816087" | ||
1594 | y="-0.11329972" | ||
1595 | height="1.2265995"> | ||
1596 | <feGaussianBlur | ||
1597 | inkscape:collect="always" | ||
1598 | stdDeviation="1.2186995" | ||
1599 | id="feGaussianBlur7596" /> | ||
1600 | </filter> | ||
1601 | <clipPath | ||
1602 | clipPathUnits="userSpaceOnUse" | ||
1603 | id="clipPath7606"> | ||
1604 | <path | ||
1605 | id="path7608" | ||
1606 | d="M 1049.205,-282.26672 L 1049.1152,-282.25891 C 1047.7278,-281.37446 1042.5119,-280.65171 1042.4862,-272.73547 C 1042.462,-265.31022 1057.4991,-255.64401 1059.6425,-254.64172 C 1061.3727,-253.83263 1063.2341,-253.23296 1065.0488,-252.92297 L 1066.4862,-252.70422 C 1068.4059,-252.49228 1070.2062,-252.55281 1071.6425,-252.79797 C 1074.7616,-253.33037 1076.7541,-253.72658 1078.7362,-254.11047 C 1080.7183,-254.49434 1081.2105,-255.15012 1082.2988,-255.39172 C 1083.4275,-255.64238 1084.569,-255.27543 1088.5488,-256.26672 C 1092.5283,-257.258 1093.9782,-257.68602 1094.6738,-258.04797 C 1095.396,-258.42398 1096.4355,-258.91855 1097.0488,-259.57922 C 1099.0117,-259.59127 1100.8425,-259.87027 1102.2675,-260.42297 C 1105.2187,-261.56758 1107.1407,-262.29239 1108.955,-263.17297 C 1110.4107,-263.8795 1111.2741,-264.875 1111.4862,-265.17297 C 1111.6985,-265.47093 1111.5852,-265.90152 1111.6112,-265.92297 C 1111.6542,-265.95821 1111.9517,-266.017 1112.1112,-266.36047 C 1112.9702,-268.20755 1114.4344,-271.98811 1114.5488,-272.67297 C 1114.6625,-273.35512 1114.7168,-274.02574 1114.7675,-274.42297 C 1114.7969,-274.65248 1114.6204,-275.30187 1114.6425,-275.36047 C 1114.6734,-275.44252 1114.9308,-275.61104 1114.9862,-275.86047 C 1115.2525,-277.05879 1115.0754,-278.06783 1114.8612,-279.48547 C 1114.6473,-280.90311 1113.8896,-284.1001 1113.2362,-284.95422 C 1112.8168,-285.50279 1112.4369,-285.74672 1112.08,-285.86047 C 1112.0129,-285.87776 1111.9561,-285.90721 1111.8925,-285.92297 C 1111.8715,-285.92695 1111.8508,-285.91983 1111.83,-285.92297 C 1111.5184,-285.99847 1111.2215,-286.08164 1110.6738,-286.14172 C 1109.6883,-286.24984 1108.2491,-286.40112 1106.705,-286.39172 C 1106.1903,-286.38859 1105.6679,-286.34408 1105.1425,-286.29797 C 1101.5836,-285.98569 1096.1327,-285.30689 1094.9238,-284.67297 C 1093.2907,-285.00699 1090.9756,-285.2852 1088.9862,-285.14172 C 1085.9222,-284.92075 1084.0185,-284.79953 1082.08,-284.67297 C 1080.1416,-284.54642 1080.3939,-284.28433 1079.1738,-284.20422 C 1077.8547,-284.11762 1077.3869,-284.42747 1073.6112,-284.11047 C 1070.0655,-283.81275 1064.6306,-283.1173 1063.3925,-282.48547 C 1061.7591,-282.81998 1059.4466,-283.09786 1057.455,-282.95422 C 1054.3908,-282.73324 1052.4872,-282.58078 1050.5488,-282.45422 C 1049.8896,-282.41119 1049.5064,-282.33029 1049.205,-282.26672 z" | ||
1607 | style="opacity:0.82448976;fill:#bcb786;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" /> | ||
1608 | </clipPath> | ||
1609 | <filter | ||
1610 | inkscape:collect="always" | ||
1611 | id="filter7610" | ||
1612 | x="-0.021942979" | ||
1613 | width="1.0438859" | ||
1614 | y="-0.10017137" | ||
1615 | height="1.2003427"> | ||
1616 | <feGaussianBlur | ||
1617 | inkscape:collect="always" | ||
1618 | stdDeviation="0.57530213" | ||
1619 | id="feGaussianBlur7612" /> | ||
1620 | </filter> | ||
1621 | <clipPath | ||
1622 | clipPathUnits="userSpaceOnUse" | ||
1623 | id="clipPath7616"> | ||
1624 | <path | ||
1625 | id="path7618" | ||
1626 | d="M 205.47016,-408.97318 L 205.38003,-408.97164 C 203.9344,-408.18598 198.68082,-407.82829 198.10378,-399.93307 C 197.56244,-392.52754 211.88973,-381.83741 213.95811,-380.68826 C 215.62775,-379.76062 217.44286,-379.03275 219.23156,-378.59711 L 220.65023,-378.27877 C 222.5505,-377.93363 224.35065,-377.86862 225.80054,-378.01314 C 228.94914,-378.32698 230.9644,-378.58345 232.96843,-378.82834 C 234.97245,-379.07322 235.50913,-379.69312 236.61162,-379.85833 C 237.75504,-380.02976 238.86821,-379.58419 242.90739,-380.29586 C 246.94627,-381.00755 248.42246,-381.33354 249.14158,-381.64616 C 249.88822,-381.97095 250.95964,-382.39191 251.61747,-383.00826 C 253.57644,-382.88355 255.42223,-383.03435 256.88227,-383.48645 C 259.90603,-384.42272 261.87384,-385.01189 263.74507,-385.76396 C 265.24645,-386.36738 266.17709,-387.30032 266.40943,-387.58279 C 266.64197,-387.86524 266.55894,-388.30268 266.58637,-388.32227 C 266.63172,-388.35443 266.93259,-388.39235 267.11563,-388.72388 C 268.1012,-390.50664 269.82518,-394.17603 269.987,-394.85126 C 270.14794,-395.52383 270.24882,-396.18904 270.32707,-396.58177 C 270.37238,-396.80868 270.24154,-397.46878 270.26767,-397.5257 C 270.30421,-397.6054 270.57272,-397.75558 270.64536,-398.00055 C 270.99449,-399.17741 270.8881,-400.19633 270.77316,-401.62545 C 270.65853,-403.05454 270.12535,-406.29655 269.53303,-407.1941 C 269.15286,-407.77056 268.79088,-408.04035 268.44277,-408.17869 C 268.37703,-408.20061 268.32242,-408.23394 268.26007,-408.2541 C 268.2394,-408.25953 268.21826,-408.25387 268.19773,-408.25845 C 267.89214,-408.35547 267.60176,-408.45912 267.05957,-408.5572 C 266.084,-408.7337 264.65883,-408.98486 263.11782,-409.08304 C 262.60416,-409.11577 262.07992,-409.10775 261.55259,-409.09835 C 257.98058,-409.03472 252.49564,-408.73725 251.24552,-408.18907 C 249.63965,-408.63604 247.34955,-409.07483 245.35499,-409.07027 C 242.28304,-409.06325 240.37552,-409.07493 238.43292,-409.0837 C 236.49041,-409.09248 236.72384,-408.81345 235.50112,-408.81852 C 234.1792,-408.82401 233.73411,-409.16569 229.9455,-409.11245 C 226.38768,-409.06243 220.91754,-408.74723 219.63844,-408.20318 C 218.0323,-408.65065 215.74477,-409.08893 213.74801,-409.08436 C 210.67586,-409.07735 208.76626,-409.05786 206.82375,-409.06662 C 206.16316,-409.06961 205.77525,-409.0156 205.47016,-408.97318 z" | ||
1627 | style="opacity:0.82448976;fill:#bcb786;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" /> | ||
1628 | </clipPath> | ||
1629 | <linearGradient | ||
1630 | inkscape:collect="always" | ||
1631 | xlink:href="#linearGradient7622" | ||
1632 | id="linearGradient7708" | ||
1633 | gradientUnits="userSpaceOnUse" | ||
1634 | gradientTransform="translate(-19.091883,4.2426407)" | ||
1635 | x1="774.97668" | ||
1636 | y1="-211.87105" | ||
1637 | x2="755.11584" | ||
1638 | y2="-202.67865" /> | ||
1639 | <mask | ||
1640 | maskUnits="userSpaceOnUse" | ||
1641 | id="mask7704"> | ||
1642 | <path | ||
1643 | style="fill:url(#linearGradient7708);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" | ||
1644 | d="M 718.40812,-224.31217 L 751.65812,-168.31217 L 1027.6581,-192.31217 L 1187.1581,-240.56217 L 1120.6581,-323.31217 L 718.40812,-224.31217 z" | ||
1645 | id="path7706" /> | ||
1646 | </mask> | ||
1647 | <radialGradient | ||
1648 | inkscape:collect="always" | ||
1649 | xlink:href="#linearGradient8430" | ||
1650 | id="radialGradient7904" | ||
1651 | gradientUnits="userSpaceOnUse" | ||
1652 | gradientTransform="matrix(-0.3324832,0.9022288,-0.9582407,-0.3531242,305.29227,19.909497)" | ||
1653 | cx="142.95833" | ||
1654 | cy="107.09234" | ||
1655 | fx="142.95833" | ||
1656 | fy="107.09234" | ||
1657 | r="66.981766" /> | ||
1658 | <radialGradient | ||
1659 | inkscape:collect="always" | ||
1660 | xlink:href="#linearGradient3317" | ||
1661 | id="radialGradient7906" | ||
1662 | gradientUnits="userSpaceOnUse" | ||
1663 | gradientTransform="matrix(1.0036478,-1.0345492e-7,1.7124628e-7,1.6613125,-160.53487,-96.205369)" | ||
1664 | cx="317.78754" | ||
1665 | cy="129.65378" | ||
1666 | fx="317.78754" | ||
1667 | fy="129.65378" | ||
1668 | r="47.863216" /> | ||
1669 | <radialGradient | ||
1670 | inkscape:collect="always" | ||
1671 | xlink:href="#linearGradient8398" | ||
1672 | id="radialGradient7908" | ||
1673 | gradientUnits="userSpaceOnUse" | ||
1674 | gradientTransform="matrix(2.0747661,-0.1577957,0.2382425,3.1325183,-550.77432,-65.728909)" | ||
1675 | cx="325.30847" | ||
1676 | cy="80.909554" | ||
1677 | fx="325.30847" | ||
1678 | fy="80.909554" | ||
1679 | r="26.937988" /> | ||
1680 | <clipPath | ||
1681 | clipPathUnits="userSpaceOnUse" | ||
1682 | id="clipPath8209"> | ||
1683 | <path | ||
1684 | sodipodi:nodetypes="czcc" | ||
1685 | id="path8211" | ||
1686 | d="M 734.03125,519.49186 C 734.03125,519.49186 750.78638,556.50992 762.73266,573.44581 C 774.67895,590.3817 815.45982,629.49186 815.45982,629.49186 L 816.05699,490.90211" | ||
1687 | style="opacity:1;fill:#1a1a1a;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" /> | ||
1688 | </clipPath> | ||
1689 | <filter | ||
1690 | inkscape:collect="always" | ||
1691 | id="filter8225"> | ||
1692 | <feGaussianBlur | ||
1693 | inkscape:collect="always" | ||
1694 | stdDeviation="10.661912" | ||
1695 | id="feGaussianBlur8227" /> | ||
1696 | </filter> | ||
1697 | <filter | ||
1698 | inkscape:collect="always" | ||
1699 | id="filter8333"> | ||
1700 | <feGaussianBlur | ||
1701 | inkscape:collect="always" | ||
1702 | stdDeviation="7.18" | ||
1703 | id="feGaussianBlur8335" /> | ||
1704 | </filter> | ||
1705 | <clipPath | ||
1706 | clipPathUnits="userSpaceOnUse" | ||
1707 | id="clipPath8338"> | ||
1708 | <path | ||
1709 | sodipodi:nodetypes="czzzzzzcccccccccczczz" | ||
1710 | id="path8340" | ||
1711 | d="M 266.27183,924.57185 C 264.86456,943.37307 265.12693,957.32289 268.35357,973.87513 C 271.58023,990.42751 284.75966,1019.7825 288.68798,1037.0589 C 292.61419,1054.326 291.38211,1075.3686 276.22854,1088.2071 C 260.91093,1101.1846 234.17727,1109.8061 208.39624,1103.9409 C 182.61518,1098.0756 138.84716,1054.7175 119.80605,1033.7126 C 100.6939,1012.6293 56.045182,939.86193 41.867507,909.4368 C 27.689835,879.01168 29.207902,872.71823 33.747792,863.90708 C 24.38107,839.38658 21.33408,813.84026 0.035334479,788.33044 C 30.360814,791.44487 43.915624,815.28676 60.161024,835.47019 C 54.631128,787.39416 42.106309,771.05368 31.787072,744.74589 C 61.781367,750.82754 82.366432,776.61828 95.766855,817.45839 C 105.32101,813.54047 114.00462,808.08545 125.95427,808.39719 C 114.65677,766.70139 108.0048,738.48134 89.267014,707.32725 C 142.70898,712.99757 172.92404,787.96657 168.23844,795.28805 C 178.21641,793.04406 187.24409,788.75767 198.67497,789.63638 C 187.426,751.28935 177.62715,712.76848 195.01526,670.98819 C 195.01526,670.98819 243.30204,736.42507 249.40491,756.79397 C 255.50779,777.16287 250.92373,795.49448 250.92373,795.49448 C 250.92373,795.49448 267.8833,826.57978 271.21765,846.58862 C 274.59075,866.82996 267.68496,905.69193 266.27183,924.57185 z" | ||
1712 | style="opacity:1;fill:#ada469;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" /> | ||
1713 | </clipPath> | ||
1714 | <filter | ||
1715 | inkscape:collect="always" | ||
1716 | id="filter8354"> | ||
1717 | <feGaussianBlur | ||
1718 | inkscape:collect="always" | ||
1719 | stdDeviation="6.82" | ||
1720 | id="feGaussianBlur8356" /> | ||
1721 | </filter> | ||
1722 | <clipPath | ||
1723 | clipPathUnits="userSpaceOnUse" | ||
1724 | id="clipPath8359"> | ||
1725 | <path | ||
1726 | sodipodi:nodetypes="czzzzzzcccccccccczczz" | ||
1727 | id="path8361" | ||
1728 | d="M 266.27183,924.57185 C 264.86456,943.37307 265.12693,957.32289 268.35357,973.87513 C 271.58023,990.42751 284.75966,1019.7825 288.68798,1037.0589 C 292.61419,1054.326 291.38211,1075.3686 276.22854,1088.2071 C 260.91093,1101.1846 234.17727,1109.8061 208.39624,1103.9409 C 182.61518,1098.0756 138.84716,1054.7175 119.80605,1033.7126 C 100.6939,1012.6293 56.045182,939.86193 41.867507,909.4368 C 27.689835,879.01168 29.207902,872.71823 33.747792,863.90708 C 24.38107,839.38658 21.33408,813.84026 0.035334479,788.33044 C 30.360814,791.44487 43.915624,815.28676 60.161024,835.47019 C 54.631128,787.39416 42.106309,771.05368 31.787072,744.74589 C 61.781367,750.82754 82.366432,776.61828 95.766855,817.45839 C 105.32101,813.54047 114.00462,808.08545 125.95427,808.39719 C 114.65677,766.70139 108.0048,738.48134 89.267014,707.32725 C 142.70898,712.99757 172.92404,787.96657 168.23844,795.28805 C 178.21641,793.04406 187.24409,788.75767 198.67497,789.63638 C 187.426,751.28935 177.62715,712.76848 195.01526,670.98819 C 195.01526,670.98819 243.30204,736.42507 249.40491,756.79397 C 255.50779,777.16287 250.92373,795.49448 250.92373,795.49448 C 250.92373,795.49448 267.8833,826.57978 271.21765,846.58862 C 274.59075,866.82996 267.68496,905.69193 266.27183,924.57185 z" | ||
1729 | style="opacity:1;fill:#ada469;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" /> | ||
1730 | </clipPath> | ||
1731 | <filter | ||
1732 | inkscape:collect="always" | ||
1733 | id="filter8379" | ||
1734 | x="-0.14413793" | ||
1735 | width="1.288276" | ||
1736 | y="-0.10278689" | ||
1737 | height="1.2055738"> | ||
1738 | <feGaussianBlur | ||
1739 | inkscape:collect="always" | ||
1740 | stdDeviation="7.389266" | ||
1741 | id="feGaussianBlur8381" /> | ||
1742 | </filter> | ||
1743 | <clipPath | ||
1744 | clipPathUnits="userSpaceOnUse" | ||
1745 | id="clipPath8392"> | ||
1746 | <path | ||
1747 | sodipodi:nodetypes="czzzzzzcccccccccczczz" | ||
1748 | id="path8394" | ||
1749 | d="M 760.16396,935.83377 C 766.95806,954.73656 770.65765,969.13346 772.05426,987.04566 C 773.45088,1004.958 768.27158,1038.8465 769.1538,1057.7018 C 770.03555,1076.547 777.28749,1097.8008 796.49843,1106.6707 C 815.9173,1115.6365 845.81767,1116.882 870.61827,1103.5251 C 895.41887,1090.1681 928.01929,1033.1996 941.59253,1006.2164 C 955.21638,979.13246 980.3536,891.71903 986.25333,856.44781 C 992.15306,821.1766 988.80387,815.14704 981.63585,807.39232 C 984.27615,779.55217 980.13613,752.45689 994.74554,720.20614 C 964.49653,732.03184 957.36325,760.36684 946.42665,785.71122 C 938.42574,734.77829 946.63581,714.43803 949.74554,684.49186 C 920.68078,699.26977 906.88403,731.60588 904.74554,777.349 C 893.82159,776.0448 883.3541,772.91477 871.17411,776.63471 C 870.91007,730.61137 869.71055,699.7453 880.08474,662.42822 C 826.82927,683.45508 817.13746,769.02232 824.03125,775.20614 C 813.14843,775.74114 802.66017,773.90884 791.17411,778.06329 C 791.81303,735.49194 790.91365,693.15468 761.17411,655.20614 C 761.17411,655.20614 730.21605,736.12848 729.74554,758.77757 C 729.27503,781.42666 739.19713,798.94345 739.19713,798.94345 C 739.19713,798.94345 730.62906,835.68396 732.89854,857.17568 C 735.19439,878.91714 753.34144,916.85185 760.16396,935.83377 z" | ||
1750 | style="opacity:1;fill:#ada469;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" /> | ||
1751 | </clipPath> | ||
1752 | <filter | ||
1753 | inkscape:collect="always" | ||
1754 | id="filter8404" | ||
1755 | x="-0.090268657" | ||
1756 | width="1.1805373" | ||
1757 | y="-0.10250848" | ||
1758 | height="1.205017"> | ||
1759 | <feGaussianBlur | ||
1760 | inkscape:collect="always" | ||
1761 | stdDeviation="5.3457272" | ||
1762 | id="feGaussianBlur8406" /> | ||
1763 | </filter> | ||
1764 | <clipPath | ||
1765 | clipPathUnits="userSpaceOnUse" | ||
1766 | id="clipPath8417"> | ||
1767 | <path | ||
1768 | sodipodi:nodetypes="czzzzzzcccccccccczczz" | ||
1769 | id="path8419" | ||
1770 | d="M 760.16396,935.83377 C 766.95806,954.73656 770.65765,969.13346 772.05426,987.04566 C 773.45088,1004.958 768.27158,1038.8465 769.1538,1057.7018 C 770.03555,1076.547 777.28749,1097.8008 796.49843,1106.6707 C 815.9173,1115.6365 845.81767,1116.882 870.61827,1103.5251 C 895.41887,1090.1681 928.01929,1033.1996 941.59253,1006.2164 C 955.21638,979.13246 980.3536,891.71903 986.25333,856.44781 C 992.15306,821.1766 988.80387,815.14704 981.63585,807.39232 C 984.27615,779.55217 980.13613,752.45689 994.74554,720.20614 C 964.49653,732.03184 957.36325,760.36684 946.42665,785.71122 C 938.42574,734.77829 946.63581,714.43803 949.74554,684.49186 C 920.68078,699.26977 906.88403,731.60588 904.74554,777.349 C 893.82159,776.0448 883.3541,772.91477 871.17411,776.63471 C 870.91007,730.61137 869.71055,699.7453 880.08474,662.42822 C 826.82927,683.45508 817.13746,769.02232 824.03125,775.20614 C 813.14843,775.74114 802.66017,773.90884 791.17411,778.06329 C 791.81303,735.49194 790.91365,693.15468 761.17411,655.20614 C 761.17411,655.20614 730.21605,736.12848 729.74554,758.77757 C 729.27503,781.42666 739.19713,798.94345 739.19713,798.94345 C 739.19713,798.94345 730.62906,835.68396 732.89854,857.17568 C 735.19439,878.91714 753.34144,916.85185 760.16396,935.83377 z" | ||
1771 | style="opacity:1;fill:#ada469;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" /> | ||
1772 | </clipPath> | ||
1773 | </defs> | ||
1774 | <metadata | ||
1775 | id="metadata7"> | ||
1776 | <rdf:RDF> | ||
1777 | <cc:Work | ||
1778 | rdf:about=""> | ||
1779 | <dc:format>image/svg+xml</dc:format> | ||
1780 | <dc:type | ||
1781 | rdf:resource="http://purl.org/dc/dcmitype/StillImage" /> | ||
1782 | </cc:Work> | ||
1783 | </rdf:RDF> | ||
1784 | </metadata> | ||
1785 | <g | ||
1786 | inkscape:groupmode="layer" | ||
1787 | id="layer1" | ||
1788 | inkscape:label="Shadow"> | ||
1789 | <path | ||
1790 | style="opacity:0.5;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter11361);enable-background:new" | ||
1791 | d="M 304.64285,526.6479 C 294.64285,527.00505 286.42857,529.50504 286.42857,529.50504 L 293.92857,535.57647 L 304.28571,539.1479 L 320.35714,539.50504 L 342.85714,534.1479 L 350.71428,535.21933 L 371.07143,533.07647 L 360.71428,539.86219 C 366.17351,538.83858 378.10757,543.4313 370.35714,545.21933 C 368.61714,545.62075 384.28571,540.57648 384.28571,540.57648 L 386.78571,535.93361 L 390.35714,526.6479 L 401.78571,526.6479 L 419.99999,522.00504 L 423.57143,517.00505 L 407.49999,518.07647 L 395.35714,520.21933 L 380.71428,515.21933 L 310.02218,531.92707 L 304.64285,526.6479 z" | ||
1792 | id="path10326" | ||
1793 | sodipodi:nodetypes="cccccccccsccccccccccc" | ||
1794 | transform="matrix(10.726753,0,0,10.726753,-2882.1235,-4565.4583)" | ||
1795 | inkscape:export-filename="/home/cheeseness/Documents/LCA09/mascot/tuz_new.png" | ||
1796 | inkscape:export-xdpi="142.10527" | ||
1797 | inkscape:export-ydpi="142.10527" /> | ||
1798 | </g> | ||
1799 | <g | ||
1800 | inkscape:groupmode="layer" | ||
1801 | id="layer20" | ||
1802 | inkscape:label="New Ear"> | ||
1803 | <g | ||
1804 | style="opacity:1;display:inline;enable-background:new" | ||
1805 | id="g7882" | ||
1806 | transform="matrix(0.71084,-0.1937433,0.262963,0.9648058,503.68027,136.48399)"> | ||
1807 | <path | ||
1808 | sodipodi:nodetypes="czzzzcc" | ||
1809 | id="path7876" | ||
1810 | d="M 245.12255,100.05344 C 245.12255,100.05344 197.99444,68.406519 177.9079,64.252501 C 157.86998,60.108538 139.435,60.934923 125.97426,77.859824 C 112.51352,94.784725 113.89687,139.12502 112.43872,164.82937 C 110.98057,190.53372 114.98817,235.00638 130.04332,253.49489 C 145.09848,271.98339 175.92966,267.07991 179.97027,274.90859 C 182.1831,279.19595 245.12255,100.05344 245.12255,100.05344 z" | ||
1811 | style="opacity:1;fill:url(#radialGradient7904);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" /> | ||
1812 | <path | ||
1813 | sodipodi:nodetypes="czzzzzc" | ||
1814 | id="path7878" | ||
1815 | d="M 135.37935,82.017807 C 135.37935,82.017807 161.7229,83.95659 173.01242,95.920995 C 184.42736,108.01833 186.74699,117.25251 188.30828,133.65558 C 189.87165,150.08057 187.45871,162.0737 180.49446,169.69292 C 173.53021,177.31214 179.49017,189.27624 154.57841,181.76399 C 129.66665,174.25174 127.54617,153.98101 128.06318,135.45924 C 128.58039,116.93026 135.37935,82.017807 135.37935,82.017807 z" | ||
1816 | style="opacity:1;fill:url(#radialGradient7906);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" /> | ||
1817 | <path | ||
1818 | sodipodi:nodetypes="czccssc" | ||
1819 | id="path7880" | ||
1820 | d="M 135.648,81.927211 C 135.648,81.927211 131.00335,98.292286 136.23625,110.49031 C 141.72419,123.28285 163.4605,154.75038 163.4605,165.14596 L 186.11675,160.14596 C 188.65893,153.17952 189.32727,144.3939 188.30425,133.64596 C 186.74296,117.24289 184.43795,108.02455 173.023,95.927211 C 163.36812,85.695164 141.42989,82.552354 135.648,81.927211 z" | ||
1821 | style="opacity:1;fill:url(#radialGradient7908);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" /> | ||
1822 | </g> | ||
1823 | </g> | ||
1824 | <g | ||
1825 | inkscape:groupmode="layer" | ||
1826 | id="layer21" | ||
1827 | inkscape:label="Rendered2" | ||
1828 | style="display:inline"> | ||
1829 | <path | ||
1830 | style="opacity:1;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" | ||
1831 | d="M 845.03125,1154.7776 C 840.74554,1155.4919 817.4031,1158.3957 787.17411,1164.7776 C 756.94512,1171.1595 729.86016,1169.7437 651.38803,1182.1072 C 571.53625,1194.688 557.32367,1224.6495 543.26578,1229.1715 C 528.56564,1233.9001 397.88839,1163.349 397.88839,1163.349 L 402.17411,1069.0633 C 402.17411,1069.0633 488.05962,1052.8624 514.31696,1035.4919 C 540.57431,1018.1214 559.89934,985.82588 573.60268,964.06329 C 587.30602,942.3007 606.45982,892.63471 606.45982,892.63471 L 845.03125,1154.7776 z" | ||
1832 | id="path7917" | ||
1833 | sodipodi:nodetypes="czzzcczzcc" /> | ||
1834 | <path | ||
1835 | style="opacity:0.5;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter8888);enable-background:accumulate" | ||
1836 | d="M 332.34019,898.38549 L 299.60838,837.08593 L 261.99104,882.19239 C 264.16779,883.5095 267.76529,861.33636 307.59144,817.77531 L 332.34019,898.38549 z" | ||
1837 | id="path7919" | ||
1838 | clip-path="url(#clipPath8658)" | ||
1839 | sodipodi:nodetypes="ccccc" | ||
1840 | transform="translate(276,136)" /> | ||
1841 | <path | ||
1842 | style="opacity:1;fill:url(#linearGradient2841);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter8892);enable-background:accumulate" | ||
1843 | d="M 200.81833,863.03015 L 347.18943,811.41136 L 591.14127,1037.6855 L 349.31075,1177.6927 L 168.29141,1090.0114 L 200.81833,863.03015 z" | ||
1844 | id="path7923" | ||
1845 | clip-path="url(#clipPath2833)" | ||
1846 | sodipodi:nodetypes="cccccc" | ||
1847 | transform="translate(276,136)" /> | ||
1848 | <path | ||
1849 | style="opacity:1;fill:#0f0f0f;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" | ||
1850 | d="M 642.88839,640.13471 C 642.88839,640.13471 613.33433,680.70776 595.03125,714.42042 C 576.72816,748.13309 536.41016,840.77736 524.67411,885.49185 C 512.91471,930.29529 462.17411,1009.0633 462.17411,1009.0633 L 538.24554,1027.2776 C 538.24554,1027.2776 550.05266,1014.4542 569.31696,981.20614 C 588.58126,947.95806 629.67411,842.63471 629.67411,842.63471 L 642.88839,640.13471 z" | ||
1851 | id="path7921" | ||
1852 | sodipodi:nodetypes="czzcczcc" /> | ||
1853 | <path | ||
1854 | style="opacity:0.4;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter8856);enable-background:accumulate" | ||
1855 | d="M 430.28131,381.94122 C 423.21025,384.76965 194.10007,414.09303 194.10007,414.09303 L 154.46046,773.92607 L 244.65895,866.56568 L 296.98485,752.01438 L 397.45289,565.62246 L 430.28131,381.94122 z" | ||
1856 | id="path7925" | ||
1857 | sodipodi:nodetypes="ccccccc" | ||
1858 | clip-path="url(#clipPath3665)" | ||
1859 | transform="translate(276,136)" /> | ||
1860 | <path | ||
1861 | style="opacity:1;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" | ||
1862 | d="M 969.67051,1164.0346 C 969.67051,1164.0346 992.92679,1175.4283 1005.7383,1184.5107 C 1018.4357,1193.5122 1035.2107,1209.1598 1047.4307,1221.8712 C 1059.7362,1234.6714 1067.5434,1244.4699 1088.9634,1246.032 C 1110.3956,1247.5949 1142.2458,1237.2444 1162.2594,1221.3678 C 1182.2729,1205.4912 1207.9063,1152.135 1207.9063,1152.135 L 1080.7455,1009.0633" | ||
1863 | id="path7927" | ||
1864 | sodipodi:nodetypes="czzzzcc" /> | ||
1865 | <path | ||
1866 | style="opacity:0.75;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter8860);enable-background:accumulate" | ||
1867 | d="M 331.34019,641.50471 L 216.17367,835.36467 L 260.2153,925.96265 L 357.79603,732.21539 L 331.34019,641.50471 z" | ||
1868 | id="path7929" | ||
1869 | clip-path="url(#clipPath8642)" | ||
1870 | sodipodi:nodetypes="ccccc" | ||
1871 | transform="translate(276,136)" /> | ||
1872 | <g | ||
1873 | style="opacity:1;display:inline;enable-background:new" | ||
1874 | id="g7931" | ||
1875 | transform="matrix(0.9934486,0.1142802,-0.1142802,0.9934486,-9.24324,588.09054)" | ||
1876 | inkscape:transform-center-x="-347.89063" | ||
1877 | inkscape:transform-center-y="-28.255779"> | ||
1878 | <path | ||
1879 | style="opacity:1;fill:#bcb786;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" | ||
1880 | d="M 1049.205,-282.26672 L 1049.1152,-282.25891 C 1047.7278,-281.37446 1042.5119,-280.65171 1042.4862,-272.73547 C 1042.462,-265.31022 1057.4991,-255.64401 1059.6425,-254.64172 C 1061.3727,-253.83263 1063.2341,-253.23296 1065.0488,-252.92297 L 1066.4862,-252.70422 C 1068.4059,-252.49228 1070.2062,-252.55281 1071.6425,-252.79797 C 1074.7616,-253.33037 1076.7541,-253.72658 1078.7362,-254.11047 C 1080.7183,-254.49434 1081.2105,-255.15012 1082.2988,-255.39172 C 1083.4275,-255.64238 1084.569,-255.27543 1088.5488,-256.26672 C 1092.5283,-257.258 1093.9782,-257.68602 1094.6738,-258.04797 C 1095.396,-258.42398 1096.4355,-258.91855 1097.0488,-259.57922 C 1099.0117,-259.59127 1100.8425,-259.87027 1102.2675,-260.42297 C 1105.2187,-261.56758 1107.1407,-262.29239 1108.955,-263.17297 C 1110.4107,-263.8795 1111.2741,-264.875 1111.4862,-265.17297 C 1111.6985,-265.47093 1111.5852,-265.90152 1111.6112,-265.92297 C 1111.6542,-265.95821 1111.9517,-266.017 1112.1112,-266.36047 C 1112.9702,-268.20755 1114.4344,-271.98811 1114.5488,-272.67297 C 1114.6625,-273.35512 1114.7168,-274.02574 1114.7675,-274.42297 C 1114.7969,-274.65248 1114.6204,-275.30187 1114.6425,-275.36047 C 1114.6734,-275.44252 1114.9308,-275.61104 1114.9862,-275.86047 C 1115.2525,-277.05879 1115.0754,-278.06783 1114.8612,-279.48547 C 1114.6473,-280.90311 1113.8896,-284.1001 1113.2362,-284.95422 C 1112.8168,-285.50279 1112.4369,-285.74672 1112.08,-285.86047 C 1112.0129,-285.87776 1111.9561,-285.90721 1111.8925,-285.92297 C 1111.8715,-285.92695 1111.8508,-285.91983 1111.83,-285.92297 C 1111.5184,-285.99847 1111.2215,-286.08164 1110.6738,-286.14172 C 1109.6883,-286.24984 1108.2491,-286.40112 1106.705,-286.39172 C 1106.1903,-286.38859 1105.6679,-286.34408 1105.1425,-286.29797 C 1101.5836,-285.98569 1096.1327,-285.30689 1094.9238,-284.67297 C 1093.2907,-285.00699 1090.9756,-285.2852 1088.9862,-285.14172 C 1085.9222,-284.92075 1084.0185,-284.79953 1082.08,-284.67297 C 1080.1416,-284.54642 1080.3939,-284.28433 1079.1738,-284.20422 C 1077.8547,-284.11762 1077.3869,-284.42747 1073.6112,-284.11047 C 1070.0655,-283.81275 1064.6306,-283.1173 1063.3925,-282.48547 C 1061.7591,-282.81998 1059.4466,-283.09786 1057.455,-282.95422 C 1054.3908,-282.73324 1052.4872,-282.58078 1050.5488,-282.45422 C 1049.8896,-282.41119 1049.5064,-282.33029 1049.205,-282.26672 z" | ||
1881 | id="path7933" /> | ||
1882 | <g | ||
1883 | clip-path="url(#clipPath7616)" | ||
1884 | style="display:inline;filter:url(#filter7610);enable-background:new" | ||
1885 | id="g7935" | ||
1886 | transform="matrix(0.9975712,-6.9654277e-2,6.9654277e-2,0.9975712,872.72062,140.02502)"> | ||
1887 | <path | ||
1888 | sodipodi:nodetypes="ccssscsssscscsscsssccscssccsscssscc" | ||
1889 | id="path7937" | ||
1890 | d="M 229.94262,-409.12268 C 226.38481,-409.07267 220.91842,-408.76259 219.63928,-408.21854 C 218.03319,-408.66601 215.73612,-409.09985 213.73933,-409.09528 C 210.66734,-409.08826 208.77464,-409.08651 206.83206,-409.09528 C 206.17159,-409.09827 205.78447,-409.02811 205.47939,-408.98569 C 205.47939,-408.98569 205.47939,-407.88976 205.47939,-407.88976 C 205.59911,-408.06923 205.87191,-408.58022 206.42914,-408.65691 C 207.17672,-408.7598 211.59842,-408.80814 213.73933,-408.76651 C 215.51393,-408.73198 218.19456,-408.49224 220.12854,-407.80756 C 220.44994,-407.69378 220.74779,-407.53378 221.02073,-407.39659 C 222.98415,-406.40966 228.96409,-403.09505 228.96409,-403.09505 C 228.96409,-403.09505 222.33134,-407.04273 221.48122,-407.53358 C 221.27791,-407.65097 220.90658,-407.79127 220.44513,-407.94456 C 221.66576,-408.39235 225.5211,-408.56427 228.27336,-408.65691 C 231.29786,-408.75873 231.62112,-408.7465 233.68405,-408.46512 C 235.81336,-408.17469 237.02256,-407.86236 237.02256,-407.86236 C 237.02255,-407.86236 236.9442,-408.50354 238.05865,-408.65691 C 238.80622,-408.7598 243.22794,-408.80814 245.36884,-408.76651 C 247.43834,-408.72625 250.73489,-408.35935 252.65024,-407.39659 C 253.65356,-406.89226 255.68588,-405.82796 257.44559,-404.86088 L 257.5412,-404.88031 C 257.5412,-404.88031 253.96086,-407.04273 253.11073,-407.53358 C 252.90742,-407.65097 252.5361,-407.79127 252.07464,-407.94456 C 253.29526,-408.39235 257.12183,-408.56427 259.87409,-408.65691 C 262.89859,-408.75873 263.22184,-408.7465 265.28478,-408.46512 C 267.23794,-408.19872 268.2977,-407.93506 268.47939,-407.88976 C 268.47939,-407.88976 268.4523,-408.20122 268.4523,-408.20122 C 268.04327,-408.33767 267.73806,-408.43457 267.05192,-408.5587 C 265.75111,-408.79403 263.6528,-409.16026 261.54335,-409.12268 C 257.9714,-409.05904 252.49007,-408.76672 251.24001,-408.21854 C 249.63418,-408.66549 247.36339,-409.09984 245.36884,-409.09528 C 242.29685,-409.08826 240.37536,-409.08651 238.43279,-409.09528 C 236.49023,-409.10406 236.72011,-408.81621 235.49721,-408.8213 C 234.1753,-408.8268 233.73109,-409.17593 229.94262,-409.12268 C 229.94262,-409.12268 229.94262,-409.12268 229.94262,-409.12268" | ||
1891 | style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" /> | ||
1892 | <path | ||
1893 | id="path7939" | ||
1894 | d="M 206.1989,-407.47878 C 208.11911,-406.66172 210.77605,-405.28595 212.35787,-404.08139 C 213.93971,-402.87683 215.26544,-402.30771 217.91246,-400.16344 C 218.79803,-399.44606 219.66111,-398.81359 220.50439,-398.2417 L 221.04496,-398.43181 C 220.33173,-398.9152 219.5772,-399.45212 218.77587,-400.05384 C 215.95364,-402.17305 215.14932,-402.86357 212.7608,-404.32798 C 210.37226,-405.79238 208.66132,-406.69374 206.1989,-407.47878 C 206.1989,-407.47878 206.1989,-407.47878 206.1989,-407.47878" | ||
1895 | style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" | ||
1896 | sodipodi:nodetypes="cssccsscc" /> | ||
1897 | <path | ||
1898 | id="path7941" | ||
1899 | d="M 237.79963,-407.47878 C 239.71984,-406.66172 242.40557,-405.28595 243.98738,-404.08139 C 244.80045,-403.46223 245.54587,-403.01097 246.43784,-402.42738 L 247.08684,-402.54404 C 246.28853,-403.12041 245.51507,-403.63839 244.39031,-404.32798 C 242.00177,-405.79238 240.26205,-406.69374 237.79963,-407.47878 C 237.79963,-407.47878 237.79963,-407.47878 237.79963,-407.47878" | ||
1900 | style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" | ||
1901 | sodipodi:nodetypes="csccscc" /> | ||
1902 | </g> | ||
1903 | <g | ||
1904 | clip-path="url(#clipPath7606)" | ||
1905 | id="g7943"> | ||
1906 | <path | ||
1907 | style="opacity:0.75;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter7578);enable-background:new" | ||
1908 | d="M 1056.25,-278.80481 C 1060.3946,-280.28358 1066.25,-275.67981 1066.25,-275.67981 C 1067.149,-275.39889 1068.9751,-276.57428 1068.8743,-277.36595 C 1068.8743,-277.36595 1067.324,-279.22657 1068.5,-280.30481 C 1069.676,-281.38305 1073.796,-278.79743 1076,-278.67981 C 1078.204,-278.56219 1081.5621,-278.90922 1083,-279.42981 C 1084.4379,-279.9504 1084.1129,-280.8544 1085.625,-281.17981 C 1087.1371,-281.50522 1090.7439,-280.14227 1092.6855,-280.01098 C 1094.6271,-279.87969 1097.3336,-279.67671 1098.5,-280.17981 C 1099.6664,-280.68291 1098.6782,-281.33902 1100.375,-282.05481 C 1102.0718,-282.7706 1108.1352,-283.01143 1110,-282.17981 C 1111.8648,-281.34819 1111.8099,-281.66061 1112.625,-279.17981 C 1113.4401,-276.69901 1120.0648,-274.01696 1111.5,-265.80481 C 1102.9352,-257.59266 1052.1221,-252.01887 1045.875,-263.05481 C 1039.6279,-274.09075 1052.1054,-277.32604 1056.25,-278.80481 z" | ||
1909 | id="path7945" | ||
1910 | sodipodi:nodetypes="czzzzzzzzzzzzzz" /> | ||
1911 | <path | ||
1912 | style="opacity:0.75;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter7594);enable-background:new" | ||
1913 | d="M 1058.5,-275.42981 C 1062.6446,-276.90858 1068.5,-272.30481 1068.5,-272.30481 C 1069.399,-272.02389 1071.2251,-273.19928 1071.1243,-273.99095 C 1071.1243,-273.99095 1069.574,-275.85157 1070.75,-276.92981 C 1071.926,-278.00805 1076.046,-275.42243 1078.25,-275.30481 C 1080.454,-275.18719 1083.8121,-275.53422 1085.25,-276.05481 C 1086.6879,-276.5754 1086.3629,-277.4794 1087.875,-277.80481 C 1089.3871,-278.13022 1092.9939,-276.76727 1094.9355,-276.63598 C 1096.8771,-276.50469 1099.5836,-276.30171 1100.75,-276.80481 C 1101.9164,-277.30791 1100.9282,-277.96402 1102.625,-278.67981 C 1104.3218,-279.3956 1110.3852,-279.63643 1112.25,-278.80481 C 1114.1148,-277.97319 1114.0599,-278.28561 1114.875,-275.80481 C 1115.6901,-273.32401 1122.3148,-270.64196 1113.75,-262.42981 C 1105.1852,-254.21766 1054.3721,-248.64387 1048.125,-259.67981 C 1041.8779,-270.71575 1054.3554,-273.95104 1058.5,-275.42981 z" | ||
1914 | id="path7947" | ||
1915 | sodipodi:nodetypes="czzzzzzzzzzzzzz" /> | ||
1916 | </g> | ||
1917 | </g> | ||
1918 | <path | ||
1919 | style="opacity:1;fill:#101414;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" | ||
1920 | d="M 628.24553,347.99185 C 624.4411,322.72762 611.43581,297.35383 611.08873,272.46662 C 610.90247,259.1111 614.36158,245.89571 624.84426,232.91257 C 661.19128,167.61674 741.78517,148.21789 810.77892,141.44715 C 897.70131,130.43035 995.69159,159.38722 1044.1503,236.84843 C 1098.2743,312.58173 1100.825,409.38755 1124.7623,496.37638 C 1154.2001,623.50398 1179.5414,752.59052 1185.1545,883.22673 C 1182.0911,961.40858 1176.7282,1048.4109 1124.6513,1111.3609 C 1076.6248,1161.7183 1001.8649,1161.4139 937.58145,1170.3632 C 847.02606,1175.0182 753.22992,1154.2174 675.79755,1106.165 C 611.02191,1068.225 580.06736,992.68633 578.51815,920.14538 C 570.12898,840.27022 604.90967,766.33687 630.13855,692.98577 C 637.60916,610.2247 639.55141,526.73802 639.79189,443.60093 C 638.95507,411.40549 632.70236,379.7836 628.24553,347.99185 z" | ||
1921 | id="path7949" | ||
1922 | sodipodi:nodetypes="cscccccccccccc" /> | ||
1923 | <path | ||
1924 | style="opacity:0.25;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter8940);enable-background:accumulate" | ||
1925 | d="M 311.83409,415.43155 L 321.73359,537.05392 L 261.62951,673.52553 L 277.18586,848.1809 C 292.79912,910.0601 309.37131,946.84995 351.56201,965.23473 C 355.88112,928.99475 312.95049,822.27485 312.31937,776.11489 C 311.68792,729.93044 323.14971,667.50703 342.99704,617.81842 C 363.04539,567.62654 379.89378,572.972 385.12193,525.22549 C 390.35008,477.47898 367.69553,375.83357 367.69553,375.83357 L 311.83409,415.43155 z" | ||
1926 | id="path7951" | ||
1927 | sodipodi:nodetypes="ccccczzzcc" | ||
1928 | clip-path="url(#clipPath8616)" | ||
1929 | transform="translate(276,136)" /> | ||
1930 | <path | ||
1931 | style="opacity:1;fill:url(#linearGradient8970);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" | ||
1932 | d="M 1010.0312,655.49186 C 1010.0312,655.49186 1026.7864,692.50992 1038.7327,709.44581 C 1050.6789,726.3817 1091.4598,765.49186 1091.4598,765.49186 L 1144.057,637.90211" | ||
1933 | id="path7953" | ||
1934 | sodipodi:nodetypes="czcc" /> | ||
1935 | <path | ||
1936 | style="opacity:0.07999998;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter8822);enable-background:accumulate" | ||
1937 | d="M 730.31998,536.56864 C 730.31998,545.05392 772.86772,595.03667 772.86772,595.03667 L 785.47431,566.26713 L 730.31998,536.56864 z" | ||
1938 | id="path7955" | ||
1939 | sodipodi:nodetypes="cccc" | ||
1940 | clip-path="url(#clipPath8209)" | ||
1941 | transform="translate(276,136)" /> | ||
1942 | <g | ||
1943 | transform="translate(450.03125,73.843964)" | ||
1944 | style="opacity:1;display:inline;enable-background:new" | ||
1945 | id="g7957" | ||
1946 | clip-path="url(#clipPath3998)"> | ||
1947 | <g | ||
1948 | transform="translate(-174.03125,62.156036)" | ||
1949 | style="filter:url(#filter3677)" | ||
1950 | id="g7959"> | ||
1951 | <g | ||
1952 | id="g7961" | ||
1953 | style="filter:url(#filter3785)"> | ||
1954 | <path | ||
1955 | sodipodi:nodetypes="czzzzzzzzzz" | ||
1956 | id="path7963" | ||
1957 | d="M 425.88244,476.99186 C 436.68787,475.5132 450.62645,480.34637 470.5253,480.20614 C 490.42415,480.06591 527.97852,463.29492 552.66815,463.06328 C 577.35778,462.83164 615.41985,475.34734 631.95387,478.06328 C 648.48789,480.77922 654.80219,477.90476 659.45386,485.92043 C 664.10553,493.9361 661.38057,496.66767 649.09672,506.63472 C 636.81287,516.60177 608.30704,519.27104 583.02529,519.49186 C 557.74295,519.71268 512.644,526.57038 487.66815,523.42042 C 462.6923,520.27046 430.73059,515.59775 418.73958,505.56328 C 406.74857,495.52881 398.88874,488.83146 401.23958,481.63471 C 403.59042,474.43796 415.07701,478.47052 425.88244,476.99186 z" | ||
1958 | style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" /> | ||
1959 | <rect | ||
1960 | y="412.60312" | ||
1961 | x="343.6539" | ||
1962 | height="181.01935" | ||
1963 | width="381.83765" | ||
1964 | id="rect7965" | ||
1965 | style="opacity:1;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" /> | ||
1966 | </g> | ||
1967 | <g | ||
1968 | id="g7967" | ||
1969 | style="filter:url(#filter3785)"> | ||
1970 | <path | ||
1971 | sodipodi:nodetypes="czzzcc" | ||
1972 | id="path7969" | ||
1973 | d="M 687.14286,452.36218 C 676.68117,462.07661 600.16326,471.36732 586.42857,481.6479 C 572.69388,491.92848 571.67605,494.53616 574.28571,501.6479 C 576.89537,508.75964 580.83098,511.05362 600,510.21932 C 619.16902,509.38502 698.57143,482.5976 698.57143,488.79075 L 687.14286,452.36218 z" | ||
1974 | style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" | ||
1975 | transform="translate(174.03125,-62.156036)" /> | ||
1976 | <rect | ||
1977 | y="344.82138" | ||
1978 | x="702.86414" | ||
1979 | height="162.63455" | ||
1980 | width="207.8894" | ||
1981 | id="rect7971" | ||
1982 | style="opacity:1;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" /> | ||
1983 | </g> | ||
1984 | </g> | ||
1985 | <g | ||
1986 | transform="translate(-174.03125,62.156036)" | ||
1987 | style="opacity:0.18000004;display:inline;enable-background:new" | ||
1988 | id="g7973"> | ||
1989 | <g | ||
1990 | id="g7975" | ||
1991 | style="filter:url(#filter3785)"> | ||
1992 | <path | ||
1993 | sodipodi:nodetypes="czzzzzzzzzz" | ||
1994 | id="path7977" | ||
1995 | d="M 425.88244,476.99186 C 436.68787,475.5132 450.62645,480.34637 470.5253,480.20614 C 490.42415,480.06591 527.97852,463.29492 552.66815,463.06328 C 577.35778,462.83164 615.41985,475.34734 631.95387,478.06328 C 648.48789,480.77922 654.80219,477.90476 659.45386,485.92043 C 664.10553,493.9361 661.38057,496.66767 649.09672,506.63472 C 636.81287,516.60177 608.30704,519.27104 583.02529,519.49186 C 557.74295,519.71268 512.644,526.57038 487.66815,523.42042 C 462.6923,520.27046 430.73059,515.59775 418.73958,505.56328 C 406.74857,495.52881 398.88874,488.83146 401.23958,481.63471 C 403.59042,474.43796 415.07701,478.47052 425.88244,476.99186 z" | ||
1996 | style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" /> | ||
1997 | <rect | ||
1998 | y="412.60312" | ||
1999 | x="343.6539" | ||
2000 | height="181.01935" | ||
2001 | width="381.83765" | ||
2002 | id="rect7979" | ||
2003 | style="opacity:1;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" /> | ||
2004 | </g> | ||
2005 | <g | ||
2006 | id="g7981" | ||
2007 | style="filter:url(#filter3785)"> | ||
2008 | <path | ||
2009 | sodipodi:nodetypes="czzzcc" | ||
2010 | id="path7983" | ||
2011 | d="M 687.14286,452.36218 C 676.68117,462.07661 600.16326,471.36732 586.42857,481.6479 C 572.69388,491.92848 571.67605,494.53616 574.28571,501.6479 C 576.89537,508.75964 580.83098,511.05362 600,510.21932 C 619.16902,509.38502 698.57143,482.5976 698.57143,488.79075 L 687.14286,452.36218 z" | ||
2012 | style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" | ||
2013 | transform="translate(174.03125,-62.156036)" /> | ||
2014 | <rect | ||
2015 | y="344.82138" | ||
2016 | x="702.86414" | ||
2017 | height="162.63455" | ||
2018 | width="207.8894" | ||
2019 | id="rect7985" | ||
2020 | style="opacity:1;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" /> | ||
2021 | </g> | ||
2022 | </g> | ||
2023 | </g> | ||
2024 | <path | ||
2025 | style="opacity:0.75;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter8802);enable-background:accumulate" | ||
2026 | d="M 582.65599,-7.4183011 L 695.79307,78.848726 L 804.68752,337.64981 L 842.87128,545.5392 L 963.07944,637.46308 C 963.07944,637.46308 950.35151,350.37773 943.28044,323.50767 C 936.20938,296.63761 793.37381,-69.643698 793.37381,-69.643698 L 582.65599,-7.4183011 z" | ||
2027 | id="path7987" | ||
2028 | clip-path="url(#clipPath8604)" | ||
2029 | sodipodi:nodetypes="cccccscc" | ||
2030 | transform="translate(276,136)" /> | ||
2031 | <path | ||
2032 | style="opacity:1;fill:url(#linearGradient8958);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" | ||
2033 | d="M 964.13839,239.599 C 964.13839,239.599 972.81571,250.49562 988.24554,251.56328 C 1003.6754,252.63094 1037.9672,211.61061 1058.4241,199.42043 C 1078.9034,187.2169 1105.4705,172.81818 1122.3527,179.06329 C 1139.2348,185.30839 1144.5105,205.49938 1150.2098,227.099 C 1155.9092,248.69861 1156.9284,288.91289 1147.5313,319.95615 C 1138.1341,350.9994 1097.028,393.0599 1082.1741,423.349 C 1067.3202,453.6381 1070.567,463.17043 1070.567,463.17043" | ||
2034 | id="path7989" | ||
2035 | sodipodi:nodetypes="czzzzzzc" /> | ||
2036 | <path | ||
2037 | style="opacity:1;fill:url(#radialGradient3315);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" | ||
2038 | d="M 1124.4955,207.63471 C 1108.6027,206.74185 1074.7767,219.74054 1058.4241,231.92043 C 1041.9855,244.16433 1029.2032,256.03483 1029.1384,284.06328 C 1029.0732,312.26932 1042.2575,323.13969 1058.2455,331.02757 C 1074.2335,338.91546 1091.9317,338.14685 1110.2098,319.24186 C 1128.488,300.33686 1124.4955,207.63471 1124.4955,207.63471 z" | ||
2039 | id="path7991" | ||
2040 | sodipodi:nodetypes="czzzzc" /> | ||
2041 | <path | ||
2042 | sodipodi:type="arc" | ||
2043 | style="opacity:0.75;fill:url(#radialGradient3543);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:bevel;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter4120);enable-background:accumulate" | ||
2044 | id="path7993" | ||
2045 | sodipodi:cx="385" | ||
2046 | sodipodi:cy="237.00504" | ||
2047 | sodipodi:rx="86.428574" | ||
2048 | sodipodi:ry="73.928574" | ||
2049 | d="M 471.42857,237.00504 A 86.428574,73.928574 0 1 1 298.57143,237.00504 A 86.428574,73.928574 0 1 1 471.42857,237.00504 z" | ||
2050 | transform="matrix(0.9434749,-0.1239943,0.1440089,1.0957669,451.94827,134.5988)" | ||
2051 | clip-path="url(#clipPath4100)" /> | ||
2052 | <path | ||
2053 | transform="translate(450.03125,73.843964)" | ||
2054 | style="opacity:1;fill:url(#radialGradient3915);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" | ||
2055 | d="M 527.60588,407.44884 C 527.60588,407.44884 405.56444,445.85232 340.09154,417.08065 C 274.61865,388.30899 265.71429,292.36218 265.71429,292.36218 C 265.71429,292.36218 339.09587,211.85825 395.63507,208.74742 C 451.46212,205.67578 486.20893,228.89074 510.50508,274.59913 C 534.85708,320.41261 527.60588,407.44884 527.60588,407.44884 z" | ||
2056 | id="path7995" | ||
2057 | sodipodi:nodetypes="csczzc" | ||
2058 | mask="url(#mask3684)" /> | ||
2059 | <path | ||
2060 | style="opacity:1;fill:url(#linearGradient3959);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:bevel;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" | ||
2061 | d="M 772.17411,393.349 C 772.17411,393.349 808.39165,365.96653 823.78125,357.45614 C 838.95859,349.06313 849.49553,345.849 859.6741,345.849 L 844.13839,412.81328" | ||
2062 | id="path7997" | ||
2063 | sodipodi:nodetypes="czcc" /> | ||
2064 | <path | ||
2065 | sodipodi:type="arc" | ||
2066 | style="opacity:1;fill:url(#radialGradient3933);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:bevel;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" | ||
2067 | id="path7999" | ||
2068 | sodipodi:cx="409.28571" | ||
2069 | sodipodi:cy="306.64789" | ||
2070 | sodipodi:rx="36.25" | ||
2071 | sodipodi:ry="36.25" | ||
2072 | d="M 445.53571,306.64789 A 36.25,36.25 0 1 1 373.03571,306.64789 A 36.25,36.25 0 1 1 445.53571,306.64789 z" | ||
2073 | transform="translate(449.49554,74.915393)" /> | ||
2074 | <path | ||
2075 | style="opacity:0.3;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter8806);enable-background:accumulate" | ||
2076 | d="M 311.83409,415.43155 L 321.73359,537.05392 L 261.62951,673.52553 L 277.18586,848.1809 C 292.79912,910.0601 309.37131,946.84995 351.56201,965.23473 C 355.88112,928.99475 360.24362,892.86709 320.31937,742.11489 L 337.99704,672.81842 L 410.12193,534.22549 L 367.69553,375.83357 L 311.83409,415.43155 z" | ||
2077 | id="path8001" | ||
2078 | sodipodi:nodetypes="cccccccccc" | ||
2079 | clip-path="url(#clipPath8616)" | ||
2080 | transform="translate(276,136)" /> | ||
2081 | <path | ||
2082 | style="opacity:0.5;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter8826);enable-background:accumulate" | ||
2083 | d="M 635.21025,581.13004 C 621.06811,593.85796 674.44372,615.71019 711.57778,605.17167 C 748.71184,594.63315 816.22265,569.6073 814.81537,525.97571 C 813.40809,482.34413 738.44784,397.28228 738.44784,397.28228 L 635.21025,581.13004 z" | ||
2084 | id="path8003" | ||
2085 | sodipodi:nodetypes="czzcc" /> | ||
2086 | <path | ||
2087 | sodipodi:type="arc" | ||
2088 | style="opacity:1;fill:url(#radialGradient3991);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:bevel;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" | ||
2089 | id="path8005" | ||
2090 | sodipodi:cx="410" | ||
2091 | sodipodi:cy="306.64789" | ||
2092 | sodipodi:rx="23.214285" | ||
2093 | sodipodi:ry="23.214285" | ||
2094 | d="M 433.21428,306.64789 A 23.214285,23.214285 0 1 1 386.78572,306.64789 A 23.214285,23.214285 0 1 1 433.21428,306.64789 z" | ||
2095 | transform="translate(449.67411,74.915393)" /> | ||
2096 | <path | ||
2097 | sodipodi:type="arc" | ||
2098 | style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:bevel;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter3981);enable-background:accumulate" | ||
2099 | id="path8007" | ||
2100 | sodipodi:cx="414.28571" | ||
2101 | sodipodi:cy="303.07648" | ||
2102 | sodipodi:rx="7.5" | ||
2103 | sodipodi:ry="7.5" | ||
2104 | d="M 421.78571,303.07648 A 7.5,7.5 0 1 1 406.78571,303.07648 A 7.5,7.5 0 1 1 421.78571,303.07648 z" | ||
2105 | transform="translate(451.99554,73.486821)" /> | ||
2106 | <path | ||
2107 | style="opacity:1;fill:url(#radialGradient4112);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" | ||
2108 | d="M 789.31696,478.349 C 789.31696,478.349 796.33977,497.91759 788.24553,513.349 C 780.15129,528.78041 745.92236,552.33722 720.74554,563.349 C 695.43582,574.41891 635.27254,596.31293 618.95982,605.31328 C 602.49834,614.39571 600.74554,617.99185 600.74554,617.99185 C 600.74554,617.99185 593.59861,598.92744 629.49553,566.20615 C 665.66764,533.23401 771.52265,518.15665 789.31696,478.349 z" | ||
2109 | id="path8009" | ||
2110 | sodipodi:nodetypes="czzzczc" /> | ||
2111 | <g | ||
2112 | style="opacity:1;display:inline;enable-background:new" | ||
2113 | id="g8011" | ||
2114 | transform="translate(780.74553,74.55825)"> | ||
2115 | <path | ||
2116 | transform="translate(-329.81481,0)" | ||
2117 | clip-path="url(#clipPath3999)" | ||
2118 | sodipodi:nodetypes="czzczzzszc" | ||
2119 | id="path8013" | ||
2120 | d="M 179.64286,267.36218 C 157.23242,307.0651 119.02676,383.14247 110.35715,417.00504 C 101.70994,450.78014 101.58516,483.42158 110,503.43362 C 118.3602,523.31575 136.16398,539.06642 150.71428,544.86218 C 150.1179,530.48631 165.08723,501.57635 223.57143,472.36218 C 282.1977,443.07704 301.95306,445.23132 327.14285,425.21932 C 352.77291,404.85756 335.34872,345.57268 330.35714,331.29075 C 325.36556,317.00882 329.12051,327.91101 328.41112,326.19774 C 317.72184,300.38182 294.1968,271.76744 263.92857,261.6479 C 233.66034,251.52836 198.91081,256.79953 179.64286,267.36218 z" | ||
2121 | style="opacity:1;fill:url(#radialGradient3585);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:20.79999924;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;display:inline" /> | ||
2122 | <path | ||
2123 | transform="matrix(0.8823874,0.4705236,-0.4705236,0.8823874,-166.62245,2.387362)" | ||
2124 | d="M 248.28731,338.07648 A 64.715881,134.00607 0 1 1 118.85555,338.07648 A 64.715881,134.00607 0 1 1 248.28731,338.07648 z" | ||
2125 | sodipodi:ry="134.00607" | ||
2126 | sodipodi:rx="64.715881" | ||
2127 | sodipodi:cy="338.07648" | ||
2128 | sodipodi:cx="183.57143" | ||
2129 | id="path8015" | ||
2130 | style="opacity:1;fill:url(#radialGradient4060);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:20.79999924;stroke-linecap:butt;stroke-linejoin:bevel;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" | ||
2131 | sodipodi:type="arc" /> | ||
2132 | <path | ||
2133 | transform="matrix(0.8823874,0.4705236,-0.4705236,0.8823874,-162.19388,-18.755495)" | ||
2134 | d="M 248.28731,338.07648 A 64.715881,134.00607 0 1 1 118.85555,338.07648 A 64.715881,134.00607 0 1 1 248.28731,338.07648 z" | ||
2135 | sodipodi:ry="134.00607" | ||
2136 | sodipodi:rx="64.715881" | ||
2137 | sodipodi:cy="338.07648" | ||
2138 | sodipodi:cx="183.57143" | ||
2139 | id="path8017" | ||
2140 | style="opacity:1;fill:url(#radialGradient4062);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:20.79999924;stroke-linecap:butt;stroke-linejoin:bevel;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" | ||
2141 | sodipodi:type="arc" /> | ||
2142 | <path | ||
2143 | transform="translate(-329.81481,3e-7)" | ||
2144 | clip-path="url(#clipPath3999)" | ||
2145 | sodipodi:nodetypes="czzczzzszc" | ||
2146 | id="path8019" | ||
2147 | d="M 179.64286,267.36218 C 157.23242,307.0651 119.02676,383.14247 110.35715,417.00504 C 101.70994,450.78014 101.58516,483.42158 110,503.43362 C 118.3602,523.31575 136.16398,539.06642 150.71428,544.86218 C 150.1179,530.48631 165.08723,501.57635 223.57143,472.36218 C 282.1977,443.07704 301.95306,445.23132 327.14285,425.21932 C 352.77291,404.85756 335.34872,345.57268 330.35714,331.29075 C 325.36556,317.00882 329.12051,327.91101 328.41112,326.19774 C 317.72184,300.38182 294.1968,271.76744 263.92857,261.6479 C 233.66034,251.52836 198.91081,256.79953 179.64286,267.36218 z" | ||
2148 | style="opacity:1;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:url(#linearGradient3587);stroke-width:20.79999924;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;display:inline;filter:url(#filter4079);enable-background:new" /> | ||
2149 | </g> | ||
2150 | <path | ||
2151 | sodipodi:type="arc" | ||
2152 | style="opacity:1;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:bevel;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" | ||
2153 | id="path8021" | ||
2154 | sodipodi:cx="310.71429" | ||
2155 | sodipodi:cy="398.07648" | ||
2156 | sodipodi:rx="19.704132" | ||
2157 | sodipodi:ry="19.704132" | ||
2158 | d="M 330.41843,398.07648 A 19.704132,19.704132 0 1 1 291.01016,398.07648 A 19.704132,19.704132 0 1 1 330.41843,398.07648 z" | ||
2159 | transform="translate(452.55663,72.581273)" /> | ||
2160 | <path | ||
2161 | sodipodi:type="arc" | ||
2162 | style="opacity:1;fill:url(#radialGradient4056);fill-opacity:1;fill-rule:evenodd;stroke:url(#linearGradient4082);stroke-width:20.79999924;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter4083);enable-background:accumulate" | ||
2163 | id="path8023" | ||
2164 | sodipodi:cx="310.71429" | ||
2165 | sodipodi:cy="398.07648" | ||
2166 | sodipodi:rx="19.704132" | ||
2167 | sodipodi:ry="19.704132" | ||
2168 | d="M 330.41843,398.07648 A 19.704132,19.704132 0 1 1 291.01016,398.07648 A 19.704132,19.704132 0 1 1 330.41843,398.07648 z" | ||
2169 | transform="translate(450.55663,72.581273)" /> | ||
2170 | <path | ||
2171 | sodipodi:type="arc" | ||
2172 | style="opacity:1;fill:url(#radialGradient4119);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:bevel;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" | ||
2173 | id="path8025" | ||
2174 | sodipodi:cx="310.71429" | ||
2175 | sodipodi:cy="398.07648" | ||
2176 | sodipodi:rx="19.704132" | ||
2177 | sodipodi:ry="19.704132" | ||
2178 | d="M 330.41843,398.07648 A 19.704132,19.704132 0 1 1 291.01016,398.07648 A 19.704132,19.704132 0 1 1 330.41843,398.07648 z" | ||
2179 | transform="translate(450.55663,72.581273)" /> | ||
2180 | <path | ||
2181 | sodipodi:type="arc" | ||
2182 | style="opacity:1;fill:url(#radialGradient4868);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.99999994px;stroke-linecap:butt;stroke-linejoin:bevel;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter4002);enable-background:accumulate" | ||
2183 | id="path8027" | ||
2184 | sodipodi:cx="429.56738" | ||
2185 | sodipodi:cy="377.42877" | ||
2186 | sodipodi:rx="72.079735" | ||
2187 | sodipodi:ry="44.547726" | ||
2188 | d="M 501.64712,377.42877 A 72.079735,44.547726 0 1 1 357.48765,377.42877 A 72.079735,44.547726 0 1 1 501.64712,377.42877 z" | ||
2189 | transform="matrix(0.9969564,-7.7961675e-2,7.7961675e-2,0.9969564,436.61877,125.29509)" | ||
2190 | inkscape:transform-center-x="-47.231976" | ||
2191 | inkscape:transform-center-y="-3.6935079" /> | ||
2192 | <path | ||
2193 | sodipodi:type="arc" | ||
2194 | style="opacity:1;fill:url(#radialGradient4876);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.99999994px;stroke-linecap:butt;stroke-linejoin:bevel;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter4010);enable-background:accumulate" | ||
2195 | id="path8029" | ||
2196 | sodipodi:cx="437.6991" | ||
2197 | sodipodi:cy="391.21735" | ||
2198 | sodipodi:rx="36.611931" | ||
2199 | sodipodi:ry="22.627417" | ||
2200 | d="M 474.31103,391.21735 A 36.611931,22.627417 0 1 1 401.08717,391.21735 A 36.611931,22.627417 0 1 1 474.31103,391.21735 z" | ||
2201 | transform="matrix(1.4357951,-6.9991037e-2,6.9991037e-2,1.4357951,235.18065,-63.86546)" | ||
2202 | inkscape:transform-center-x="-20.955902" | ||
2203 | inkscape:transform-center-y="-13.056625" /> | ||
2204 | <g | ||
2205 | transform="translate(450.03125,73.843964)" | ||
2206 | id="g8031" | ||
2207 | style="opacity:1;display:inline;filter:url(#filter4053);enable-background:new"> | ||
2208 | <path | ||
2209 | d="M 416.87499,401.82648 A 3.2142856,3.2142856 0 1 1 410.44642,401.82648 A 3.2142856,3.2142856 0 1 1 416.87499,401.82648 z" | ||
2210 | sodipodi:ry="3.2142856" | ||
2211 | sodipodi:rx="3.2142856" | ||
2212 | sodipodi:cy="401.82648" | ||
2213 | sodipodi:cx="413.66071" | ||
2214 | id="path8033" | ||
2215 | style="opacity:1;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:url(#linearGradient4484);stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" | ||
2216 | sodipodi:type="arc" /> | ||
2217 | <path | ||
2218 | transform="translate(13.125009,8.1249913)" | ||
2219 | d="M 416.87499,401.82648 A 3.2142856,3.2142856 0 1 1 410.44642,401.82648 A 3.2142856,3.2142856 0 1 1 416.87499,401.82648 z" | ||
2220 | sodipodi:ry="3.2142856" | ||
2221 | sodipodi:rx="3.2142856" | ||
2222 | sodipodi:cy="401.82648" | ||
2223 | sodipodi:cx="413.66071" | ||
2224 | id="path8035" | ||
2225 | style="opacity:1;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:url(#linearGradient4486);stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" | ||
2226 | sodipodi:type="arc" /> | ||
2227 | <path | ||
2228 | transform="translate(32.946437,7.4999913)" | ||
2229 | d="M 416.87499,401.82648 A 3.2142856,3.2142856 0 1 1 410.44642,401.82648 A 3.2142856,3.2142856 0 1 1 416.87499,401.82648 z" | ||
2230 | sodipodi:ry="3.2142856" | ||
2231 | sodipodi:rx="3.2142856" | ||
2232 | sodipodi:cy="401.82648" | ||
2233 | sodipodi:cx="413.66071" | ||
2234 | id="path8037" | ||
2235 | style="opacity:1;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:url(#linearGradient4488);stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" | ||
2236 | sodipodi:type="arc" /> | ||
2237 | <path | ||
2238 | transform="translate(24.910723,-10.267866)" | ||
2239 | d="M 416.87499,401.82648 A 3.2142856,3.2142856 0 1 1 410.44642,401.82648 A 3.2142856,3.2142856 0 1 1 416.87499,401.82648 z" | ||
2240 | sodipodi:ry="3.2142856" | ||
2241 | sodipodi:rx="3.2142856" | ||
2242 | sodipodi:cy="401.82648" | ||
2243 | sodipodi:cx="413.66071" | ||
2244 | id="path8039" | ||
2245 | style="opacity:1;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:url(#linearGradient4490);stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" | ||
2246 | sodipodi:type="arc" /> | ||
2247 | <path | ||
2248 | transform="translate(47.589294,-0.6250087)" | ||
2249 | d="M 416.87499,401.82648 A 3.2142856,3.2142856 0 1 1 410.44642,401.82648 A 3.2142856,3.2142856 0 1 1 416.87499,401.82648 z" | ||
2250 | sodipodi:ry="3.2142856" | ||
2251 | sodipodi:rx="3.2142856" | ||
2252 | sodipodi:cy="401.82648" | ||
2253 | sodipodi:cx="413.66071" | ||
2254 | id="path8041" | ||
2255 | style="opacity:1;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:url(#linearGradient4492);stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" | ||
2256 | sodipodi:type="arc" /> | ||
2257 | </g> | ||
2258 | <path | ||
2259 | style="opacity:1;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" | ||
2260 | d="M 896.20301,482.92837 C 897.1881,487.27845 900.74008,489.10785 903.58974,490.82019 C 908.05042,493.33311 910.1099,492.3423 912.74425,490.06258 C 914.3462,488.14141 923.42736,485.36393 928.33848,482.99151 C 932.66809,481.5326 937.24178,477.63278 941.723,474.65775 C 945.11814,473.03051 947.06964,475.01239 949.55168,475.6679 C 952.4958,476.38451 953.96285,477.83965 955.6126,479.20344 C 958.00876,480.37863 954.6847,482.34657 958.8956,483.49658 C 960.08651,483.71452 961.31255,484.07303 962.17859,482.99151" | ||
2261 | id="path8043" | ||
2262 | sodipodi:nodetypes="ccccccccc" /> | ||
2263 | <path | ||
2264 | style="opacity:1;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" | ||
2265 | d="M 910.85021,475.35223 C 913.16515,475.32025 914.02799,475.99476 916.34292,474.53148 C 919.79856,471.45035 921.74546,471.38671 924.29787,470.11206 C 927.32444,468.79683 930.83357,478.26375 934.3994,479.96105 C 936.79449,479.13963 935.68854,481.75484 935.85149,482.6127 C 935.90862,485.25954 938.65843,486.29076 940.20777,488.04227 C 943.52381,490.29776 947.583,494.33773 951.31945,493.34557 C 957.7647,490.4145 961.59867,492.06411 967.60816,485.95883 C 968.31221,484.77749 967.02391,479.06423 970.70175,478.76149 C 973.22574,479.01487 974.86842,478.81164 976.76267,479.32971 C 982.20367,481.4469 984.50045,485.77971 991.47301,487.28466 C 997.65591,488.25105 999.08565,491.07892 1005.3626,492.33542" | ||
2266 | id="path8045" | ||
2267 | sodipodi:nodetypes="cccccccccccc" /> | ||
2268 | <path | ||
2269 | style="opacity:1;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" | ||
2270 | d="M 876.98133,483.52197 C 879.37991,482.72817 883.08746,487.71407 885.15446,490.56765 C 885.74727,493.24717 886.30823,496.0541 885.91207,502.68948 C 886.6972,505.10702 888.59256,505.72043 890.7103,505.97248 C 893.82775,505.4357 896.58699,504.64818 898.0339,502.94202 C 899.9055,501.00035 903.34643,505.33596 906.11512,506.98263 C 909.72521,508.89472 913.8889,508.96149 917.98442,509.25547 C 919.688,509.02483 920.35482,513.77062 921.26741,517.3367 C 921.65155,521.71476 920.38197,524.23239 919.49965,527.18568 C 919.20535,529.68223 922.48815,530.71542 925.8131,531.73137 C 928.99554,532.47261 932.35734,533.39321 934.90447,533.49914 C 940.04633,534.37405 942.99321,536.18966 947.0263,537.53975 C 949.26544,538.3563 950.28649,539.78191 951.57199,541.07528" | ||
2271 | id="path8047" | ||
2272 | sodipodi:nodetypes="ccccccccccccc" /> | ||
2273 | <path | ||
2274 | style="opacity:0.25;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter8814);enable-background:new" | ||
2275 | d="M 332,187.69519 C 332,187.69519 389.5,162.19519 389.5,159.69519 C 389.5,157.19519 395,107.69519 395,107.69519 C 395,107.69519 486,59.195189 486.5,57.195189 C 487,55.195189 572.5,-4.8048114 572.5,-4.8048114 L 386.5,17.195189 L 311,123.19519 L 332,187.69519 z" | ||
2276 | id="path8049" | ||
2277 | clip-path="url(#clipPath8514)" | ||
2278 | transform="translate(276,136)" /> | ||
2279 | <path | ||
2280 | style="opacity:0.25;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" | ||
2281 | d="M 1697.2846,722.5514 C 1697.2846,722.5514 1581.3191,796.0905 1574.2481,800.33314 C 1567.177,804.57578 1343.7312,937.51186 1343.7312,937.51186 L 1347.9739,977.10984 L 1564.3486,876.70067 L 1681.7283,774.8773 L 1697.2846,722.5514 z" | ||
2282 | id="path8051" /> | ||
2283 | <path | ||
2284 | style="opacity:0.5;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter8810);enable-background:accumulate" | ||
2285 | d="M 528.91587,556.85291 C 523.25902,555.4387 347.89654,631.80623 347.89654,631.80623 L 313.95541,812.82557 L 365.05087,1006.7738 L 622.25397,1074.4551 C 622.25397,1074.4551 828.72915,1227.1901 834.386,1222.9475 C 840.04286,1218.7049 1002.6774,1029.2002 1002.6774,1029.2002 L 842.87128,845.35248 L 796.20224,667.16157 L 528.91587,556.85291 z" | ||
2286 | id="path8053" | ||
2287 | clip-path="url(#clipPath8610)" | ||
2288 | sodipodi:nodetypes="cccccscccc" | ||
2289 | transform="translate(276,136)" /> | ||
2290 | <path | ||
2291 | style="opacity:1;fill:#0c0c0c;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" | ||
2292 | d="M 1097.6433,613.88997 C 1097.6433,613.88997 1120.2628,607.38316 1133.386,608.01724 C 1146.5093,608.65133 1164.0276,609.95586 1177.0949,620.20343 C 1190.1622,630.45099 1202.1626,647.3435 1211.2073,678.57308 C 1220.2519,709.80266 1212.9056,777.82509 1205.0312,821.92043 C 1197.1569,866.01577 1176.7661,928.03341 1160.0312,961.92043 C 1143.2964,995.80745 1110.2335,1039.4156 1099.4618,1051.7966 C 1088.0976,1064.8586 1043.2559,1088.2228 1020.0312,1094.0633 C 1025.3346,1083.4567 1068.931,1043.4744 1055.0312,1033.349 C 1041.0123,1023.1367 1009.2712,1079.3314 970.7381,1062.3822 C 992.12041,1049.2501 1012.5175,1011.1961 1004.7787,995.78772 C 996.93846,980.17733 974.07378,1044.5453 911.24317,1032.8006 C 941.29521,1005.2739 966.65023,961.89659 952.50587,949.8209 C 938.09071,937.51403 892.04412,1004.1141 892.04412,1004.1141 C 892.04412,1004.1141 889.22222,962.41287 905.81732,935.50673 C 922.45667,908.52886 985.47029,853.89146 1005.3704,823.80331 C 1025.2706,793.71517 1038.983,757.79429 1047.5059,731.28537 C 1056.0287,704.77645 1063.3068,654.18583 1063.3068,654.18583" | ||
2293 | id="path8055" | ||
2294 | sodipodi:nodetypes="czzzzzzczczczczzzc" /> | ||
2295 | <path | ||
2296 | style="opacity:0.25;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter8818);enable-background:accumulate" | ||
2297 | d="M 770.74639,609.17881 L 719.8347,706.75955 L 639.93163,817.77531 L 674.57987,889.19309 L 717.00628,968.38906 L 789.13117,923.13422 L 803.27331,730.80117 L 824.48651,592.20825 L 810.34437,502.05213 L 770.74639,609.17881 z" | ||
2298 | id="path8057" | ||
2299 | clip-path="url(#clipPath8622)" | ||
2300 | sodipodi:nodetypes="cccccccccc" | ||
2301 | transform="translate(276,136)" /> | ||
2302 | <path | ||
2303 | style="opacity:1;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter8810);enable-background:accumulate" | ||
2304 | d="M 295,846.19519 L 301.64488,777.27234 C 301.64488,777.27234 391.96439,866.27691 464,900.19519 C 536.03561,934.11347 772,962.19519 772,962.19519 L 926,936.19519 L 890,1098.1952 L 604,1124.1952 L 306,1035.1952 L 295,846.19519 z" | ||
2305 | id="path8059" | ||
2306 | clip-path="url(#clipPath8906)" | ||
2307 | sodipodi:nodetypes="cczcccccc" | ||
2308 | transform="translate(276,136)" /> | ||
2309 | <path | ||
2310 | transform="translate(450.03125,73.843964)" | ||
2311 | style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter3587);enable-background:new" | ||
2312 | d="M 405.79629,845.99023 L 480.74961,911.04406 L 483.24924,927.92446 L 502.6526,938.08337 L 509.14464,961.13446 L 540.85369,952.76336 L 555.70293,1000.8466 C 567.95945,1013.5745 645.49637,887.7369 611.56436,1039.0304 L 550.75318,1055.2939 L 461.55026,960.60104 L 398.72523,906.80141 L 405.79629,845.99023 z" | ||
2313 | id="path8061" | ||
2314 | sodipodi:nodetypes="cccccccccccc" | ||
2315 | clip-path="url(#clipPath3602)" /> | ||
2316 | <path | ||
2317 | style="opacity:1;fill:#121212;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" | ||
2318 | d="M 1159.317,918.349 C 1213.6027,916.92043 1285.352,903.29701 1329.317,891.56328 C 1373.3697,879.80614 1455.2033,855.21604 1504.674,833.70614 C 1554.0133,812.25342 1618.2778,774.42454 1658.9599,741.56329 C 1699.468,708.8426 1711.3498,685.74348 1719.6741,707.99186 C 1728.0432,730.35965 1703.2672,764.31748 1681.817,789.06329 C 1660.2128,813.98669 1629.0856,841.76862 1582.8883,878.349 C 1536.691,914.92938 1426.8058,979.93363 1370.0312,1006.9204 C 1312.9652,1034.0458 1241.8279,1065.1589 1197.8884,1079.4205 C 1153.9489,1093.6821 1066.4598,1110.4919 1066.4598,1110.4919 L 1159.317,918.349 z" | ||
2319 | id="path8063" | ||
2320 | sodipodi:nodetypes="czzzzzzzzcc" /> | ||
2321 | <path | ||
2322 | transform="translate(450.03125,73.843964)" | ||
2323 | style="opacity:0.5;fill:url(#linearGradient3666);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter3779);enable-background:accumulate" | ||
2324 | d="M 1241.5965,652.95007 C 1241.5965,652.95007 1176.875,707.28713 1095.9326,751.94501 C 1013.9082,797.19985 811.67556,845.28311 811.67556,845.28311 C 811.67556,845.28311 796.57419,866.33507 856.93045,873.56739 C 917.28671,880.79971 1081.0124,820.2667 1135.5306,777.40085 C 1190.0488,734.535 1255.7387,665.67799 1255.7387,665.67799 L 1241.5965,652.95007 z" | ||
2325 | id="path8065" | ||
2326 | sodipodi:nodetypes="czczzcc" | ||
2327 | clip-path="url(#clipPath3992)" /> | ||
2328 | <g | ||
2329 | transform="translate(450.03125,73.843964)" | ||
2330 | style="opacity:1;display:inline;enable-background:new" | ||
2331 | id="g8067" | ||
2332 | clip-path="url(#clipPath3986)"> | ||
2333 | <g | ||
2334 | transform="translate(-174.03125,62.156036)" | ||
2335 | style="filter:url(#filter3677)" | ||
2336 | id="g8069"> | ||
2337 | <g | ||
2338 | style="filter:url(#filter3785)" | ||
2339 | id="g8071"> | ||
2340 | <path | ||
2341 | sodipodi:nodetypes="czzccccc" | ||
2342 | id="path8073" | ||
2343 | d="M 1094.2857,725.93361 C 1094.2857,725.93361 1093.9896,752.09452 1098.9285,763.79076 C 1103.8674,775.487 1118.9666,790.27741 1127.5,795.21933 C 1136.0334,800.16125 1146.4286,803.79075 1146.4286,803.79075 L 1264.2857,688.79075 L 1282.1429,613.07647 L 1185.7143,651.6479 L 1094.2857,725.93361 z" | ||
2344 | style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" | ||
2345 | transform="translate(174.03125,-62.156036)" /> | ||
2346 | <rect | ||
2347 | y="486.14224" | ||
2348 | x="1197.8389" | ||
2349 | height="309.71277" | ||
2350 | width="333.75412" | ||
2351 | id="rect8075" | ||
2352 | style="opacity:1;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" /> | ||
2353 | </g> | ||
2354 | </g> | ||
2355 | <g | ||
2356 | transform="translate(-174.03125,62.156036)" | ||
2357 | style="opacity:0.18000004;display:inline;enable-background:new" | ||
2358 | id="g8077"> | ||
2359 | <g | ||
2360 | style="filter:url(#filter3785)" | ||
2361 | id="g8079"> | ||
2362 | <path | ||
2363 | sodipodi:nodetypes="czzccccc" | ||
2364 | id="path8081" | ||
2365 | d="M 1094.2857,725.93361 C 1094.2857,725.93361 1093.9896,752.09452 1098.9285,763.79076 C 1103.8674,775.487 1118.9666,790.27741 1127.5,795.21933 C 1136.0334,800.16125 1146.4286,803.79075 1146.4286,803.79075 L 1264.2857,688.79075 L 1282.1429,613.07647 L 1185.7143,651.6479 L 1094.2857,725.93361 z" | ||
2366 | style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" | ||
2367 | transform="translate(174.03125,-62.156036)" /> | ||
2368 | <rect | ||
2369 | y="486.14224" | ||
2370 | x="1197.8389" | ||
2371 | height="309.71277" | ||
2372 | width="333.75412" | ||
2373 | id="rect8083" | ||
2374 | style="opacity:1;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" /> | ||
2375 | </g> | ||
2376 | </g> | ||
2377 | </g> | ||
2378 | <path | ||
2379 | transform="translate(450.03125,73.843964)" | ||
2380 | style="opacity:0.83300003;fill:#050505;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:15;stroke-linecap:butt;stroke-linejoin:miter;marker:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter8225);enable-background:accumulate" | ||
2381 | d="M 1264.1875,605 C 1259.6964,605.73268 1256.0305,608.45509 1252.25,611.40625 C 1242.1687,619.27601 1224.0805,645.83149 1204.2188,661.875 C 1164.3514,694.07816 1100.2228,731.85201 1051.6562,752.96875 C 1003.0422,774.10613 921.11498,798.78676 877.34375,810.46875 C 833.94554,822.05121 762.29972,835.59982 709.09375,837 L 704.53125,837.125 L 702.53125,841.25 L 609.6875,1033.375 L 603.1875,1046.8438 L 617.84375,1044 C 617.84375,1044 705.11343,1027.3486 750.1875,1012.7188 C 794.9127,998.20213 865.97836,967.05197 923.21875,939.84375 C 980.82199,912.46306 1090.1551,847.86412 1137.5,810.375 C 1183.8608,773.66518 1215.3049,745.65818 1237.4375,720.125 C 1248.3386,707.549 1260.1823,692.59356 1268.4688,677.375 C 1276.7552,662.15644 1287.6285,633.15692 1282.1562,618.53125 C 1280.9385,615.27651 1279.6048,612.46995 1277.5625,610.03125 C 1275.5202,607.59255 1269.0878,608.45926 1269,605 C 1268.7902,596.73518 1265.6845,604.75577 1264.1875,605 z M 1266.3438,620.21875 C 1266.7586,620.80449 1267.3749,621.77641 1268.125,623.78125 C 1271.0218,631.52338 1266.6843,655.68 1259.3125,669.21875 C 1251.9407,682.7575 1236.6741,698.14269 1226.125,710.3125 C 1205.0496,734.62606 1174.2213,762.17406 1128.1875,798.625 C 1083.1379,834.29659 972.72717,899.71959 916.78125,926.3125 C 859.88952,953.35499 788.68509,984.4309 745.53125,998.4375 C 709.16634,1010.2406 649.68654,1022.2713 629.8125,1026.2188 L 714.09375,851.75 C 768.80066,849.7007 837.88634,836.53365 881.21875,824.96875 C 925.55297,813.1365 1007.2974,788.63242 1057.625,766.75 C 1107.737,744.96129 1170.1594,705.58184 1211.6562,672.0625 C 1232.3026,655.38529 1253.4011,629.51662 1261.4688,623.21875 C 1263.9058,621.31633 1265.5494,620.58295 1266.3438,620.21875 z" | ||
2382 | id="path8085" | ||
2383 | clip-path="url(#clipPath3722)" | ||
2384 | sodipodi:nodetypes="cssssccccccssssssssccssssssccssssc" /> | ||
2385 | <g | ||
2386 | style="opacity:1;display:inline;enable-background:new" | ||
2387 | id="g8087" | ||
2388 | mask="url(#mask7704)" | ||
2389 | transform="matrix(0.9934486,0.1142802,-0.1142802,0.9934486,-9.24324,588.09054)" | ||
2390 | inkscape:transform-center-x="-185.09603" | ||
2391 | inkscape:transform-center-y="-12.859654"> | ||
2392 | <path | ||
2393 | transform="translate(8.0045714e-2,-3.125e-2)" | ||
2394 | style="fill:#bcb786;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" | ||
2395 | d="M 1111.4062,-285.9375 L 1107.4688,-284.0625 C 1107.4283,-284.05228 1107.3692,-284.04201 1107.3438,-284.03125 C 1106.925,-283.8184 1107.1791,-283.93067 1106.6875,-283.71875 C 1106.2014,-283.50919 1104.9499,-283.13456 1102.5938,-282.25 C 1099.2626,-280.99942 1096.7895,-280.10016 1095.5938,-279.1875 C 1094.0576,-279.16623 1091.8733,-278.95419 1089.9375,-278.46875 C 1086.956,-277.72108 1085.0823,-277.29474 1083.1875,-276.875 C 1081.2927,-276.45527 1081.512,-276.23281 1080.3125,-276 C 1079.0159,-275.74833 1078.5911,-276.00899 1074.875,-275.21875 C 1071.3851,-274.4766 1065.9802,-273.28768 1064.7188,-272.53125 C 1063.1348,-272.71203 1060.8513,-272.85303 1058.875,-272.5625 C 1055.8346,-272.11554 1053.9588,-271.88974 1052.0312,-271.65625 C 1051.3758,-271.57687 1050.9902,-271.45547 1050.6875,-271.375 C 1050.2613,-271.24334 1050.0017,-271.11498 1049.3125,-271.03125 C 1048.0009,-270.87188 1047.5503,-271.18808 1043.7812,-270.75 C 1040.2273,-270.33691 1034.7758,-269.47718 1033.5312,-268.8125 C 1031.9322,-269.10979 1029.6735,-269.34669 1027.6875,-269.15625 C 1024.6287,-268.86293 1022.7155,-268.67226 1020.7812,-268.5 C 1018.847,-268.32773 1019.0926,-268.07763 1017.875,-267.96875 C 1016.5588,-267.85105 1016.1152,-268.13238 1012.3438,-267.71875 C 1008.8017,-267.3303 1003.3359,-266.50948 1002.0625,-265.84375 C 1000.4636,-266.13844 998.1753,-266.35076 996.1875,-266.15625 C 993.12921,-265.857 991.2463,-265.67601 989.3125,-265.5 C 988.65501,-265.44015 988.27245,-265.32144 987.96875,-265.25 C 987.54105,-265.13104 987.28525,-265.03193 986.59375,-264.96875 C 985.27775,-264.84849 984.834,-265.16363 981.0625,-264.75 C 977.50631,-264.35998 972.0569,-263.51084 970.8125,-262.84375 C 969.21381,-263.13793 966.95265,-263.36747 964.96875,-263.15625 C 961.91305,-262.83092 959.9947,-262.63001 958.0625,-262.4375 C 956.13031,-262.24499 956.37275,-261.99662 955.15625,-261.875 C 953.84137,-261.74353 953.3932,-262.03954 949.625,-261.59375 C 946.08611,-261.17509 940.6473,-260.30158 939.375,-259.625 C 937.77741,-259.90604 935.51505,-260.04543 933.53125,-259.8125 C 930.47927,-259.45413 928.58625,-259.24464 926.65625,-259.03125 C 926.00007,-258.95869 925.6156,-258.85856 925.3125,-258.78125 C 924.88571,-258.65402 924.6276,-258.51405 923.9375,-258.4375 C 922.62411,-258.29181 922.17015,-258.61152 918.40625,-258.125 C 914.85737,-257.66624 909.4276,-256.70598 908.1875,-256 C 906.59441,-256.24424 904.3537,-256.38135 902.375,-256.125 C 899.32741,-255.73018 897.4243,-255.47655 895.5,-255.21875 C 893.57571,-254.96096 893.7739,-254.72522 892.5625,-254.5625 C 891.25301,-254.3866 890.8153,-254.66688 887.0625,-254.09375 C 883.53821,-253.55551 878.1393,-252.39458 876.875,-251.65625 C 875.28751,-251.85979 873.0295,-251.91098 871.0625,-251.5625 C 868.03631,-251.02638 866.1636,-250.70081 864.25,-250.375 C 863.59941,-250.26423 863.2363,-250.10406 862.9375,-250 C 862.51681,-249.83512 862.27405,-249.6687 861.59375,-249.53125 C 860.29905,-249.26966 859.86665,-249.53745 856.15625,-248.71875 C 852.65777,-247.9468 847.31035,-246.33582 846.09375,-245.5 C 844.53085,-245.57745 842.33625,-245.41472 840.40625,-244.90625 C 837.43387,-244.12312 835.58855,-243.67416 833.71875,-243.15625 C 831.84875,-242.63835 832.0521,-242.38897 830.875,-242.0625 C 829.60251,-241.7096 829.17795,-241.95541 825.53125,-240.875 C 822.10657,-239.86037 816.88185,-237.94183 815.65625,-237.03125 C 814.11747,-237.01851 811.93645,-236.75903 810.03125,-236.15625 C 807.10027,-235.22891 805.2809,-234.69783 803.4375,-234.09375 C 802.81071,-233.88837 802.44585,-233.70117 802.15625,-233.5625 C 801.74867,-233.34889 801.50295,-233.15375 800.84375,-232.9375 C 799.58925,-232.52596 799.1576,-232.74846 795.5625,-231.5 C 792.17261,-230.32283 786.96755,-228.2863 785.78125,-227.34375 C 784.25737,-227.28408 782.1312,-226.94888 780.25,-226.28125 C 777.35261,-225.25296 775.55095,-224.60577 773.71875,-223.96875 C 771.88655,-223.33174 772.0909,-223.12021 770.9375,-222.71875 C 769.69071,-222.28479 769.27395,-222.51903 765.71875,-221.15625 C 762.38005,-219.87645 757.23165,-217.6737 756.03125,-216.6875 C 754.52407,-216.57981 752.39555,-216.1887 750.53125,-215.46875 C 747.66307,-214.36115 745.90735,-213.68719 744.09375,-213 C 743.47705,-212.76637 743.0973,-212.55797 742.8125,-212.40625 C 742.81251,-212.40625 742.8125,-212.37673 742.8125,-212.375 L 734.8125,-209.1875 L 722.3366,-205.69561 L 730.26626,-186.41789 C 729.67463,-184.44432 742.8125,-191.15625 742.8125,-191.15625 C 743.03891,-191.30093 743.26145,-191.42886 743.53125,-191.53125 C 744.61177,-191.94123 745.70285,-191.74702 749.53125,-193.21875 C 753.35977,-194.69049 754.7553,-195.22373 755.4375,-195.625 C 756.11711,-196.02478 757.04925,-196.50437 757.65625,-197.15625 C 759.48317,-197.294 761.22705,-197.64948 762.59375,-198.15625 C 765.56175,-199.25677 767.4691,-199.96244 769.375,-200.625 C 771.28081,-201.28754 771.72915,-202.03987 772.78125,-202.40625 C 773.87287,-202.78636 774.97635,-202.57163 778.84375,-203.9375 C 782.71115,-205.30336 784.1269,-205.76458 784.8125,-206.15625 C 785.51361,-206.55677 786.5133,-207.08923 787.125,-207.75 C 789.09581,-207.80466 790.94195,-208.13463 792.40625,-208.625 C 795.40777,-209.63008 797.3324,-210.24671 799.25,-210.875 C 800.78861,-211.3791 801.42415,-211.92177 802.15625,-212.3125 C 802.38647,-212.44681 802.63215,-212.56623 802.90625,-212.65625 C 804.00457,-213.01673 805.0877,-212.73762 809,-213.96875 C 812.91231,-215.19988 814.366,-215.6417 815.0625,-216 C 815.75641,-216.35697 816.6926,-216.79261 817.3125,-217.40625 C 819.17771,-217.42891 820.94835,-217.67308 822.34375,-218.09375 C 825.37415,-219.00729 827.33615,-219.52385 829.28125,-220.0625 C 831.22637,-220.60114 831.70745,-221.32702 832.78125,-221.625 C 833.89527,-221.93415 835.00125,-221.61761 838.96875,-222.65625 C 842.93625,-223.69488 844.38625,-224.08898 845.09375,-224.40625 C 845.82855,-224.73584 846.90765,-225.15997 847.53125,-225.78125 C 849.52907,-225.66525 851.3887,-225.80134 852.875,-226.15625 C 855.95311,-226.89125 857.9584,-227.25719 859.9375,-227.65625 C 861.52541,-227.97643 862.1818,-228.4468 862.9375,-228.75 C 863.17501,-228.8568 863.4044,-228.94276 863.6875,-229 C 864.82091,-229.22919 865.99215,-228.79107 870.03125,-229.5 C 874.07067,-230.20893 875.5315,-230.42709 876.25,-230.6875 C 876.96581,-230.94694 877.95435,-231.25474 878.59375,-231.78125 C 880.51795,-231.54176 882.34165,-231.55672 883.78125,-231.78125 C 886.90767,-232.26887 888.9358,-232.48192 890.9375,-232.75 C 892.93921,-233.01807 893.42625,-233.69514 894.53125,-233.84375 C 895.67767,-233.99793 896.8071,-233.54218 900.875,-234.0625 C 904.94281,-234.58282 906.43525,-234.75823 907.15625,-235 C 907.89337,-235.24714 908.95435,-235.58623 909.59375,-236.125 C 911.64375,-235.78947 913.56745,-235.72704 915.09375,-235.90625 C 918.23595,-236.27521 920.27375,-236.46561 922.28125,-236.6875 C 923.89207,-236.86552 924.5459,-237.2957 925.3125,-237.53125 C 925.55341,-237.61677 925.80655,-237.68685 926.09375,-237.71875 C 927.24345,-237.84647 928.39505,-237.3721 932.46875,-237.84375 C 936.54245,-238.3154 938.0278,-238.45435 938.75,-238.6875 C 939.46941,-238.91977 940.45025,-239.16096 941.09375,-239.65625 C 943.03005,-239.32279 944.8638,-239.25201 946.3125,-239.40625 C 949.45851,-239.7412 951.49,-239.92484 953.5,-240.125 C 955.50991,-240.32514 955.98415,-240.95139 957.09375,-241.0625 C 958.24485,-241.17778 959.39025,-240.69744 963.46875,-241.125 C 967.54725,-241.55256 969.05765,-241.68709 969.78125,-241.90625 C 970.52047,-242.13011 971.57685,-242.4195 972.21875,-242.9375 C 974.27575,-242.53883 976.2206,-242.4441 977.75,-242.59375 C 980.89871,-242.90185 982.9258,-243.067 984.9375,-243.25 C 986.55151,-243.39682 987.20055,-243.81055 987.96875,-244.03125 C 988.21005,-244.11211 988.4623,-244.16116 988.75,-244.1875 C 989.90211,-244.29295 991.0429,-243.79475 995.125,-244.1875 C 999.20711,-244.58025 1000.7139,-244.71834 1001.4375,-244.9375 C 1002.1584,-245.15583 1003.1371,-245.3852 1003.7812,-245.875 C 1005.7193,-245.52501 1007.5501,-245.42062 1009,-245.5625 C 1012.1487,-245.8706 1014.1758,-246.03575 1016.1875,-246.21875 C 1018.1991,-246.40174 1018.7017,-247.05677 1019.8125,-247.15625 C 1020.9648,-247.25948 1022.1047,-246.77142 1026.1875,-247.15625 C 1030.2704,-247.54107 1031.7762,-247.65725 1032.5,-247.875 C 1033.2393,-248.09743 1034.2956,-248.38949 1034.9375,-248.90625 C 1036.9949,-248.50448 1038.9404,-248.40292 1040.4688,-248.5625 C 1043.6153,-248.89102 1045.6458,-249.0852 1047.6562,-249.28125 C 1049.2692,-249.43854 1049.9219,-249.91273 1050.6875,-250.15625 C 1050.9282,-250.24429 1051.1507,-250.27762 1051.4375,-250.3125 C 1052.5858,-250.4522 1053.7542,-249.97259 1057.8125,-250.5625 C 1061.8708,-251.15242 1063.3743,-251.33964 1064.0938,-251.59375 C 1064.8104,-251.84691 1065.7684,-252.15182 1066.4062,-252.6875 C 1068.3259,-252.47556 1070.1262,-252.53609 1071.5625,-252.78125 C 1074.6816,-253.31365 1076.6741,-253.70986 1078.6562,-254.09375 C 1080.6383,-254.47762 1081.1305,-255.1334 1082.2188,-255.375 C 1083.3475,-255.62566 1084.489,-255.25871 1088.4688,-256.25 C 1092.4483,-257.24127 1093.8983,-257.6693 1094.5938,-258.03125 C 1095.316,-258.40725 1096.3555,-258.90183 1096.9688,-259.5625 C 1098.9317,-259.57454 1100.7625,-259.85355 1102.1875,-260.40625 C 1105.1387,-261.55085 1107.0607,-262.27567 1108.875,-263.15625 C 1110.3307,-263.86277 1111.1941,-264.85828 1111.4062,-265.15625 C 1111.6185,-265.4542 1111.5051,-265.8848 1111.5312,-265.90625 C 1111.5742,-265.94148 1111.8716,-266.00028 1112.0312,-266.34375 C 1112.8902,-268.19082 1114.3544,-271.97139 1114.4688,-272.65625 C 1114.5825,-273.33839 1114.6368,-274.00902 1114.6875,-274.40625 C 1114.7169,-274.63575 1114.5404,-275.28515 1114.5625,-275.34375 C 1114.5934,-275.42579 1114.8508,-275.59432 1114.9062,-275.84375 C 1115.1725,-277.04206 1114.9953,-278.05111 1114.7812,-279.46875 C 1114.5673,-280.88638 1113.8096,-284.08338 1113.1562,-284.9375 C 1112.4973,-285.79922 1111.9314,-285.94801 1111.4062,-285.9375 z" | ||
2396 | id="path8089" | ||
2397 | sodipodi:nodetypes="ccssscsssscssssscsssscssssscsssscssssscsssscssssscsssscssssscsssscssscccccssscssssscsssssscssssscsssssscssssscsssssscssssscsssssscssssscsssssscssssscsssssssssssc" /> | ||
2398 | <g | ||
2399 | clip-path="url(#clipPath7421)" | ||
2400 | id="g8091"> | ||
2401 | <path | ||
2402 | id="path8093" | ||
2403 | d="M 1107.409,-284.04961 C 1106.9903,-283.83678 1107.2534,-283.95572 1106.7618,-283.7438 C 1106.2757,-283.53426 1105.0384,-283.16941 1102.6822,-282.28485 C 1099.3511,-281.03428 1096.852,-280.13141 1095.6563,-279.21875 C 1094.1202,-279.19749 1091.9358,-278.98544 1090,-278.5 C 1087.0185,-277.75234 1085.1448,-277.32599 1083.25,-276.90625 C 1081.3552,-276.48653 1081.5745,-276.26406 1080.375,-276.03125 C 1079.0784,-275.77959 1078.6536,-276.04024 1074.9375,-275.25 C 1071.4476,-274.50786 1066.0427,-273.31893 1064.7813,-272.5625 C 1063.1974,-272.74329 1060.9138,-272.88428 1058.9375,-272.59375 C 1055.8971,-272.1468 1054.0213,-271.92099 1052.0938,-271.6875 C 1051.4384,-271.60813 1051.0527,-271.48672 1050.75,-271.40625 C 1050.3238,-271.2746 1050.0642,-271.14623 1049.375,-271.0625 C 1048.0634,-270.90314 1047.6128,-271.21933 1043.8438,-270.78125 C 1040.2899,-270.36817 1034.8384,-269.50843 1033.5938,-268.84375 C 1031.9948,-269.14105 1029.736,-269.37794 1027.75,-269.1875 C 1024.6912,-268.89419 1022.778,-268.70351 1020.8438,-268.53125 C 1018.9095,-268.35899 1019.1551,-268.10888 1017.9375,-268 C 1016.6213,-267.88231 1016.1777,-268.16363 1012.4063,-267.75 C 1008.8644,-267.36156 1003.3984,-266.54073 1002.125,-265.875 C 1000.5261,-266.1697 998.23783,-266.38201 996.25,-266.1875 C 993.19176,-265.88826 991.30887,-265.70726 989.375,-265.53125 C 988.71751,-265.47141 988.33496,-265.35269 988.03125,-265.28125 C 987.6036,-265.1623 987.34774,-265.06318 986.65625,-265 C 985.34029,-264.87975 984.89649,-265.19488 981.125,-264.78125 C 977.56886,-264.39124 972.11946,-263.54209 970.875,-262.875 C 969.27637,-263.16919 967.01516,-263.39872 965.03125,-263.1875 C 961.97565,-262.86218 960.05721,-262.66126 958.125,-262.46875 C 956.19279,-262.27625 956.43513,-262.02787 955.21875,-261.90625 C 953.90387,-261.77479 953.45577,-262.07079 949.6875,-261.625 C 946.14863,-261.20635 940.70982,-260.33283 939.4375,-259.65625 C 937.83995,-259.9373 935.57754,-260.07668 933.59375,-259.84375 C 930.54177,-259.48539 928.64867,-259.27589 926.71875,-259.0625 C 926.06255,-258.98995 925.67809,-258.88981 925.375,-258.8125 C 924.94823,-258.68528 924.69009,-258.5453 924,-258.46875 C 922.68667,-258.32307 922.23254,-258.64277 918.46875,-258.15625 C 914.91986,-257.6975 909.49012,-256.73723 908.25,-256.03125 C 906.65695,-256.27549 904.41619,-256.4126 902.4375,-256.15625 C 899.38991,-255.76144 897.48679,-255.5078 895.5625,-255.25 C 893.63822,-254.99221 893.83639,-254.75647 892.625,-254.59375 C 891.31554,-254.41785 890.87781,-254.69813 887.125,-254.125 C 883.60071,-253.58676 878.20185,-252.42583 876.9375,-251.6875 C 875.35,-251.89104 873.092,-251.94223 871.125,-251.59375 C 868.09883,-251.05763 866.22611,-250.73206 864.3125,-250.40625 C 863.66189,-250.29548 863.29879,-250.13531 863,-250.03125 C 862.57933,-249.86637 862.33655,-249.69995 861.65625,-249.5625 C 860.3616,-249.30091 859.92913,-249.5687 856.21875,-248.75 C 852.72022,-247.97805 847.3728,-246.36707 846.15625,-245.53125 C 844.59347,-245.6087 842.39867,-245.44597 840.46875,-244.9375 C 837.49631,-244.15437 835.65114,-243.70541 833.78125,-243.1875 C 831.91137,-242.6696 832.11465,-242.42022 830.9375,-242.09375 C 829.66504,-241.74085 829.24047,-241.98666 825.59375,-240.90625 C 822.16905,-239.89162 816.94431,-237.97308 815.71875,-237.0625 C 814.17992,-237.04976 811.99892,-236.79028 810.09375,-236.1875 C 807.16269,-235.26016 805.34344,-234.72908 803.5,-234.125 C 802.87324,-233.91962 802.50827,-233.73242 802.21875,-233.59375 C 801.81112,-233.38014 801.56541,-233.185 800.90625,-232.96875 C 799.65179,-232.55721 799.22014,-232.77971 795.625,-231.53125 C 792.23515,-230.35408 787.03002,-228.31755 785.84375,-227.375 C 784.31986,-227.31533 782.1937,-226.98013 780.3125,-226.3125 C 777.41511,-225.28421 775.61342,-224.63702 773.78125,-224 C 771.94908,-223.36299 772.1534,-223.15146 771,-222.75 C 769.75322,-222.31604 769.33639,-222.55028 765.78125,-221.1875 C 762.44258,-219.9077 757.2941,-217.70495 756.09375,-216.71875 C 754.58657,-216.61106 752.45806,-216.21995 750.59375,-215.5 C 747.72557,-214.3924 745.96995,-213.71844 744.15625,-213.03125 C 743.53959,-212.79762 743.15984,-212.58922 742.875,-212.4375 C 742.875,-212.4375 742.875,-211.34375 742.875,-211.34375 C 742.98678,-211.56611 743.26099,-212.16118 743.78125,-212.4375 C 744.47922,-212.80822 748.59488,-214.43087 750.59375,-215.15625 C 752.25061,-215.7575 754.74764,-216.48493 756.5625,-216.46875 C 756.86412,-216.46606 757.15012,-216.41785 757.40625,-216.375 C 759.24874,-216.06675 764.875,-214.8125 764.875,-214.8125 C 764.87499,-214.8125 758.64151,-216.45925 757.84375,-216.65625 C 757.65296,-216.70336 757.30803,-216.72497 756.875,-216.71875 C 758.02046,-217.58846 761.636,-219.11226 764.21875,-220.15625 C 767.05697,-221.30352 767.33556,-221.40807 769.28125,-221.8125 C 771.28955,-222.22994 772.4375,-222.3125 772.4375,-222.3125 C 772.4375,-222.31249 772.35514,-222.91364 773.40625,-223.4375 C 774.11135,-223.78891 778.29327,-225.3299 780.3125,-226 C 782.2644,-226.64773 785.3699,-227.3585 787.1875,-227 C 789.05073,-226.6325 794.71875,-225.1875 794.71875,-225.1875 C 794.71876,-225.1875 788.43175,-227.05861 787.625,-227.28125 C 787.43208,-227.3345 787.09416,-227.36729 786.65625,-227.375 C 787.81459,-228.20788 791.45069,-229.57032 794.0625,-230.53125 C 796.93266,-231.58726 797.22984,-231.69305 799.1875,-232.0625 C 801.04099,-232.41229 802.04634,-232.48798 802.21875,-232.5 C 802.33235,-232.71724 802.5962,-233.31002 803.125,-233.5625 C 803.83444,-233.90124 808.05107,-235.27525 810.09375,-235.875 C 811.78692,-236.37211 814.33452,-236.91177 816.1875,-236.78125 C 816.49545,-236.75957 816.80099,-236.68399 817.0625,-236.625 C 818.94368,-236.20068 824.65625,-234.59375 824.65625,-234.59375 C 824.65626,-234.59375 818.31451,-236.659 817.5,-236.90625 C 817.30521,-236.96539 816.94212,-237.01019 816.5,-237.03125 C 817.66949,-237.8288 821.36302,-239.08747 824,-239.96875 C 826.89781,-240.93722 827.23301,-240.97207 829.21875,-241.25 C 831.2684,-241.53689 832.40625,-241.5625 832.40625,-241.5625 C 832.40623,-241.5625 832.3335,-242.16947 833.40625,-242.625 C 834.12585,-242.93057 838.39723,-244.12575 840.46875,-244.625 C 842.47119,-245.10758 845.66724,-245.55329 847.53125,-245.03125 C 849.44203,-244.4961 855.25,-242.53125 855.25,-242.53125 C 855.25,-242.53125 848.82734,-244.95476 848,-245.25 C 847.80216,-245.32061 847.41784,-245.39039 846.96875,-245.4375 C 848.15665,-246.16615 851.88402,-247.21158 854.5625,-247.9375 C 857.50592,-248.73525 857.85458,-248.70833 859.875,-248.84375 C 861.78789,-248.97198 862.82205,-248.91484 863,-248.90625 C 863.11728,-249.10991 863.39176,-249.68573 863.9375,-249.875 C 864.66969,-250.12894 869.01602,-250.92289 871.125,-251.25 C 872.87313,-251.52111 875.52588,-251.7347 877.4375,-251.34375 C 877.75516,-251.27879 878.04272,-251.15824 878.3125,-251.0625 C 880.25324,-250.37377 886.15625,-247.96875 886.15625,-247.96875 C 886.15626,-247.96875 879.62154,-250.91952 878.78125,-251.28125 C 878.58028,-251.36776 878.20612,-251.44804 877.75,-251.53125 C 878.9565,-252.16443 882.77956,-252.92685 885.5,-253.4375 C 888.48953,-253.99869 888.80023,-253.96704 890.84375,-253.96875 C 892.95301,-253.97052 894.15625,-253.84375 894.15625,-253.84375 C 894.15625,-253.84374 894.08354,-254.47494 895.1875,-254.78125 C 895.92802,-254.98672 900.31362,-255.61512 902.4375,-255.84375 C 904.49052,-256.06474 907.75613,-256.09597 909.65625,-255.375 C 911.60404,-254.63593 917.5,-252 917.5,-252 C 917.50002,-252 910.93712,-255.17897 910.09375,-255.5625 C 909.89207,-255.65423 909.55154,-255.74871 909.09375,-255.84375 C 910.30467,-256.44563 914.07817,-257.09259 916.8125,-257.5 C 919.8173,-257.94772 920.13801,-257.9517 922.1875,-257.90625 C 924.12795,-257.86323 925.19449,-257.71202 925.375,-257.6875 C 925.49392,-257.88066 925.7589,-258.45333 926.3125,-258.59375 C 927.05521,-258.78213 931.46679,-259.32803 933.59375,-259.53125 C 935.35678,-259.69967 938.01384,-259.76554 939.9375,-259.28125 C 940.25718,-259.20077 940.54101,-259.07766 940.8125,-258.96875 C 942.76543,-258.18526 948.71875,-255.5 948.71875,-255.5 C 948.71873,-255.5 942.12684,-258.75348 941.28125,-259.15625 C 941.07903,-259.25257 940.70899,-259.36328 940.25,-259.46875 C 941.46414,-260.04302 945.29366,-260.59094 948.03125,-260.96875 C 951.03963,-261.38395 951.35432,-261.41138 953.40625,-261.34375 C 955.52423,-261.27394 956.71875,-261.09375 956.71875,-261.09375 C 956.71873,-261.09375 956.6415,-261.73116 957.75,-262 C 958.49362,-262.18035 962.90176,-262.66355 965.03125,-262.84375 C 967.08972,-263.01792 970.37449,-262.96807 972.28125,-262.1875 C 974.23584,-261.38734 980.15625,-258.65625 980.15625,-258.65625 C 980.15623,-258.65625 973.59632,-261.96501 972.75,-262.375 C 972.54763,-262.47305 972.17814,-262.5781 971.71875,-262.6875 C 972.93392,-263.2514 976.72883,-263.8018 979.46875,-264.15625 C 982.47966,-264.54577 982.79006,-264.5539 984.84375,-264.46875 C 986.78814,-264.38815 987.85038,-264.21551 988.03125,-264.1875 C 988.15041,-264.37836 988.41402,-264.93281 988.96875,-265.0625 C 989.71301,-265.2365 994.11868,-265.71297 996.25,-265.875 C 998.01662,-266.00927 1000.6997,-266.00071 1002.625,-265.5 C 1002.945,-265.41679 1003.2283,-265.29873 1003.5,-265.1875 C 1005.4546,-264.38734 1011.4063,-261.625 1011.4063,-261.625 C 1011.4062,-261.625 1004.8151,-264.96501 1003.9688,-265.375 C 1003.7664,-265.47305 1003.3969,-265.57811 1002.9375,-265.6875 C 1004.1526,-266.2514 1007.9788,-266.77056 1010.7188,-267.125 C 1013.7297,-267.51453 1014.0713,-267.5539 1016.125,-267.46875 C 1018.2447,-267.38087 1019.4375,-267.15625 1019.4375,-267.15625 C 1019.4375,-267.15625 1019.3591,-267.80527 1020.4688,-268.0625 C 1021.2131,-268.23506 1025.6183,-268.68586 1027.75,-268.84375 C 1029.8106,-268.99635 1033.0929,-268.94052 1035,-268.15625 C 1036.955,-267.3523 1042.875,-264.65625 1042.875,-264.65625 C 1042.875,-264.65625 1036.3152,-267.93212 1035.4688,-268.34375 C 1035.2663,-268.44219 1034.897,-268.54597 1034.4375,-268.65625 C 1035.6529,-269.21779 1039.4494,-269.78403 1042.1875,-270.15625 C 1045.1965,-270.5653 1045.5102,-270.57183 1047.5625,-270.5 C 1049.5056,-270.43201 1050.5697,-270.33515 1050.75,-270.3125 C 1050.8688,-270.5069 1051.1346,-271.04131 1051.6875,-271.1875 C 1052.4293,-271.38362 1056.8186,-272.01628 1058.9375,-272.28125 C 1060.6939,-272.50086 1063.3428,-272.61356 1065.25,-272.25 C 1065.5669,-272.18959 1065.8558,-272.06062 1066.125,-271.96875 C 1068.0612,-271.30783 1073.9688,-269.03125 1073.9688,-269.03125 C 1073.9687,-269.03125 1067.4321,-271.8378 1066.5938,-272.1875 C 1066.3933,-272.27113 1066.0176,-272.36083 1065.5625,-272.4375 C 1066.7662,-273.08796 1070.5816,-273.80945 1073.2813,-274.4375 C 1076.248,-275.1277 1076.5702,-275.19257 1078.5938,-275.3125 C 1080.6824,-275.4363 1081.875,-275.34375 1081.875,-275.34375 C 1081.875,-275.34374 1081.788,-275.9758 1082.875,-276.375 C 1083.6042,-276.6428 1087.9222,-277.71297 1090,-278.1875 C 1092.0085,-278.64619 1095.1679,-279.2168 1097,-278.8125 C 1098.8781,-278.39804 1110.5782,-275.79687 1110.5782,-275.79687 C 1110.5782,-275.79687 1098.2507,-278.81953 1097.4375,-279.0625 C 1097.243,-279.12062 1096.8789,-279.16876 1096.4375,-279.1875 C 1097.6051,-279.99119 1099.9517,-280.8748 1102.5469,-281.89062 C 1104.2283,-282.5488 1103.4706,-282.26721 1105.3228,-282.89422 C 1107.0764,-283.48788 1107.8082,-283.90493 1107.9532,-284.00721 C 1108.2993,-284.21372 1107.5972,-284.12909 1107.409,-284.04961 z" | ||
2404 | style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter7001);enable-background:new" | ||
2405 | sodipodi:nodetypes="czscsssscssssscsssscssssscsssscssssscsssscssssscsssscssssscsssscssccsssscscsscssscscsscsssscscsscssscscsscsssscscsscssscscsscsssscscsscssscscsscsssscscsscssscscsscsssscscsscssscscsscc" /> | ||
2406 | <path | ||
2407 | id="path8095" | ||
2408 | d="M 1082.625,-275.125 C 1084.498,-274.73152 1087.1211,-273.97945 1088.6563,-273.15625 C 1090.1915,-272.33306 1091.4785,-272.10025 1094.0313,-270.65625 C 1096.5579,-269.22699 1098.8271,-268.64929 1101,-268.125 C 1103.3476,-267.55858 1106.4354,-267.40977 1109.8438,-266.9375 C 1108.7549,-267.77725 1103.2364,-268.10995 1101.4375,-268.5 C 1099.6386,-268.89006 1097.5434,-269.51616 1094.8438,-270.8125 C 1092.1441,-272.10884 1091.3494,-272.61146 1089.0313,-273.5 C 1086.7131,-274.38854 1085.0269,-274.88314 1082.625,-275.125 z" | ||
2409 | style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter6949);enable-background:new" /> | ||
2410 | <path | ||
2411 | id="path8097" | ||
2412 | d="M 1051.4688,-270 C 1053.3741,-269.42241 1055.9969,-268.38428 1057.5625,-267.40625 C 1059.1281,-266.42823 1060.4427,-266.04644 1063.0625,-264.28125 C 1065.6555,-262.53409 1068.0484,-261.57198 1070.3125,-260.6875 C 1072.7586,-259.73193 1075.9951,-259.03037 1079.7188,-257.625 C 1078.5292,-258.76284 1072.6557,-260.31175 1070.7813,-261 C 1068.9068,-261.68825 1066.6995,-262.5662 1063.9063,-264.28125 C 1061.113,-265.99629 1060.3327,-266.56515 1057.9688,-267.6875 C 1055.6047,-268.80984 1053.9121,-269.52205 1051.4688,-270 z" | ||
2413 | style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter6961);enable-background:new" /> | ||
2414 | <path | ||
2415 | id="path8099" | ||
2416 | d="M 1020.2188,-266.84375 C 1022.1307,-266.20564 1024.8,-265.08839 1026.375,-264.03125 C 1027.9501,-262.9741 1029.2706,-262.52258 1031.9063,-260.625 C 1034.5149,-258.74679 1036.9347,-257.59497 1039.2188,-256.5625 C 1041.6865,-255.44705 1044.9833,-254.3892 1048.75,-252.71875 C 1047.5467,-253.94128 1041.5472,-256.03298 1039.6563,-256.84375 C 1037.7653,-257.65452 1035.5914,-258.73754 1032.7813,-260.59375 C 1029.9711,-262.44995 1029.1595,-263.07068 1026.7813,-264.3125 C 1024.403,-265.5543 1022.6706,-266.28819 1020.2188,-266.84375 z" | ||
2417 | style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter6957);enable-background:new" /> | ||
2418 | <path | ||
2419 | id="path8101" | ||
2420 | d="M 1110.1719,-266.89063 C 1110.3227,-266.84207 1110.8599,-266.25963 1110.2813,-265.40625 C 1109.4712,-264.21166 1104.5764,-262.08196 1101.7188,-261.28125 C 1098.8739,-260.48413 1095.4287,-260.30351 1091.1563,-261.65625 C 1086.8547,-263.0182 1085.6866,-264.12497 1080.5,-265.96875 C 1085.164,-263.85358 1086.6953,-262.01642 1090.625,-260.625 C 1092.2457,-260.05113 1093.9921,-259.6854 1095.6875,-259.59375 C 1095.2424,-259.26812 1094.1572,-258.61045 1092.125,-258 C 1089.3295,-257.16031 1085.4759,-256.46622 1083.875,-256.375 C 1082.3604,-256.28868 1080.733,-256.88749 1080.4375,-257 C 1080.6042,-256.89692 1080.8107,-256.62266 1080.1875,-255.96875 C 1079.2882,-255.02512 1074.0401,-254.04575 1071.0625,-253.71875 C 1068.0982,-253.3932 1064.5409,-253.73471 1060.1563,-255.625 C 1056.1783,-257.33997 1054.8173,-258.54036 1050.75,-260.375 C 1050.75,-260.375 1050.75,-260.21875 1050.75,-260.21875 C 1054.3931,-258.12346 1056.034,-256.33548 1059.625,-254.65625 C 1061.3552,-253.84716 1063.2167,-253.24749 1065.0313,-252.9375 C 1064.4964,-252.65074 1063.4735,-252.22599 1061.5938,-251.90625 C 1058.7248,-251.41829 1054.7848,-251.09011 1053.1563,-251.15625 C 1052.3056,-251.19079 1051.4277,-251.34062 1050.75,-251.5625 C 1050.0652,-251.77738 1049.5603,-252.00717 1049.4375,-252.0625 C 1049.6069,-251.95529 1049.8686,-251.65962 1049.2188,-251.03125 C 1048.3091,-250.15163 1042.9727,-249.69487 1039.9688,-249.5625 C 1036.9783,-249.43071 1033.3799,-250.01313 1028.9688,-252.125 C 1024.5276,-254.25126 1023.3273,-255.5266 1018.0625,-257.90625 C 1022.7968,-255.30921 1024.349,-253.27715 1028.4063,-251.1875 C 1030.0796,-250.32565 1031.8915,-249.69325 1033.6563,-249.25 C 1033.193,-249.01668 1032.0669,-248.56186 1029.9688,-248.3125 C 1027.0825,-247.96952 1023.1342,-247.81962 1021.5,-247.9375 C 1019.9538,-248.049 1018.2688,-248.79446 1017.9688,-248.9375 C 1018.1379,-248.81721 1018.3826,-248.52702 1017.75,-247.9375 C 1016.8372,-247.08677 1011.5059,-246.67538 1008.5,-246.5625 C 1005.5075,-246.45013 1001.9103,-247.05293 997.5,-249.15625 C 993.49875,-251.06448 992.11197,-252.29408 988.03125,-254.25 C 988.03122,-254.25 988.03125,-254.09375 988.03125,-254.09375 C 991.68631,-251.88983 993.32546,-250.0412 996.9375,-248.1875 C 998.67779,-247.29435 1000.5745,-246.65923 1002.4063,-246.21875 C 1001.8663,-245.97045 1000.8282,-245.60342 998.9375,-245.375 C 996.05182,-245.02642 992.07145,-244.85405 990.4375,-244.96875 C 989.58405,-245.02865 988.71119,-245.22666 988.03125,-245.46875 C 987.34415,-245.70405 986.8419,-245.94101 986.71875,-246 C 986.88873,-245.88773 987.18323,-245.57775 986.53125,-244.96875 C 985.6186,-244.11625 980.25592,-243.67538 977.25,-243.5625 C 974.25754,-243.45013 970.65654,-244.09055 966.25,-246.15625 C 961.81347,-248.23603 960.60312,-249.48796 955.34375,-251.8125 C 960.07313,-249.26501 961.63449,-247.2347 965.6875,-245.1875 C 967.35905,-244.34317 969.17304,-243.72107 970.9375,-243.28125 C 970.47427,-243.04703 969.3478,-242.59718 967.25,-242.34375 C 964.36431,-241.99517 960.4138,-241.77423 958.78125,-241.875 C 957.23669,-241.97032 955.58094,-242.70385 955.28125,-242.84375 C 955.45024,-242.72522 955.66317,-242.4399 955.03125,-241.84375 C 954.11939,-240.98347 948.7846,-240.5135 945.78125,-240.375 C 942.7913,-240.2371 939.2138,-240.82568 934.8125,-242.84375 C 930.81942,-244.67464 929.44739,-245.87295 925.375,-247.75 C 925.37498,-247.75 925.375,-247.59375 925.375,-247.59375 C 929.02261,-245.46048 930.64533,-243.65888 934.25,-241.875 C 935.98675,-241.01549 937.85727,-240.42486 939.6875,-240 C 939.14803,-239.7471 938.13687,-239.35871 936.25,-239.09375 C 933.37022,-238.68939 929.41187,-238.44813 927.78125,-238.53125 C 926.92953,-238.57466 926.05355,-238.7398 925.375,-238.96875 C 924.68931,-239.19076 924.1854,-239.41214 924.0625,-239.46875 C 924.23209,-239.35976 924.4944,-239.0591 923.84375,-238.4375 C 922.93296,-237.56736 917.59354,-237.04598 914.59375,-236.875 C 911.60742,-236.70479 908.01994,-237.19077 903.625,-239.15625 C 899.20011,-241.13513 898.01904,-242.38444 892.78125,-244.53125 C 897.49122,-242.14358 899.05142,-240.14252 903.09375,-238.1875 C 904.7609,-237.38119 906.55418,-236.79092 908.3125,-236.40625 C 907.85087,-236.15755 906.7155,-235.694 904.625,-235.375 C 901.7494,-234.93624 897.8446,-234.6419 896.21875,-234.6875 C 894.68052,-234.73062 892.98595,-235.43272 892.6875,-235.5625 C 892.85583,-235.44968 893.09807,-235.14875 892.46875,-234.53125 C 891.56063,-233.64015 886.2658,-233.003 883.28125,-232.71875 C 880.31007,-232.43577 876.70783,-232.89455 872.34375,-234.65625 C 868.38441,-236.25456 867.0146,-237.45112 863,-238.96875 C 863.00003,-238.96875 863,-238.8125 863,-238.8125 C 866.5959,-237.00115 868.23831,-235.23017 871.8125,-233.65625 C 873.53457,-232.8979 875.39998,-232.3673 877.21875,-232.03125 C 876.68266,-231.75217 875.65217,-231.34362 873.78125,-230.96875 C 870.92586,-230.39665 866.99183,-229.94936 865.375,-229.9375 C 864.53049,-229.93129 863.66892,-230.01844 863,-230.1875 C 862.32409,-230.34901 861.83991,-230.51673 861.71875,-230.5625 C 861.88597,-230.46848 862.14142,-230.17902 861.5,-229.5 C 860.60213,-228.54948 855.31352,-227.58292 852.375,-227.0625 C 849.44966,-226.54441 845.94285,-226.68826 841.65625,-228.09375 C 837.34045,-229.50882 836.18348,-230.62369 831.09375,-232.0625 C 835.6706,-230.31149 837.1823,-228.50244 841.125,-227.0625 C 842.75108,-226.46861 844.49385,-226.10685 846.21875,-225.90625 C 845.7659,-225.60923 844.66397,-225.02286 842.625,-224.4375 C 839.82028,-223.63233 835.98614,-222.86167 834.40625,-222.6875 C 832.9115,-222.5227 831.29002,-223.00431 831,-223.09375 C 831.16356,-223.00368 831.39278,-222.73382 830.78125,-222.03125 C 829.89878,-221.0174 824.73673,-219.6596 821.84375,-218.96875 C 818.96373,-218.28097 815.50815,-218.20873 811.28125,-219.40625 C 807.4464,-220.4927 806.10867,-221.47862 802.21875,-222.53125 C 802.21874,-222.53125 802.21875,-222.375 802.21875,-222.375 C 805.70293,-220.98015 807.28816,-219.4556 810.75,-218.34375 C 812.41793,-217.80803 814.20578,-217.55701 815.96875,-217.46875 C 815.44911,-217.11663 814.46836,-216.55423 812.65625,-215.9375 C 809.89059,-214.99625 806.06601,-214.00213 804.5,-213.78125 C 803.68206,-213.66586 802.8669,-213.65842 802.21875,-213.75 C 801.56379,-213.83321 801.08615,-213.96827 800.96875,-214 C 801.13079,-213.92536 801.40274,-213.65956 800.78125,-212.90625 C 799.91125,-211.85172 794.77162,-210.247 791.90625,-209.46875 C 789.05372,-208.69399 785.64713,-208.51055 781.46875,-209.5625 C 777.26192,-210.62163 776.11206,-211.60416 771.125,-212.71875 C 775.60954,-211.25929 777.09435,-209.58352 780.9375,-208.46875 C 782.52254,-208.00898 784.22429,-207.8305 785.90625,-207.78125 C 785.46468,-207.44449 784.39374,-206.75352 782.40625,-206 C 779.67232,-204.96351 775.95427,-203.83731 774.40625,-203.5625 C 772.94163,-203.30248 771.34667,-203.67904 771.0625,-203.75 C 771.22275,-203.67035 771.44294,-203.42902 770.84375,-202.6875 C 769.97909,-201.61744 764.92723,-199.86935 762.09375,-199 C 759.27295,-198.13453 755.88625,-197.84369 751.75,-198.78125 C 747.99741,-199.63186 746.70215,-200.49772 742.875,-201.375 C 742.875,-201.375 742.875,-201.21875 742.875,-201.21875 C 746.30296,-199.98096 747.86241,-198.58645 751.25,-197.6875 C 752.88216,-197.25436 754.61704,-197.10449 756.34375,-197.125 C 755.83482,-196.74083 754.867,-196.10318 753.09375,-195.375 C 750.38741,-194.26366 746.65742,-193.06719 745.125,-192.75 C 744.3246,-192.58431 743.51269,-192.53138 742.875,-192.59375 C 742.875,-192.59375 742.875,-192.07823 742.875,-191.67146 C 742.875,-191.40639 742.875,-191.1875 742.875,-191.1875 C 743.10145,-191.33218 743.32391,-191.46011 743.59375,-191.5625 C 744.67427,-191.97248 745.76536,-191.77827 749.59375,-193.25 C 753.42218,-194.72174 754.81787,-195.25498 755.5,-195.65625 C 756.1796,-196.05603 757.11165,-196.53562 757.71875,-197.1875 C 759.5456,-197.32525 761.2895,-197.68073 762.65625,-198.1875 C 765.62437,-199.28802 767.53162,-199.99369 769.4375,-200.65625 C 771.34336,-201.31879 771.79159,-202.07112 772.84375,-202.4375 C 773.9353,-202.81761 775.03886,-202.60288 778.90625,-203.96875 C 782.7737,-205.33461 784.18941,-205.79583 784.875,-206.1875 C 785.57609,-206.58802 786.57581,-207.12048 787.1875,-207.78125 C 789.1583,-207.83591 791.00435,-208.16588 792.46875,-208.65625 C 795.47023,-209.66133 797.3949,-210.27796 799.3125,-210.90625 C 800.8511,-211.41035 801.48652,-211.95302 802.21875,-212.34375 C 802.44891,-212.47806 802.69449,-212.59748 802.96875,-212.6875 C 804.06698,-213.04798 805.1502,-212.76887 809.0625,-214 C 812.97483,-215.23113 814.42855,-215.67295 815.125,-216.03125 C 815.81888,-216.38822 816.75515,-216.82386 817.375,-217.4375 C 819.24021,-217.46016 821.01081,-217.70433 822.40625,-218.125 C 825.43668,-219.03854 827.39863,-219.5551 829.34375,-220.09375 C 831.28886,-220.63239 831.76993,-221.35827 832.84375,-221.65625 C 833.95776,-221.9654 835.06369,-221.64886 839.03125,-222.6875 C 842.99886,-223.72613 844.44883,-224.12023 845.15625,-224.4375 C 845.89112,-224.76709 846.97008,-225.19122 847.59375,-225.8125 C 849.59149,-225.6965 851.45118,-225.83259 852.9375,-226.1875 C 856.01561,-226.9225 858.02094,-227.28844 860,-227.6875 C 861.58792,-228.00768 862.24429,-228.47805 863,-228.78125 C 863.23757,-228.88805 863.46695,-228.97401 863.75,-229.03125 C 864.88347,-229.26044 866.05448,-228.82232 870.09375,-229.53125 C 874.13308,-230.24018 875.594,-230.45834 876.3125,-230.71875 C 877.02836,-230.97819 878.01678,-231.28599 878.65625,-231.8125 C 880.58052,-231.57301 882.40413,-231.58797 883.84375,-231.8125 C 886.97008,-232.30012 888.9983,-232.51317 891,-232.78125 C 893.00171,-233.04932 893.48869,-233.72639 894.59375,-233.875 C 895.74014,-234.02918 896.86967,-233.57343 900.9375,-234.09375 C 905.00534,-234.61407 906.49763,-234.78948 907.21875,-235.03125 C 907.95585,-235.27839 909.01684,-235.61748 909.65625,-236.15625 C 911.70632,-235.82072 913.63003,-235.75829 915.15625,-235.9375 C 918.29856,-236.30646 920.33619,-236.49686 922.34375,-236.71875 C 923.95451,-236.89677 924.60842,-237.32695 925.375,-237.5625 C 925.61594,-237.64802 925.86912,-237.7181 926.15625,-237.75 C 927.30603,-237.87772 928.45754,-237.40335 932.53125,-237.875 C 936.60499,-238.34665 938.09034,-238.4856 938.8125,-238.71875 C 939.53196,-238.95102 940.51274,-239.19221 941.15625,-239.6875 C 943.09262,-239.35404 944.92631,-239.28326 946.375,-239.4375 C 949.52102,-239.77245 951.55256,-239.95609 953.5625,-240.15625 C 955.57246,-240.35639 956.04664,-240.98264 957.15625,-241.09375 C 958.30739,-241.20903 959.45268,-240.72869 963.53125,-241.15625 C 967.60986,-241.58381 969.12011,-241.71834 969.84375,-241.9375 C 970.5829,-242.16136 971.63947,-242.45075 972.28125,-242.96875 C 974.33835,-242.57008 976.28312,-242.47535 977.8125,-242.625 C 980.96123,-242.9331 982.98834,-243.09825 985,-243.28125 C 986.61407,-243.42807 987.2631,-243.8418 988.03125,-244.0625 C 988.27267,-244.14336 988.52478,-244.19241 988.8125,-244.21875 C 989.96461,-244.3242 991.10546,-243.826 995.1875,-244.21875 C 999.26958,-244.6115 1000.7764,-244.74959 1001.5,-244.96875 C 1002.2209,-245.18708 1003.1997,-245.41645 1003.8438,-245.90625 C 1005.7818,-245.55626 1007.6126,-245.45187 1009.0625,-245.59375 C 1012.2112,-245.90185 1014.2383,-246.067 1016.25,-246.25 C 1018.2616,-246.43299 1018.7642,-247.08802 1019.875,-247.1875 C 1021.0273,-247.29073 1022.1672,-246.80267 1026.25,-247.1875 C 1030.3329,-247.57232 1031.8387,-247.6885 1032.5625,-247.90625 C 1033.3018,-248.12868 1034.3581,-248.42074 1035,-248.9375 C 1037.0574,-248.53573 1039.0029,-248.43417 1040.5313,-248.59375 C 1043.6779,-248.92227 1045.7084,-249.11645 1047.7188,-249.3125 C 1049.3318,-249.46979 1049.9844,-249.94398 1050.75,-250.1875 C 1050.9907,-250.27554 1051.2132,-250.30887 1051.5,-250.34375 C 1052.6483,-250.48345 1053.8167,-250.00384 1057.875,-250.59375 C 1061.9333,-251.18367 1063.4368,-251.37089 1064.1563,-251.625 C 1064.873,-251.87816 1065.8308,-252.18307 1066.4688,-252.71875 C 1068.3885,-252.50681 1070.1887,-252.56734 1071.625,-252.8125 C 1074.7441,-253.3449 1076.7366,-253.74111 1078.7188,-254.125 C 1080.7009,-254.50887 1081.1931,-255.16465 1082.2813,-255.40625 C 1083.4101,-255.65691 1084.5516,-255.28996 1088.5313,-256.28125 C 1092.5109,-257.27253 1093.9609,-257.70055 1094.6563,-258.0625 C 1095.3786,-258.43851 1096.4182,-258.93308 1097.0313,-259.59375 C 1098.9943,-259.6058 1100.825,-259.8848 1102.25,-260.4375 C 1105.2012,-261.58211 1107.1232,-262.30692 1108.9375,-263.1875 C 1110.3932,-263.89403 1111.2723,-264.87391 1111.4844,-265.17188 C 1111.6966,-265.46984 1111.5962,-265.91718 1111.6223,-265.93863 C 1111.6652,-265.97387 1111.9416,-266.0236 1112.1013,-266.36707 C 1112.9602,-268.21415 1114.4223,-272.01166 1114.5365,-272.69652 C 1114.6502,-273.37868 1114.7003,-274.04426 1114.751,-274.44149 C 1114.7804,-274.67101 1114.6043,-275.30693 1114.6264,-275.36553 C 1114.6573,-275.44759 1114.9309,-275.63081 1114.9863,-275.88024 C 1115.2526,-277.07857 1115.0752,-278.07153 1114.8612,-279.48917 C 1114.6472,-280.90681 1113.8775,-284.11131 1113.2243,-284.96543 C 1112.5654,-285.82715 1112.0014,-285.9766 1111.4764,-285.96609 C 1111.2678,-285.69633 1111.6132,-285.703 1111.639,-285.65348 C 1112.3196,-285.60269 1112.573,-285.28484 1113.0582,-284.75686 C 1113.5434,-284.22888 1114.501,-280.8173 1114.6376,-279.36691 C 1114.7742,-277.91652 1114.8276,-276.50671 1114.5496,-275.89827 C 1114.2715,-275.28982 1113.6054,-275.46963 1113.313,-275.40375 C 1113.844,-275.21786 1114.2038,-275.19053 1114.2654,-274.34607 C 1114.3247,-273.53269 1114.1322,-272.70638 1113.7456,-271.54045 C 1113.3544,-270.36044 1111.9004,-267.19047 1111.4599,-266.94168 C 1111.0076,-266.68617 1110.5075,-266.75969 1110.1719,-266.89063 z" | ||
2421 | style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter6997);enable-background:new" | ||
2422 | sodipodi:nodetypes="cssscscsscsssccscssssssscscsscsssscscssssssscscsscsssscscssssssscscsscsssscscssssssscscsscsssscscssssssscscsscsssccscsscscssscssssscsssssscssssscsssssscssssscsssssscssssscsssssscssssscsssssscssssscsszsszssszzcczzzczzzc" /> | ||
2423 | <path | ||
2424 | id="path8103" | ||
2425 | d="M 988.75,-263.84375 C 990.66161,-263.20935 993.30027,-262.08534 994.875,-261.03125 C 996.44977,-259.97716 997.7711,-259.54873 1000.4063,-257.65625 C 1003.0145,-255.78311 1005.4332,-254.64103 1007.7188,-253.59375 C 1010.1881,-252.46228 1013.4709,-251.43901 1017.25,-249.65625 C 1016.0428,-250.91465 1010.111,-253.0207 1008.2188,-253.84375 C 1006.3266,-254.66679 1004.0908,-255.77424 1001.2813,-257.625 C 998.47169,-259.47575 997.65906,-260.10654 995.28125,-261.34375 C 992.90343,-262.58094 991.20137,-263.29295 988.75,-263.84375 z" | ||
2426 | style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter6953);enable-background:new" /> | ||
2427 | <path | ||
2428 | id="path8105" | ||
2429 | d="M 957.5,-260.78125 C 959.41,-260.16315 962.08288,-259.07191 963.65625,-258.03125 C 965.22964,-256.99059 966.55233,-256.54873 969.1875,-254.65625 C 971.79573,-252.7831 974.21442,-251.64104 976.5,-250.59375 C 978.96931,-249.46228 982.25213,-248.439 986.03125,-246.65625 C 984.82397,-247.91465 978.82971,-250.05195 976.9375,-250.875 C 975.04533,-251.69804 972.84084,-252.8055 970.03125,-254.65625 C 967.22167,-256.507 966.4383,-257.09557 964.0625,-258.3125 C 961.68672,-259.52941 959.94929,-260.25135 957.5,-260.78125 z" | ||
2430 | style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter6993);enable-background:new" /> | ||
2431 | <path | ||
2432 | id="path8107" | ||
2433 | d="M 926.09375,-257.375 C 928.00147,-256.77755 930.64723,-255.71116 932.21875,-254.6875 C 933.79025,-253.66385 935.08897,-253.24779 937.71875,-251.40625 C 940.32166,-249.58352 942.74762,-248.43405 945.03125,-247.40625 C 947.49845,-246.29584 950.7866,-245.31302 954.5625,-243.5625 C 953.35627,-244.8106 947.3906,-246.88059 945.5,-247.6875 C 943.60942,-248.4944 941.39758,-249.57854 938.59375,-251.375 C 935.7899,-253.17144 934.96671,-253.77751 932.59375,-254.96875 C 930.22078,-256.15999 928.54013,-256.87158 926.09375,-257.375 z" | ||
2434 | style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter6989);enable-background:new" /> | ||
2435 | <path | ||
2436 | id="path8109" | ||
2437 | d="M 894.90625,-253.5625 C 896.80838,-253.00895 899.49326,-251.97363 901.0625,-250.96875 C 902.63173,-249.96388 903.93651,-249.56011 906.5625,-247.75 C 909.16162,-245.95836 911.56284,-244.87811 913.84375,-243.875 C 916.30803,-242.79126 919.60359,-241.83471 923.375,-240.125 C 922.1702,-241.36007 916.20084,-243.36978 914.3125,-244.15625 C 912.42418,-244.94272 910.2373,-245.98705 907.4375,-247.75 C 904.63773,-249.51294 903.83831,-250.11836 901.46875,-251.28125 C 899.09918,-252.44413 897.3455,-253.11537 894.90625,-253.5625 z" | ||
2438 | style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter6985);enable-background:new" /> | ||
2439 | <path | ||
2440 | id="path8111" | ||
2441 | d="M 863.71875,-248.65625 C 865.59937,-248.22716 868.22302,-247.27587 869.78125,-246.34375 C 871.33948,-245.41164 872.63358,-245.08599 875.25,-243.34375 C 877.83971,-241.61931 880.23067,-240.63573 882.5,-239.71875 C 884.95176,-238.72806 888.23959,-237.84168 892,-236.21875 C 890.79869,-237.42609 884.84751,-239.28484 882.96875,-240 C 881.09,-240.71517 878.88335,-241.68442 876.09375,-243.375 C 873.30412,-245.06557 872.50914,-245.60322 870.15625,-246.65625 C 867.80333,-247.70926 866.13041,-248.36873 863.71875,-248.65625 z" | ||
2442 | style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter6965);enable-background:new" /> | ||
2443 | <path | ||
2444 | id="path8113" | ||
2445 | d="M 833.15625,-241.375 C 835.00461,-241.07856 837.6257,-240.39868 839.15625,-239.59375 C 840.68683,-238.78882 841.96999,-238.53802 844.53125,-237.0625 C 847.06629,-235.60204 849.42193,-234.73741 851.65625,-234 C 854.07024,-233.20332 857.31336,-232.53311 861.03125,-231.15625 C 859.84354,-232.28498 853.94353,-233.746 852.09375,-234.3125 C 850.24398,-234.879 848.09033,-235.68642 845.34375,-237.15625 C 842.59718,-238.62608 841.84239,-239.07653 839.53125,-239.9375 C 837.2201,-240.79845 835.52654,-241.25759 833.15625,-241.375 z" | ||
2446 | style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter6981);enable-background:new" /> | ||
2447 | <path | ||
2448 | id="path8115" | ||
2449 | d="M 802.90625,-232.3125 C 804.72845,-232.10123 807.27201,-231.51193 808.78125,-230.78125 C 810.2905,-230.05059 811.53693,-229.85127 814.0625,-228.5 C 816.56226,-227.16254 818.89404,-226.45157 821.09375,-225.84375 C 823.47028,-225.18708 826.65839,-224.77087 830.3125,-223.65625 C 829.14515,-224.70121 823.38362,-225.75954 821.5625,-226.21875 C 819.74139,-226.67796 817.61025,-227.34571 814.90625,-228.65625 C 812.20222,-229.96677 811.43519,-230.37615 809.15625,-231.125 C 806.8773,-231.87383 805.243,-232.30431 802.90625,-232.3125 z" | ||
2450 | style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter6977);enable-background:new" /> | ||
2451 | <path | ||
2452 | id="path8117" | ||
2453 | d="M 773.1875,-222.1875 C 774.99859,-222.0088 777.50809,-221.52244 779,-220.84375 C 780.49194,-220.16506 781.7534,-220.04553 784.25,-218.78125 C 786.72107,-217.52987 789.04005,-216.88511 791.21875,-216.34375 C 793.57262,-215.75887 796.71009,-215.44623 800.3125,-214.5 C 799.16166,-215.49116 793.45999,-216.2833 791.65625,-216.6875 C 789.85253,-217.0917 787.74072,-217.70866 785.0625,-218.9375 C 782.38432,-220.16634 781.65905,-220.54839 779.40625,-221.21875 C 777.15346,-221.88909 775.50998,-222.22107 773.1875,-222.1875 z" | ||
2454 | style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter6973);enable-background:new" /> | ||
2455 | <path | ||
2456 | id="path8119" | ||
2457 | d="M 743.5625,-211.1875 C 745.35531,-211.05839 747.83563,-210.63785 749.3125,-210 C 750.7894,-209.36215 752.0286,-209.25844 754.5,-208.0625 C 756.94618,-206.87878 759.22054,-206.31584 761.375,-205.84375 C 763.70267,-205.33372 766.7946,-205.16311 770.375,-204.28125 C 769.23121,-205.25185 763.62741,-205.8719 761.84375,-206.21875 C 760.06008,-206.56559 757.9609,-207.10631 755.3125,-208.25 C 752.66409,-209.39368 751.91755,-209.76631 749.6875,-210.375 C 747.45742,-210.98368 745.86156,-211.28466 743.5625,-211.1875 z" | ||
2458 | style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter6969);enable-background:new" /> | ||
2459 | <g | ||
2460 | id="g8121" | ||
2461 | style="fill:#ffffff;fill-opacity:1;filter:url(#filter7345)"> | ||
2462 | <path | ||
2463 | sodipodi:nodetypes="czzzczzc" | ||
2464 | id="path8123" | ||
2465 | d="M 744.9375,-212.11731 C 744.9375,-212.11731 752.15979,-215.34049 754,-215.61731 C 755.84021,-215.89413 757.35225,-215.62054 760,-215.05481 C 762.64775,-214.48908 768.7357,-212.83963 771.1875,-211.67981 C 773.6393,-210.51999 776.5,-208.11731 776.5,-208.11731 C 776.5,-208.11731 769.35356,-210.8975 766.3125,-211.67981 C 763.27144,-212.46212 758.66789,-213.76355 755.9375,-213.99231 C 753.20711,-214.22107 744.9375,-212.11731 744.9375,-212.11731 z" | ||
2466 | style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" /> | ||
2467 | <path | ||
2468 | sodipodi:nodetypes="czzzczzc" | ||
2469 | id="path8125" | ||
2470 | d="M 735.46875,-206.95416 C 735.46875,-206.95416 739.12854,-209.17734 740.96875,-209.45416 C 742.80896,-209.73098 744.6335,-209.20739 747.28125,-208.64166 C 749.929,-208.07593 756.01695,-206.42648 758.46875,-205.26666 C 760.92055,-204.10684 765.03125,-203.14166 765.03125,-203.14166 C 765.03125,-203.14166 756.63481,-204.48435 753.59375,-205.26666 C 750.55269,-206.04897 745.63664,-207.6004 742.90625,-207.82916 C 740.17586,-208.05792 735.46875,-206.95416 735.46875,-206.95416 z" | ||
2471 | style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" /> | ||
2472 | <path | ||
2473 | sodipodi:nodetypes="czzzczzc" | ||
2474 | id="path8127" | ||
2475 | d="M 759.85042,-217.61116 C 759.85042,-217.61116 768.39412,-220.90973 770.2482,-221.06902 C 772.10229,-221.22832 773.88986,-220.58982 776.4963,-219.85694 C 779.10274,-219.12406 785.07354,-217.091 787.44666,-215.77769 C 789.81978,-214.46438 793.86083,-213.23987 793.86083,-213.23987 C 793.86083,-213.23987 785.5667,-215.11352 782.58152,-216.08754 C 779.59633,-217.06156 774.78883,-218.92232 772.0785,-219.32416 C 769.36817,-219.726 759.85042,-217.61116 759.85042,-217.61116 z" | ||
2476 | style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" /> | ||
2477 | <path | ||
2478 | sodipodi:nodetypes="czzzczzc" | ||
2479 | id="path8129" | ||
2480 | d="M 775.19813,-223.2266 C 775.19813,-223.2266 782.96946,-226.00904 784.82644,-226.13009 C 786.68341,-226.25113 788.45744,-225.57592 791.04822,-224.78947 C 793.63899,-224.00302 799.56662,-221.8473 801.91216,-220.48535 C 804.25771,-219.1234 808.27265,-217.81585 808.27265,-217.81585 C 808.27265,-217.81585 800.01892,-219.86008 797.05444,-220.89543 C 794.08997,-221.93078 789.32185,-223.89024 786.62038,-224.34786 C 783.91891,-224.80549 775.19813,-223.2266 775.19813,-223.2266 z" | ||
2481 | style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" /> | ||
2482 | <path | ||
2483 | inkscape:transform-center-y="-4.3190906" | ||
2484 | inkscape:transform-center-x="13.852145" | ||
2485 | sodipodi:nodetypes="czzzczzc" | ||
2486 | id="path8131" | ||
2487 | d="M 789.64298,-227.95417 C 789.64298,-227.95417 798.32554,-231.47448 800.18452,-231.55952 C 802.04349,-231.64455 803.8041,-230.9351 806.37915,-230.09859 C 808.9542,-229.2621 814.83894,-226.99193 817.15766,-225.58479 C 819.47638,-224.17764 823.46523,-222.79255 823.46523,-222.79255 C 823.46523,-222.79255 815.25266,-224.99632 812.3088,-226.08891 C 809.36494,-227.1815 804.63568,-229.23299 801.94358,-229.74288 C 799.25149,-230.25276 789.64298,-227.95417 789.64298,-227.95417 z" | ||
2488 | style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" /> | ||
2489 | <path | ||
2490 | inkscape:transform-center-y="-4.3190906" | ||
2491 | inkscape:transform-center-x="13.852145" | ||
2492 | sodipodi:nodetypes="czzzczzc" | ||
2493 | id="path8133" | ||
2494 | d="M 804.49513,-233.32948 C 804.49513,-233.32948 812.30269,-235.91229 814.16167,-235.99733 C 816.02064,-236.08236 817.78125,-235.37291 820.3563,-234.5364 C 822.93135,-233.69991 828.81609,-231.42974 831.13481,-230.0226 C 833.45353,-228.61545 837.44238,-227.23036 837.44238,-227.23036 C 837.44238,-227.23036 829.22981,-229.43413 826.28595,-230.52672 C 823.34209,-231.61931 818.61283,-233.6708 815.92073,-234.18069 C 813.22864,-234.69057 804.49513,-233.32948 804.49513,-233.32948 z" | ||
2495 | style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" /> | ||
2496 | <path | ||
2497 | inkscape:transform-center-y="-4.3190906" | ||
2498 | inkscape:transform-center-x="13.852145" | ||
2499 | sodipodi:nodetypes="czzzczzc" | ||
2500 | id="path8135" | ||
2501 | d="M 819.55763,-237.57948 C 819.55763,-237.57948 828.11519,-240.16229 829.97417,-240.24733 C 831.83314,-240.33236 833.59375,-239.62291 836.1688,-238.7864 C 838.74385,-237.94991 844.62859,-235.67974 846.94731,-234.2726 C 849.26603,-232.86545 853.25488,-231.48036 853.25488,-231.48036 C 853.25488,-231.48036 845.04231,-233.68413 842.09845,-234.77672 C 839.15459,-235.86931 834.42533,-237.9208 831.73323,-238.43069 C 829.04114,-238.94057 819.55763,-237.57948 819.55763,-237.57948 z" | ||
2502 | style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" /> | ||
2503 | <path | ||
2504 | inkscape:transform-center-y="-4.9269042" | ||
2505 | inkscape:transform-center-x="13.64141" | ||
2506 | sodipodi:nodetypes="czzzczzc" | ||
2507 | id="path8137" | ||
2508 | d="M 836.23395,-242.60125 C 836.23395,-242.60125 843.20097,-244.58848 845.06179,-244.56882 C 846.9226,-244.54915 848.64052,-243.7418 851.16444,-242.76177 C 853.68837,-241.78177 859.4361,-239.18419 861.672,-237.64886 C 863.9079,-236.11351 867.81253,-234.50625 867.81253,-234.50625 C 867.81253,-234.50625 859.73692,-237.16847 856.85917,-238.42491 C 853.98143,-239.68136 849.37505,-241.99561 846.71589,-242.65612 C 844.05674,-243.31661 836.23395,-242.60125 836.23395,-242.60125 z" | ||
2509 | style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" /> | ||
2510 | <path | ||
2511 | inkscape:transform-center-y="-5.1542119" | ||
2512 | inkscape:transform-center-x="13.55068" | ||
2513 | sodipodi:nodetypes="czzzczzc" | ||
2514 | id="path8139" | ||
2515 | d="M 850.73028,-246.00461 C 850.73028,-246.00461 858.41812,-248.03229 860.2781,-247.97315 C 862.13807,-247.914 863.83848,-247.07036 866.34103,-246.03699 C 868.84358,-245.00365 874.5349,-242.28467 876.73771,-240.70224 C 878.94053,-239.11979 882.81016,-237.43004 882.81016,-237.43004 C 882.81016,-237.43004 874.79287,-240.26302 871.94244,-241.58026 C 869.09201,-242.89749 864.53578,-245.30898 861.89124,-246.02576 C 859.2467,-246.74254 850.73028,-246.00461 850.73028,-246.00461 z" | ||
2516 | style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" /> | ||
2517 | <path | ||
2518 | inkscape:transform-center-y="-5.4740887" | ||
2519 | inkscape:transform-center-x="13.41151" | ||
2520 | sodipodi:nodetypes="czzzczzc" | ||
2521 | id="path8141" | ||
2522 | d="M 864.82496,-249.21081 C 864.82496,-249.21081 872.99448,-251.17987 874.85184,-251.06477 C 876.70919,-250.94965 878.38342,-250.05521 880.85374,-248.94698 C 883.32405,-247.83877 888.93094,-244.94971 891.08512,-243.30167 C 893.2393,-241.65363 897.05632,-239.84815 897.05632,-239.84815 C 897.05632,-239.84815 889.12793,-242.92121 886.31845,-244.32365 C 883.50896,-245.72609 879.02739,-248.27364 876.40562,-249.06971 C 873.78386,-249.86577 864.82496,-249.21081 864.82496,-249.21081 z" | ||
2523 | style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" /> | ||
2524 | <path | ||
2525 | inkscape:transform-center-y="-5.79376" | ||
2526 | inkscape:transform-center-x="13.258805" | ||
2527 | sodipodi:nodetypes="czzzczzc" | ||
2528 | id="path8143" | ||
2529 | d="M 881.38485,-251.60282 C 881.38485,-251.60282 889.47021,-253.51091 891.32322,-253.33946 C 893.17622,-253.16799 894.82252,-252.22313 897.25804,-251.04038 C 899.69357,-249.85767 905.21013,-246.79968 907.31327,-245.08699 C 909.41641,-243.37429 913.17684,-241.45373 913.17684,-241.45373 C 913.17684,-241.45373 905.34544,-244.76613 902.57984,-246.25323 C 899.81423,-247.74035 895.41209,-250.42282 892.8157,-251.29814 C 890.21933,-252.17345 881.38485,-251.60282 881.38485,-251.60282 z" | ||
2530 | style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" /> | ||
2531 | <path | ||
2532 | inkscape:transform-center-y="-5.7433893" | ||
2533 | inkscape:transform-center-x="13.28378" | ||
2534 | sodipodi:nodetypes="czzzczzc" | ||
2535 | id="path8145" | ||
2536 | d="M 896.58415,-254.34724 C 896.58415,-254.34724 904.22581,-255.77494 906.07962,-255.61239 C 907.93342,-255.44983 909.58424,-254.51289 912.02541,-253.34186 C 914.46659,-252.17086 919.99779,-249.1394 922.10913,-247.43684 C 924.22047,-245.73426 927.99009,-243.83179 927.99009,-243.83179 C 927.99009,-243.83179 920.14286,-247.10653 917.37014,-248.58034 C 914.59743,-250.05414 910.18245,-252.71543 907.58189,-253.57827 C 904.98134,-254.44109 896.58415,-254.34724 896.58415,-254.34724 z" | ||
2537 | style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" /> | ||
2538 | <path | ||
2539 | inkscape:transform-center-y="-5.7433893" | ||
2540 | inkscape:transform-center-x="13.28378" | ||
2541 | sodipodi:nodetypes="czzzczzc" | ||
2542 | id="path8147" | ||
2543 | d="M 911.45328,-255.98544 C 911.45328,-255.98544 920.09494,-257.53814 921.94875,-257.37559 C 923.80255,-257.21303 925.45337,-256.27609 927.89454,-255.10506 C 930.33572,-253.93406 935.86692,-250.9026 937.97826,-249.20004 C 940.0896,-247.49746 943.85922,-245.59499 943.85922,-245.59499 C 943.85922,-245.59499 936.01199,-248.86973 933.23927,-250.34354 C 930.46656,-251.81734 926.05158,-254.47863 923.45102,-255.34147 C 920.85047,-256.20429 911.45328,-255.98544 911.45328,-255.98544 z" | ||
2544 | style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" /> | ||
2545 | <path | ||
2546 | inkscape:transform-center-y="-5.7433893" | ||
2547 | inkscape:transform-center-x="13.28378" | ||
2548 | sodipodi:nodetypes="czzzczzc" | ||
2549 | id="path8149" | ||
2550 | d="M 927.70328,-258.29794 C 927.70328,-258.29794 935.34494,-259.16314 937.19875,-259.00059 C 939.05255,-258.83803 940.70337,-257.90109 943.14454,-256.73006 C 945.58572,-255.55906 951.11692,-252.5276 953.22826,-250.82504 C 955.3396,-249.12246 959.10922,-247.21999 959.10922,-247.21999 C 959.10922,-247.21999 951.26199,-250.49473 948.48927,-251.96854 C 945.71656,-253.44234 941.30158,-256.10363 938.70102,-256.96647 C 936.10047,-257.82929 927.70328,-258.29794 927.70328,-258.29794 z" | ||
2551 | style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" /> | ||
2552 | <path | ||
2553 | inkscape:transform-center-y="-5.7433893" | ||
2554 | inkscape:transform-center-x="13.28378" | ||
2555 | sodipodi:nodetypes="czzzczzc" | ||
2556 | id="path8151" | ||
2557 | d="M 942.82828,-259.48544 C 942.82828,-259.48544 951.40744,-260.97564 953.26125,-260.81309 C 955.11505,-260.65053 956.76587,-259.71359 959.20704,-258.54256 C 961.64822,-257.37156 967.17942,-254.3401 969.29076,-252.63754 C 971.4021,-250.93496 975.17172,-249.03249 975.17172,-249.03249 C 975.17172,-249.03249 967.32449,-252.30723 964.55177,-253.78104 C 961.77906,-255.25484 957.36408,-257.91613 954.76352,-258.77897 C 952.16297,-259.64179 942.82828,-259.48544 942.82828,-259.48544 z" | ||
2558 | style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" /> | ||
2559 | <path | ||
2560 | inkscape:transform-center-y="-5.7433893" | ||
2561 | inkscape:transform-center-x="13.28378" | ||
2562 | sodipodi:nodetypes="czzzczzc" | ||
2563 | id="path8153" | ||
2564 | d="M 959.07828,-261.54794 C 959.07828,-261.54794 966.90744,-262.35064 968.76125,-262.18809 C 970.61505,-262.02553 972.26587,-261.08859 974.70704,-259.91756 C 977.14822,-258.74656 982.67942,-255.7151 984.79076,-254.01254 C 986.9021,-252.30996 990.67172,-250.40749 990.67172,-250.40749 C 990.67172,-250.40749 982.82449,-253.68223 980.05177,-255.15604 C 977.27906,-256.62984 972.86408,-259.29113 970.26352,-260.15397 C 967.66297,-261.01679 959.07828,-261.54794 959.07828,-261.54794 z" | ||
2565 | style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" /> | ||
2566 | <path | ||
2567 | inkscape:transform-center-y="-5.7433893" | ||
2568 | inkscape:transform-center-x="13.28378" | ||
2569 | sodipodi:nodetypes="czzzczzc" | ||
2570 | id="path8155" | ||
2571 | d="M 974.45328,-262.79794 C 974.45328,-262.79794 982.84494,-263.97564 984.69875,-263.81309 C 986.55255,-263.65053 988.20337,-262.71359 990.64454,-261.54256 C 993.08572,-260.37156 998.61692,-257.3401 1000.7283,-255.63754 C 1002.8396,-253.93496 1006.6092,-252.03249 1006.6092,-252.03249 C 1006.6092,-252.03249 998.76199,-255.30723 995.98927,-256.78104 C 993.21656,-258.25484 988.80158,-260.91613 986.20102,-261.77897 C 983.60047,-262.64179 974.45328,-262.79794 974.45328,-262.79794 z" | ||
2572 | style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" /> | ||
2573 | <path | ||
2574 | inkscape:transform-center-y="-5.7433893" | ||
2575 | inkscape:transform-center-x="13.28378" | ||
2576 | sodipodi:nodetypes="czzzczzc" | ||
2577 | id="path8157" | ||
2578 | d="M 990.64078,-264.86044 C 990.64078,-264.86044 997.53244,-265.85064 999.38625,-265.68809 C 1001.2401,-265.52553 1002.8909,-264.58859 1005.332,-263.41756 C 1007.7732,-262.24656 1013.3044,-259.2151 1015.4158,-257.51254 C 1017.5271,-255.80996 1021.2967,-253.90749 1021.2967,-253.90749 C 1021.2967,-253.90749 1013.4495,-257.18223 1010.6768,-258.65604 C 1007.9041,-260.12984 1003.4891,-262.79113 1000.8885,-263.65397 C 998.28797,-264.51679 990.64078,-264.86044 990.64078,-264.86044 z" | ||
2579 | style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" /> | ||
2580 | <path | ||
2581 | inkscape:transform-center-y="-5.7433893" | ||
2582 | inkscape:transform-center-x="13.28378" | ||
2583 | sodipodi:nodetypes="czzzczzc" | ||
2584 | id="path8159" | ||
2585 | d="M 1007.7658,-265.79794 C 1007.7658,-265.79794 1014.5949,-266.97564 1016.4488,-266.81309 C 1018.3026,-266.65053 1019.9534,-265.71359 1022.3945,-264.54256 C 1024.8357,-263.37156 1030.3669,-260.3401 1032.4783,-258.63754 C 1034.5896,-256.93496 1038.3592,-255.03249 1038.3592,-255.03249 C 1038.3592,-255.03249 1030.512,-258.30723 1027.7393,-259.78104 C 1024.9666,-261.25484 1020.5516,-263.91613 1017.951,-264.77897 C 1015.3505,-265.64179 1007.7658,-265.79794 1007.7658,-265.79794 z" | ||
2586 | style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" /> | ||
2587 | <path | ||
2588 | inkscape:transform-center-y="-5.7433893" | ||
2589 | inkscape:transform-center-x="13.28378" | ||
2590 | sodipodi:nodetypes="czzzczzc" | ||
2591 | id="path8161" | ||
2592 | d="M 1023.8908,-267.79794 C 1023.8908,-267.79794 1029.9699,-268.22564 1031.8238,-268.06309 C 1033.6776,-267.90053 1035.3284,-266.96359 1037.7695,-265.79256 C 1040.2107,-264.62156 1045.7419,-261.5901 1047.8533,-259.88754 C 1049.9646,-258.18496 1053.7342,-256.28249 1053.7342,-256.28249 C 1053.7342,-256.28249 1045.887,-259.55723 1043.1143,-261.03104 C 1040.3416,-262.50484 1035.9266,-265.16613 1033.326,-266.02897 C 1030.7255,-266.89179 1023.8908,-267.79794 1023.8908,-267.79794 z" | ||
2593 | style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" /> | ||
2594 | <path | ||
2595 | inkscape:transform-center-y="-5.7433893" | ||
2596 | inkscape:transform-center-x="13.28378" | ||
2597 | sodipodi:nodetypes="czzzczzc" | ||
2598 | id="path8163" | ||
2599 | d="M 1039.7033,-269.17294 C 1039.7033,-269.17294 1046.1574,-269.85064 1048.0113,-269.68809 C 1049.8651,-269.52553 1051.5159,-268.58859 1053.957,-267.41756 C 1056.3982,-266.24656 1061.9294,-263.2151 1064.0408,-261.51254 C 1066.1521,-259.80996 1069.9217,-257.90749 1069.9217,-257.90749 C 1069.9217,-257.90749 1062.0745,-261.18223 1059.3018,-262.65604 C 1056.5291,-264.12984 1052.1141,-266.79113 1049.5135,-267.65397 C 1046.913,-268.51679 1039.7033,-269.17294 1039.7033,-269.17294 z" | ||
2600 | style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" /> | ||
2601 | <path | ||
2602 | inkscape:transform-center-y="-5.1360724" | ||
2603 | inkscape:transform-center-x="13.55813" | ||
2604 | sodipodi:nodetypes="czzzczzc" | ||
2605 | id="path8165" | ||
2606 | d="M 1055.2718,-271.03319 C 1055.2718,-271.03319 1060.7694,-271.94264 1062.6296,-271.88667 C 1064.4897,-271.83067 1066.1915,-270.98993 1068.6957,-269.96081 C 1071.2001,-268.93171 1076.896,-266.22241 1079.1015,-264.64372 C 1081.307,-263.06501 1085.1795,-261.38182 1085.1795,-261.38182 C 1085.1795,-261.38182 1077.1575,-264.20121 1074.3047,-265.5136 C 1071.4521,-266.82598 1066.8918,-269.22973 1064.246,-269.94203 C 1061.6003,-270.65431 1055.2718,-271.03319 1055.2718,-271.03319 z" | ||
2607 | style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" /> | ||
2608 | <path | ||
2609 | inkscape:transform-center-y="-4.6370147" | ||
2610 | inkscape:transform-center-x="13.74758" | ||
2611 | sodipodi:nodetypes="czzzczzc" | ||
2612 | id="path8167" | ||
2613 | d="M 1072.7007,-273.48537 C 1072.7007,-273.48537 1077.2479,-274.64118 1079.1087,-274.67158 C 1080.9694,-274.70196 1082.7083,-273.94109 1085.2576,-273.02927 C 1087.807,-272.1175 1093.6225,-269.67541 1095.899,-268.20077 C 1098.1753,-266.72609 1102.1217,-265.22441 1102.1217,-265.22441 C 1102.1217,-265.22441 1093.9775,-267.66852 1091.067,-268.84713 C 1088.1565,-270.02573 1083.4896,-272.21528 1080.8136,-272.80404 C 1078.1377,-273.39279 1072.7007,-273.48537 1072.7007,-273.48537 z" | ||
2614 | style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" /> | ||
2615 | <path | ||
2616 | inkscape:transform-center-y="-4.4842392" | ||
2617 | inkscape:transform-center-x="13.79933" | ||
2618 | sodipodi:nodetypes="czzzczzc" | ||
2619 | id="path8169" | ||
2620 | d="M 1087.1585,-276.5244 C 1087.1585,-276.5244 1093.1185,-278.29795 1094.9787,-278.35464 C 1096.8387,-278.41131 1098.5883,-277.67509 1101.1502,-276.79939 C 1103.7122,-275.92373 1103.6728,-275.94226 1106.4837,-275.30924 C 1109.2806,-274.67938 1113.5604,-273.79611 1113.5604,-273.79611 C 1113.5604,-273.79611 1109.9449,-273.81239 1106.7681,-274.26225 C 1103.6526,-274.70344 1099.3938,-275.9605 1096.7097,-276.51138 C 1094.0258,-277.06226 1087.1585,-276.5244 1087.1585,-276.5244 z" | ||
2621 | style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" /> | ||
2622 | <path | ||
2623 | sodipodi:nodetypes="czczc" | ||
2624 | id="path8171" | ||
2625 | d="M 1099.25,-279.92981 C 1099.4112,-279.66119 1110.4581,-284.53027 1111.4375,-284.61731 C 1112.4169,-284.70435 1113.4375,-281.49231 1113.4375,-281.49231 C 1113.4375,-281.49231 1112.6624,-282.99665 1110.5625,-282.55481 C 1108.4626,-282.11297 1099.2616,-279.8834 1099.25,-279.92981 z" | ||
2626 | style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" /> | ||
2627 | </g> | ||
2628 | <path | ||
2629 | id="path8173" | ||
2630 | d="M 1107.4532,-284.0938 C 1107.0345,-283.88097 1107.2976,-283.99991 1106.806,-283.78799 C 1106.3199,-283.57845 1105.0826,-283.2136 1102.7264,-282.32904 C 1099.3953,-281.07847 1096.8962,-280.1756 1095.7005,-279.26294 C 1094.1644,-279.24168 1091.98,-279.02963 1090.0442,-278.54419 C 1087.0627,-277.79653 1085.189,-277.37018 1083.2942,-276.95044 C 1081.3994,-276.53072 1081.6187,-276.30825 1080.4192,-276.07544 C 1079.1226,-275.82378 1078.6978,-276.08443 1074.9817,-275.29419 C 1071.4918,-274.55205 1066.0869,-273.36312 1064.8255,-272.60669 C 1063.2416,-272.78748 1060.958,-272.92847 1058.9817,-272.63794 C 1055.9413,-272.19099 1054.0655,-271.96518 1052.138,-271.73169 C 1051.4826,-271.65232 1051.0969,-271.53091 1050.7942,-271.45044 C 1050.368,-271.31879 1050.1084,-271.19042 1049.4192,-271.10669 C 1048.1076,-270.94733 1047.657,-271.26352 1043.888,-270.82544 C 1040.3341,-270.41236 1034.8826,-269.55262 1033.638,-268.88794 C 1032.039,-269.18524 1029.7802,-269.42213 1027.7942,-269.23169 C 1024.7354,-268.93838 1022.8222,-268.7477 1020.888,-268.57544 C 1018.9537,-268.40318 1019.1993,-268.15307 1017.9817,-268.04419 C 1016.6655,-267.9265 1016.2219,-268.20782 1012.4505,-267.79419 C 1008.9086,-267.40575 1003.4426,-266.58492 1002.1692,-265.91919 C 1000.5703,-266.21389 998.28202,-266.4262 996.29419,-266.23169 C 993.23595,-265.93245 991.35306,-265.75145 989.41919,-265.57544 C 988.7617,-265.5156 988.37915,-265.39688 988.07544,-265.32544 C 987.64779,-265.20649 987.39193,-265.10737 986.70044,-265.04419 C 985.38448,-264.92394 984.94068,-265.23907 981.16919,-264.82544 C 977.61305,-264.43543 972.16365,-263.58628 970.91919,-262.91919 C 969.32056,-263.21338 967.05935,-263.44291 965.07544,-263.23169 C 962.01984,-262.90637 960.1014,-262.70545 958.16919,-262.51294 C 956.23698,-262.32044 956.47932,-262.07206 955.26294,-261.95044 C 953.94806,-261.81898 953.49996,-262.11498 949.73169,-261.66919 C 946.19282,-261.25054 940.75401,-260.37702 939.48169,-259.70044 C 937.88414,-259.98149 935.62173,-260.12087 933.63794,-259.88794 C 930.58596,-259.52958 928.69286,-259.32008 926.76294,-259.10669 C 926.10674,-259.03414 925.72228,-258.934 925.41919,-258.85669 C 924.99242,-258.72947 924.73428,-258.58949 924.04419,-258.51294 C 922.73086,-258.36726 922.27673,-258.68696 918.51294,-258.20044 C 914.96405,-257.74169 909.53431,-256.78142 908.29419,-256.07544 C 906.70114,-256.31968 904.46038,-256.45679 902.48169,-256.20044 C 899.4341,-255.80563 897.53098,-255.55199 895.60669,-255.29419 C 893.68241,-255.0364 893.88058,-254.80066 892.66919,-254.63794 C 891.35973,-254.46204 890.922,-254.74232 887.16919,-254.16919 C 883.6449,-253.63095 878.24604,-252.47002 876.98169,-251.73169 C 875.39419,-251.93523 873.13619,-251.98642 871.16919,-251.63794 C 868.14302,-251.10182 866.2703,-250.77625 864.35669,-250.45044 C 863.70608,-250.33967 863.34298,-250.1795 863.04419,-250.07544 C 862.62352,-249.91056 862.38074,-249.74414 861.70044,-249.60669 C 860.40579,-249.3451 859.97332,-249.61289 856.26294,-248.79419 C 852.76441,-248.02224 847.41699,-246.41126 846.20044,-245.57544 C 844.63766,-245.65289 842.44286,-245.49016 840.51294,-244.98169 C 837.5405,-244.19856 835.69533,-243.7496 833.82544,-243.23169 C 831.95556,-242.71379 832.15884,-242.46441 830.98169,-242.13794 C 829.70923,-241.78504 829.28466,-242.03085 825.63794,-240.95044 C 822.21324,-239.93581 816.9885,-238.01727 815.76294,-237.10669 C 814.22411,-237.09395 812.04311,-236.83447 810.13794,-236.23169 C 807.20688,-235.30435 805.38763,-234.77327 803.54419,-234.16919 C 802.91743,-233.96381 802.55246,-233.77661 802.26294,-233.63794 C 801.85531,-233.42433 801.6096,-233.22919 800.95044,-233.01294 C 799.69598,-232.6014 799.26433,-232.8239 795.66919,-231.57544 C 792.27934,-230.39827 787.07421,-228.36174 785.88794,-227.41919 C 784.36405,-227.35952 782.23789,-227.02432 780.35669,-226.35669 C 777.4593,-225.3284 775.65761,-224.68121 773.82544,-224.04419 C 771.99327,-223.40718 772.19759,-223.19565 771.04419,-222.79419 C 769.79741,-222.36023 769.38058,-222.59447 765.82544,-221.23169 C 762.48677,-219.95189 757.33829,-217.74914 756.13794,-216.76294 C 754.63076,-216.65525 752.50225,-216.26414 750.63794,-215.54419 C 747.76976,-214.43659 746.01414,-213.76263 744.20044,-213.07544 C 743.58378,-212.84181 743.20403,-212.63341 742.91919,-212.48169 C 742.91919,-212.48169 742.91919,-211.38794 742.91919,-211.38794 C 743.03097,-211.6103 743.30518,-212.20537 743.82544,-212.48169 C 744.52341,-212.85241 748.63907,-214.47506 750.63794,-215.20044 C 752.2948,-215.80169 754.79183,-216.52912 756.60669,-216.51294 C 756.90831,-216.51025 757.19431,-216.46204 757.45044,-216.41919 C 759.29293,-216.11094 764.91919,-214.85669 764.91919,-214.85669 C 764.91918,-214.85669 758.6857,-216.50344 757.88794,-216.70044 C 757.69715,-216.74755 757.35222,-216.76916 756.91919,-216.76294 C 758.06465,-217.63265 761.68019,-219.15645 764.26294,-220.20044 C 767.10116,-221.34771 767.37975,-221.45226 769.32544,-221.85669 C 771.33374,-222.27413 772.48169,-222.35669 772.48169,-222.35669 C 772.48169,-222.35668 772.39933,-222.95783 773.45044,-223.48169 C 774.15554,-223.8331 778.33746,-225.37409 780.35669,-226.04419 C 782.30859,-226.69192 785.41409,-227.40269 787.23169,-227.04419 C 789.09492,-226.67669 794.76294,-225.23169 794.76294,-225.23169 C 794.76295,-225.23169 788.47594,-227.1028 787.66919,-227.32544 C 787.47627,-227.37869 787.13835,-227.41148 786.70044,-227.41919 C 787.85878,-228.25207 791.49488,-229.61451 794.10669,-230.57544 C 796.97685,-231.63145 797.27403,-231.73724 799.23169,-232.10669 C 801.08518,-232.45648 802.09053,-232.53217 802.26294,-232.54419 C 802.37654,-232.76143 802.64039,-233.35421 803.16919,-233.60669 C 803.87863,-233.94543 808.09526,-235.31944 810.13794,-235.91919 C 811.83111,-236.4163 814.37871,-236.95596 816.23169,-236.82544 C 816.53964,-236.80376 816.84518,-236.72818 817.10669,-236.66919 C 818.98787,-236.24487 824.70044,-234.63794 824.70044,-234.63794 C 824.70045,-234.63794 818.3587,-236.70319 817.54419,-236.95044 C 817.3494,-237.00958 816.98631,-237.05438 816.54419,-237.07544 C 817.71368,-237.87299 821.40721,-239.13166 824.04419,-240.01294 C 826.942,-240.98141 827.2772,-241.01626 829.26294,-241.29419 C 831.31259,-241.58108 832.45044,-241.60669 832.45044,-241.60669 C 832.45042,-241.60669 832.37769,-242.21366 833.45044,-242.66919 C 834.17004,-242.97476 838.44142,-244.16994 840.51294,-244.66919 C 842.51538,-245.15177 845.71143,-245.59748 847.57544,-245.07544 C 849.48622,-244.54029 855.29419,-242.57544 855.29419,-242.57544 C 855.29419,-242.57544 848.87153,-244.99895 848.04419,-245.29419 C 847.84635,-245.3648 847.46203,-245.43458 847.01294,-245.48169 C 848.20084,-246.21034 851.92821,-247.25577 854.60669,-247.98169 C 857.55011,-248.77944 857.89877,-248.75252 859.91919,-248.88794 C 861.83208,-249.01617 862.86624,-248.95903 863.04419,-248.95044 C 863.16147,-249.1541 863.43595,-249.72992 863.98169,-249.91919 C 864.71388,-250.17313 869.06021,-250.96708 871.16919,-251.29419 C 872.91732,-251.5653 875.57007,-251.77889 877.48169,-251.38794 C 877.79935,-251.32298 878.08691,-251.20243 878.35669,-251.10669 C 880.29743,-250.41796 886.20044,-248.01294 886.20044,-248.01294 C 886.20045,-248.01294 879.66573,-250.96371 878.82544,-251.32544 C 878.62447,-251.41195 878.25031,-251.49223 877.79419,-251.57544 C 879.00069,-252.20862 882.82375,-252.97104 885.54419,-253.48169 C 888.53372,-254.04288 888.84442,-254.01123 890.88794,-254.01294 C 892.9972,-254.01471 894.20044,-253.88794 894.20044,-253.88794 C 894.20044,-253.88793 894.12773,-254.51913 895.23169,-254.82544 C 895.97221,-255.03091 900.35781,-255.65931 902.48169,-255.88794 C 904.53471,-256.10893 907.80032,-256.14016 909.70044,-255.41919 C 911.64823,-254.68012 917.54419,-252.04419 917.54419,-252.04419 C 917.54421,-252.04419 910.98131,-255.22316 910.13794,-255.60669 C 909.93626,-255.69842 909.59573,-255.7929 909.13794,-255.88794 C 910.34886,-256.48982 914.12236,-257.13678 916.85669,-257.54419 C 919.86149,-257.99191 920.1822,-257.99589 922.23169,-257.95044 C 924.17214,-257.90742 925.23868,-257.75621 925.41919,-257.73169 C 925.53811,-257.92485 925.80309,-258.49752 926.35669,-258.63794 C 927.0994,-258.82632 931.51098,-259.37222 933.63794,-259.57544 C 935.40097,-259.74386 938.05803,-259.80973 939.98169,-259.32544 C 940.30137,-259.24496 940.5852,-259.12185 940.85669,-259.01294 C 942.80962,-258.22945 948.76294,-255.54419 948.76294,-255.54419 C 948.76292,-255.54419 942.17103,-258.79767 941.32544,-259.20044 C 941.12322,-259.29676 940.75318,-259.40747 940.29419,-259.51294 C 941.50833,-260.08721 945.33785,-260.63513 948.07544,-261.01294 C 951.08382,-261.42814 951.39851,-261.45557 953.45044,-261.38794 C 955.56842,-261.31813 956.76294,-261.13794 956.76294,-261.13794 C 956.76292,-261.13794 956.68569,-261.77535 957.79419,-262.04419 C 958.53781,-262.22454 962.94595,-262.70774 965.07544,-262.88794 C 967.13391,-263.06211 970.41868,-263.01226 972.32544,-262.23169 C 974.28003,-261.43153 980.20044,-258.70044 980.20044,-258.70044 C 980.20042,-258.70044 973.64051,-262.0092 972.79419,-262.41919 C 972.59182,-262.51724 972.22233,-262.62229 971.76294,-262.73169 C 972.97811,-263.29559 976.77302,-263.84599 979.51294,-264.20044 C 982.52385,-264.58996 982.83425,-264.59809 984.88794,-264.51294 C 986.83233,-264.43234 987.89457,-264.2597 988.07544,-264.23169 C 988.1946,-264.42255 988.45821,-264.977 989.01294,-265.10669 C 989.7572,-265.28069 994.16287,-265.75716 996.29419,-265.91919 C 998.06081,-266.05346 1000.7439,-266.0449 1002.6692,-265.54419 C 1002.9892,-265.46098 1003.2725,-265.34292 1003.5442,-265.23169 C 1005.4988,-264.43153 1011.4505,-261.66919 1011.4505,-261.66919 C 1011.4504,-261.66919 1004.8593,-265.0092 1004.013,-265.41919 C 1003.8106,-265.51724 1003.4411,-265.6223 1002.9817,-265.73169 C 1004.1968,-266.29559 1008.023,-266.81475 1010.763,-267.16919 C 1013.7739,-267.55872 1014.1155,-267.59809 1016.1692,-267.51294 C 1018.2889,-267.42506 1019.4817,-267.20044 1019.4817,-267.20044 C 1019.4817,-267.20044 1019.4033,-267.84946 1020.513,-268.10669 C 1021.2573,-268.27925 1025.6625,-268.73005 1027.7942,-268.88794 C 1029.8548,-269.04054 1033.1371,-268.98471 1035.0442,-268.20044 C 1036.9992,-267.39649 1042.9192,-264.70044 1042.9192,-264.70044 C 1042.9192,-264.70044 1036.3594,-267.97631 1035.513,-268.38794 C 1035.3105,-268.48638 1034.9412,-268.59016 1034.4817,-268.70044 C 1035.6971,-269.26198 1039.4936,-269.82822 1042.2317,-270.20044 C 1045.2407,-270.60949 1045.5544,-270.61602 1047.6067,-270.54419 C 1049.5498,-270.4762 1050.6139,-270.37934 1050.7942,-270.35669 C 1050.913,-270.55109 1051.1788,-271.0855 1051.7317,-271.23169 C 1052.4735,-271.42781 1056.8628,-272.06047 1058.9817,-272.32544 C 1060.7381,-272.54505 1063.387,-272.65775 1065.2942,-272.29419 C 1065.6111,-272.23378 1065.9,-272.10481 1066.1692,-272.01294 C 1068.1054,-271.35202 1074.013,-269.07544 1074.013,-269.07544 C 1074.0129,-269.07544 1067.4763,-271.88199 1066.638,-272.23169 C 1066.4375,-272.31532 1066.0618,-272.40502 1065.6067,-272.48169 C 1066.8104,-273.13215 1070.6258,-273.85364 1073.3255,-274.48169 C 1076.2922,-275.17189 1076.6144,-275.23676 1078.638,-275.35669 C 1080.7266,-275.48049 1081.9192,-275.38794 1081.9192,-275.38794 C 1081.9192,-275.38793 1081.8322,-276.01999 1082.9192,-276.41919 C 1083.6484,-276.68699 1087.9664,-277.75716 1090.0442,-278.23169 C 1092.0527,-278.69038 1095.2121,-279.26099 1097.0442,-278.85669 C 1098.9223,-278.44223 1110.6224,-275.84106 1110.6224,-275.84106 C 1110.6224,-275.84106 1098.2949,-278.86372 1097.4817,-279.10669 C 1097.2872,-279.16481 1096.9231,-279.21295 1096.4817,-279.23169 C 1097.6493,-280.03538 1099.9959,-280.91899 1102.5911,-281.93481 C 1104.2725,-282.59299 1103.5148,-282.3114 1105.367,-282.93841 C 1107.1206,-283.53207 1107.8524,-283.94912 1107.9974,-284.0514 C 1108.3435,-284.25791 1107.6414,-284.17328 1107.4532,-284.0938 z" | ||
2631 | style="opacity:0.25;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter7333);enable-background:new" | ||
2632 | sodipodi:nodetypes="czscsssscssssscsssscssssscsssscssssscsssscssssscsssscssssscsssscssccsssscscsscssscscsscsssscscsscssscscsscsssscscsscssscscsscsssscscsscssscscsscsssscscsscssscscsscsssscscsscssscscsscc" /> | ||
2633 | <path | ||
2634 | id="path8175" | ||
2635 | d="M 1082.625,-275.125 C 1084.498,-274.73152 1087.1211,-273.97945 1088.6563,-273.15625 C 1090.1915,-272.33306 1091.4785,-272.10025 1094.0313,-270.65625 C 1096.5579,-269.22699 1098.8271,-268.64929 1101,-268.125 C 1103.3476,-267.55858 1106.4354,-267.40977 1109.8438,-266.9375 C 1108.7549,-267.77725 1103.2364,-268.10995 1101.4375,-268.5 C 1099.6386,-268.89006 1097.5434,-269.51616 1094.8438,-270.8125 C 1092.1441,-272.10884 1091.3494,-272.61146 1089.0313,-273.5 C 1086.7131,-274.38854 1085.0269,-274.88314 1082.625,-275.125 z" | ||
2636 | style="opacity:0.25;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter7285);enable-background:new" /> | ||
2637 | <path | ||
2638 | id="path8177" | ||
2639 | d="M 1051.4688,-270 C 1053.3741,-269.42241 1055.9969,-268.38428 1057.5625,-267.40625 C 1059.1281,-266.42823 1060.4427,-266.04644 1063.0625,-264.28125 C 1065.6555,-262.53409 1068.0484,-261.57198 1070.3125,-260.6875 C 1072.7586,-259.73193 1075.9951,-259.03037 1079.7188,-257.625 C 1078.5292,-258.76284 1072.6557,-260.31175 1070.7813,-261 C 1068.9068,-261.68825 1066.6995,-262.5662 1063.9063,-264.28125 C 1061.113,-265.99629 1060.3327,-266.56515 1057.9688,-267.6875 C 1055.6047,-268.80984 1053.9121,-269.52205 1051.4688,-270 z" | ||
2640 | style="opacity:0.25;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter7289);enable-background:new" /> | ||
2641 | <path | ||
2642 | id="path8179" | ||
2643 | d="M 1020.2188,-266.84375 C 1022.1307,-266.20564 1024.8,-265.08839 1026.375,-264.03125 C 1027.9501,-262.9741 1029.2706,-262.52258 1031.9063,-260.625 C 1034.5149,-258.74679 1036.9347,-257.59497 1039.2188,-256.5625 C 1041.6865,-255.44705 1044.9833,-254.3892 1048.75,-252.71875 C 1047.5467,-253.94128 1041.5472,-256.03298 1039.6563,-256.84375 C 1037.7653,-257.65452 1035.5914,-258.73754 1032.7813,-260.59375 C 1029.9711,-262.44995 1029.1595,-263.07068 1026.7813,-264.3125 C 1024.403,-265.5543 1022.6706,-266.28819 1020.2188,-266.84375 z" | ||
2644 | style="opacity:0.25;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter7293);enable-background:new" /> | ||
2645 | <path | ||
2646 | id="path8181" | ||
2647 | d="M 1110.1719,-266.89063 C 1110.3227,-266.84207 1110.8599,-266.25963 1110.2813,-265.40625 C 1109.4712,-264.21166 1104.5764,-262.08196 1101.7188,-261.28125 C 1098.8739,-260.48413 1095.4287,-260.30351 1091.1563,-261.65625 C 1086.8547,-263.0182 1085.6866,-264.12497 1080.5,-265.96875 C 1085.164,-263.85358 1086.6953,-262.01642 1090.625,-260.625 C 1092.2457,-260.05113 1093.9921,-259.6854 1095.6875,-259.59375 C 1095.2424,-259.26812 1094.1572,-258.61045 1092.125,-258 C 1089.3295,-257.16031 1085.4759,-256.46622 1083.875,-256.375 C 1082.3604,-256.28868 1080.733,-256.88749 1080.4375,-257 C 1080.6042,-256.89692 1080.8107,-256.62266 1080.1875,-255.96875 C 1079.2882,-255.02512 1074.0401,-254.04575 1071.0625,-253.71875 C 1068.0982,-253.3932 1064.5409,-253.73471 1060.1563,-255.625 C 1056.1783,-257.33997 1054.8173,-258.54036 1050.75,-260.375 C 1050.75,-260.375 1050.75,-260.21875 1050.75,-260.21875 C 1054.3931,-258.12346 1056.034,-256.33548 1059.625,-254.65625 C 1061.3552,-253.84716 1063.2167,-253.24749 1065.0313,-252.9375 C 1064.4964,-252.65074 1063.4735,-252.22599 1061.5938,-251.90625 C 1058.7248,-251.41829 1054.7848,-251.09011 1053.1563,-251.15625 C 1052.3056,-251.19079 1051.4277,-251.34062 1050.75,-251.5625 C 1050.0652,-251.77738 1049.5603,-252.00717 1049.4375,-252.0625 C 1049.6069,-251.95529 1049.8686,-251.65962 1049.2188,-251.03125 C 1048.3091,-250.15163 1042.9727,-249.69487 1039.9688,-249.5625 C 1036.9783,-249.43071 1033.3799,-250.01313 1028.9688,-252.125 C 1024.5276,-254.25126 1023.3273,-255.5266 1018.0625,-257.90625 C 1022.7968,-255.30921 1024.349,-253.27715 1028.4063,-251.1875 C 1030.0796,-250.32565 1031.8915,-249.69325 1033.6563,-249.25 C 1033.193,-249.01668 1032.0669,-248.56186 1029.9688,-248.3125 C 1027.0825,-247.96952 1023.1342,-247.81962 1021.5,-247.9375 C 1019.9538,-248.049 1018.2688,-248.79446 1017.9688,-248.9375 C 1018.1379,-248.81721 1018.3826,-248.52702 1017.75,-247.9375 C 1016.8372,-247.08677 1011.5059,-246.67538 1008.5,-246.5625 C 1005.5075,-246.45013 1001.9103,-247.05293 997.5,-249.15625 C 993.49875,-251.06448 992.11197,-252.29408 988.03125,-254.25 C 988.03122,-254.25 988.03125,-254.09375 988.03125,-254.09375 C 991.68631,-251.88983 993.32546,-250.0412 996.9375,-248.1875 C 998.67779,-247.29435 1000.5745,-246.65923 1002.4063,-246.21875 C 1001.8663,-245.97045 1000.8282,-245.60342 998.9375,-245.375 C 996.05182,-245.02642 992.07145,-244.85405 990.4375,-244.96875 C 989.58405,-245.02865 988.71119,-245.22666 988.03125,-245.46875 C 987.34415,-245.70405 986.8419,-245.94101 986.71875,-246 C 986.88873,-245.88773 987.18323,-245.57775 986.53125,-244.96875 C 985.6186,-244.11625 980.25592,-243.67538 977.25,-243.5625 C 974.25754,-243.45013 970.65654,-244.09055 966.25,-246.15625 C 961.81347,-248.23603 960.60312,-249.48796 955.34375,-251.8125 C 960.07313,-249.26501 961.63449,-247.2347 965.6875,-245.1875 C 967.35905,-244.34317 969.17304,-243.72107 970.9375,-243.28125 C 970.47427,-243.04703 969.3478,-242.59718 967.25,-242.34375 C 964.36431,-241.99517 960.4138,-241.77423 958.78125,-241.875 C 957.23669,-241.97032 955.58094,-242.70385 955.28125,-242.84375 C 955.45024,-242.72522 955.66317,-242.4399 955.03125,-241.84375 C 954.11939,-240.98347 948.7846,-240.5135 945.78125,-240.375 C 942.7913,-240.2371 939.2138,-240.82568 934.8125,-242.84375 C 930.81942,-244.67464 929.44739,-245.87295 925.375,-247.75 C 925.37498,-247.75 925.375,-247.59375 925.375,-247.59375 C 929.02261,-245.46048 930.64533,-243.65888 934.25,-241.875 C 935.98675,-241.01549 937.85727,-240.42486 939.6875,-240 C 939.14803,-239.7471 938.13687,-239.35871 936.25,-239.09375 C 933.37022,-238.68939 929.41187,-238.44813 927.78125,-238.53125 C 926.92953,-238.57466 926.05355,-238.7398 925.375,-238.96875 C 924.68931,-239.19076 924.1854,-239.41214 924.0625,-239.46875 C 924.23209,-239.35976 924.4944,-239.0591 923.84375,-238.4375 C 922.93296,-237.56736 917.59354,-237.04598 914.59375,-236.875 C 911.60742,-236.70479 908.01994,-237.19077 903.625,-239.15625 C 899.20011,-241.13513 898.01904,-242.38444 892.78125,-244.53125 C 897.49122,-242.14358 899.05142,-240.14252 903.09375,-238.1875 C 904.7609,-237.38119 906.55418,-236.79092 908.3125,-236.40625 C 907.85087,-236.15755 906.7155,-235.694 904.625,-235.375 C 901.7494,-234.93624 897.8446,-234.6419 896.21875,-234.6875 C 894.68052,-234.73062 892.98595,-235.43272 892.6875,-235.5625 C 892.85583,-235.44968 893.09807,-235.14875 892.46875,-234.53125 C 891.56063,-233.64015 886.2658,-233.003 883.28125,-232.71875 C 880.31007,-232.43577 876.70783,-232.89455 872.34375,-234.65625 C 868.38441,-236.25456 867.0146,-237.45112 863,-238.96875 C 863.00003,-238.96875 863,-238.8125 863,-238.8125 C 866.5959,-237.00115 868.23831,-235.23017 871.8125,-233.65625 C 873.53457,-232.8979 875.39998,-232.3673 877.21875,-232.03125 C 876.68266,-231.75217 875.65217,-231.34362 873.78125,-230.96875 C 870.92586,-230.39665 866.99183,-229.94936 865.375,-229.9375 C 864.53049,-229.93129 863.66892,-230.01844 863,-230.1875 C 862.32409,-230.34901 861.83991,-230.51673 861.71875,-230.5625 C 861.88597,-230.46848 862.14142,-230.17902 861.5,-229.5 C 860.60213,-228.54948 855.31352,-227.58292 852.375,-227.0625 C 849.44966,-226.54441 845.94285,-226.68826 841.65625,-228.09375 C 837.34045,-229.50882 836.18348,-230.62369 831.09375,-232.0625 C 835.6706,-230.31149 837.1823,-228.50244 841.125,-227.0625 C 842.75108,-226.46861 844.49385,-226.10685 846.21875,-225.90625 C 845.7659,-225.60923 844.66397,-225.02286 842.625,-224.4375 C 839.82028,-223.63233 835.98614,-222.86167 834.40625,-222.6875 C 832.9115,-222.5227 831.29002,-223.00431 831,-223.09375 C 831.16356,-223.00368 831.39278,-222.73382 830.78125,-222.03125 C 829.89878,-221.0174 824.73673,-219.6596 821.84375,-218.96875 C 818.96373,-218.28097 815.50815,-218.20873 811.28125,-219.40625 C 807.4464,-220.4927 806.10867,-221.47862 802.21875,-222.53125 C 802.21874,-222.53125 802.21875,-222.375 802.21875,-222.375 C 805.70293,-220.98015 807.28816,-219.4556 810.75,-218.34375 C 812.41793,-217.80803 814.20578,-217.55701 815.96875,-217.46875 C 815.44911,-217.11663 814.46836,-216.55423 812.65625,-215.9375 C 809.89059,-214.99625 806.06601,-214.00213 804.5,-213.78125 C 803.68206,-213.66586 802.8669,-213.65842 802.21875,-213.75 C 801.56379,-213.83321 801.08615,-213.96827 800.96875,-214 C 801.13079,-213.92536 801.40274,-213.65956 800.78125,-212.90625 C 799.91125,-211.85172 794.77162,-210.247 791.90625,-209.46875 C 789.05372,-208.69399 785.64713,-208.51055 781.46875,-209.5625 C 777.26192,-210.62163 776.11206,-211.60416 771.125,-212.71875 C 775.60954,-211.25929 777.09435,-209.58352 780.9375,-208.46875 C 782.52254,-208.00898 784.22429,-207.8305 785.90625,-207.78125 C 785.46468,-207.44449 784.39374,-206.75352 782.40625,-206 C 779.67232,-204.96351 775.95427,-203.83731 774.40625,-203.5625 C 772.94163,-203.30248 771.34667,-203.67904 771.0625,-203.75 C 771.22275,-203.67035 771.44294,-203.42902 770.84375,-202.6875 C 769.97909,-201.61744 764.92723,-199.86935 762.09375,-199 C 759.27295,-198.13453 755.88625,-197.84369 751.75,-198.78125 C 747.99741,-199.63186 746.70215,-200.49772 742.875,-201.375 C 742.875,-201.375 742.875,-201.21875 742.875,-201.21875 C 746.30296,-199.98096 747.86241,-198.58645 751.25,-197.6875 C 752.88216,-197.25436 754.61704,-197.10449 756.34375,-197.125 C 755.83482,-196.74083 754.867,-196.10318 753.09375,-195.375 C 750.38741,-194.26366 746.65742,-193.06719 745.125,-192.75 C 744.3246,-192.58431 743.51269,-192.53138 742.875,-192.59375 C 742.875,-192.59375 742.875,-192.07823 742.875,-191.67146 C 742.875,-191.40639 742.875,-191.1875 742.875,-191.1875 C 743.10145,-191.33218 743.32391,-191.46011 743.59375,-191.5625 C 744.67427,-191.97248 745.76536,-191.77827 749.59375,-193.25 C 753.42218,-194.72174 754.81787,-195.25498 755.5,-195.65625 C 756.1796,-196.05603 757.11165,-196.53562 757.71875,-197.1875 C 759.5456,-197.32525 761.2895,-197.68073 762.65625,-198.1875 C 765.62437,-199.28802 767.53162,-199.99369 769.4375,-200.65625 C 771.34336,-201.31879 771.79159,-202.07112 772.84375,-202.4375 C 773.9353,-202.81761 775.03886,-202.60288 778.90625,-203.96875 C 782.7737,-205.33461 784.18941,-205.79583 784.875,-206.1875 C 785.57609,-206.58802 786.57581,-207.12048 787.1875,-207.78125 C 789.1583,-207.83591 791.00435,-208.16588 792.46875,-208.65625 C 795.47023,-209.66133 797.3949,-210.27796 799.3125,-210.90625 C 800.8511,-211.41035 801.48652,-211.95302 802.21875,-212.34375 C 802.44891,-212.47806 802.69449,-212.59748 802.96875,-212.6875 C 804.06698,-213.04798 805.1502,-212.76887 809.0625,-214 C 812.97483,-215.23113 814.42855,-215.67295 815.125,-216.03125 C 815.81888,-216.38822 816.75515,-216.82386 817.375,-217.4375 C 819.24021,-217.46016 821.01081,-217.70433 822.40625,-218.125 C 825.43668,-219.03854 827.39863,-219.5551 829.34375,-220.09375 C 831.28886,-220.63239 831.76993,-221.35827 832.84375,-221.65625 C 833.95776,-221.9654 835.06369,-221.64886 839.03125,-222.6875 C 842.99886,-223.72613 844.44883,-224.12023 845.15625,-224.4375 C 845.89112,-224.76709 846.97008,-225.19122 847.59375,-225.8125 C 849.59149,-225.6965 851.45118,-225.83259 852.9375,-226.1875 C 856.01561,-226.9225 858.02094,-227.28844 860,-227.6875 C 861.58792,-228.00768 862.24429,-228.47805 863,-228.78125 C 863.23757,-228.88805 863.46695,-228.97401 863.75,-229.03125 C 864.88347,-229.26044 866.05448,-228.82232 870.09375,-229.53125 C 874.13308,-230.24018 875.594,-230.45834 876.3125,-230.71875 C 877.02836,-230.97819 878.01678,-231.28599 878.65625,-231.8125 C 880.58052,-231.57301 882.40413,-231.58797 883.84375,-231.8125 C 886.97008,-232.30012 888.9983,-232.51317 891,-232.78125 C 893.00171,-233.04932 893.48869,-233.72639 894.59375,-233.875 C 895.74014,-234.02918 896.86967,-233.57343 900.9375,-234.09375 C 905.00534,-234.61407 906.49763,-234.78948 907.21875,-235.03125 C 907.95585,-235.27839 909.01684,-235.61748 909.65625,-236.15625 C 911.70632,-235.82072 913.63003,-235.75829 915.15625,-235.9375 C 918.29856,-236.30646 920.33619,-236.49686 922.34375,-236.71875 C 923.95451,-236.89677 924.60842,-237.32695 925.375,-237.5625 C 925.61594,-237.64802 925.86912,-237.7181 926.15625,-237.75 C 927.30603,-237.87772 928.45754,-237.40335 932.53125,-237.875 C 936.60499,-238.34665 938.09034,-238.4856 938.8125,-238.71875 C 939.53196,-238.95102 940.51274,-239.19221 941.15625,-239.6875 C 943.09262,-239.35404 944.92631,-239.28326 946.375,-239.4375 C 949.52102,-239.77245 951.55256,-239.95609 953.5625,-240.15625 C 955.57246,-240.35639 956.04664,-240.98264 957.15625,-241.09375 C 958.30739,-241.20903 959.45268,-240.72869 963.53125,-241.15625 C 967.60986,-241.58381 969.12011,-241.71834 969.84375,-241.9375 C 970.5829,-242.16136 971.63947,-242.45075 972.28125,-242.96875 C 974.33835,-242.57008 976.28312,-242.47535 977.8125,-242.625 C 980.96123,-242.9331 982.98834,-243.09825 985,-243.28125 C 986.61407,-243.42807 987.2631,-243.8418 988.03125,-244.0625 C 988.27267,-244.14336 988.52478,-244.19241 988.8125,-244.21875 C 989.96461,-244.3242 991.10546,-243.826 995.1875,-244.21875 C 999.26958,-244.6115 1000.7764,-244.74959 1001.5,-244.96875 C 1002.2209,-245.18708 1003.1997,-245.41645 1003.8438,-245.90625 C 1005.7818,-245.55626 1007.6126,-245.45187 1009.0625,-245.59375 C 1012.2112,-245.90185 1014.2383,-246.067 1016.25,-246.25 C 1018.2616,-246.43299 1018.7642,-247.08802 1019.875,-247.1875 C 1021.0273,-247.29073 1022.1672,-246.80267 1026.25,-247.1875 C 1030.3329,-247.57232 1031.8387,-247.6885 1032.5625,-247.90625 C 1033.3018,-248.12868 1034.3581,-248.42074 1035,-248.9375 C 1037.0574,-248.53573 1039.0029,-248.43417 1040.5313,-248.59375 C 1043.6779,-248.92227 1045.7084,-249.11645 1047.7188,-249.3125 C 1049.3318,-249.46979 1049.9844,-249.94398 1050.75,-250.1875 C 1050.9907,-250.27554 1051.2132,-250.30887 1051.5,-250.34375 C 1052.6483,-250.48345 1053.8167,-250.00384 1057.875,-250.59375 C 1061.9333,-251.18367 1063.4368,-251.37089 1064.1563,-251.625 C 1064.873,-251.87816 1065.8308,-252.18307 1066.4688,-252.71875 C 1068.3885,-252.50681 1070.1887,-252.56734 1071.625,-252.8125 C 1074.7441,-253.3449 1076.7366,-253.74111 1078.7188,-254.125 C 1080.7009,-254.50887 1081.1931,-255.16465 1082.2813,-255.40625 C 1083.4101,-255.65691 1084.5516,-255.28996 1088.5313,-256.28125 C 1092.5109,-257.27253 1093.9609,-257.70055 1094.6563,-258.0625 C 1095.3786,-258.43851 1096.4182,-258.93308 1097.0313,-259.59375 C 1098.9943,-259.6058 1100.825,-259.8848 1102.25,-260.4375 C 1105.2012,-261.58211 1107.1232,-262.30692 1108.9375,-263.1875 C 1110.3932,-263.89403 1111.2723,-264.87391 1111.4844,-265.17188 C 1111.6966,-265.46984 1111.5962,-265.91718 1111.6223,-265.93863 C 1111.6652,-265.97387 1111.9416,-266.0236 1112.1013,-266.36707 C 1112.9602,-268.21415 1114.4223,-272.01166 1114.5365,-272.69652 C 1114.6502,-273.37868 1114.7003,-274.04426 1114.751,-274.44149 C 1114.7804,-274.67101 1114.6043,-275.30693 1114.6264,-275.36553 C 1114.6573,-275.44759 1114.9309,-275.63081 1114.9863,-275.88024 C 1115.2526,-277.07857 1115.0752,-278.07153 1114.8612,-279.48917 C 1114.6472,-280.90681 1113.8775,-284.11131 1113.2243,-284.96543 C 1112.5654,-285.82715 1112.0014,-285.9766 1111.4764,-285.96609 C 1111.2678,-285.69633 1111.6132,-285.703 1111.639,-285.65348 C 1112.3196,-285.60269 1112.573,-285.28484 1113.0582,-284.75686 C 1113.5434,-284.22888 1114.28,-280.90569 1114.4166,-279.4553 C 1114.5532,-278.00491 1114.6066,-276.5951 1114.3286,-275.98666 C 1114.0505,-275.37821 1113.6054,-275.46963 1113.313,-275.40375 C 1113.844,-275.21786 1113.9828,-275.27892 1114.0444,-274.43446 C 1114.1037,-273.62108 1113.9112,-272.79477 1113.5246,-271.62884 C 1113.1334,-270.44883 1111.6794,-267.27886 1111.2389,-267.03007 C 1110.7866,-266.77456 1110.5075,-266.75969 1110.1719,-266.89063 z" | ||
2648 | style="opacity:0.25;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter7337);enable-background:new" | ||
2649 | sodipodi:nodetypes="cssscscsscsssccscssssssscscsscsssscscssssssscscsscsssscscssssssscscsscsssscscssssssscscsscsssscscssssssscscsscsssccscsscscssscssssscsssssscssssscsssssscssssscsssssscssssscsssssscssssscsssssscssssscsszsszssszzcczzzczzzc" /> | ||
2650 | <path | ||
2651 | id="path8183" | ||
2652 | d="M 988.75,-263.84375 C 990.66161,-263.20935 993.30027,-262.08534 994.875,-261.03125 C 996.44977,-259.97716 997.7711,-259.54873 1000.4063,-257.65625 C 1003.0145,-255.78311 1005.4332,-254.64103 1007.7188,-253.59375 C 1010.1881,-252.46228 1013.4709,-251.43901 1017.25,-249.65625 C 1016.0428,-250.91465 1010.111,-253.0207 1008.2188,-253.84375 C 1006.3266,-254.66679 1004.0908,-255.77424 1001.2813,-257.625 C 998.47169,-259.47575 997.65906,-260.10654 995.28125,-261.34375 C 992.90343,-262.58094 991.20137,-263.29295 988.75,-263.84375 z" | ||
2653 | style="opacity:0.25;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter7297);enable-background:new" /> | ||
2654 | <path | ||
2655 | id="path8185" | ||
2656 | d="M 957.5,-260.78125 C 959.41,-260.16315 962.08288,-259.07191 963.65625,-258.03125 C 965.22964,-256.99059 966.55233,-256.54873 969.1875,-254.65625 C 971.79573,-252.7831 974.21442,-251.64104 976.5,-250.59375 C 978.96931,-249.46228 982.25213,-248.439 986.03125,-246.65625 C 984.82397,-247.91465 978.82971,-250.05195 976.9375,-250.875 C 975.04533,-251.69804 972.84084,-252.8055 970.03125,-254.65625 C 967.22167,-256.507 966.4383,-257.09557 964.0625,-258.3125 C 961.68672,-259.52941 959.94929,-260.25135 957.5,-260.78125 z" | ||
2657 | style="opacity:0.25;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter7301);enable-background:new" /> | ||
2658 | <path | ||
2659 | id="path8187" | ||
2660 | d="M 926.09375,-257.375 C 928.00147,-256.77755 930.64723,-255.71116 932.21875,-254.6875 C 933.79025,-253.66385 935.08897,-253.24779 937.71875,-251.40625 C 940.32166,-249.58352 942.74762,-248.43405 945.03125,-247.40625 C 947.49845,-246.29584 950.7866,-245.31302 954.5625,-243.5625 C 953.35627,-244.8106 947.3906,-246.88059 945.5,-247.6875 C 943.60942,-248.4944 941.39758,-249.57854 938.59375,-251.375 C 935.7899,-253.17144 934.96671,-253.77751 932.59375,-254.96875 C 930.22078,-256.15999 928.54013,-256.87158 926.09375,-257.375 z" | ||
2661 | style="opacity:0.25;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter7305);enable-background:new" /> | ||
2662 | <path | ||
2663 | id="path8189" | ||
2664 | d="M 894.90625,-253.5625 C 896.80838,-253.00895 899.49326,-251.97363 901.0625,-250.96875 C 902.63173,-249.96388 903.93651,-249.56011 906.5625,-247.75 C 909.16162,-245.95836 911.56284,-244.87811 913.84375,-243.875 C 916.30803,-242.79126 919.60359,-241.83471 923.375,-240.125 C 922.1702,-241.36007 916.20084,-243.36978 914.3125,-244.15625 C 912.42418,-244.94272 910.2373,-245.98705 907.4375,-247.75 C 904.63773,-249.51294 903.83831,-250.11836 901.46875,-251.28125 C 899.09918,-252.44413 897.3455,-253.11537 894.90625,-253.5625 z" | ||
2665 | style="opacity:0.25;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter7309);enable-background:new" /> | ||
2666 | <path | ||
2667 | id="path8191" | ||
2668 | d="M 863.71875,-248.65625 C 865.59937,-248.22716 868.22302,-247.27587 869.78125,-246.34375 C 871.33948,-245.41164 872.63358,-245.08599 875.25,-243.34375 C 877.83971,-241.61931 880.23067,-240.63573 882.5,-239.71875 C 884.95176,-238.72806 888.23959,-237.84168 892,-236.21875 C 890.79869,-237.42609 884.84751,-239.28484 882.96875,-240 C 881.09,-240.71517 878.88335,-241.68442 876.09375,-243.375 C 873.30412,-245.06557 872.50914,-245.60322 870.15625,-246.65625 C 867.80333,-247.70926 866.13041,-248.36873 863.71875,-248.65625 z" | ||
2669 | style="opacity:0.25;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter7313);enable-background:new" /> | ||
2670 | <path | ||
2671 | id="path8193" | ||
2672 | d="M 833.15625,-241.375 C 835.00461,-241.07856 837.6257,-240.39868 839.15625,-239.59375 C 840.68683,-238.78882 841.96999,-238.53802 844.53125,-237.0625 C 847.06629,-235.60204 849.42193,-234.73741 851.65625,-234 C 854.07024,-233.20332 857.31336,-232.53311 861.03125,-231.15625 C 859.84354,-232.28498 853.94353,-233.746 852.09375,-234.3125 C 850.24398,-234.879 848.09033,-235.68642 845.34375,-237.15625 C 842.59718,-238.62608 841.84239,-239.07653 839.53125,-239.9375 C 837.2201,-240.79845 835.52654,-241.25759 833.15625,-241.375 z" | ||
2673 | style="opacity:0.25;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter7317);enable-background:new" /> | ||
2674 | <path | ||
2675 | id="path8195" | ||
2676 | d="M 802.90625,-232.3125 C 804.72845,-232.10123 807.27201,-231.51193 808.78125,-230.78125 C 810.2905,-230.05059 811.53693,-229.85127 814.0625,-228.5 C 816.56226,-227.16254 818.89404,-226.45157 821.09375,-225.84375 C 823.47028,-225.18708 826.65839,-224.77087 830.3125,-223.65625 C 829.14515,-224.70121 823.38362,-225.75954 821.5625,-226.21875 C 819.74139,-226.67796 817.61025,-227.34571 814.90625,-228.65625 C 812.20222,-229.96677 811.43519,-230.37615 809.15625,-231.125 C 806.8773,-231.87383 805.243,-232.30431 802.90625,-232.3125 z" | ||
2677 | style="opacity:0.25;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter7321);enable-background:new" /> | ||
2678 | <path | ||
2679 | id="path8197" | ||
2680 | d="M 773.1875,-222.1875 C 774.99859,-222.0088 777.50809,-221.52244 779,-220.84375 C 780.49194,-220.16506 781.7534,-220.04553 784.25,-218.78125 C 786.72107,-217.52987 789.04005,-216.88511 791.21875,-216.34375 C 793.57262,-215.75887 796.71009,-215.44623 800.3125,-214.5 C 799.16166,-215.49116 793.45999,-216.2833 791.65625,-216.6875 C 789.85253,-217.0917 787.74072,-217.70866 785.0625,-218.9375 C 782.38432,-220.16634 781.65905,-220.54839 779.40625,-221.21875 C 777.15346,-221.88909 775.50998,-222.22107 773.1875,-222.1875 z" | ||
2681 | style="opacity:0.25;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter7329);enable-background:new" /> | ||
2682 | <path | ||
2683 | id="path8199" | ||
2684 | d="M 743.5625,-211.1875 C 745.35531,-211.05839 747.83563,-210.63785 749.3125,-210 C 750.7894,-209.36215 752.0286,-209.25844 754.5,-208.0625 C 756.94618,-206.87878 759.22054,-206.31584 761.375,-205.84375 C 763.70267,-205.33372 766.7946,-205.16311 770.375,-204.28125 C 769.23121,-205.25185 763.62741,-205.8719 761.84375,-206.21875 C 760.06008,-206.56559 757.9609,-207.10631 755.3125,-208.25 C 752.66409,-209.39368 751.91755,-209.76631 749.6875,-210.375 C 747.45742,-210.98368 745.86156,-211.28466 743.5625,-211.1875 z" | ||
2685 | style="opacity:0.25;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter7325);enable-background:new" /> | ||
2686 | </g> | ||
2687 | </g> | ||
2688 | <path | ||
2689 | style="opacity:1;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" | ||
2690 | d="M 863.87812,475.6679 C 865.52024,472.4499 867.39593,469.93261 868.73948,465.81892 C 869.5382,462.16103 872.05152,463.78819 875.99995,457.42202 C 877.40188,455.18252 881.47648,457.81338 884.96505,455.02291 C 886.23577,454.21972 887.84993,454.6186 889.44761,454.95978 C 893.213,456.27874 895.27337,458.66333 897.78137,460.76815 C 903.92043,466.73838 918.31551,468.71142 921.26741,467.08161 C 922.70146,464.17687 929.14869,461.67273 933.64178,455.96993 C 934.38989,454.84726 945.37114,447.22547 948.28899,449.40394" | ||
2691 | id="path8201" | ||
2692 | sodipodi:nodetypes="ccccccccc" /> | ||
2693 | <path | ||
2694 | style="opacity:1;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" | ||
2695 | d="M 888.50059,465.25071 C 895.864,462.01774 902.31149,456.34231 909.20872,451.86619 C 912.51929,449.89665 916.07855,455.0822 920.00472,455.46485 C 922.30245,455.24672 923.71762,456.66744 925.68683,457.10635 C 930.84319,458.42414 928.08476,460.97123 935.66209,463.54607 C 941.8177,465.26647 944.56949,456.7476 950.56184,456.22247 C 955.43923,455.71948 958.66076,455.90644 962.17859,455.96993 C 966.10555,456.10882 966.25714,452.47233 968.23951,450.66663 C 971.22007,447.86141 975.39512,448.81691 978.38436,445.92573 C 979.4019,444.54105 980.33894,442.91488 981.11895,440.81764 C 982.00096,438.8173 984.15901,441.12362 985.91718,442.08033" | ||
2696 | id="path8203" | ||
2697 | sodipodi:nodetypes="ccccccccccc" /> | ||
2698 | </g> | ||
2699 | <g | ||
2700 | inkscape:groupmode="layer" | ||
2701 | id="layer15" | ||
2702 | inkscape:label="Feet" | ||
2703 | style="display:inline"> | ||
2704 | <path | ||
2705 | style="opacity:0.25;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter9048);enable-background:accumulate" | ||
2706 | d="M 403.27922,1056.3058 L 459.84776,1013.8794 L 531.97265,1028.0215 L 485.30361,1080.3474 L 431.56349,1087.4185 L 403.27922,1056.3058 z" | ||
2707 | id="path8994" /> | ||
2708 | <path | ||
2709 | style="opacity:1;fill:#ada469;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" | ||
2710 | d="M 542.27183,1060.5719 C 540.86456,1079.3731 541.12693,1093.3229 544.35357,1109.8752 C 547.58023,1126.4275 560.75966,1155.7825 564.68798,1173.0589 C 568.61419,1190.326 567.38211,1211.3686 552.22854,1224.2072 C 536.91093,1237.1846 510.17726,1245.8061 484.39623,1239.9409 C 458.61518,1234.0757 414.84716,1190.7175 395.80604,1169.7126 C 376.6939,1148.6293 332.04518,1075.862 317.86751,1045.4368 C 303.68984,1015.0117 305.2079,1008.7182 309.74779,999.90708 C 300.38107,975.38658 297.33408,949.84027 276.03534,924.33044 C 306.36081,927.44488 319.91562,951.28677 336.16102,971.47019 C 330.63113,923.39416 318.10631,907.05369 307.78707,880.74589 C 337.78137,886.82754 358.36643,912.61828 371.76686,953.45839 C 381.32101,949.54048 390.00462,944.08545 401.95427,944.39719 C 390.65677,902.70139 384.00481,874.48135 365.26702,843.32725 C 418.70898,848.99758 448.92404,923.96657 444.23844,931.28805 C 454.21641,929.04406 463.24409,924.75767 474.67497,925.63638 C 463.426,887.28936 453.62716,848.76848 471.01526,806.98819 C 471.01526,806.98819 519.30204,872.42507 525.40492,892.79397 C 531.50779,913.16287 526.92373,931.49448 526.92373,931.49448 C 526.92373,931.49448 543.8833,962.57978 547.21765,982.58862 C 550.59075,1002.83 543.68496,1041.6919 542.27183,1060.5719 z" | ||
2711 | id="path4189" | ||
2712 | sodipodi:nodetypes="czzzzzzcccccccccczczz" /> | ||
2713 | <path | ||
2714 | style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter3587);enable-background:accumulate" | ||
2715 | d="M 719.5,738.69519 L 737.81177,754.12715 L 782.2228,738.73894 L 805.5,713.19519 L 816.96397,732.41584 L 847.63558,745.19938 L 872.73295,750.92775 L 892,723.19519 L 908.02309,747.02126 L 947,752.19519 L 957.24541,745.99667 L 964.00012,754.69487 L 989.5,765.69519 L 991.5,725.19519 L 955.94866,710.6576 L 923.45591,689.1305 L 883.0038,677.66492 L 861.69668,662.13148 L 840,685.19519 L 755.02878,638.61208 L 722,676.69519 L 719.5,738.69519 z" | ||
2716 | id="path4191" | ||
2717 | sodipodi:nodetypes="cccccccccccccccccccccc" | ||
2718 | clip-path="url(#clipPath3631)" | ||
2719 | transform="matrix(-0.9045327,0.2506626,0.2506626,0.9045327,995.28646,23.53493)" /> | ||
2720 | <path | ||
2721 | style="opacity:0.58775509;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:20.79999924;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;display:inline;filter:url(#filter3898);enable-background:new" | ||
2722 | d="M 584,696.5 L 577.4375,713.65625 C 577.4375,713.65625 569.62598,734.02113 561.75,757.3125 C 557.81201,768.95818 553.86698,781.35395 550.8125,792.4375 C 547.75802,803.52105 545.47664,812.81736 545.3125,820.71875 C 544.91443,839.88071 551.05903,855.60705 554.25,862.46875 C 553.47847,866.02398 552.25863,871.92307 550.90625,880.5625 C 548.98583,892.83071 547.18798,907.71691 548.53125,920.4375 C 549.91334,933.52585 555.34347,948.62515 561.125,963.46875 C 566.90653,978.31235 573.17935,992.69586 576.34375,1001.5 C 582.97581,1019.9519 586.33671,1033.0763 587.65625,1050 C 588.2376,1057.4561 587.41398,1070.336 586.40625,1083.375 C 585.39852,1096.414 584.21964,1109.6764 584.71875,1120.3438 C 585.70786,1141.4836 594.04673,1167.1785 618.09375,1178.2812 C 640.86858,1188.7966 673.42057,1189.9834 701.53125,1174.8438 C 717.69117,1166.1404 731.60759,1147.7462 744.90625,1127.9375 C 758.20491,1108.1288 769.87542,1086.8841 776.84375,1073.0312 C 792.19667,1042.51 816.23728,957.56702 822.46875,920.3125 C 825.48734,902.26597 826.39041,891.24695 825.09375,882.28125 C 824.11522,875.51521 821.26556,870.13385 818.21875,866.0625 C 820.26149,838.55459 817.48668,814.69372 830.1875,786.65625 L 840.75,763.375 L 816.9375,772.6875 C 799.44775,779.52503 788.03586,791.73286 780.34375,804.75 C 780.02124,805.29577 779.78061,805.85776 779.46875,806.40625 C 779.69078,783.89104 783.87659,768.76866 786.0625,747.71875 L 788.03125,728.71875 L 771,737.375 C 740.40551,752.93071 725.30511,785.56821 721.28125,827.59375 C 717.03593,826.96828 712.44985,826.5741 707.46875,826.75 C 707.17726,787.56964 707.07246,759.71315 716.0625,727.375 L 721.65625,707.25 L 702.21875,714.90625 C 671.30938,727.11019 654.35921,756.83698 645.59375,783.28125 C 641.21102,796.50339 638.84793,809.08246 638,819.21875 C 637.76797,821.99248 637.68894,824.53007 637.6875,826.9375 C 634.44563,826.90109 631.26698,827.07339 627.625,827.4375 C 627.66662,788.43277 624.14076,747.68335 595.34375,710.9375 L 584,696.5 z M 589.8125,740.3125 C 606.61941,770.95633 607.28701,804.27978 606.75,840.0625 L 606.53125,855.125 L 618.56618,848.58579 C 627.22823,845.45277 638.12676,848.35827 650.5,847.75 L 665.17465,857.1066 L 658.84375,831.3125 C 658.7541,831.08253 658.62329,830.89581 658.59375,830.59375 C 658.39424,828.55389 658.37143,825.12068 658.71875,820.96875 C 659.41339,812.66489 661.50832,801.38351 665.34375,789.8125 C 670.49907,774.25956 678.83176,758.62002 690.46875,747.28125 C 685.78494,775.91923 687.25316,807.54059 687.45711,843.08639 L 684.69118,856.34803 L 700.1875,848.75 C 709.2169,845.99229 717.37647,848.40004 729.46875,849.84375 L 742.71507,859.28798 L 741.09375,840 C 742.54168,809.02823 749.31524,786.32192 761.8125,771.125 C 758.82562,790.90384 756.38207,812.9098 762.125,849.46875 L 763.19052,855.84193 L 760.25237,867.35878 L 770.86948,859.1906 L 780.05921,869.41258 L 778.51093,858.94898 L 781.9375,852 C 787.63852,838.78851 792.11032,825.78663 798.28125,815.34375 C 799.24111,813.71941 800.31278,812.27939 801.34375,810.78125 C 797.66309,831.9366 798.91659,850.9894 797.25,868.5625 L 792.56986,876.36948 L 799.96875,876.59375 C 803.1888,880.07736 803.83625,880.44443 804.53125,885.25 C 805.22625,890.05557 804.84987,899.65035 801.96875,916.875 C 796.40076,950.16292 770.12313,994.71481 758.22835,1018.3614 C 751.62344,1031.4918 739.70002,1075.8473 727.105,1094.6079 C 714.50998,1113.3684 698.57363,1134.3752 689.93296,1139.0288 C 668.44244,1150.603 645.37702,1164.5347 629.31407,1157.1183 C 614.93921,1150.4813 606.27438,1135.9256 605.5,1119.375 C 605.11689,1111.187 606.11279,1098.0658 607.125,1084.9688 C 608.13721,1071.8717 618.41391,1062.398 622.54839,1048.4062 C 627.92068,1030.2254 621.10152,1011.8118 610.04839,994.46875 C 603.56184,984.29097 586.07159,970.21085 580.5,955.90625 C 574.92841,941.60165 570.13249,926.9031 569.21875,918.25 C 568.29254,909.47887 569.64125,895.22498 571.4375,883.75 C 573.23375,872.27503 575.28125,863.46875 575.28125,863.46875 L 584.70403,859.85355 L 574.21875,855.96875 C 574.21875,855.96875 565.71986,840.65865 566.125,821.15625 C 566.19611,817.73309 567.96126,808.4282 570.84375,797.96875 C 573.72624,787.5093 577.60841,775.41604 581.46875,764 C 584.51314,754.99692 587.24938,747.39655 589.8125,740.3125 z" | ||
2723 | id="path4193" | ||
2724 | clip-path="url(#clipPath3677)" | ||
2725 | sodipodi:nodetypes="ccssscsssssssssssssccccscccccccccsscccccccccccssscccccccccccccccsccccssssssssssssscccsssc" | ||
2726 | transform="matrix(-0.9045327,0.2506626,0.2506626,0.9045327,822.28931,10.93589)" /> | ||
2727 | <g | ||
2728 | id="g3617" | ||
2729 | clip-path="url(#clipPath3622)" | ||
2730 | transform="translate(276,136)"> | ||
2731 | <path | ||
2732 | transform="matrix(-0.9045327,0.2506626,0.2506626,0.9045327,-52.200498,74.09707)" | ||
2733 | id="path4195" | ||
2734 | d="M -15.66751,843.48852 L -65.16499,827.93217 L -92.03504,880.25807 L -51.02285,925.51291 L -1.52538,887.32914 L -15.66751,843.48852 z" | ||
2735 | style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter9024);enable-background:accumulate" /> | ||
2736 | <path | ||
2737 | sodipodi:nodetypes="ccccccccccccc" | ||
2738 | transform="matrix(-0.9045327,0.2506626,0.2506626,0.9045327,-46.92842,75.511284)" | ||
2739 | id="path4197" | ||
2740 | d="M 118.70648,859.93048 L 63.552152,813.26144 L 19.711532,850.03099 L 53.652662,903.7711 L 40.055848,989.23313 L 0.61048221,1017.5253 L -40.401718,1028.839 L -43.230138,1075.508 L 13.338402,1100.9639 L 32.282389,1031.3139 L 55.738939,972.45727 L 102.08648,899.84236 L 118.70648,859.93048 z" | ||
2741 | style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter9020);enable-background:accumulate" /> | ||
2742 | </g> | ||
2743 | <path | ||
2744 | style="opacity:0.25;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter9044);enable-background:accumulate" | ||
2745 | d="M -70.82184,932.58397 L -10.01066,905.71392 L 90.3985,936.82662 L 26.75889,967.93931 L -55.26549,950.96875 L -70.82184,932.58397 z" | ||
2746 | id="path4199" | ||
2747 | transform="matrix(-0.9045327,0.2506626,0.2506626,0.9045327,229.07158,211.51128)" /> | ||
2748 | <path | ||
2749 | style="opacity:0.58775509;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:20.79999924;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;display:inline;filter:url(#filter4105);enable-background:new" | ||
2750 | d="M 583.0625,715.75 C 570.95641,750.19974 556.348,784.28333 551.3125,820.59375 C 550.48042,835.52242 555.90165,849.75318 560.15625,863.65625 C 554.24001,890.85751 550.01944,920.5562 561.3125,946.78125 C 574.82967,984.9421 596.31397,1022.4634 593.73529,1064.2495 C 592.78699,1093.5437 584.72085,1125.2436 599.125,1152.5312 C 609.32364,1171.866 632.26456,1179.8429 653.09285,1180.1988 C 680.95504,1181.3729 709.55546,1168.5772 725.09375,1144.9375 C 747.68924,1115.5658 766.89426,1083.4402 780.3324,1048.8777 C 797.22293,1003.3717 810.0042,956.31698 818.26642,908.4788 C 820.09082,895.53774 821.3675,881.00895 813.6875,869.65625 C 810.25635,862.31993 813.72957,854.09611 813.00293,846.34648 C 813.67693,821.35182 817.01525,795.68272 829.65625,773.75 C 811.92312,780.1946 794.58357,790.30971 785.65318,807.61425 C 781.7181,814.3238 778.04836,821.18838 774.28125,828 C 770.73126,797.98592 778.00088,768.35172 781.0625,738.71875 C 760.89646,747.77338 744.18578,764.37397 736.88755,785.40075 C 730.58292,800.98078 728.08533,817.71793 726.625,834.4375 C 718.37166,832.91825 709.94053,832.33595 701.5625,832.9375 C 700.59942,794.23963 701.09554,753.53035 712.53125,717.03125 C 693.85012,723.24901 677.36504,735.76676 666.90322,752.41848 C 653.05068,773.29827 645.64182,798.17243 643.84375,823.03125 C 644.42909,827.35579 643.78249,834.87134 637.5,832.90625 C 632.16882,832.9238 626.87092,833.58508 621.5625,834 C 622.71034,794.61852 618.22106,752.3718 594.5,719.78125 C 591.43929,716.14408 588.86315,712.09687 585.875,708.4375 C 584.9375,710.875 584,713.3125 583.0625,715.75 z M 590.8125,729.59375 C 609.37777,758.89004 613.295,794.41387 612.9375,828.46875 C 613.14159,833.64401 612.42094,840.29795 613.0625,844.53125 C 625.38106,838.4285 639.80162,842.09135 652.84375,842.34375 C 655.16087,843.567 656.03585,843.99618 654.75,840.9375 C 650.58545,826.98465 652.90172,812.3245 656.55504,798.52986 C 662.92191,772.23922 677.18332,747.44188 699.375,731.5 C 690.75791,768.73706 693.65842,808.06161 693.28125,845.46875 C 705.53469,838.55885 720.56004,842.02262 733.3125,845.21875 C 736.70472,848.75355 735.60185,844.48927 735.5,841.40625 C 735.01691,820.03567 739.63133,798.33662 749.1875,779.25 C 755.15016,768.56273 763.43088,759.44621 771.625,750.375 C 763.75344,784.2131 762.4221,819.71093 768.90625,853.875 C 770.6311,852.46382 773.51306,853.42086 774.5625,853.5 C 784.24619,832.26318 790.91362,808.11938 809.45266,792.75815 C 811.32595,792.38693 808.00448,801.2831 807.96875,804.65625 C 804.43387,826.50206 800.79359,848.79859 799.18454,870.87536 C 790.40075,873.21707 802.03289,873.1989 802.65329,874.93786 C 810.5764,885.50366 807.31628,899.34258 806.28494,911.2912 C 799.22089,956.32475 784.14263,998.65314 770.33139,1041.971 C 758.25663,1074.9203 742.95719,1100.8235 722.44331,1129.1725 C 711.49074,1142.7239 699.19859,1157.0238 681.59956,1161.6725 C 661.44355,1167.9138 637.3928,1172.5494 619,1161.7188 C 601.71034,1149.3774 597.97607,1126.0099 599.73774,1106.0324 C 599.78653,1090.2062 604.6766,1077.5203 604.14834,1062.5406 C 603.6101,1047.2777 601.85699,1031.9759 597.60573,1015.6743 C 593.35447,999.37268 588.56248,990.75636 581.48667,974.10092 C 574.24556,957.05636 566.41652,937.35229 563.28125,917.8125 C 561.53177,899.18536 566.17296,880.68988 569.0625,862.5625 C 572.35873,859.72554 567.46451,857.36591 566.75,854.375 C 559.14887,837.35992 558.34253,817.6001 564.00766,799.81502 C 571.13786,774.74272 579.76853,750.18261 588.6875,725.6875 C 589.39583,726.98958 590.10417,728.29167 590.8125,729.59375 z" | ||
2751 | id="path4201" | ||
2752 | sodipodi:nodetypes="ccccccccccccccccccccccccccccccccccccccccccccccccccczzzcccccc" | ||
2753 | clip-path="url(#clipPath4177)" | ||
2754 | transform="matrix(-0.9045327,0.2506626,0.2506626,0.9045327,822.28931,10.93589)" /> | ||
2755 | <path | ||
2756 | style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter4130);enable-background:accumulate" | ||
2757 | d="M 735.05635,733.03834 L 737.81177,754.12715 L 782.2228,738.73894 L 787.07343,716.34919 L 783.13726,694.29697 L 760.68563,657.70396 L 752.40559,688.0089 L 735.05635,733.03834 z" | ||
2758 | id="path4203" | ||
2759 | sodipodi:nodetypes="cccccccc" | ||
2760 | clip-path="url(#clipPath3631)" | ||
2761 | transform="matrix(-0.9045327,0.2506626,0.2506626,0.9045327,995.28646,23.53493)" /> | ||
2762 | <path | ||
2763 | style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter4141);enable-background:accumulate" | ||
2764 | d="M 831.81321,730.29452 L 847.63558,745.19938 L 868.49031,748.09932 L 866.90002,708.17334 L 875.22563,677.66492 L 868.06064,671.32386 L 846.36395,692.26626 L 831.81321,730.29452 z" | ||
2765 | id="path4205" | ||
2766 | sodipodi:nodetypes="cccccccc" | ||
2767 | clip-path="url(#clipPath3631)" | ||
2768 | transform="matrix(-0.9045327,0.2506626,0.2506626,0.9045327,995.28646,23.53493)" /> | ||
2769 | <g | ||
2770 | id="g8317" | ||
2771 | style="filter:url(#filter8333)" | ||
2772 | clip-path="url(#clipPath8338)" | ||
2773 | transform="translate(276,136)"> | ||
2774 | <path | ||
2775 | transform="matrix(-0.9045327,0.2506626,0.2506626,0.9045327,719.28646,-112.46507)" | ||
2776 | clip-path="none" | ||
2777 | sodipodi:nodetypes="ccccc" | ||
2778 | id="path4209" | ||
2779 | d="M 964.00012,754.69487 L 982.42893,762.15966 L 991.5,725.19519 L 976.62969,730.03405 L 964.00012,754.69487 z" | ||
2780 | style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" /> | ||
2781 | <rect | ||
2782 | y="757.19519" | ||
2783 | x="-55" | ||
2784 | height="177" | ||
2785 | width="182" | ||
2786 | id="rect8315" | ||
2787 | style="opacity:1;fill:none;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:25;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" /> | ||
2788 | </g> | ||
2789 | <g | ||
2790 | id="g8346" | ||
2791 | style="filter:url(#filter8354)" | ||
2792 | clip-path="url(#clipPath8359)" | ||
2793 | transform="translate(276,136)"> | ||
2794 | <path | ||
2795 | transform="matrix(-0.9045327,0.2506626,0.2506626,0.9045327,719.28646,-112.46507)" | ||
2796 | clip-path="none" | ||
2797 | sodipodi:nodetypes="ccccccc" | ||
2798 | id="path4207" | ||
2799 | d="M 910.14441,746.31415 L 942.75736,751.48808 L 942.39617,727.61189 L 949.5847,697.92968 L 941.13358,692.66603 L 919.31164,719.1768 L 910.14441,746.31415 z" | ||
2800 | style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" /> | ||
2801 | <rect | ||
2802 | y="696.19519" | ||
2803 | x="-22" | ||
2804 | height="176" | ||
2805 | width="165" | ||
2806 | id="rect8344" | ||
2807 | style="opacity:1;fill:none;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:25;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" /> | ||
2808 | </g> | ||
2809 | </g> | ||
2810 | <g | ||
2811 | inkscape:groupmode="layer" | ||
2812 | id="layer16" | ||
2813 | inkscape:label="Left Foot" | ||
2814 | style="display:inline"> | ||
2815 | <path | ||
2816 | style="opacity:1;fill:#ada469;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" | ||
2817 | d="M 1036.164,1071.8338 C 1042.9581,1090.7366 1046.6577,1105.1335 1048.0543,1123.0457 C 1049.4509,1140.958 1044.2716,1174.8465 1045.1538,1193.7018 C 1046.0356,1212.547 1053.2875,1233.8008 1072.4984,1242.6707 C 1091.9173,1251.6365 1121.8177,1252.882 1146.6183,1239.5251 C 1171.4189,1226.1681 1204.0193,1169.1996 1217.5925,1142.2164 C 1231.2164,1115.1325 1256.3536,1027.719 1262.2533,992.44781 C 1268.1531,957.1766 1264.8039,951.14704 1257.6359,943.39232 C 1260.2762,915.55217 1256.1361,888.45689 1270.7455,856.20614 C 1240.4965,868.03184 1233.3632,896.36684 1222.4266,921.71122 C 1214.4257,870.77829 1222.6358,850.43803 1225.7455,820.49186 C 1196.6808,835.26977 1182.884,867.60588 1180.7455,913.349 C 1169.8216,912.0448 1159.3541,908.91477 1147.1741,912.63471 C 1146.9101,866.61137 1145.7106,835.7453 1156.0847,798.42822 C 1102.8293,819.45508 1093.1375,905.02232 1100.0312,911.20614 C 1089.1484,911.74114 1078.6602,909.90884 1067.1741,914.06329 C 1067.813,871.49194 1066.9136,829.15468 1037.1741,791.20614 C 1037.1741,791.20614 1006.2161,872.12848 1005.7455,894.77757 C 1005.275,917.42666 1015.1971,934.94345 1015.1971,934.94345 C 1015.1971,934.94345 1006.6291,971.68396 1008.8985,993.17568 C 1011.1944,1014.9171 1029.3414,1052.8519 1036.164,1071.8338 z" | ||
2818 | id="path8848" | ||
2819 | sodipodi:nodetypes="czzzzzzcccccccccczczz" /> | ||
2820 | <path | ||
2821 | style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter3587);enable-background:accumulate" | ||
2822 | d="M 719.5,738.69519 L 737.81177,754.12715 L 782.2228,738.73894 L 805.5,713.19519 L 816.96397,732.41584 L 847.63558,745.19938 L 872.73295,750.92775 L 892,723.19519 L 908.02309,747.02126 L 947,752.19519 L 957.24541,745.99667 L 964.00012,754.69487 L 989.5,765.69519 L 991.5,725.19519 L 955.94866,710.6576 L 923.45591,689.1305 L 883.0038,677.66492 L 861.69668,662.13148 L 840,685.19519 L 755.02878,638.61208 L 722,676.69519 L 719.5,738.69519 z" | ||
2823 | id="path3635" | ||
2824 | sodipodi:nodetypes="cccccccccccccccccccccc" | ||
2825 | clip-path="url(#clipPath3631)" | ||
2826 | transform="translate(276,136)" /> | ||
2827 | <path | ||
2828 | transform="translate(450.03125,73.843964)" | ||
2829 | style="opacity:0.58775509;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:20.79999924;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;display:inline;filter:url(#filter3898);enable-background:new" | ||
2830 | d="M 584,696.5 L 577.4375,713.65625 C 577.4375,713.65625 569.62598,734.02113 561.75,757.3125 C 557.81201,768.95818 553.86698,781.35395 550.8125,792.4375 C 547.75802,803.52105 545.47664,812.81736 545.3125,820.71875 C 544.91443,839.88071 551.05903,855.60705 554.25,862.46875 C 553.47847,866.02398 552.25863,871.92307 550.90625,880.5625 C 548.98583,892.83071 547.18798,907.71691 548.53125,920.4375 C 549.91334,933.52585 555.34347,948.62515 561.125,963.46875 C 566.90653,978.31235 573.17935,992.69586 576.34375,1001.5 C 582.97581,1019.9519 586.33671,1033.0763 587.65625,1050 C 588.2376,1057.4561 587.41398,1070.336 586.40625,1083.375 C 585.39852,1096.414 584.21964,1109.6764 584.71875,1120.3438 C 585.70786,1141.4836 594.04673,1167.1785 618.09375,1178.2812 C 640.86858,1188.7966 673.42057,1189.9834 701.53125,1174.8438 C 717.69117,1166.1404 731.60759,1147.7462 744.90625,1127.9375 C 758.20491,1108.1288 769.87542,1086.8841 776.84375,1073.0312 C 792.19667,1042.51 816.23728,957.56702 822.46875,920.3125 C 825.48734,902.26597 826.39041,891.24695 825.09375,882.28125 C 824.11522,875.51521 821.26556,870.13385 818.21875,866.0625 C 820.26149,838.55459 817.48668,814.69372 830.1875,786.65625 L 840.75,763.375 L 816.9375,772.6875 C 799.44775,779.52503 788.03586,791.73286 780.34375,804.75 C 780.02124,805.29577 779.78061,805.85776 779.46875,806.40625 C 779.69078,783.89104 783.87659,768.76866 786.0625,747.71875 L 788.03125,728.71875 L 771,737.375 C 740.40551,752.93071 725.30511,785.56821 721.28125,827.59375 C 717.03593,826.96828 712.44985,826.5741 707.46875,826.75 C 707.17726,787.56964 707.07246,759.71315 716.0625,727.375 L 721.65625,707.25 L 702.21875,714.90625 C 671.30938,727.11019 654.35921,756.83698 645.59375,783.28125 C 641.21102,796.50339 638.84793,809.08246 638,819.21875 C 637.76797,821.99248 637.68894,824.53007 637.6875,826.9375 C 634.44563,826.90109 631.26698,827.07339 627.625,827.4375 C 627.66662,788.43277 624.14076,747.68335 595.34375,710.9375 L 584,696.5 z M 589.8125,740.3125 C 606.61941,770.95633 607.28701,804.27978 606.75,840.0625 L 606.53125,855.125 L 618.56618,848.58579 C 627.22823,845.45277 638.12676,848.35827 650.5,847.75 L 665.17465,857.1066 L 658.84375,831.3125 C 658.7541,831.08253 658.62329,830.89581 658.59375,830.59375 C 658.39424,828.55389 658.37143,825.12068 658.71875,820.96875 C 659.41339,812.66489 661.50832,801.38351 665.34375,789.8125 C 670.49907,774.25956 678.83176,758.62002 690.46875,747.28125 C 685.78494,775.91923 687.25316,807.54059 687.45711,843.08639 L 684.69118,856.34803 L 700.1875,848.75 C 709.2169,845.99229 717.37647,848.40004 729.46875,849.84375 L 742.71507,859.28798 L 741.09375,840 C 742.54168,809.02823 749.31524,786.32192 761.8125,771.125 C 758.82562,790.90384 756.38207,812.9098 762.125,849.46875 L 763.19052,855.84193 L 760.25237,867.35878 L 770.86948,859.1906 L 780.05921,869.41258 L 778.51093,858.94898 L 781.9375,852 C 787.63852,838.78851 792.11032,825.78663 798.28125,815.34375 C 799.24111,813.71941 800.31278,812.27939 801.34375,810.78125 C 797.66309,831.9366 798.91659,850.9894 797.25,868.5625 L 792.56986,876.36948 L 799.96875,876.59375 C 803.1888,880.07736 803.83625,880.44443 804.53125,885.25 C 805.22625,890.05557 804.84987,899.65035 801.96875,916.875 C 796.40076,950.16292 770.17603,1040.0409 758.28125,1063.6875 C 751.67634,1076.8179 740.25127,1097.5832 727.65625,1116.3438 C 715.06123,1135.1043 700.29692,1151.8776 691.65625,1156.5312 C 670.16573,1168.1054 642.87545,1166.7914 626.8125,1159.375 C 612.43764,1152.738 606.27438,1135.9256 605.5,1119.375 C 605.11689,1111.187 606.11279,1098.0658 607.125,1084.9688 C 608.13721,1071.8717 618.41391,1062.398 622.54839,1048.4062 C 627.92068,1030.2254 621.10152,1011.8118 610.04839,994.46875 C 603.56184,984.29097 586.07159,970.21085 580.5,955.90625 C 574.92841,941.60165 570.13249,926.9031 569.21875,918.25 C 568.29254,909.47887 569.64125,895.22498 571.4375,883.75 C 573.23375,872.27503 575.28125,863.46875 575.28125,863.46875 L 584.70403,859.85355 L 574.21875,855.96875 C 574.21875,855.96875 565.71986,840.65865 566.125,821.15625 C 566.19611,817.73309 567.96126,808.4282 570.84375,797.96875 C 573.72624,787.5093 577.60841,775.41604 581.46875,764 C 584.51314,754.99692 587.24938,747.39655 589.8125,740.3125 z" | ||
2831 | id="path3669" | ||
2832 | clip-path="url(#clipPath3677)" | ||
2833 | sodipodi:nodetypes="ccssscsssssssssssssccccscccccccccsscccccccccccssscccccccccccccccsccccssssssssssssscccsssc" /> | ||
2834 | <g | ||
2835 | id="g3628" | ||
2836 | clip-path="url(#clipPath3636)" | ||
2837 | transform="translate(276,136)"> | ||
2838 | <path | ||
2839 | id="path8988" | ||
2840 | d="M 824.48651,818.48242 L 774.98903,802.92607 L 748.11898,855.25197 L 789.13117,900.50681 L 838.62864,862.32304 L 824.48651,818.48242 z" | ||
2841 | style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter9024);enable-background:accumulate" /> | ||
2842 | <path | ||
2843 | id="path8990" | ||
2844 | d="M 964.49365,855.25197 L 909.33932,808.58293 L 865.4987,845.35248 L 899.43983,899.09259 L 906.51089,965.56063 L 855.59921,1000.916 L 814.58701,1012.2297 L 811.75859,1058.8987 L 868.32713,1084.3546 L 931.96674,1007.987 L 956.00837,913.23473 L 964.49365,855.25197 z" | ||
2845 | style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter9020);enable-background:accumulate" /> | ||
2846 | </g> | ||
2847 | <path | ||
2848 | style="opacity:0.25;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter9044);enable-background:accumulate" | ||
2849 | d="M 1045.3322,1043.5779 L 1106.1434,1016.7078 L 1206.5525,1047.8205 L 1142.9129,1078.9332 L 1060.8885,1061.9626 L 1045.3322,1043.5779 z" | ||
2850 | id="path8992" /> | ||
2851 | <path | ||
2852 | transform="translate(450.03125,73.843964)" | ||
2853 | style="opacity:0.58775509;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:20.79999924;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;display:inline;filter:url(#filter4185);enable-background:new" | ||
2854 | d="M 583.0625,715.75 C 570.95641,750.19974 556.348,784.28333 551.3125,820.59375 C 550.48042,835.52242 555.90165,849.75318 560.15625,863.65625 C 554.24001,890.85751 550.01944,920.5562 561.3125,946.78125 C 574.82967,984.9421 596.31397,1022.4634 593.73529,1064.2495 C 592.78699,1093.5437 584.72085,1125.2436 599.125,1152.5312 C 609.32364,1171.866 632.26456,1179.8429 653.09285,1180.1988 C 680.95504,1181.3729 709.55546,1168.5772 725.09375,1144.9375 C 747.68924,1115.5658 766.89426,1083.4402 780.3324,1048.8777 C 797.22293,1003.3717 810.0042,956.31698 818.26642,908.4788 C 820.09082,895.53774 821.3675,881.00895 813.6875,869.65625 C 810.25635,862.31993 813.72957,854.09611 813.00293,846.34648 C 813.67693,821.35182 817.01525,795.68272 829.65625,773.75 C 811.92312,780.1946 794.58357,790.30971 785.65318,807.61425 C 781.7181,814.3238 778.04836,821.18838 774.28125,828 C 770.73126,797.98592 778.00088,768.35172 781.0625,738.71875 C 760.89646,747.77338 744.18578,764.37397 736.88755,785.40075 C 730.58292,800.98078 728.08533,817.71793 726.625,834.4375 C 718.37166,832.91825 709.94053,832.33595 701.5625,832.9375 C 700.59942,794.23963 701.09554,753.53035 712.53125,717.03125 C 693.85012,723.24901 677.36504,735.76676 666.90322,752.41848 C 653.05068,773.29827 645.64182,798.17243 643.84375,823.03125 C 644.42909,827.35579 643.78249,834.87134 637.5,832.90625 C 632.16882,832.9238 626.87092,833.58508 621.5625,834 C 622.71034,794.61852 618.22106,752.3718 594.5,719.78125 C 591.43929,716.14408 588.86315,712.09687 585.875,708.4375 C 584.9375,710.875 584,713.3125 583.0625,715.75 z M 590.8125,729.59375 C 609.37777,758.89004 613.295,794.41387 612.9375,828.46875 C 613.14159,833.64401 612.42094,840.29795 613.0625,844.53125 C 625.38106,838.4285 639.80162,842.09135 652.84375,842.34375 C 655.16087,843.567 656.03585,843.99618 654.75,840.9375 C 650.58545,826.98465 652.90172,812.3245 656.55504,798.52986 C 662.92191,772.23922 677.18332,747.44188 699.375,731.5 C 690.75791,768.73706 693.65842,808.06161 693.28125,845.46875 C 705.53469,838.55885 720.56004,842.02262 733.3125,845.21875 C 736.70472,848.75355 735.60185,844.48927 735.5,841.40625 C 735.01691,820.03567 739.63133,798.33662 749.1875,779.25 C 755.15016,768.56273 763.43088,759.44621 771.625,750.375 C 763.75344,784.2131 762.4221,819.71093 768.90625,853.875 C 770.6311,852.46382 773.51306,853.42086 774.5625,853.5 C 784.24619,832.26318 790.91362,808.11938 809.45266,792.75815 C 811.32595,792.38693 808.00448,801.2831 807.96875,804.65625 C 804.43387,826.50206 804.67155,848.82948 803.0625,870.90625 C 801.75012,872.28304 805.91085,873.22979 806.53125,874.96875 C 814.45436,885.53455 809.65419,899.80024 808.62285,911.74886 C 801.5588,956.78241 786.85732,1000.1282 773.04608,1043.446 C 760.97132,1076.3953 742.32638,1106.526 721.8125,1134.875 C 710.85993,1148.4264 698.56778,1162.7263 680.96875,1167.375 C 660.81274,1173.6163 637.3928,1172.5494 619,1161.7188 C 601.71034,1149.3774 597.97607,1126.0099 599.73774,1106.0324 C 599.78653,1090.2062 602.10985,1078.2316 607.65521,1063.2271 C 613.20056,1048.2226 610.12626,1031.8954 605.875,1015.5938 C 601.62374,999.2922 593.69597,989.33378 584.05342,973.38963 C 574.41087,957.44548 566.41652,937.35229 563.28125,917.8125 C 561.53177,899.18536 566.17296,880.68988 569.0625,862.5625 C 572.35873,859.72554 567.46451,857.36591 566.75,854.375 C 559.14887,837.35992 558.34253,817.6001 564.00766,799.81502 C 571.13786,774.74272 579.76853,750.18261 588.6875,725.6875 C 589.39583,726.98958 590.10417,728.29167 590.8125,729.59375 z" | ||
2855 | id="path4149" | ||
2856 | sodipodi:nodetypes="ccccccccccccccccccccccccccccccccccccccccccccccccccczzzcccccc" | ||
2857 | clip-path="url(#clipPath4177)" /> | ||
2858 | <path | ||
2859 | style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter4130);enable-background:accumulate" | ||
2860 | d="M 735.05635,733.03834 L 737.81177,754.12715 L 782.2228,738.73894 L 787.07343,716.34919 L 783.13726,694.29697 L 760.68563,657.70396 L 752.40559,688.0089 L 735.05635,733.03834 z" | ||
2861 | id="path3902" | ||
2862 | sodipodi:nodetypes="cccccccc" | ||
2863 | clip-path="url(#clipPath3631)" | ||
2864 | transform="translate(276,136)" /> | ||
2865 | <path | ||
2866 | style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter4141);enable-background:accumulate" | ||
2867 | d="M 831.81321,730.29452 L 847.63558,745.19938 L 868.49031,748.09932 L 866.90002,708.17334 L 875.22563,677.66492 L 868.06064,671.32386 L 846.36395,692.26626 L 831.81321,730.29452 z" | ||
2868 | id="path4135" | ||
2869 | sodipodi:nodetypes="cccccccc" | ||
2870 | clip-path="url(#clipPath3631)" | ||
2871 | transform="translate(276,136)" /> | ||
2872 | <g | ||
2873 | id="g8367" | ||
2874 | style="filter:url(#filter8379)" | ||
2875 | clip-path="url(#clipPath8392)" | ||
2876 | transform="translate(276,136)"> | ||
2877 | <path | ||
2878 | clip-path="none" | ||
2879 | sodipodi:nodetypes="ccccccc" | ||
2880 | id="path4145" | ||
2881 | d="M 910.14441,746.31415 L 942.75736,751.48808 L 942.39617,727.61189 L 949.5847,697.92968 L 941.13358,692.66603 L 919.31164,719.1768 L 910.14441,746.31415 z" | ||
2882 | style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" /> | ||
2883 | <rect | ||
2884 | y="650.19098" | ||
2885 | x="877.51953" | ||
2886 | height="172.53406" | ||
2887 | width="123.03658" | ||
2888 | id="rect8365" | ||
2889 | style="opacity:1;fill:none;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:25;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" /> | ||
2890 | </g> | ||
2891 | <g | ||
2892 | id="g8400" | ||
2893 | style="filter:url(#filter8404)" | ||
2894 | clip-path="url(#clipPath8417)" | ||
2895 | transform="translate(276,136)"> | ||
2896 | <path | ||
2897 | clip-path="none" | ||
2898 | sodipodi:nodetypes="ccccc" | ||
2899 | id="path4147" | ||
2900 | d="M 964.00012,754.69487 L 982.42893,762.15966 L 991.5,725.19519 L 976.62969,730.03405 L 964.00012,754.69487 z" | ||
2901 | style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" /> | ||
2902 | <rect | ||
2903 | y="677.06104" | ||
2904 | x="924.89569" | ||
2905 | height="125.1579" | ||
2906 | width="142.12846" | ||
2907 | id="rect8398" | ||
2908 | style="opacity:1;fill:none;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:25;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" /> | ||
2909 | </g> | ||
2910 | </g> | ||
2911 | </svg> | ||
diff --git a/Documentation/logo.txt b/Documentation/logo.txt index a2e62445e28e..296f0f7f67eb 100644 --- a/Documentation/logo.txt +++ b/Documentation/logo.txt | |||
@@ -1,4 +1,13 @@ | |||
1 | Tux is taking a three month sabbatical to work as a barber, so Tuz is | 1 | This is the full-colour version of the currently unofficial Linux logo |
2 | standing in. He's taken pains to ensure you'll hardly notice. | 2 | ("currently unofficial" just means that there has been no paperwork and |
3 | that I have not really announced it yet). It was created by Larry Ewing, | ||
4 | and is freely usable as long as you acknowledge Larry as the original | ||
5 | artist. | ||
6 | |||
7 | Note that there are black-and-white versions of this available that | ||
8 | scale down to smaller sizes and are better for letterheads or whatever | ||
9 | you want to use it for: for the full range of logos take a look at | ||
10 | Larry's web-page: | ||
11 | |||
12 | http://www.isc.tamu.edu/~lewing/linux/ | ||
3 | 13 | ||
4 | Image by Andrew McGown and Josh Bush. Image is licensed CC BY-SA. | ||
diff --git a/Documentation/md.txt b/Documentation/md.txt index 1da9d1b1793f..4edd39ec7db9 100644 --- a/Documentation/md.txt +++ b/Documentation/md.txt | |||
@@ -164,15 +164,19 @@ All md devices contain: | |||
164 | raid_disks | 164 | raid_disks |
165 | a text file with a simple number indicating the number of devices | 165 | a text file with a simple number indicating the number of devices |
166 | in a fully functional array. If this is not yet known, the file | 166 | in a fully functional array. If this is not yet known, the file |
167 | will be empty. If an array is being resized (not currently | 167 | will be empty. If an array is being resized this will contain |
168 | possible) this will contain the larger of the old and new sizes. | 168 | the new number of devices. |
169 | Some raid level (RAID1) allow this value to be set while the | 169 | Some raid levels allow this value to be set while the array is |
170 | array is active. This will reconfigure the array. Otherwise | 170 | active. This will reconfigure the array. Otherwise it can only |
171 | it can only be set while assembling an array. | 171 | be set while assembling an array. |
172 | A change to this attribute will not be permitted if it would | ||
173 | reduce the size of the array. To reduce the number of drives | ||
174 | in an e.g. raid5, the array size must first be reduced by | ||
175 | setting the 'array_size' attribute. | ||
172 | 176 | ||
173 | chunk_size | 177 | chunk_size |
174 | This is the size if bytes for 'chunks' and is only relevant to | 178 | This is the size in bytes for 'chunks' and is only relevant to |
175 | raid levels that involve striping (1,4,5,6,10). The address space | 179 | raid levels that involve striping (0,4,5,6,10). The address space |
176 | of the array is conceptually divided into chunks and consecutive | 180 | of the array is conceptually divided into chunks and consecutive |
177 | chunks are striped onto neighbouring devices. | 181 | chunks are striped onto neighbouring devices. |
178 | The size should be at least PAGE_SIZE (4k) and should be a power | 182 | The size should be at least PAGE_SIZE (4k) and should be a power |
@@ -183,6 +187,20 @@ All md devices contain: | |||
183 | simply a number that is interpretted differently by different | 187 | simply a number that is interpretted differently by different |
184 | levels. It can be written while assembling an array. | 188 | levels. It can be written while assembling an array. |
185 | 189 | ||
190 | array_size | ||
191 | This can be used to artificially constrain the available space in | ||
192 | the array to be less than is actually available on the combined | ||
193 | devices. Writing a number (in Kilobytes) which is less than | ||
194 | the available size will set the size. Any reconfiguration of the | ||
195 | array (e.g. adding devices) will not cause the size to change. | ||
196 | Writing the word 'default' will cause the effective size of the | ||
197 | array to be whatever size is actually available based on | ||
198 | 'level', 'chunk_size' and 'component_size'. | ||
199 | |||
200 | This can be used to reduce the size of the array before reducing | ||
201 | the number of devices in a raid4/5/6, or to support external | ||
202 | metadata formats which mandate such clipping. | ||
203 | |||
186 | reshape_position | 204 | reshape_position |
187 | This is either "none" or a sector number within the devices of | 205 | This is either "none" or a sector number within the devices of |
188 | the array where "reshape" is up to. If this is set, the three | 206 | the array where "reshape" is up to. If this is set, the three |
@@ -207,6 +225,11 @@ All md devices contain: | |||
207 | about the array. It can be 0.90 (traditional format), 1.0, 1.1, | 225 | about the array. It can be 0.90 (traditional format), 1.0, 1.1, |
208 | 1.2 (newer format in varying locations) or "none" indicating that | 226 | 1.2 (newer format in varying locations) or "none" indicating that |
209 | the kernel isn't managing metadata at all. | 227 | the kernel isn't managing metadata at all. |
228 | Alternately it can be "external:" followed by a string which | ||
229 | is set by user-space. This indicates that metadata is managed | ||
230 | by a user-space program. Any device failure or other event that | ||
231 | requires a metadata update will cause array activity to be | ||
232 | suspended until the event is acknowledged. | ||
210 | 233 | ||
211 | resync_start | 234 | resync_start |
212 | The point at which resync should start. If no resync is needed, | 235 | The point at which resync should start. If no resync is needed, |
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index f5b7127f54ac..7f5809eddee6 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt | |||
@@ -31,6 +31,7 @@ Contents: | |||
31 | 31 | ||
32 | - Locking functions. | 32 | - Locking functions. |
33 | - Interrupt disabling functions. | 33 | - Interrupt disabling functions. |
34 | - Sleep and wake-up functions. | ||
34 | - Miscellaneous functions. | 35 | - Miscellaneous functions. |
35 | 36 | ||
36 | (*) Inter-CPU locking barrier effects. | 37 | (*) Inter-CPU locking barrier effects. |
@@ -1217,6 +1218,132 @@ barriers are required in such a situation, they must be provided from some | |||
1217 | other means. | 1218 | other means. |
1218 | 1219 | ||
1219 | 1220 | ||
1221 | SLEEP AND WAKE-UP FUNCTIONS | ||
1222 | --------------------------- | ||
1223 | |||
1224 | Sleeping and waking on an event flagged in global data can be viewed as an | ||
1225 | interaction between two pieces of data: the task state of the task waiting for | ||
1226 | the event and the global data used to indicate the event. To make sure that | ||
1227 | these appear to happen in the right order, the primitives to begin the process | ||
1228 | of going to sleep, and the primitives to initiate a wake up imply certain | ||
1229 | barriers. | ||
1230 | |||
1231 | Firstly, the sleeper normally follows something like this sequence of events: | ||
1232 | |||
1233 | for (;;) { | ||
1234 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
1235 | if (event_indicated) | ||
1236 | break; | ||
1237 | schedule(); | ||
1238 | } | ||
1239 | |||
1240 | A general memory barrier is interpolated automatically by set_current_state() | ||
1241 | after it has altered the task state: | ||
1242 | |||
1243 | CPU 1 | ||
1244 | =============================== | ||
1245 | set_current_state(); | ||
1246 | set_mb(); | ||
1247 | STORE current->state | ||
1248 | <general barrier> | ||
1249 | LOAD event_indicated | ||
1250 | |||
1251 | set_current_state() may be wrapped by: | ||
1252 | |||
1253 | prepare_to_wait(); | ||
1254 | prepare_to_wait_exclusive(); | ||
1255 | |||
1256 | which therefore also imply a general memory barrier after setting the state. | ||
1257 | The whole sequence above is available in various canned forms, all of which | ||
1258 | interpolate the memory barrier in the right place: | ||
1259 | |||
1260 | wait_event(); | ||
1261 | wait_event_interruptible(); | ||
1262 | wait_event_interruptible_exclusive(); | ||
1263 | wait_event_interruptible_timeout(); | ||
1264 | wait_event_killable(); | ||
1265 | wait_event_timeout(); | ||
1266 | wait_on_bit(); | ||
1267 | wait_on_bit_lock(); | ||
1268 | |||
1269 | |||
1270 | Secondly, code that performs a wake up normally follows something like this: | ||
1271 | |||
1272 | event_indicated = 1; | ||
1273 | wake_up(&event_wait_queue); | ||
1274 | |||
1275 | or: | ||
1276 | |||
1277 | event_indicated = 1; | ||
1278 | wake_up_process(event_daemon); | ||
1279 | |||
1280 | A write memory barrier is implied by wake_up() and co. if and only if they wake | ||
1281 | something up. The barrier occurs before the task state is cleared, and so sits | ||
1282 | between the STORE to indicate the event and the STORE to set TASK_RUNNING: | ||
1283 | |||
1284 | CPU 1 CPU 2 | ||
1285 | =============================== =============================== | ||
1286 | set_current_state(); STORE event_indicated | ||
1287 | set_mb(); wake_up(); | ||
1288 | STORE current->state <write barrier> | ||
1289 | <general barrier> STORE current->state | ||
1290 | LOAD event_indicated | ||
1291 | |||
1292 | The available waker functions include: | ||
1293 | |||
1294 | complete(); | ||
1295 | wake_up(); | ||
1296 | wake_up_all(); | ||
1297 | wake_up_bit(); | ||
1298 | wake_up_interruptible(); | ||
1299 | wake_up_interruptible_all(); | ||
1300 | wake_up_interruptible_nr(); | ||
1301 | wake_up_interruptible_poll(); | ||
1302 | wake_up_interruptible_sync(); | ||
1303 | wake_up_interruptible_sync_poll(); | ||
1304 | wake_up_locked(); | ||
1305 | wake_up_locked_poll(); | ||
1306 | wake_up_nr(); | ||
1307 | wake_up_poll(); | ||
1308 | wake_up_process(); | ||
1309 | |||
1310 | |||
1311 | [!] Note that the memory barriers implied by the sleeper and the waker do _not_ | ||
1312 | order multiple stores before the wake-up with respect to loads of those stored | ||
1313 | values after the sleeper has called set_current_state(). For instance, if the | ||
1314 | sleeper does: | ||
1315 | |||
1316 | set_current_state(TASK_INTERRUPTIBLE); | ||
1317 | if (event_indicated) | ||
1318 | break; | ||
1319 | __set_current_state(TASK_RUNNING); | ||
1320 | do_something(my_data); | ||
1321 | |||
1322 | and the waker does: | ||
1323 | |||
1324 | my_data = value; | ||
1325 | event_indicated = 1; | ||
1326 | wake_up(&event_wait_queue); | ||
1327 | |||
1328 | there's no guarantee that the change to event_indicated will be perceived by | ||
1329 | the sleeper as coming after the change to my_data. In such a circumstance, the | ||
1330 | code on both sides must interpolate its own memory barriers between the | ||
1331 | separate data accesses. Thus the above sleeper ought to do: | ||
1332 | |||
1333 | set_current_state(TASK_INTERRUPTIBLE); | ||
1334 | if (event_indicated) { | ||
1335 | smp_rmb(); | ||
1336 | do_something(my_data); | ||
1337 | } | ||
1338 | |||
1339 | and the waker should do: | ||
1340 | |||
1341 | my_data = value; | ||
1342 | smp_wmb(); | ||
1343 | event_indicated = 1; | ||
1344 | wake_up(&event_wait_queue); | ||
1345 | |||
1346 | |||
1220 | MISCELLANEOUS FUNCTIONS | 1347 | MISCELLANEOUS FUNCTIONS |
1221 | ----------------------- | 1348 | ----------------------- |
1222 | 1349 | ||
@@ -1366,7 +1493,7 @@ WHERE ARE MEMORY BARRIERS NEEDED? | |||
1366 | 1493 | ||
1367 | Under normal operation, memory operation reordering is generally not going to | 1494 | Under normal operation, memory operation reordering is generally not going to |
1368 | be a problem as a single-threaded linear piece of code will still appear to | 1495 | be a problem as a single-threaded linear piece of code will still appear to |
1369 | work correctly, even if it's in an SMP kernel. There are, however, three | 1496 | work correctly, even if it's in an SMP kernel. There are, however, four |
1370 | circumstances in which reordering definitely _could_ be a problem: | 1497 | circumstances in which reordering definitely _could_ be a problem: |
1371 | 1498 | ||
1372 | (*) Interprocessor interaction. | 1499 | (*) Interprocessor interaction. |
diff --git a/Documentation/memory-hotplug.txt b/Documentation/memory-hotplug.txt index 4c2ecf537a4a..bbc8a6a36921 100644 --- a/Documentation/memory-hotplug.txt +++ b/Documentation/memory-hotplug.txt | |||
@@ -73,13 +73,13 @@ this phase is triggered automatically. ACPI can notify this event. If not, | |||
73 | (see Section 4.). | 73 | (see Section 4.). |
74 | 74 | ||
75 | Logical Memory Hotplug phase is to change memory state into | 75 | Logical Memory Hotplug phase is to change memory state into |
76 | avaiable/unavailable for users. Amount of memory from user's view is | 76 | available/unavailable for users. Amount of memory from user's view is |
77 | changed by this phase. The kernel makes all memory in it as free pages | 77 | changed by this phase. The kernel makes all memory in it as free pages |
78 | when a memory range is available. | 78 | when a memory range is available. |
79 | 79 | ||
80 | In this document, this phase is described as online/offline. | 80 | In this document, this phase is described as online/offline. |
81 | 81 | ||
82 | Logical Memory Hotplug phase is triggred by write of sysfs file by system | 82 | Logical Memory Hotplug phase is triggered by write of sysfs file by system |
83 | administrator. For the hot-add case, it must be executed after Physical Hotplug | 83 | administrator. For the hot-add case, it must be executed after Physical Hotplug |
84 | phase by hand. | 84 | phase by hand. |
85 | (However, if you writes udev's hotplug scripts for memory hotplug, these | 85 | (However, if you writes udev's hotplug scripts for memory hotplug, these |
@@ -334,7 +334,7 @@ MEMORY_CANCEL_ONLINE | |||
334 | Generated if MEMORY_GOING_ONLINE fails. | 334 | Generated if MEMORY_GOING_ONLINE fails. |
335 | 335 | ||
336 | MEMORY_ONLINE | 336 | MEMORY_ONLINE |
337 | Generated when memory has succesfully brought online. The callback may | 337 | Generated when memory has successfully brought online. The callback may |
338 | allocate pages from the new memory. | 338 | allocate pages from the new memory. |
339 | 339 | ||
340 | MEMORY_GOING_OFFLINE | 340 | MEMORY_GOING_OFFLINE |
@@ -359,7 +359,7 @@ The third argument is passed by pointer of struct memory_notify. | |||
359 | struct memory_notify { | 359 | struct memory_notify { |
360 | unsigned long start_pfn; | 360 | unsigned long start_pfn; |
361 | unsigned long nr_pages; | 361 | unsigned long nr_pages; |
362 | int status_cahnge_nid; | 362 | int status_change_nid; |
363 | } | 363 | } |
364 | 364 | ||
365 | start_pfn is start_pfn of online/offline memory. | 365 | start_pfn is start_pfn of online/offline memory. |
diff --git a/Documentation/misc-devices/isl29003 b/Documentation/misc-devices/isl29003 new file mode 100644 index 000000000000..c4ff5f38e010 --- /dev/null +++ b/Documentation/misc-devices/isl29003 | |||
@@ -0,0 +1,62 @@ | |||
1 | Kernel driver isl29003 | ||
2 | ===================== | ||
3 | |||
4 | Supported chips: | ||
5 | * Intersil ISL29003 | ||
6 | Prefix: 'isl29003' | ||
7 | Addresses scanned: none | ||
8 | Datasheet: | ||
9 | http://www.intersil.com/data/fn/fn7464.pdf | ||
10 | |||
11 | Author: Daniel Mack <daniel@caiaq.de> | ||
12 | |||
13 | |||
14 | Description | ||
15 | ----------- | ||
16 | The ISL29003 is an integrated light sensor with a 16-bit integrating type | ||
17 | ADC, I2C user programmable lux range select for optimized counts/lux, and | ||
18 | I2C multi-function control and monitoring capabilities. The internal ADC | ||
19 | provides 16-bit resolution while rejecting 50Hz and 60Hz flicker caused by | ||
20 | artificial light sources. | ||
21 | |||
22 | The driver allows to set the lux range, the bit resolution, the operational | ||
23 | mode (see below) and the power state of device and can read the current lux | ||
24 | value, of course. | ||
25 | |||
26 | |||
27 | Detection | ||
28 | --------- | ||
29 | |||
30 | The ISL29003 does not have an ID register which could be used to identify | ||
31 | it, so the detection routine will just try to read from the configured I2C | ||
32 | addess and consider the device to be present as soon as it ACKs the | ||
33 | transfer. | ||
34 | |||
35 | |||
36 | Sysfs entries | ||
37 | ------------- | ||
38 | |||
39 | range: | ||
40 | 0: 0 lux to 1000 lux (default) | ||
41 | 1: 0 lux to 4000 lux | ||
42 | 2: 0 lux to 16,000 lux | ||
43 | 3: 0 lux to 64,000 lux | ||
44 | |||
45 | resolution: | ||
46 | 0: 2^16 cycles (default) | ||
47 | 1: 2^12 cycles | ||
48 | 2: 2^8 cycles | ||
49 | 3: 2^4 cycles | ||
50 | |||
51 | mode: | ||
52 | 0: diode1's current (unsigned 16bit) (default) | ||
53 | 1: diode1's current (unsigned 16bit) | ||
54 | 2: difference between diodes (l1 - l2, signed 15bit) | ||
55 | |||
56 | power_state: | ||
57 | 0: device is disabled (default) | ||
58 | 1: device is enabled | ||
59 | |||
60 | lux (read only): | ||
61 | returns the value from the last sensor reading | ||
62 | |||
diff --git a/Documentation/mn10300/ABI.txt b/Documentation/mn10300/ABI.txt index 1fef1f06dfd2..d3507bad428d 100644 --- a/Documentation/mn10300/ABI.txt +++ b/Documentation/mn10300/ABI.txt | |||
@@ -26,7 +26,7 @@ registers and the stack. If the first argument is a 64-bit value, it will be | |||
26 | passed in D0:D1. If the first argument is not a 64-bit value, but the second | 26 | passed in D0:D1. If the first argument is not a 64-bit value, but the second |
27 | is, the second will be passed entirely on the stack and D1 will be unused. | 27 | is, the second will be passed entirely on the stack and D1 will be unused. |
28 | 28 | ||
29 | Arguments smaller than 32-bits are not coelesced within a register or a stack | 29 | Arguments smaller than 32-bits are not coalesced within a register or a stack |
30 | word. For example, two byte-sized arguments will always be passed in separate | 30 | word. For example, two byte-sized arguments will always be passed in separate |
31 | registers or word-sized stack slots. | 31 | registers or word-sized stack slots. |
32 | 32 | ||
diff --git a/Documentation/mtd/nand_ecc.txt b/Documentation/mtd/nand_ecc.txt index bdf93b7f0f24..274821b35a7f 100644 --- a/Documentation/mtd/nand_ecc.txt +++ b/Documentation/mtd/nand_ecc.txt | |||
@@ -50,7 +50,7 @@ byte 255: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp1 rp3 rp5 ... rp15 | |||
50 | cp5 cp5 cp5 cp5 cp4 cp4 cp4 cp4 | 50 | cp5 cp5 cp5 cp5 cp4 cp4 cp4 cp4 |
51 | 51 | ||
52 | This figure represents a sector of 256 bytes. | 52 | This figure represents a sector of 256 bytes. |
53 | cp is my abbreviaton for column parity, rp for row parity. | 53 | cp is my abbreviation for column parity, rp for row parity. |
54 | 54 | ||
55 | Let's start to explain column parity. | 55 | Let's start to explain column parity. |
56 | cp0 is the parity that belongs to all bit0, bit2, bit4, bit6. | 56 | cp0 is the parity that belongs to all bit0, bit2, bit4, bit6. |
@@ -560,7 +560,7 @@ Measuring this code again showed big gain. When executing the original | |||
560 | linux code 1 million times, this took about 1 second on my system. | 560 | linux code 1 million times, this took about 1 second on my system. |
561 | (using time to measure the performance). After this iteration I was back | 561 | (using time to measure the performance). After this iteration I was back |
562 | to 0.075 sec. Actually I had to decide to start measuring over 10 | 562 | to 0.075 sec. Actually I had to decide to start measuring over 10 |
563 | million interations in order not to loose too much accuracy. This one | 563 | million iterations in order not to lose too much accuracy. This one |
564 | definitely seemed to be the jackpot! | 564 | definitely seemed to be the jackpot! |
565 | 565 | ||
566 | There is a little bit more room for improvement though. There are three | 566 | There is a little bit more room for improvement though. There are three |
@@ -571,8 +571,8 @@ loop; This eliminates 3 statements per loop. Of course after the loop we | |||
571 | need to correct by adding: | 571 | need to correct by adding: |
572 | rp4 ^= rp4_6; | 572 | rp4 ^= rp4_6; |
573 | rp6 ^= rp4_6 | 573 | rp6 ^= rp4_6 |
574 | Furthermore there are 4 sequential assingments to rp8. This can be | 574 | Furthermore there are 4 sequential assignments to rp8. This can be |
575 | encoded slightly more efficient by saving tmppar before those 4 lines | 575 | encoded slightly more efficiently by saving tmppar before those 4 lines |
576 | and later do rp8 = rp8 ^ tmppar ^ notrp8; | 576 | and later do rp8 = rp8 ^ tmppar ^ notrp8; |
577 | (where notrp8 is the value of rp8 before those 4 lines). | 577 | (where notrp8 is the value of rp8 before those 4 lines). |
578 | Again a use of the commutative property of xor. | 578 | Again a use of the commutative property of xor. |
@@ -622,7 +622,7 @@ Not a big change, but every penny counts :-) | |||
622 | Analysis 7 | 622 | Analysis 7 |
623 | ========== | 623 | ========== |
624 | 624 | ||
625 | Acutally this made things worse. Not very much, but I don't want to move | 625 | Actually this made things worse. Not very much, but I don't want to move |
626 | into the wrong direction. Maybe something to investigate later. Could | 626 | into the wrong direction. Maybe something to investigate later. Could |
627 | have to do with caching again. | 627 | have to do with caching again. |
628 | 628 | ||
@@ -642,7 +642,7 @@ Analysis 8 | |||
642 | This makes things worse. Let's stick with attempt 6 and continue from there. | 642 | This makes things worse. Let's stick with attempt 6 and continue from there. |
643 | Although it seems that the code within the loop cannot be optimised | 643 | Although it seems that the code within the loop cannot be optimised |
644 | further there is still room to optimize the generation of the ecc codes. | 644 | further there is still room to optimize the generation of the ecc codes. |
645 | We can simply calcualate the total parity. If this is 0 then rp4 = rp5 | 645 | We can simply calculate the total parity. If this is 0 then rp4 = rp5 |
646 | etc. If the parity is 1, then rp4 = !rp5; | 646 | etc. If the parity is 1, then rp4 = !rp5; |
647 | But if rp4 = rp5 we do not need rp5 etc. We can just write the even bits | 647 | But if rp4 = rp5 we do not need rp5 etc. We can just write the even bits |
648 | in the result byte and then do something like | 648 | in the result byte and then do something like |
diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt index 5ede7473b425..d5181ce9ff62 100644 --- a/Documentation/networking/bonding.txt +++ b/Documentation/networking/bonding.txt | |||
@@ -221,7 +221,7 @@ ad_select | |||
221 | 221 | ||
222 | - Any slave's 802.3ad association state changes | 222 | - Any slave's 802.3ad association state changes |
223 | 223 | ||
224 | - The bond's adminstrative state changes to up | 224 | - The bond's administrative state changes to up |
225 | 225 | ||
226 | count or 2 | 226 | count or 2 |
227 | 227 | ||
@@ -369,7 +369,7 @@ fail_over_mac | |||
369 | When this policy is used in conjuction with the mii | 369 | When this policy is used in conjuction with the mii |
370 | monitor, devices which assert link up prior to being | 370 | monitor, devices which assert link up prior to being |
371 | able to actually transmit and receive are particularly | 371 | able to actually transmit and receive are particularly |
372 | susecptible to loss of the gratuitous ARP, and an | 372 | susceptible to loss of the gratuitous ARP, and an |
373 | appropriate updelay setting may be required. | 373 | appropriate updelay setting may be required. |
374 | 374 | ||
375 | follow or 2 | 375 | follow or 2 |
@@ -1242,7 +1242,7 @@ monitoring is enabled, and vice-versa. | |||
1242 | To add ARP targets: | 1242 | To add ARP targets: |
1243 | # echo +192.168.0.100 > /sys/class/net/bond0/bonding/arp_ip_target | 1243 | # echo +192.168.0.100 > /sys/class/net/bond0/bonding/arp_ip_target |
1244 | # echo +192.168.0.101 > /sys/class/net/bond0/bonding/arp_ip_target | 1244 | # echo +192.168.0.101 > /sys/class/net/bond0/bonding/arp_ip_target |
1245 | NOTE: up to 10 target addresses may be specified. | 1245 | NOTE: up to 16 target addresses may be specified. |
1246 | 1246 | ||
1247 | To remove an ARP target: | 1247 | To remove an ARP target: |
1248 | # echo -192.168.0.100 > /sys/class/net/bond0/bonding/arp_ip_target | 1248 | # echo -192.168.0.100 > /sys/class/net/bond0/bonding/arp_ip_target |
@@ -1794,7 +1794,7 @@ target to query. | |||
1794 | generally referred to as "trunk failover." This is a feature of the | 1794 | generally referred to as "trunk failover." This is a feature of the |
1795 | switch that causes the link state of a particular switch port to be set | 1795 | switch that causes the link state of a particular switch port to be set |
1796 | down (or up) when the state of another switch port goes down (or up). | 1796 | down (or up) when the state of another switch port goes down (or up). |
1797 | It's purpose is to propogate link failures from logically "exterior" ports | 1797 | Its purpose is to propagate link failures from logically "exterior" ports |
1798 | to the logically "interior" ports that bonding is able to monitor via | 1798 | to the logically "interior" ports that bonding is able to monitor via |
1799 | miimon. Availability and configuration for trunk failover varies by | 1799 | miimon. Availability and configuration for trunk failover varies by |
1800 | switch, but this can be a viable alternative to the ARP monitor when using | 1800 | switch, but this can be a viable alternative to the ARP monitor when using |
diff --git a/Documentation/networking/can.txt b/Documentation/networking/can.txt index 2035bc4932f2..cd79735013f9 100644 --- a/Documentation/networking/can.txt +++ b/Documentation/networking/can.txt | |||
@@ -36,10 +36,15 @@ This file contains | |||
36 | 6.2 local loopback of sent frames | 36 | 6.2 local loopback of sent frames |
37 | 6.3 CAN controller hardware filters | 37 | 6.3 CAN controller hardware filters |
38 | 6.4 The virtual CAN driver (vcan) | 38 | 6.4 The virtual CAN driver (vcan) |
39 | 6.5 currently supported CAN hardware | 39 | 6.5 The CAN network device driver interface |
40 | 6.6 todo | 40 | 6.5.1 Netlink interface to set/get devices properties |
41 | 6.5.2 Setting the CAN bit-timing | ||
42 | 6.5.3 Starting and stopping the CAN network device | ||
43 | 6.6 supported CAN hardware | ||
41 | 44 | ||
42 | 7 Credits | 45 | 7 Socket CAN resources |
46 | |||
47 | 8 Credits | ||
43 | 48 | ||
44 | ============================================================================ | 49 | ============================================================================ |
45 | 50 | ||
@@ -234,6 +239,8 @@ solution for a couple of reasons: | |||
234 | the user application using the common CAN filter mechanisms. Inside | 239 | the user application using the common CAN filter mechanisms. Inside |
235 | this filter definition the (interested) type of errors may be | 240 | this filter definition the (interested) type of errors may be |
236 | selected. The reception of error frames is disabled by default. | 241 | selected. The reception of error frames is disabled by default. |
242 | The format of the CAN error frame is briefly decribed in the Linux | ||
243 | header file "include/linux/can/error.h". | ||
237 | 244 | ||
238 | 4. How to use Socket CAN | 245 | 4. How to use Socket CAN |
239 | ------------------------ | 246 | ------------------------ |
@@ -327,7 +334,7 @@ solution for a couple of reasons: | |||
327 | return 1; | 334 | return 1; |
328 | } | 335 | } |
329 | 336 | ||
330 | /* paraniod check ... */ | 337 | /* paranoid check ... */ |
331 | if (nbytes < sizeof(struct can_frame)) { | 338 | if (nbytes < sizeof(struct can_frame)) { |
332 | fprintf(stderr, "read: incomplete CAN frame\n"); | 339 | fprintf(stderr, "read: incomplete CAN frame\n"); |
333 | return 1; | 340 | return 1; |
@@ -605,61 +612,213 @@ solution for a couple of reasons: | |||
605 | removal of vcan network devices can be managed with the ip(8) tool: | 612 | removal of vcan network devices can be managed with the ip(8) tool: |
606 | 613 | ||
607 | - Create a virtual CAN network interface: | 614 | - Create a virtual CAN network interface: |
608 | ip link add type vcan | 615 | $ ip link add type vcan |
609 | 616 | ||
610 | - Create a virtual CAN network interface with a specific name 'vcan42': | 617 | - Create a virtual CAN network interface with a specific name 'vcan42': |
611 | ip link add dev vcan42 type vcan | 618 | $ ip link add dev vcan42 type vcan |
612 | 619 | ||
613 | - Remove a (virtual CAN) network interface 'vcan42': | 620 | - Remove a (virtual CAN) network interface 'vcan42': |
614 | ip link del vcan42 | 621 | $ ip link del vcan42 |
615 | 622 | ||
616 | The tool 'vcan' from the SocketCAN SVN repository on BerliOS is obsolete. | 623 | 6.5 The CAN network device driver interface |
617 | 624 | ||
618 | Virtual CAN network device creation in older Kernels: | 625 | The CAN network device driver interface provides a generic interface |
619 | In Linux Kernel versions < 2.6.24 the vcan driver creates 4 vcan | 626 | to setup, configure and monitor CAN network devices. The user can then |
620 | netdevices at module load time by default. This value can be changed | 627 | configure the CAN device, like setting the bit-timing parameters, via |
621 | with the module parameter 'numdev'. E.g. 'modprobe vcan numdev=8' | 628 | the netlink interface using the program "ip" from the "IPROUTE2" |
622 | 629 | utility suite. The following chapter describes briefly how to use it. | |
623 | 6.5 currently supported CAN hardware | 630 | Furthermore, the interface uses a common data structure and exports a |
631 | set of common functions, which all real CAN network device drivers | ||
632 | should use. Please have a look to the SJA1000 or MSCAN driver to | ||
633 | understand how to use them. The name of the module is can-dev.ko. | ||
634 | |||
635 | 6.5.1 Netlink interface to set/get devices properties | ||
636 | |||
637 | The CAN device must be configured via netlink interface. The supported | ||
638 | netlink message types are defined and briefly described in | ||
639 | "include/linux/can/netlink.h". CAN link support for the program "ip" | ||
640 | of the IPROUTE2 utility suite is avaiable and it can be used as shown | ||
641 | below: | ||
642 | |||
643 | - Setting CAN device properties: | ||
644 | |||
645 | $ ip link set can0 type can help | ||
646 | Usage: ip link set DEVICE type can | ||
647 | [ bitrate BITRATE [ sample-point SAMPLE-POINT] ] | | ||
648 | [ tq TQ prop-seg PROP_SEG phase-seg1 PHASE-SEG1 | ||
649 | phase-seg2 PHASE-SEG2 [ sjw SJW ] ] | ||
650 | |||
651 | [ loopback { on | off } ] | ||
652 | [ listen-only { on | off } ] | ||
653 | [ triple-sampling { on | off } ] | ||
654 | |||
655 | [ restart-ms TIME-MS ] | ||
656 | [ restart ] | ||
657 | |||
658 | Where: BITRATE := { 1..1000000 } | ||
659 | SAMPLE-POINT := { 0.000..0.999 } | ||
660 | TQ := { NUMBER } | ||
661 | PROP-SEG := { 1..8 } | ||
662 | PHASE-SEG1 := { 1..8 } | ||
663 | PHASE-SEG2 := { 1..8 } | ||
664 | SJW := { 1..4 } | ||
665 | RESTART-MS := { 0 | NUMBER } | ||
666 | |||
667 | - Display CAN device details and statistics: | ||
668 | |||
669 | $ ip -details -statistics link show can0 | ||
670 | 2: can0: <NOARP,UP,LOWER_UP,ECHO> mtu 16 qdisc pfifo_fast state UP qlen 10 | ||
671 | link/can | ||
672 | can <TRIPLE-SAMPLING> state ERROR-ACTIVE restart-ms 100 | ||
673 | bitrate 125000 sample_point 0.875 | ||
674 | tq 125 prop-seg 6 phase-seg1 7 phase-seg2 2 sjw 1 | ||
675 | sja1000: tseg1 1..16 tseg2 1..8 sjw 1..4 brp 1..64 brp-inc 1 | ||
676 | clock 8000000 | ||
677 | re-started bus-errors arbit-lost error-warn error-pass bus-off | ||
678 | 41 17457 0 41 42 41 | ||
679 | RX: bytes packets errors dropped overrun mcast | ||
680 | 140859 17608 17457 0 0 0 | ||
681 | TX: bytes packets errors dropped carrier collsns | ||
682 | 861 112 0 41 0 0 | ||
683 | |||
684 | More info to the above output: | ||
685 | |||
686 | "<TRIPLE-SAMPLING>" | ||
687 | Shows the list of selected CAN controller modes: LOOPBACK, | ||
688 | LISTEN-ONLY, or TRIPLE-SAMPLING. | ||
689 | |||
690 | "state ERROR-ACTIVE" | ||
691 | The current state of the CAN controller: "ERROR-ACTIVE", | ||
692 | "ERROR-WARNING", "ERROR-PASSIVE", "BUS-OFF" or "STOPPED" | ||
693 | |||
694 | "restart-ms 100" | ||
695 | Automatic restart delay time. If set to a non-zero value, a | ||
696 | restart of the CAN controller will be triggered automatically | ||
697 | in case of a bus-off condition after the specified delay time | ||
698 | in milliseconds. By default it's off. | ||
699 | |||
700 | "bitrate 125000 sample_point 0.875" | ||
701 | Shows the real bit-rate in bits/sec and the sample-point in the | ||
702 | range 0.000..0.999. If the calculation of bit-timing parameters | ||
703 | is enabled in the kernel (CONFIG_CAN_CALC_BITTIMING=y), the | ||
704 | bit-timing can be defined by setting the "bitrate" argument. | ||
705 | Optionally the "sample-point" can be specified. By default it's | ||
706 | 0.000 assuming CIA-recommended sample-points. | ||
707 | |||
708 | "tq 125 prop-seg 6 phase-seg1 7 phase-seg2 2 sjw 1" | ||
709 | Shows the time quanta in ns, propagation segment, phase buffer | ||
710 | segment 1 and 2 and the synchronisation jump width in units of | ||
711 | tq. They allow to define the CAN bit-timing in a hardware | ||
712 | independent format as proposed by the Bosch CAN 2.0 spec (see | ||
713 | chapter 8 of http://www.semiconductors.bosch.de/pdf/can2spec.pdf). | ||
714 | |||
715 | "sja1000: tseg1 1..16 tseg2 1..8 sjw 1..4 brp 1..64 brp-inc 1 | ||
716 | clock 8000000" | ||
717 | Shows the bit-timing constants of the CAN controller, here the | ||
718 | "sja1000". The minimum and maximum values of the time segment 1 | ||
719 | and 2, the synchronisation jump width in units of tq, the | ||
720 | bitrate pre-scaler and the CAN system clock frequency in Hz. | ||
721 | These constants could be used for user-defined (non-standard) | ||
722 | bit-timing calculation algorithms in user-space. | ||
723 | |||
724 | "re-started bus-errors arbit-lost error-warn error-pass bus-off" | ||
725 | Shows the number of restarts, bus and arbitration lost errors, | ||
726 | and the state changes to the error-warning, error-passive and | ||
727 | bus-off state. RX overrun errors are listed in the "overrun" | ||
728 | field of the standard network statistics. | ||
729 | |||
730 | 6.5.2 Setting the CAN bit-timing | ||
731 | |||
732 | The CAN bit-timing parameters can always be defined in a hardware | ||
733 | independent format as proposed in the Bosch CAN 2.0 specification | ||
734 | specifying the arguments "tq", "prop_seg", "phase_seg1", "phase_seg2" | ||
735 | and "sjw": | ||
736 | |||
737 | $ ip link set canX type can tq 125 prop-seg 6 \ | ||
738 | phase-seg1 7 phase-seg2 2 sjw 1 | ||
739 | |||
740 | If the kernel option CONFIG_CAN_CALC_BITTIMING is enabled, CIA | ||
741 | recommended CAN bit-timing parameters will be calculated if the bit- | ||
742 | rate is specified with the argument "bitrate": | ||
743 | |||
744 | $ ip link set canX type can bitrate 125000 | ||
745 | |||
746 | Note that this works fine for the most common CAN controllers with | ||
747 | standard bit-rates but may *fail* for exotic bit-rates or CAN system | ||
748 | clock frequencies. Disabling CONFIG_CAN_CALC_BITTIMING saves some | ||
749 | space and allows user-space tools to solely determine and set the | ||
750 | bit-timing parameters. The CAN controller specific bit-timing | ||
751 | constants can be used for that purpose. They are listed by the | ||
752 | following command: | ||
753 | |||
754 | $ ip -details link show can0 | ||
755 | ... | ||
756 | sja1000: clock 8000000 tseg1 1..16 tseg2 1..8 sjw 1..4 brp 1..64 brp-inc 1 | ||
757 | |||
758 | 6.5.3 Starting and stopping the CAN network device | ||
759 | |||
760 | A CAN network device is started or stopped as usual with the command | ||
761 | "ifconfig canX up/down" or "ip link set canX up/down". Be aware that | ||
762 | you *must* define proper bit-timing parameters for real CAN devices | ||
763 | before you can start it to avoid error-prone default settings: | ||
764 | |||
765 | $ ip link set canX up type can bitrate 125000 | ||
766 | |||
767 | A device may enter the "bus-off" state if too much errors occurred on | ||
768 | the CAN bus. Then no more messages are received or sent. An automatic | ||
769 | bus-off recovery can be enabled by setting the "restart-ms" to a | ||
770 | non-zero value, e.g.: | ||
771 | |||
772 | $ ip link set canX type can restart-ms 100 | ||
773 | |||
774 | Alternatively, the application may realize the "bus-off" condition | ||
775 | by monitoring CAN error frames and do a restart when appropriate with | ||
776 | the command: | ||
777 | |||
778 | $ ip link set canX type can restart | ||
779 | |||
780 | Note that a restart will also create a CAN error frame (see also | ||
781 | chapter 3.4). | ||
624 | 782 | ||
625 | On the project website http://developer.berlios.de/projects/socketcan | 783 | 6.6 Supported CAN hardware |
626 | there are different drivers available: | ||
627 | 784 | ||
628 | vcan: Virtual CAN interface driver (if no real hardware is available) | 785 | Please check the "Kconfig" file in "drivers/net/can" to get an actual |
629 | sja1000: Philips SJA1000 CAN controller (recommended) | 786 | list of the support CAN hardware. On the Socket CAN project website |
630 | i82527: Intel i82527 CAN controller | 787 | (see chapter 7) there might be further drivers available, also for |
631 | mscan: Motorola/Freescale CAN controller (e.g. inside SOC MPC5200) | 788 | older kernel versions. |
632 | ccan: CCAN controller core (e.g. inside SOC h7202) | ||
633 | slcan: For a bunch of CAN adaptors that are attached via a | ||
634 | serial line ASCII protocol (for serial / USB adaptors) | ||
635 | 789 | ||
636 | Additionally the different CAN adaptors (ISA/PCI/PCMCIA/USB/Parport) | 790 | 7. Socket CAN resources |
637 | from PEAK Systemtechnik support the CAN netdevice driver model | 791 | ----------------------- |
638 | since Linux driver v6.0: http://www.peak-system.com/linux/index.htm | ||
639 | 792 | ||
640 | Please check the Mailing Lists on the berlios OSS project website. | 793 | You can find further resources for Socket CAN like user space tools, |
794 | support for old kernel versions, more drivers, mailing lists, etc. | ||
795 | at the BerliOS OSS project website for Socket CAN: | ||
641 | 796 | ||
642 | 6.6 todo | 797 | http://developer.berlios.de/projects/socketcan |
643 | 798 | ||
644 | The configuration interface for CAN network drivers is still an open | 799 | If you have questions, bug fixes, etc., don't hesitate to post them to |
645 | issue that has not been finalized in the socketcan project. Also the | 800 | the Socketcan-Users mailing list. But please search the archives first. |
646 | idea of having a library module (candev.ko) that holds functions | ||
647 | that are needed by all CAN netdevices is not ready to ship. | ||
648 | Your contribution is welcome. | ||
649 | 801 | ||
650 | 7. Credits | 802 | 8. Credits |
651 | ---------- | 803 | ---------- |
652 | 804 | ||
653 | Oliver Hartkopp (PF_CAN core, filters, drivers, bcm) | 805 | Oliver Hartkopp (PF_CAN core, filters, drivers, bcm, SJA1000 driver) |
654 | Urs Thuermann (PF_CAN core, kernel integration, socket interfaces, raw, vcan) | 806 | Urs Thuermann (PF_CAN core, kernel integration, socket interfaces, raw, vcan) |
655 | Jan Kizka (RT-SocketCAN core, Socket-API reconciliation) | 807 | Jan Kizka (RT-SocketCAN core, Socket-API reconciliation) |
656 | Wolfgang Grandegger (RT-SocketCAN core & drivers, Raw Socket-API reviews) | 808 | Wolfgang Grandegger (RT-SocketCAN core & drivers, Raw Socket-API reviews, |
809 | CAN device driver interface, MSCAN driver) | ||
657 | Robert Schwebel (design reviews, PTXdist integration) | 810 | Robert Schwebel (design reviews, PTXdist integration) |
658 | Marc Kleine-Budde (design reviews, Kernel 2.6 cleanups, drivers) | 811 | Marc Kleine-Budde (design reviews, Kernel 2.6 cleanups, drivers) |
659 | Benedikt Spranger (reviews) | 812 | Benedikt Spranger (reviews) |
660 | Thomas Gleixner (LKML reviews, coding style, posting hints) | 813 | Thomas Gleixner (LKML reviews, coding style, posting hints) |
661 | Andrey Volkov (kernel subtree structure, ioctls, mscan driver) | 814 | Andrey Volkov (kernel subtree structure, ioctls, MSCAN driver) |
662 | Matthias Brukner (first SJA1000 CAN netdevice implementation Q2/2003) | 815 | Matthias Brukner (first SJA1000 CAN netdevice implementation Q2/2003) |
663 | Klaus Hitschler (PEAK driver integration) | 816 | Klaus Hitschler (PEAK driver integration) |
664 | Uwe Koppe (CAN netdevices with PF_PACKET approach) | 817 | Uwe Koppe (CAN netdevices with PF_PACKET approach) |
665 | Michael Schulze (driver layer loopback requirement, RT CAN drivers review) | 818 | Michael Schulze (driver layer loopback requirement, RT CAN drivers review) |
819 | Pavel Pisa (Bit-timing calculation) | ||
820 | Sascha Hauer (SJA1000 platform driver) | ||
821 | Sebastian Haas (SJA1000 EMS PCI driver) | ||
822 | Markus Plessing (SJA1000 EMS PCI driver) | ||
823 | Per Dalen (SJA1000 Kvaser PCI driver) | ||
824 | Sam Ravnborg (reviews, coding style, kbuild help) | ||
diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt index 7a3bb1abb830..b132e4a3cf0f 100644 --- a/Documentation/networking/dccp.txt +++ b/Documentation/networking/dccp.txt | |||
@@ -141,7 +141,8 @@ rx_ccid = 2 | |||
141 | Default CCID for the receiver-sender half-connection; see tx_ccid. | 141 | Default CCID for the receiver-sender half-connection; see tx_ccid. |
142 | 142 | ||
143 | seq_window = 100 | 143 | seq_window = 100 |
144 | The initial sequence window (sec. 7.5.2). | 144 | The initial sequence window (sec. 7.5.2) of the sender. This influences |
145 | the local ackno validity and the remote seqno validity windows (7.5.1). | ||
145 | 146 | ||
146 | tx_qlen = 5 | 147 | tx_qlen = 5 |
147 | The size of the transmit buffer in packets. A value of 0 corresponds | 148 | The size of the transmit buffer in packets. A value of 0 corresponds |
diff --git a/Documentation/networking/dm9000.txt b/Documentation/networking/dm9000.txt index 65df3dea5561..5552e2e575c5 100644 --- a/Documentation/networking/dm9000.txt +++ b/Documentation/networking/dm9000.txt | |||
@@ -129,7 +129,7 @@ PHY Link state polling | |||
129 | ---------------------- | 129 | ---------------------- |
130 | 130 | ||
131 | The driver keeps track of the link state and informs the network core | 131 | The driver keeps track of the link state and informs the network core |
132 | about link (carrier) availablilty. This is managed by several methods | 132 | about link (carrier) availability. This is managed by several methods |
133 | depending on the version of the chip and on which PHY is being used. | 133 | depending on the version of the chip and on which PHY is being used. |
134 | 134 | ||
135 | For the internal PHY, the original (and currently default) method is | 135 | For the internal PHY, the original (and currently default) method is |
diff --git a/Documentation/networking/ieee802154.txt b/Documentation/networking/ieee802154.txt new file mode 100644 index 000000000000..a0280ad2edc9 --- /dev/null +++ b/Documentation/networking/ieee802154.txt | |||
@@ -0,0 +1,76 @@ | |||
1 | |||
2 | Linux IEEE 802.15.4 implementation | ||
3 | |||
4 | |||
5 | Introduction | ||
6 | ============ | ||
7 | |||
8 | The Linux-ZigBee project goal is to provide complete implementation | ||
9 | of IEEE 802.15.4 / ZigBee / 6LoWPAN protocols. IEEE 802.15.4 is a stack | ||
10 | of protocols for organizing Low-Rate Wireless Personal Area Networks. | ||
11 | |||
12 | Currently only IEEE 802.15.4 layer is implemented. We have choosen | ||
13 | to use plain Berkeley socket API, the generic Linux networking stack | ||
14 | to transfer IEEE 802.15.4 messages and a special protocol over genetlink | ||
15 | for configuration/management | ||
16 | |||
17 | |||
18 | Socket API | ||
19 | ========== | ||
20 | |||
21 | int sd = socket(PF_IEEE802154, SOCK_DGRAM, 0); | ||
22 | ..... | ||
23 | |||
24 | The address family, socket addresses etc. are defined in the | ||
25 | include/net/ieee802154/af_ieee802154.h header or in the special header | ||
26 | in our userspace package (see either linux-zigbee sourceforge download page | ||
27 | or git tree at git://linux-zigbee.git.sourceforge.net/gitroot/linux-zigbee). | ||
28 | |||
29 | One can use SOCK_RAW for passing raw data towards device xmit function. YMMV. | ||
30 | |||
31 | |||
32 | MLME - MAC Level Management | ||
33 | ============================ | ||
34 | |||
35 | Most of IEEE 802.15.4 MLME interfaces are directly mapped on netlink commands. | ||
36 | See the include/net/ieee802154/nl802154.h header. Our userspace tools package | ||
37 | (see above) provides CLI configuration utility for radio interfaces and simple | ||
38 | coordinator for IEEE 802.15.4 networks as an example users of MLME protocol. | ||
39 | |||
40 | |||
41 | Kernel side | ||
42 | ============= | ||
43 | |||
44 | Like with WiFi, there are several types of devices implementing IEEE 802.15.4. | ||
45 | 1) 'HardMAC'. The MAC layer is implemented in the device itself, the device | ||
46 | exports MLME and data API. | ||
47 | 2) 'SoftMAC' or just radio. These types of devices are just radio transceivers | ||
48 | possibly with some kinds of acceleration like automatic CRC computation and | ||
49 | comparation, automagic ACK handling, address matching, etc. | ||
50 | |||
51 | Those types of devices require different approach to be hooked into Linux kernel. | ||
52 | |||
53 | |||
54 | HardMAC | ||
55 | ======= | ||
56 | |||
57 | See the header include/net/ieee802154/netdevice.h. You have to implement Linux | ||
58 | net_device, with .type = ARPHRD_IEEE802154. Data is exchanged with socket family | ||
59 | code via plain sk_buffs. The control block of sk_buffs will contain additional | ||
60 | info as described in the struct ieee802154_mac_cb. | ||
61 | |||
62 | To hook the MLME interface you have to populate the ml_priv field of your | ||
63 | net_device with a pointer to struct ieee802154_mlme_ops instance. All fields are | ||
64 | required. | ||
65 | |||
66 | We provide an example of simple HardMAC driver at drivers/ieee802154/fakehard.c | ||
67 | |||
68 | |||
69 | SoftMAC | ||
70 | ======= | ||
71 | |||
72 | We are going to provide intermediate layer impelementing IEEE 802.15.4 MAC | ||
73 | in software. This is currently WIP. | ||
74 | |||
75 | See header include/net/ieee802154/mac802154.h and several drivers in | ||
76 | drivers/ieee802154/ | ||
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index c7712787933c..8be76235fe67 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt | |||
@@ -2,7 +2,7 @@ | |||
2 | 2 | ||
3 | ip_forward - BOOLEAN | 3 | ip_forward - BOOLEAN |
4 | 0 - disabled (default) | 4 | 0 - disabled (default) |
5 | not 0 - enabled | 5 | not 0 - enabled |
6 | 6 | ||
7 | Forward Packets between interfaces. | 7 | Forward Packets between interfaces. |
8 | 8 | ||
@@ -36,49 +36,49 @@ rt_cache_rebuild_count - INTEGER | |||
36 | IP Fragmentation: | 36 | IP Fragmentation: |
37 | 37 | ||
38 | ipfrag_high_thresh - INTEGER | 38 | ipfrag_high_thresh - INTEGER |
39 | Maximum memory used to reassemble IP fragments. When | 39 | Maximum memory used to reassemble IP fragments. When |
40 | ipfrag_high_thresh bytes of memory is allocated for this purpose, | 40 | ipfrag_high_thresh bytes of memory is allocated for this purpose, |
41 | the fragment handler will toss packets until ipfrag_low_thresh | 41 | the fragment handler will toss packets until ipfrag_low_thresh |
42 | is reached. | 42 | is reached. |
43 | 43 | ||
44 | ipfrag_low_thresh - INTEGER | 44 | ipfrag_low_thresh - INTEGER |
45 | See ipfrag_high_thresh | 45 | See ipfrag_high_thresh |
46 | 46 | ||
47 | ipfrag_time - INTEGER | 47 | ipfrag_time - INTEGER |
48 | Time in seconds to keep an IP fragment in memory. | 48 | Time in seconds to keep an IP fragment in memory. |
49 | 49 | ||
50 | ipfrag_secret_interval - INTEGER | 50 | ipfrag_secret_interval - INTEGER |
51 | Regeneration interval (in seconds) of the hash secret (or lifetime | 51 | Regeneration interval (in seconds) of the hash secret (or lifetime |
52 | for the hash secret) for IP fragments. | 52 | for the hash secret) for IP fragments. |
53 | Default: 600 | 53 | Default: 600 |
54 | 54 | ||
55 | ipfrag_max_dist - INTEGER | 55 | ipfrag_max_dist - INTEGER |
56 | ipfrag_max_dist is a non-negative integer value which defines the | 56 | ipfrag_max_dist is a non-negative integer value which defines the |
57 | maximum "disorder" which is allowed among fragments which share a | 57 | maximum "disorder" which is allowed among fragments which share a |
58 | common IP source address. Note that reordering of packets is | 58 | common IP source address. Note that reordering of packets is |
59 | not unusual, but if a large number of fragments arrive from a source | 59 | not unusual, but if a large number of fragments arrive from a source |
60 | IP address while a particular fragment queue remains incomplete, it | 60 | IP address while a particular fragment queue remains incomplete, it |
61 | probably indicates that one or more fragments belonging to that queue | 61 | probably indicates that one or more fragments belonging to that queue |
62 | have been lost. When ipfrag_max_dist is positive, an additional check | 62 | have been lost. When ipfrag_max_dist is positive, an additional check |
63 | is done on fragments before they are added to a reassembly queue - if | 63 | is done on fragments before they are added to a reassembly queue - if |
64 | ipfrag_max_dist (or more) fragments have arrived from a particular IP | 64 | ipfrag_max_dist (or more) fragments have arrived from a particular IP |
65 | address between additions to any IP fragment queue using that source | 65 | address between additions to any IP fragment queue using that source |
66 | address, it's presumed that one or more fragments in the queue are | 66 | address, it's presumed that one or more fragments in the queue are |
67 | lost. The existing fragment queue will be dropped, and a new one | 67 | lost. The existing fragment queue will be dropped, and a new one |
68 | started. An ipfrag_max_dist value of zero disables this check. | 68 | started. An ipfrag_max_dist value of zero disables this check. |
69 | 69 | ||
70 | Using a very small value, e.g. 1 or 2, for ipfrag_max_dist can | 70 | Using a very small value, e.g. 1 or 2, for ipfrag_max_dist can |
71 | result in unnecessarily dropping fragment queues when normal | 71 | result in unnecessarily dropping fragment queues when normal |
72 | reordering of packets occurs, which could lead to poor application | 72 | reordering of packets occurs, which could lead to poor application |
73 | performance. Using a very large value, e.g. 50000, increases the | 73 | performance. Using a very large value, e.g. 50000, increases the |
74 | likelihood of incorrectly reassembling IP fragments that originate | 74 | likelihood of incorrectly reassembling IP fragments that originate |
75 | from different IP datagrams, which could result in data corruption. | 75 | from different IP datagrams, which could result in data corruption. |
76 | Default: 64 | 76 | Default: 64 |
77 | 77 | ||
78 | INET peer storage: | 78 | INET peer storage: |
79 | 79 | ||
80 | inet_peer_threshold - INTEGER | 80 | inet_peer_threshold - INTEGER |
81 | The approximate size of the storage. Starting from this threshold | 81 | The approximate size of the storage. Starting from this threshold |
82 | entries will be thrown aggressively. This threshold also determines | 82 | entries will be thrown aggressively. This threshold also determines |
83 | entries' time-to-live and time intervals between garbage collection | 83 | entries' time-to-live and time intervals between garbage collection |
84 | passes. More entries, less time-to-live, less GC interval. | 84 | passes. More entries, less time-to-live, less GC interval. |
@@ -105,7 +105,7 @@ inet_peer_gc_maxtime - INTEGER | |||
105 | in effect under low (or absent) memory pressure on the pool. | 105 | in effect under low (or absent) memory pressure on the pool. |
106 | Measured in seconds. | 106 | Measured in seconds. |
107 | 107 | ||
108 | TCP variables: | 108 | TCP variables: |
109 | 109 | ||
110 | somaxconn - INTEGER | 110 | somaxconn - INTEGER |
111 | Limit of socket listen() backlog, known in userspace as SOMAXCONN. | 111 | Limit of socket listen() backlog, known in userspace as SOMAXCONN. |
@@ -168,7 +168,16 @@ tcp_dsack - BOOLEAN | |||
168 | Allows TCP to send "duplicate" SACKs. | 168 | Allows TCP to send "duplicate" SACKs. |
169 | 169 | ||
170 | tcp_ecn - BOOLEAN | 170 | tcp_ecn - BOOLEAN |
171 | Enable Explicit Congestion Notification in TCP. | 171 | Enable Explicit Congestion Notification (ECN) in TCP. ECN is only |
172 | used when both ends of the TCP flow support it. It is useful to | ||
173 | avoid losses due to congestion (when the bottleneck router supports | ||
174 | ECN). | ||
175 | Possible values are: | ||
176 | 0 disable ECN | ||
177 | 1 ECN enabled | ||
178 | 2 Only server-side ECN enabled. If the other end does | ||
179 | not support ECN, behavior is like with ECN disabled. | ||
180 | Default: 2 | ||
172 | 181 | ||
173 | tcp_fack - BOOLEAN | 182 | tcp_fack - BOOLEAN |
174 | Enable FACK congestion avoidance and fast retransmission. | 183 | Enable FACK congestion avoidance and fast retransmission. |
@@ -310,7 +319,7 @@ tcp_orphan_retries - INTEGER | |||
310 | 319 | ||
311 | tcp_reordering - INTEGER | 320 | tcp_reordering - INTEGER |
312 | Maximal reordering of packets in a TCP stream. | 321 | Maximal reordering of packets in a TCP stream. |
313 | Default: 3 | 322 | Default: 3 |
314 | 323 | ||
315 | tcp_retrans_collapse - BOOLEAN | 324 | tcp_retrans_collapse - BOOLEAN |
316 | Bug-to-bug compatibility with some broken printers. | 325 | Bug-to-bug compatibility with some broken printers. |
@@ -521,7 +530,7 @@ IP Variables: | |||
521 | 530 | ||
522 | ip_local_port_range - 2 INTEGERS | 531 | ip_local_port_range - 2 INTEGERS |
523 | Defines the local port range that is used by TCP and UDP to | 532 | Defines the local port range that is used by TCP and UDP to |
524 | choose the local port. The first number is the first, the | 533 | choose the local port. The first number is the first, the |
525 | second the last local port number. Default value depends on | 534 | second the last local port number. Default value depends on |
526 | amount of memory available on the system: | 535 | amount of memory available on the system: |
527 | > 128Mb 32768-61000 | 536 | > 128Mb 32768-61000 |
@@ -594,12 +603,12 @@ icmp_errors_use_inbound_ifaddr - BOOLEAN | |||
594 | 603 | ||
595 | If zero, icmp error messages are sent with the primary address of | 604 | If zero, icmp error messages are sent with the primary address of |
596 | the exiting interface. | 605 | the exiting interface. |
597 | 606 | ||
598 | If non-zero, the message will be sent with the primary address of | 607 | If non-zero, the message will be sent with the primary address of |
599 | the interface that received the packet that caused the icmp error. | 608 | the interface that received the packet that caused the icmp error. |
600 | This is the behaviour network many administrators will expect from | 609 | This is the behaviour network many administrators will expect from |
601 | a router. And it can make debugging complicated network layouts | 610 | a router. And it can make debugging complicated network layouts |
602 | much easier. | 611 | much easier. |
603 | 612 | ||
604 | Note that if no primary address exists for the interface selected, | 613 | Note that if no primary address exists for the interface selected, |
605 | then the primary address of the first non-loopback interface that | 614 | then the primary address of the first non-loopback interface that |
@@ -611,7 +620,7 @@ igmp_max_memberships - INTEGER | |||
611 | Change the maximum number of multicast groups we can subscribe to. | 620 | Change the maximum number of multicast groups we can subscribe to. |
612 | Default: 20 | 621 | Default: 20 |
613 | 622 | ||
614 | conf/interface/* changes special settings per interface (where "interface" is | 623 | conf/interface/* changes special settings per interface (where "interface" is |
615 | the name of your network interface) | 624 | the name of your network interface) |
616 | conf/all/* is special, changes the settings for all interfaces | 625 | conf/all/* is special, changes the settings for all interfaces |
617 | 626 | ||
@@ -625,11 +634,11 @@ log_martians - BOOLEAN | |||
625 | accept_redirects - BOOLEAN | 634 | accept_redirects - BOOLEAN |
626 | Accept ICMP redirect messages. | 635 | Accept ICMP redirect messages. |
627 | accept_redirects for the interface will be enabled if: | 636 | accept_redirects for the interface will be enabled if: |
628 | - both conf/{all,interface}/accept_redirects are TRUE in the case forwarding | 637 | - both conf/{all,interface}/accept_redirects are TRUE in the case |
629 | for the interface is enabled | 638 | forwarding for the interface is enabled |
630 | or | 639 | or |
631 | - at least one of conf/{all,interface}/accept_redirects is TRUE in the case | 640 | - at least one of conf/{all,interface}/accept_redirects is TRUE in the |
632 | forwarding for the interface is disabled | 641 | case forwarding for the interface is disabled |
633 | accept_redirects for the interface will be disabled otherwise | 642 | accept_redirects for the interface will be disabled otherwise |
634 | default TRUE (host) | 643 | default TRUE (host) |
635 | FALSE (router) | 644 | FALSE (router) |
@@ -640,8 +649,8 @@ forwarding - BOOLEAN | |||
640 | mc_forwarding - BOOLEAN | 649 | mc_forwarding - BOOLEAN |
641 | Do multicast routing. The kernel needs to be compiled with CONFIG_MROUTE | 650 | Do multicast routing. The kernel needs to be compiled with CONFIG_MROUTE |
642 | and a multicast routing daemon is required. | 651 | and a multicast routing daemon is required. |
643 | conf/all/mc_forwarding must also be set to TRUE to enable multicast routing | 652 | conf/all/mc_forwarding must also be set to TRUE to enable multicast |
644 | for the interface | 653 | routing for the interface |
645 | 654 | ||
646 | medium_id - INTEGER | 655 | medium_id - INTEGER |
647 | Integer value used to differentiate the devices by the medium they | 656 | Integer value used to differentiate the devices by the medium they |
@@ -649,7 +658,7 @@ medium_id - INTEGER | |||
649 | the broadcast packets are received only on one of them. | 658 | the broadcast packets are received only on one of them. |
650 | The default value 0 means that the device is the only interface | 659 | The default value 0 means that the device is the only interface |
651 | to its medium, value of -1 means that medium is not known. | 660 | to its medium, value of -1 means that medium is not known. |
652 | 661 | ||
653 | Currently, it is used to change the proxy_arp behavior: | 662 | Currently, it is used to change the proxy_arp behavior: |
654 | the proxy_arp feature is enabled for packets forwarded between | 663 | the proxy_arp feature is enabled for packets forwarded between |
655 | two devices attached to different media. | 664 | two devices attached to different media. |
@@ -699,16 +708,22 @@ accept_source_route - BOOLEAN | |||
699 | default TRUE (router) | 708 | default TRUE (router) |
700 | FALSE (host) | 709 | FALSE (host) |
701 | 710 | ||
702 | rp_filter - BOOLEAN | 711 | rp_filter - INTEGER |
703 | 1 - do source validation by reversed path, as specified in RFC1812 | ||
704 | Recommended option for single homed hosts and stub network | ||
705 | routers. Could cause troubles for complicated (not loop free) | ||
706 | networks running a slow unreliable protocol (sort of RIP), | ||
707 | or using static routes. | ||
708 | |||
709 | 0 - No source validation. | 712 | 0 - No source validation. |
710 | 713 | 1 - Strict mode as defined in RFC3704 Strict Reverse Path | |
711 | conf/all/rp_filter must also be set to TRUE to do source validation | 714 | Each incoming packet is tested against the FIB and if the interface |
715 | is not the best reverse path the packet check will fail. | ||
716 | By default failed packets are discarded. | ||
717 | 2 - Loose mode as defined in RFC3704 Loose Reverse Path | ||
718 | Each incoming packet's source address is also tested against the FIB | ||
719 | and if the source address is not reachable via any interface | ||
720 | the packet check will fail. | ||
721 | |||
722 | Current recommended practice in RFC3704 is to enable strict mode | ||
723 | to prevent IP spoofing from DDos attacks. If using asymmetric routing | ||
724 | or other complicated routing, then loose mode is recommended. | ||
725 | |||
726 | conf/all/rp_filter must also be set to non-zero to do source validation | ||
712 | on the interface | 727 | on the interface |
713 | 728 | ||
714 | Default value is 0. Note that some distributions enable it | 729 | Default value is 0. Note that some distributions enable it |
@@ -782,6 +797,12 @@ arp_ignore - INTEGER | |||
782 | The max value from conf/{all,interface}/arp_ignore is used | 797 | The max value from conf/{all,interface}/arp_ignore is used |
783 | when ARP request is received on the {interface} | 798 | when ARP request is received on the {interface} |
784 | 799 | ||
800 | arp_notify - BOOLEAN | ||
801 | Define mode for notification of address and device changes. | ||
802 | 0 - (default): do nothing | ||
803 | 1 - Generate gratuitous arp replies when device is brought up | ||
804 | or hardware address changes. | ||
805 | |||
785 | arp_accept - BOOLEAN | 806 | arp_accept - BOOLEAN |
786 | Define behavior when gratuitous arp replies are received: | 807 | Define behavior when gratuitous arp replies are received: |
787 | 0 - drop gratuitous arp frames | 808 | 0 - drop gratuitous arp frames |
@@ -823,7 +844,7 @@ apply to IPv6 [XXX?]. | |||
823 | 844 | ||
824 | bindv6only - BOOLEAN | 845 | bindv6only - BOOLEAN |
825 | Default value for IPV6_V6ONLY socket option, | 846 | Default value for IPV6_V6ONLY socket option, |
826 | which restricts use of the IPv6 socket to IPv6 communication | 847 | which restricts use of the IPv6 socket to IPv6 communication |
827 | only. | 848 | only. |
828 | TRUE: disable IPv4-mapped address feature | 849 | TRUE: disable IPv4-mapped address feature |
829 | FALSE: enable IPv4-mapped address feature | 850 | FALSE: enable IPv4-mapped address feature |
@@ -833,19 +854,19 @@ bindv6only - BOOLEAN | |||
833 | IPv6 Fragmentation: | 854 | IPv6 Fragmentation: |
834 | 855 | ||
835 | ip6frag_high_thresh - INTEGER | 856 | ip6frag_high_thresh - INTEGER |
836 | Maximum memory used to reassemble IPv6 fragments. When | 857 | Maximum memory used to reassemble IPv6 fragments. When |
837 | ip6frag_high_thresh bytes of memory is allocated for this purpose, | 858 | ip6frag_high_thresh bytes of memory is allocated for this purpose, |
838 | the fragment handler will toss packets until ip6frag_low_thresh | 859 | the fragment handler will toss packets until ip6frag_low_thresh |
839 | is reached. | 860 | is reached. |
840 | 861 | ||
841 | ip6frag_low_thresh - INTEGER | 862 | ip6frag_low_thresh - INTEGER |
842 | See ip6frag_high_thresh | 863 | See ip6frag_high_thresh |
843 | 864 | ||
844 | ip6frag_time - INTEGER | 865 | ip6frag_time - INTEGER |
845 | Time in seconds to keep an IPv6 fragment in memory. | 866 | Time in seconds to keep an IPv6 fragment in memory. |
846 | 867 | ||
847 | ip6frag_secret_interval - INTEGER | 868 | ip6frag_secret_interval - INTEGER |
848 | Regeneration interval (in seconds) of the hash secret (or lifetime | 869 | Regeneration interval (in seconds) of the hash secret (or lifetime |
849 | for the hash secret) for IPv6 fragments. | 870 | for the hash secret) for IPv6 fragments. |
850 | Default: 600 | 871 | Default: 600 |
851 | 872 | ||
@@ -854,17 +875,17 @@ conf/default/*: | |||
854 | 875 | ||
855 | 876 | ||
856 | conf/all/*: | 877 | conf/all/*: |
857 | Change all the interface-specific settings. | 878 | Change all the interface-specific settings. |
858 | 879 | ||
859 | [XXX: Other special features than forwarding?] | 880 | [XXX: Other special features than forwarding?] |
860 | 881 | ||
861 | conf/all/forwarding - BOOLEAN | 882 | conf/all/forwarding - BOOLEAN |
862 | Enable global IPv6 forwarding between all interfaces. | 883 | Enable global IPv6 forwarding between all interfaces. |
863 | 884 | ||
864 | IPv4 and IPv6 work differently here; e.g. netfilter must be used | 885 | IPv4 and IPv6 work differently here; e.g. netfilter must be used |
865 | to control which interfaces may forward packets and which not. | 886 | to control which interfaces may forward packets and which not. |
866 | 887 | ||
867 | This also sets all interfaces' Host/Router setting | 888 | This also sets all interfaces' Host/Router setting |
868 | 'forwarding' to the specified value. See below for details. | 889 | 'forwarding' to the specified value. See below for details. |
869 | 890 | ||
870 | This referred to as global forwarding. | 891 | This referred to as global forwarding. |
@@ -875,12 +896,12 @@ proxy_ndp - BOOLEAN | |||
875 | conf/interface/*: | 896 | conf/interface/*: |
876 | Change special settings per interface. | 897 | Change special settings per interface. |
877 | 898 | ||
878 | The functional behaviour for certain settings is different | 899 | The functional behaviour for certain settings is different |
879 | depending on whether local forwarding is enabled or not. | 900 | depending on whether local forwarding is enabled or not. |
880 | 901 | ||
881 | accept_ra - BOOLEAN | 902 | accept_ra - BOOLEAN |
882 | Accept Router Advertisements; autoconfigure using them. | 903 | Accept Router Advertisements; autoconfigure using them. |
883 | 904 | ||
884 | Functional default: enabled if local forwarding is disabled. | 905 | Functional default: enabled if local forwarding is disabled. |
885 | disabled if local forwarding is enabled. | 906 | disabled if local forwarding is enabled. |
886 | 907 | ||
@@ -926,7 +947,7 @@ accept_source_route - INTEGER | |||
926 | Default: 0 | 947 | Default: 0 |
927 | 948 | ||
928 | autoconf - BOOLEAN | 949 | autoconf - BOOLEAN |
929 | Autoconfigure addresses using Prefix Information in Router | 950 | Autoconfigure addresses using Prefix Information in Router |
930 | Advertisements. | 951 | Advertisements. |
931 | 952 | ||
932 | Functional default: enabled if accept_ra_pinfo is enabled. | 953 | Functional default: enabled if accept_ra_pinfo is enabled. |
@@ -935,11 +956,11 @@ autoconf - BOOLEAN | |||
935 | dad_transmits - INTEGER | 956 | dad_transmits - INTEGER |
936 | The amount of Duplicate Address Detection probes to send. | 957 | The amount of Duplicate Address Detection probes to send. |
937 | Default: 1 | 958 | Default: 1 |
938 | 959 | ||
939 | forwarding - BOOLEAN | 960 | forwarding - BOOLEAN |
940 | Configure interface-specific Host/Router behaviour. | 961 | Configure interface-specific Host/Router behaviour. |
941 | 962 | ||
942 | Note: It is recommended to have the same setting on all | 963 | Note: It is recommended to have the same setting on all |
943 | interfaces; mixed router/host scenarios are rather uncommon. | 964 | interfaces; mixed router/host scenarios are rather uncommon. |
944 | 965 | ||
945 | FALSE: | 966 | FALSE: |
@@ -948,13 +969,13 @@ forwarding - BOOLEAN | |||
948 | 969 | ||
949 | 1. IsRouter flag is not set in Neighbour Advertisements. | 970 | 1. IsRouter flag is not set in Neighbour Advertisements. |
950 | 2. Router Solicitations are being sent when necessary. | 971 | 2. Router Solicitations are being sent when necessary. |
951 | 3. If accept_ra is TRUE (default), accept Router | 972 | 3. If accept_ra is TRUE (default), accept Router |
952 | Advertisements (and do autoconfiguration). | 973 | Advertisements (and do autoconfiguration). |
953 | 4. If accept_redirects is TRUE (default), accept Redirects. | 974 | 4. If accept_redirects is TRUE (default), accept Redirects. |
954 | 975 | ||
955 | TRUE: | 976 | TRUE: |
956 | 977 | ||
957 | If local forwarding is enabled, Router behaviour is assumed. | 978 | If local forwarding is enabled, Router behaviour is assumed. |
958 | This means exactly the reverse from the above: | 979 | This means exactly the reverse from the above: |
959 | 980 | ||
960 | 1. IsRouter flag is set in Neighbour Advertisements. | 981 | 1. IsRouter flag is set in Neighbour Advertisements. |
@@ -989,7 +1010,7 @@ router_solicitation_interval - INTEGER | |||
989 | Default: 4 | 1010 | Default: 4 |
990 | 1011 | ||
991 | router_solicitations - INTEGER | 1012 | router_solicitations - INTEGER |
992 | Number of Router Solicitations to send until assuming no | 1013 | Number of Router Solicitations to send until assuming no |
993 | routers are present. | 1014 | routers are present. |
994 | Default: 3 | 1015 | Default: 3 |
995 | 1016 | ||
@@ -1013,11 +1034,11 @@ temp_prefered_lft - INTEGER | |||
1013 | 1034 | ||
1014 | max_desync_factor - INTEGER | 1035 | max_desync_factor - INTEGER |
1015 | Maximum value for DESYNC_FACTOR, which is a random value | 1036 | Maximum value for DESYNC_FACTOR, which is a random value |
1016 | that ensures that clients don't synchronize with each | 1037 | that ensures that clients don't synchronize with each |
1017 | other and generate new addresses at exactly the same time. | 1038 | other and generate new addresses at exactly the same time. |
1018 | value is in seconds. | 1039 | value is in seconds. |
1019 | Default: 600 | 1040 | Default: 600 |
1020 | 1041 | ||
1021 | regen_max_retry - INTEGER | 1042 | regen_max_retry - INTEGER |
1022 | Number of attempts before give up attempting to generate | 1043 | Number of attempts before give up attempting to generate |
1023 | valid temporary addresses. | 1044 | valid temporary addresses. |
@@ -1025,15 +1046,24 @@ regen_max_retry - INTEGER | |||
1025 | 1046 | ||
1026 | max_addresses - INTEGER | 1047 | max_addresses - INTEGER |
1027 | Number of maximum addresses per interface. 0 disables limitation. | 1048 | Number of maximum addresses per interface. 0 disables limitation. |
1028 | It is recommended not set too large value (or 0) because it would | 1049 | It is recommended not set too large value (or 0) because it would |
1029 | be too easy way to crash kernel to allow to create too much of | 1050 | be too easy way to crash kernel to allow to create too much of |
1030 | autoconfigured addresses. | 1051 | autoconfigured addresses. |
1031 | Default: 16 | 1052 | Default: 16 |
1032 | 1053 | ||
1033 | disable_ipv6 - BOOLEAN | 1054 | disable_ipv6 - BOOLEAN |
1034 | Disable IPv6 operation. | 1055 | Disable IPv6 operation. If accept_dad is set to 2, this value |
1056 | will be dynamically set to TRUE if DAD fails for the link-local | ||
1057 | address. | ||
1035 | Default: FALSE (enable IPv6 operation) | 1058 | Default: FALSE (enable IPv6 operation) |
1036 | 1059 | ||
1060 | When this value is changed from 1 to 0 (IPv6 is being enabled), | ||
1061 | it will dynamically create a link-local address on the given | ||
1062 | interface and start Duplicate Address Detection, if necessary. | ||
1063 | |||
1064 | When this value is changed from 0 to 1 (IPv6 is being disabled), | ||
1065 | it will dynamically delete all address on the given interface. | ||
1066 | |||
1037 | accept_dad - INTEGER | 1067 | accept_dad - INTEGER |
1038 | Whether to accept DAD (Duplicate Address Detection). | 1068 | Whether to accept DAD (Duplicate Address Detection). |
1039 | 0: Disable DAD | 1069 | 0: Disable DAD |
@@ -1252,13 +1282,22 @@ sctp_rmem - vector of 3 INTEGERs: min, default, max | |||
1252 | sctp_wmem - vector of 3 INTEGERs: min, default, max | 1282 | sctp_wmem - vector of 3 INTEGERs: min, default, max |
1253 | See tcp_wmem for a description. | 1283 | See tcp_wmem for a description. |
1254 | 1284 | ||
1255 | UNDOCUMENTED: | ||
1256 | 1285 | ||
1257 | /proc/sys/net/core/* | 1286 | /proc/sys/net/core/* |
1258 | dev_weight FIXME | 1287 | dev_weight - INTEGER |
1288 | The maximum number of packets that kernel can handle on a NAPI | ||
1289 | interrupt, it's a Per-CPU variable. | ||
1290 | |||
1291 | Default: 64 | ||
1259 | 1292 | ||
1260 | /proc/sys/net/unix/* | 1293 | /proc/sys/net/unix/* |
1261 | max_dgram_qlen FIXME | 1294 | max_dgram_qlen - INTEGER |
1295 | The maximum length of dgram socket receive queue | ||
1296 | |||
1297 | Default: 10 | ||
1298 | |||
1299 | |||
1300 | UNDOCUMENTED: | ||
1262 | 1301 | ||
1263 | /proc/sys/net/irda/* | 1302 | /proc/sys/net/irda/* |
1264 | fast_poll_increase FIXME | 1303 | fast_poll_increase FIXME |
diff --git a/Documentation/networking/ipv6.txt b/Documentation/networking/ipv6.txt index 268e5c103dd8..9fd7e21296c8 100644 --- a/Documentation/networking/ipv6.txt +++ b/Documentation/networking/ipv6.txt | |||
@@ -33,3 +33,40 @@ disable | |||
33 | 33 | ||
34 | A reboot is required to enable IPv6. | 34 | A reboot is required to enable IPv6. |
35 | 35 | ||
36 | autoconf | ||
37 | |||
38 | Specifies whether to enable IPv6 address autoconfiguration | ||
39 | on all interfaces. This might be used when one does not wish | ||
40 | for addresses to be automatically generated from prefixes | ||
41 | received in Router Advertisements. | ||
42 | |||
43 | The possible values and their effects are: | ||
44 | |||
45 | 0 | ||
46 | IPv6 address autoconfiguration is disabled on all interfaces. | ||
47 | |||
48 | Only the IPv6 loopback address (::1) and link-local addresses | ||
49 | will be added to interfaces. | ||
50 | |||
51 | 1 | ||
52 | IPv6 address autoconfiguration is enabled on all interfaces. | ||
53 | |||
54 | This is the default value. | ||
55 | |||
56 | disable_ipv6 | ||
57 | |||
58 | Specifies whether to disable IPv6 on all interfaces. | ||
59 | This might be used when no IPv6 addresses are desired. | ||
60 | |||
61 | The possible values and their effects are: | ||
62 | |||
63 | 0 | ||
64 | IPv6 is enabled on all interfaces. | ||
65 | |||
66 | This is the default value. | ||
67 | |||
68 | 1 | ||
69 | IPv6 is disabled on all interfaces. | ||
70 | |||
71 | No IPv6 addresses will be added to interfaces. | ||
72 | |||
diff --git a/Documentation/networking/ixgbe.txt b/Documentation/networking/ixgbe.txt new file mode 100644 index 000000000000..eeb68685c788 --- /dev/null +++ b/Documentation/networking/ixgbe.txt | |||
@@ -0,0 +1,199 @@ | |||
1 | Linux Base Driver for 10 Gigabit PCI Express Intel(R) Network Connection | ||
2 | ======================================================================== | ||
3 | |||
4 | March 10, 2009 | ||
5 | |||
6 | |||
7 | Contents | ||
8 | ======== | ||
9 | |||
10 | - In This Release | ||
11 | - Identifying Your Adapter | ||
12 | - Building and Installation | ||
13 | - Additional Configurations | ||
14 | - Support | ||
15 | |||
16 | |||
17 | |||
18 | In This Release | ||
19 | =============== | ||
20 | |||
21 | This file describes the ixgbe Linux Base Driver for the 10 Gigabit PCI | ||
22 | Express Intel(R) Network Connection. This driver includes support for | ||
23 | Itanium(R)2-based systems. | ||
24 | |||
25 | For questions related to hardware requirements, refer to the documentation | ||
26 | supplied with your 10 Gigabit adapter. All hardware requirements listed apply | ||
27 | to use with Linux. | ||
28 | |||
29 | The following features are available in this kernel: | ||
30 | - Native VLANs | ||
31 | - Channel Bonding (teaming) | ||
32 | - SNMP | ||
33 | - Generic Receive Offload | ||
34 | - Data Center Bridging | ||
35 | |||
36 | Channel Bonding documentation can be found in the Linux kernel source: | ||
37 | /Documentation/networking/bonding.txt | ||
38 | |||
39 | Ethtool, lspci, and ifconfig can be used to display device and driver | ||
40 | specific information. | ||
41 | |||
42 | |||
43 | Identifying Your Adapter | ||
44 | ======================== | ||
45 | |||
46 | This driver supports devices based on the 82598 controller and the 82599 | ||
47 | controller. | ||
48 | |||
49 | For specific information on identifying which adapter you have, please visit: | ||
50 | |||
51 | http://support.intel.com/support/network/sb/CS-008441.htm | ||
52 | |||
53 | |||
54 | Building and Installation | ||
55 | ========================= | ||
56 | |||
57 | select m for "Intel(R) 10GbE PCI Express adapters support" located at: | ||
58 | Location: | ||
59 | -> Device Drivers | ||
60 | -> Network device support (NETDEVICES [=y]) | ||
61 | -> Ethernet (10000 Mbit) (NETDEV_10000 [=y]) | ||
62 | |||
63 | 1. make modules & make modules_install | ||
64 | |||
65 | 2. Load the module: | ||
66 | |||
67 | # modprobe ixgbe | ||
68 | |||
69 | The insmod command can be used if the full | ||
70 | path to the driver module is specified. For example: | ||
71 | |||
72 | insmod /lib/modules/<KERNEL VERSION>/kernel/drivers/net/ixgbe/ixgbe.ko | ||
73 | |||
74 | With 2.6 based kernels also make sure that older ixgbe drivers are | ||
75 | removed from the kernel, before loading the new module: | ||
76 | |||
77 | rmmod ixgbe; modprobe ixgbe | ||
78 | |||
79 | 3. Assign an IP address to the interface by entering the following, where | ||
80 | x is the interface number: | ||
81 | |||
82 | ifconfig ethx <IP_address> | ||
83 | |||
84 | 4. Verify that the interface works. Enter the following, where <IP_address> | ||
85 | is the IP address for another machine on the same subnet as the interface | ||
86 | that is being tested: | ||
87 | |||
88 | ping <IP_address> | ||
89 | |||
90 | |||
91 | Additional Configurations | ||
92 | ========================= | ||
93 | |||
94 | Viewing Link Messages | ||
95 | --------------------- | ||
96 | Link messages will not be displayed to the console if the distribution is | ||
97 | restricting system messages. In order to see network driver link messages on | ||
98 | your console, set dmesg to eight by entering the following: | ||
99 | |||
100 | dmesg -n 8 | ||
101 | |||
102 | NOTE: This setting is not saved across reboots. | ||
103 | |||
104 | |||
105 | Jumbo Frames | ||
106 | ------------ | ||
107 | The driver supports Jumbo Frames for all adapters. Jumbo Frames support is | ||
108 | enabled by changing the MTU to a value larger than the default of 1500. | ||
109 | The maximum value for the MTU is 16110. Use the ifconfig command to | ||
110 | increase the MTU size. For example: | ||
111 | |||
112 | ifconfig ethx mtu 9000 up | ||
113 | |||
114 | The maximum MTU setting for Jumbo Frames is 16110. This value coincides | ||
115 | with the maximum Jumbo Frames size of 16128. | ||
116 | |||
117 | Generic Receive Offload, aka GRO | ||
118 | -------------------------------- | ||
119 | The driver supports the in-kernel software implementation of GRO. GRO has | ||
120 | shown that by coalescing Rx traffic into larger chunks of data, CPU | ||
121 | utilization can be significantly reduced when under large Rx load. GRO is an | ||
122 | evolution of the previously-used LRO interface. GRO is able to coalesce | ||
123 | other protocols besides TCP. It's also safe to use with configurations that | ||
124 | are problematic for LRO, namely bridging and iSCSI. | ||
125 | |||
126 | GRO is enabled by default in the driver. Future versions of ethtool will | ||
127 | support disabling and re-enabling GRO on the fly. | ||
128 | |||
129 | |||
130 | Data Center Bridging, aka DCB | ||
131 | ----------------------------- | ||
132 | |||
133 | DCB is a configuration Quality of Service implementation in hardware. | ||
134 | It uses the VLAN priority tag (802.1p) to filter traffic. That means | ||
135 | that there are 8 different priorities that traffic can be filtered into. | ||
136 | It also enables priority flow control which can limit or eliminate the | ||
137 | number of dropped packets during network stress. Bandwidth can be | ||
138 | allocated to each of these priorities, which is enforced at the hardware | ||
139 | level. | ||
140 | |||
141 | To enable DCB support in ixgbe, you must enable the DCB netlink layer to | ||
142 | allow the userspace tools (see below) to communicate with the driver. | ||
143 | This can be found in the kernel configuration here: | ||
144 | |||
145 | -> Networking support | ||
146 | -> Networking options | ||
147 | -> Data Center Bridging support | ||
148 | |||
149 | Once this is selected, DCB support must be selected for ixgbe. This can | ||
150 | be found here: | ||
151 | |||
152 | -> Device Drivers | ||
153 | -> Network device support (NETDEVICES [=y]) | ||
154 | -> Ethernet (10000 Mbit) (NETDEV_10000 [=y]) | ||
155 | -> Intel(R) 10GbE PCI Express adapters support | ||
156 | -> Data Center Bridging (DCB) Support | ||
157 | |||
158 | After these options are selected, you must rebuild your kernel and your | ||
159 | modules. | ||
160 | |||
161 | In order to use DCB, userspace tools must be downloaded and installed. | ||
162 | The dcbd tools can be found at: | ||
163 | |||
164 | http://e1000.sf.net | ||
165 | |||
166 | |||
167 | Ethtool | ||
168 | ------- | ||
169 | The driver utilizes the ethtool interface for driver configuration and | ||
170 | diagnostics, as well as displaying statistical information. Ethtool | ||
171 | version 3.0 or later is required for this functionality. | ||
172 | |||
173 | The latest release of ethtool can be found from | ||
174 | http://sourceforge.net/projects/gkernel. | ||
175 | |||
176 | |||
177 | NAPI | ||
178 | ---- | ||
179 | |||
180 | NAPI (Rx polling mode) is supported in the ixgbe driver. NAPI is enabled | ||
181 | by default in the driver. | ||
182 | |||
183 | See www.cyberus.ca/~hadi/usenix-paper.tgz for more information on NAPI. | ||
184 | |||
185 | |||
186 | Support | ||
187 | ======= | ||
188 | |||
189 | For general information, go to the Intel support website at: | ||
190 | |||
191 | http://support.intel.com | ||
192 | |||
193 | or the Intel Wired Networking project hosted by Sourceforge at: | ||
194 | |||
195 | http://e1000.sourceforge.net | ||
196 | |||
197 | If an issue is identified with the released source code on the supported | ||
198 | kernel with a supported adapter, email the specific information related | ||
199 | to the issue to e1000-devel@lists.sf.net | ||
diff --git a/Documentation/networking/l2tp.txt b/Documentation/networking/l2tp.txt index 2451f551c505..63214b280e00 100644 --- a/Documentation/networking/l2tp.txt +++ b/Documentation/networking/l2tp.txt | |||
@@ -158,7 +158,7 @@ Sample Userspace Code | |||
158 | } | 158 | } |
159 | return 0; | 159 | return 0; |
160 | 160 | ||
161 | Miscellanous | 161 | Miscellaneous |
162 | ============ | 162 | ============ |
163 | 163 | ||
164 | The PPPoL2TP driver was developed as part of the OpenL2TP project by | 164 | The PPPoL2TP driver was developed as part of the OpenL2TP project by |
diff --git a/Documentation/networking/mac80211-injection.txt b/Documentation/networking/mac80211-injection.txt index 84906ef3ed6e..b30e81ad5307 100644 --- a/Documentation/networking/mac80211-injection.txt +++ b/Documentation/networking/mac80211-injection.txt | |||
@@ -12,38 +12,22 @@ following format: | |||
12 | The radiotap format is discussed in | 12 | The radiotap format is discussed in |
13 | ./Documentation/networking/radiotap-headers.txt. | 13 | ./Documentation/networking/radiotap-headers.txt. |
14 | 14 | ||
15 | Despite 13 radiotap argument types are currently defined, most only make sense | 15 | Despite many radiotap parameters being currently defined, most only make sense |
16 | to appear on received packets. The following information is parsed from the | 16 | to appear on received packets. The following information is parsed from the |
17 | radiotap headers and used to control injection: | 17 | radiotap headers and used to control injection: |
18 | 18 | ||
19 | * IEEE80211_RADIOTAP_RATE | ||
20 | |||
21 | rate in 500kbps units, automatic if invalid or not present | ||
22 | |||
23 | |||
24 | * IEEE80211_RADIOTAP_ANTENNA | ||
25 | |||
26 | antenna to use, automatic if not present | ||
27 | |||
28 | |||
29 | * IEEE80211_RADIOTAP_DBM_TX_POWER | ||
30 | |||
31 | transmit power in dBm, automatic if not present | ||
32 | |||
33 | |||
34 | * IEEE80211_RADIOTAP_FLAGS | 19 | * IEEE80211_RADIOTAP_FLAGS |
35 | 20 | ||
36 | IEEE80211_RADIOTAP_F_FCS: FCS will be removed and recalculated | 21 | IEEE80211_RADIOTAP_F_FCS: FCS will be removed and recalculated |
37 | IEEE80211_RADIOTAP_F_WEP: frame will be encrypted if key available | 22 | IEEE80211_RADIOTAP_F_WEP: frame will be encrypted if key available |
38 | IEEE80211_RADIOTAP_F_FRAG: frame will be fragmented if longer than the | 23 | IEEE80211_RADIOTAP_F_FRAG: frame will be fragmented if longer than the |
39 | current fragmentation threshold. Note that | 24 | current fragmentation threshold. |
40 | this flag is only reliable when software | 25 | |
41 | fragmentation is enabled) | ||
42 | 26 | ||
43 | The injection code can also skip all other currently defined radiotap fields | 27 | The injection code can also skip all other currently defined radiotap fields |
44 | facilitating replay of captured radiotap headers directly. | 28 | facilitating replay of captured radiotap headers directly. |
45 | 29 | ||
46 | Here is an example valid radiotap header defining these three parameters | 30 | Here is an example valid radiotap header defining some parameters |
47 | 31 | ||
48 | 0x00, 0x00, // <-- radiotap version | 32 | 0x00, 0x00, // <-- radiotap version |
49 | 0x0b, 0x00, // <- radiotap header length | 33 | 0x0b, 0x00, // <- radiotap header length |
@@ -72,8 +56,8 @@ interface), along the following lines: | |||
72 | ... | 56 | ... |
73 | r = pcap_inject(ppcap, u8aSendBuffer, nLength); | 57 | r = pcap_inject(ppcap, u8aSendBuffer, nLength); |
74 | 58 | ||
75 | You can also find sources for a complete inject test applet here: | 59 | You can also find a link to a complete inject application here: |
76 | 60 | ||
77 | http://penumbra.warmcat.com/_twk/tiki-index.php?page=packetspammer | 61 | http://wireless.kernel.org/en/users/Documentation/packetspammer |
78 | 62 | ||
79 | Andy Green <andy@warmcat.com> | 63 | Andy Green <andy@warmcat.com> |
diff --git a/Documentation/networking/netdevices.txt b/Documentation/networking/netdevices.txt index a2ab6a0b116d..87b3d15f523a 100644 --- a/Documentation/networking/netdevices.txt +++ b/Documentation/networking/netdevices.txt | |||
@@ -74,7 +74,7 @@ dev->hard_start_xmit: | |||
74 | for this and return NETDEV_TX_LOCKED when the spin lock fails. | 74 | for this and return NETDEV_TX_LOCKED when the spin lock fails. |
75 | The locking there should also properly protect against | 75 | The locking there should also properly protect against |
76 | set_multicast_list. Note that the use of NETIF_F_LLTX is deprecated. | 76 | set_multicast_list. Note that the use of NETIF_F_LLTX is deprecated. |
77 | Dont use it for new drivers. | 77 | Don't use it for new drivers. |
78 | 78 | ||
79 | Context: Process with BHs disabled or BH (timer), | 79 | Context: Process with BHs disabled or BH (timer), |
80 | will be called with interrupts disabled by netconsole. | 80 | will be called with interrupts disabled by netconsole. |
diff --git a/Documentation/networking/operstates.txt b/Documentation/networking/operstates.txt index c9074f9b78bb..1a77a3cfae54 100644 --- a/Documentation/networking/operstates.txt +++ b/Documentation/networking/operstates.txt | |||
@@ -38,9 +38,6 @@ ifinfomsg::if_flags & IFF_LOWER_UP: | |||
38 | ifinfomsg::if_flags & IFF_DORMANT: | 38 | ifinfomsg::if_flags & IFF_DORMANT: |
39 | Driver has signaled netif_dormant_on() | 39 | Driver has signaled netif_dormant_on() |
40 | 40 | ||
41 | These interface flags can also be queried without netlink using the | ||
42 | SIOCGIFFLAGS ioctl. | ||
43 | |||
44 | TLV IFLA_OPERSTATE | 41 | TLV IFLA_OPERSTATE |
45 | 42 | ||
46 | contains RFC2863 state of the interface in numeric representation: | 43 | contains RFC2863 state of the interface in numeric representation: |
diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt index 07c53d596035..a22fd85e3796 100644 --- a/Documentation/networking/packet_mmap.txt +++ b/Documentation/networking/packet_mmap.txt | |||
@@ -4,16 +4,18 @@ | |||
4 | 4 | ||
5 | This file documents the CONFIG_PACKET_MMAP option available with the PACKET | 5 | This file documents the CONFIG_PACKET_MMAP option available with the PACKET |
6 | socket interface on 2.4 and 2.6 kernels. This type of sockets is used for | 6 | socket interface on 2.4 and 2.6 kernels. This type of sockets is used for |
7 | capture network traffic with utilities like tcpdump or any other that uses | 7 | capture network traffic with utilities like tcpdump or any other that needs |
8 | the libpcap library. | 8 | raw access to network interface. |
9 | |||
10 | You can find the latest version of this document at | ||
11 | 9 | ||
10 | You can find the latest version of this document at: | ||
12 | http://pusa.uv.es/~ulisses/packet_mmap/ | 11 | http://pusa.uv.es/~ulisses/packet_mmap/ |
13 | 12 | ||
14 | Please send me your comments to | 13 | Howto can be found at: |
14 | http://wiki.gnu-log.net (packet_mmap) | ||
15 | 15 | ||
16 | Please send your comments to | ||
16 | Ulisses Alonso Camaró <uaca@i.hate.spam.alumni.uv.es> | 17 | Ulisses Alonso Camaró <uaca@i.hate.spam.alumni.uv.es> |
18 | Johann Baudy <johann.baudy@gnu-log.net> | ||
17 | 19 | ||
18 | ------------------------------------------------------------------------------- | 20 | ------------------------------------------------------------------------------- |
19 | + Why use PACKET_MMAP | 21 | + Why use PACKET_MMAP |
@@ -25,19 +27,24 @@ to capture each packet, it requires two if you want to get packet's | |||
25 | timestamp (like libpcap always does). | 27 | timestamp (like libpcap always does). |
26 | 28 | ||
27 | In the other hand PACKET_MMAP is very efficient. PACKET_MMAP provides a size | 29 | In the other hand PACKET_MMAP is very efficient. PACKET_MMAP provides a size |
28 | configurable circular buffer mapped in user space. This way reading packets just | 30 | configurable circular buffer mapped in user space that can be used to either |
29 | needs to wait for them, most of the time there is no need to issue a single | 31 | send or receive packets. This way reading packets just needs to wait for them, |
30 | system call. By using a shared buffer between the kernel and the user | 32 | most of the time there is no need to issue a single system call. Concerning |
31 | also has the benefit of minimizing packet copies. | 33 | transmission, multiple packets can be sent through one system call to get the |
32 | 34 | highest bandwidth. | |
33 | It's fine to use PACKET_MMAP to improve the performance of the capture process, | 35 | By using a shared buffer between the kernel and the user also has the benefit |
34 | but it isn't everything. At least, if you are capturing at high speeds (this | 36 | of minimizing packet copies. |
35 | is relative to the cpu speed), you should check if the device driver of your | 37 | |
36 | network interface card supports some sort of interrupt load mitigation or | 38 | It's fine to use PACKET_MMAP to improve the performance of the capture and |
37 | (even better) if it supports NAPI, also make sure it is enabled. | 39 | transmission process, but it isn't everything. At least, if you are capturing |
40 | at high speeds (this is relative to the cpu speed), you should check if the | ||
41 | device driver of your network interface card supports some sort of interrupt | ||
42 | load mitigation or (even better) if it supports NAPI, also make sure it is | ||
43 | enabled. For transmission, check the MTU (Maximum Transmission Unit) used and | ||
44 | supported by devices of your network. | ||
38 | 45 | ||
39 | -------------------------------------------------------------------------------- | 46 | -------------------------------------------------------------------------------- |
40 | + How to use CONFIG_PACKET_MMAP | 47 | + How to use CONFIG_PACKET_MMAP to improve capture process |
41 | -------------------------------------------------------------------------------- | 48 | -------------------------------------------------------------------------------- |
42 | 49 | ||
43 | From the user standpoint, you should use the higher level libpcap library, which | 50 | From the user standpoint, you should use the higher level libpcap library, which |
@@ -57,7 +64,7 @@ the low level details or want to improve libpcap by including PACKET_MMAP | |||
57 | support. | 64 | support. |
58 | 65 | ||
59 | -------------------------------------------------------------------------------- | 66 | -------------------------------------------------------------------------------- |
60 | + How to use CONFIG_PACKET_MMAP directly | 67 | + How to use CONFIG_PACKET_MMAP directly to improve capture process |
61 | -------------------------------------------------------------------------------- | 68 | -------------------------------------------------------------------------------- |
62 | 69 | ||
63 | From the system calls stand point, the use of PACKET_MMAP involves | 70 | From the system calls stand point, the use of PACKET_MMAP involves |
@@ -66,6 +73,7 @@ the following process: | |||
66 | 73 | ||
67 | [setup] socket() -------> creation of the capture socket | 74 | [setup] socket() -------> creation of the capture socket |
68 | setsockopt() ---> allocation of the circular buffer (ring) | 75 | setsockopt() ---> allocation of the circular buffer (ring) |
76 | option: PACKET_RX_RING | ||
69 | mmap() ---------> mapping of the allocated buffer to the | 77 | mmap() ---------> mapping of the allocated buffer to the |
70 | user process | 78 | user process |
71 | 79 | ||
@@ -97,13 +105,75 @@ also the mapping of the circular buffer in the user process and | |||
97 | the use of this buffer. | 105 | the use of this buffer. |
98 | 106 | ||
99 | -------------------------------------------------------------------------------- | 107 | -------------------------------------------------------------------------------- |
108 | + How to use CONFIG_PACKET_MMAP directly to improve transmission process | ||
109 | -------------------------------------------------------------------------------- | ||
110 | Transmission process is similar to capture as shown below. | ||
111 | |||
112 | [setup] socket() -------> creation of the transmission socket | ||
113 | setsockopt() ---> allocation of the circular buffer (ring) | ||
114 | option: PACKET_TX_RING | ||
115 | bind() ---------> bind transmission socket with a network interface | ||
116 | mmap() ---------> mapping of the allocated buffer to the | ||
117 | user process | ||
118 | |||
119 | [transmission] poll() ---------> wait for free packets (optional) | ||
120 | send() ---------> send all packets that are set as ready in | ||
121 | the ring | ||
122 | The flag MSG_DONTWAIT can be used to return | ||
123 | before end of transfer. | ||
124 | |||
125 | [shutdown] close() --------> destruction of the transmission socket and | ||
126 | deallocation of all associated resources. | ||
127 | |||
128 | Binding the socket to your network interface is mandatory (with zero copy) to | ||
129 | know the header size of frames used in the circular buffer. | ||
130 | |||
131 | As capture, each frame contains two parts: | ||
132 | |||
133 | -------------------- | ||
134 | | struct tpacket_hdr | Header. It contains the status of | ||
135 | | | of this frame | ||
136 | |--------------------| | ||
137 | | data buffer | | ||
138 | . . Data that will be sent over the network interface. | ||
139 | . . | ||
140 | -------------------- | ||
141 | |||
142 | bind() associates the socket to your network interface thanks to | ||
143 | sll_ifindex parameter of struct sockaddr_ll. | ||
144 | |||
145 | Initialization example: | ||
146 | |||
147 | struct sockaddr_ll my_addr; | ||
148 | struct ifreq s_ifr; | ||
149 | ... | ||
150 | |||
151 | strncpy (s_ifr.ifr_name, "eth0", sizeof(s_ifr.ifr_name)); | ||
152 | |||
153 | /* get interface index of eth0 */ | ||
154 | ioctl(this->socket, SIOCGIFINDEX, &s_ifr); | ||
155 | |||
156 | /* fill sockaddr_ll struct to prepare binding */ | ||
157 | my_addr.sll_family = AF_PACKET; | ||
158 | my_addr.sll_protocol = ETH_P_ALL; | ||
159 | my_addr.sll_ifindex = s_ifr.ifr_ifindex; | ||
160 | |||
161 | /* bind socket to eth0 */ | ||
162 | bind(this->socket, (struct sockaddr *)&my_addr, sizeof(struct sockaddr_ll)); | ||
163 | |||
164 | A complete tutorial is available at: http://wiki.gnu-log.net/ | ||
165 | |||
166 | -------------------------------------------------------------------------------- | ||
100 | + PACKET_MMAP settings | 167 | + PACKET_MMAP settings |
101 | -------------------------------------------------------------------------------- | 168 | -------------------------------------------------------------------------------- |
102 | 169 | ||
103 | 170 | ||
104 | To setup PACKET_MMAP from user level code is done with a call like | 171 | To setup PACKET_MMAP from user level code is done with a call like |
105 | 172 | ||
173 | - Capture process | ||
106 | setsockopt(fd, SOL_PACKET, PACKET_RX_RING, (void *) &req, sizeof(req)) | 174 | setsockopt(fd, SOL_PACKET, PACKET_RX_RING, (void *) &req, sizeof(req)) |
175 | - Transmission process | ||
176 | setsockopt(fd, SOL_PACKET, PACKET_TX_RING, (void *) &req, sizeof(req)) | ||
107 | 177 | ||
108 | The most significant argument in the previous call is the req parameter, | 178 | The most significant argument in the previous call is the req parameter, |
109 | this parameter must to have the following structure: | 179 | this parameter must to have the following structure: |
@@ -117,11 +187,11 @@ this parameter must to have the following structure: | |||
117 | }; | 187 | }; |
118 | 188 | ||
119 | This structure is defined in /usr/include/linux/if_packet.h and establishes a | 189 | This structure is defined in /usr/include/linux/if_packet.h and establishes a |
120 | circular buffer (ring) of unswappable memory mapped in the capture process. | 190 | circular buffer (ring) of unswappable memory. |
121 | Being mapped in the capture process allows reading the captured frames and | 191 | Being mapped in the capture process allows reading the captured frames and |
122 | related meta-information like timestamps without requiring a system call. | 192 | related meta-information like timestamps without requiring a system call. |
123 | 193 | ||
124 | Captured frames are grouped in blocks. Each block is a physically contiguous | 194 | Frames are grouped in blocks. Each block is a physically contiguous |
125 | region of memory and holds tp_block_size/tp_frame_size frames. The total number | 195 | region of memory and holds tp_block_size/tp_frame_size frames. The total number |
126 | of blocks is tp_block_nr. Note that tp_frame_nr is a redundant parameter because | 196 | of blocks is tp_block_nr. Note that tp_frame_nr is a redundant parameter because |
127 | 197 | ||
@@ -336,6 +406,7 @@ struct tpacket_hdr). If this field is 0 means that the frame is ready | |||
336 | to be used for the kernel, If not, there is a frame the user can read | 406 | to be used for the kernel, If not, there is a frame the user can read |
337 | and the following flags apply: | 407 | and the following flags apply: |
338 | 408 | ||
409 | +++ Capture process: | ||
339 | from include/linux/if_packet.h | 410 | from include/linux/if_packet.h |
340 | 411 | ||
341 | #define TP_STATUS_COPY 2 | 412 | #define TP_STATUS_COPY 2 |
@@ -391,6 +462,37 @@ packets are in the ring: | |||
391 | It doesn't incur in a race condition to first check the status value and | 462 | It doesn't incur in a race condition to first check the status value and |
392 | then poll for frames. | 463 | then poll for frames. |
393 | 464 | ||
465 | |||
466 | ++ Transmission process | ||
467 | Those defines are also used for transmission: | ||
468 | |||
469 | #define TP_STATUS_AVAILABLE 0 // Frame is available | ||
470 | #define TP_STATUS_SEND_REQUEST 1 // Frame will be sent on next send() | ||
471 | #define TP_STATUS_SENDING 2 // Frame is currently in transmission | ||
472 | #define TP_STATUS_WRONG_FORMAT 4 // Frame format is not correct | ||
473 | |||
474 | First, the kernel initializes all frames to TP_STATUS_AVAILABLE. To send a | ||
475 | packet, the user fills a data buffer of an available frame, sets tp_len to | ||
476 | current data buffer size and sets its status field to TP_STATUS_SEND_REQUEST. | ||
477 | This can be done on multiple frames. Once the user is ready to transmit, it | ||
478 | calls send(). Then all buffers with status equal to TP_STATUS_SEND_REQUEST are | ||
479 | forwarded to the network device. The kernel updates each status of sent | ||
480 | frames with TP_STATUS_SENDING until the end of transfer. | ||
481 | At the end of each transfer, buffer status returns to TP_STATUS_AVAILABLE. | ||
482 | |||
483 | header->tp_len = in_i_size; | ||
484 | header->tp_status = TP_STATUS_SEND_REQUEST; | ||
485 | retval = send(this->socket, NULL, 0, 0); | ||
486 | |||
487 | The user can also use poll() to check if a buffer is available: | ||
488 | (status == TP_STATUS_SENDING) | ||
489 | |||
490 | struct pollfd pfd; | ||
491 | pfd.fd = fd; | ||
492 | pfd.revents = 0; | ||
493 | pfd.events = POLLOUT; | ||
494 | retval = poll(&pfd, 1, timeout); | ||
495 | |||
394 | -------------------------------------------------------------------------------- | 496 | -------------------------------------------------------------------------------- |
395 | + THANKS | 497 | + THANKS |
396 | -------------------------------------------------------------------------------- | 498 | -------------------------------------------------------------------------------- |
diff --git a/Documentation/networking/phonet.txt b/Documentation/networking/phonet.txt index 6a07e45d4a93..6e8ce09f9c73 100644 --- a/Documentation/networking/phonet.txt +++ b/Documentation/networking/phonet.txt | |||
@@ -36,7 +36,7 @@ Phonet packets have a common header as follows: | |||
36 | On Linux, the link-layer header includes the pn_media byte (see below). | 36 | On Linux, the link-layer header includes the pn_media byte (see below). |
37 | The next 7 bytes are part of the network-layer header. | 37 | The next 7 bytes are part of the network-layer header. |
38 | 38 | ||
39 | The device ID is split: the 6 higher-order bits consitute the device | 39 | The device ID is split: the 6 higher-order bits constitute the device |
40 | address, while the 2 lower-order bits are used for multiplexing, as are | 40 | address, while the 2 lower-order bits are used for multiplexing, as are |
41 | the 8-bit object identifiers. As such, Phonet can be considered as a | 41 | the 8-bit object identifiers. As such, Phonet can be considered as a |
42 | network layer with 6 bits of address space and 10 bits for transport | 42 | network layer with 6 bits of address space and 10 bits for transport |
diff --git a/Documentation/networking/rds.txt b/Documentation/networking/rds.txt new file mode 100644 index 000000000000..c67077cbeb80 --- /dev/null +++ b/Documentation/networking/rds.txt | |||
@@ -0,0 +1,356 @@ | |||
1 | |||
2 | Overview | ||
3 | ======== | ||
4 | |||
5 | This readme tries to provide some background on the hows and whys of RDS, | ||
6 | and will hopefully help you find your way around the code. | ||
7 | |||
8 | In addition, please see this email about RDS origins: | ||
9 | http://oss.oracle.com/pipermail/rds-devel/2007-November/000228.html | ||
10 | |||
11 | RDS Architecture | ||
12 | ================ | ||
13 | |||
14 | RDS provides reliable, ordered datagram delivery by using a single | ||
15 | reliable connection between any two nodes in the cluster. This allows | ||
16 | applications to use a single socket to talk to any other process in the | ||
17 | cluster - so in a cluster with N processes you need N sockets, in contrast | ||
18 | to N*N if you use a connection-oriented socket transport like TCP. | ||
19 | |||
20 | RDS is not Infiniband-specific; it was designed to support different | ||
21 | transports. The current implementation used to support RDS over TCP as well | ||
22 | as IB. Work is in progress to support RDS over iWARP, and using DCE to | ||
23 | guarantee no dropped packets on Ethernet, it may be possible to use RDS over | ||
24 | UDP in the future. | ||
25 | |||
26 | The high-level semantics of RDS from the application's point of view are | ||
27 | |||
28 | * Addressing | ||
29 | RDS uses IPv4 addresses and 16bit port numbers to identify | ||
30 | the end point of a connection. All socket operations that involve | ||
31 | passing addresses between kernel and user space generally | ||
32 | use a struct sockaddr_in. | ||
33 | |||
34 | The fact that IPv4 addresses are used does not mean the underlying | ||
35 | transport has to be IP-based. In fact, RDS over IB uses a | ||
36 | reliable IB connection; the IP address is used exclusively to | ||
37 | locate the remote node's GID (by ARPing for the given IP). | ||
38 | |||
39 | The port space is entirely independent of UDP, TCP or any other | ||
40 | protocol. | ||
41 | |||
42 | * Socket interface | ||
43 | RDS sockets work *mostly* as you would expect from a BSD | ||
44 | socket. The next section will cover the details. At any rate, | ||
45 | all I/O is performed through the standard BSD socket API. | ||
46 | Some additions like zerocopy support are implemented through | ||
47 | control messages, while other extensions use the getsockopt/ | ||
48 | setsockopt calls. | ||
49 | |||
50 | Sockets must be bound before you can send or receive data. | ||
51 | This is needed because binding also selects a transport and | ||
52 | attaches it to the socket. Once bound, the transport assignment | ||
53 | does not change. RDS will tolerate IPs moving around (eg in | ||
54 | a active-active HA scenario), but only as long as the address | ||
55 | doesn't move to a different transport. | ||
56 | |||
57 | * sysctls | ||
58 | RDS supports a number of sysctls in /proc/sys/net/rds | ||
59 | |||
60 | |||
61 | Socket Interface | ||
62 | ================ | ||
63 | |||
64 | AF_RDS, PF_RDS, SOL_RDS | ||
65 | These constants haven't been assigned yet, because RDS isn't in | ||
66 | mainline yet. Currently, the kernel module assigns some constant | ||
67 | and publishes it to user space through two sysctl files | ||
68 | /proc/sys/net/rds/pf_rds | ||
69 | /proc/sys/net/rds/sol_rds | ||
70 | |||
71 | fd = socket(PF_RDS, SOCK_SEQPACKET, 0); | ||
72 | This creates a new, unbound RDS socket. | ||
73 | |||
74 | setsockopt(SOL_SOCKET): send and receive buffer size | ||
75 | RDS honors the send and receive buffer size socket options. | ||
76 | You are not allowed to queue more than SO_SNDSIZE bytes to | ||
77 | a socket. A message is queued when sendmsg is called, and | ||
78 | it leaves the queue when the remote system acknowledges | ||
79 | its arrival. | ||
80 | |||
81 | The SO_RCVSIZE option controls the maximum receive queue length. | ||
82 | This is a soft limit rather than a hard limit - RDS will | ||
83 | continue to accept and queue incoming messages, even if that | ||
84 | takes the queue length over the limit. However, it will also | ||
85 | mark the port as "congested" and send a congestion update to | ||
86 | the source node. The source node is supposed to throttle any | ||
87 | processes sending to this congested port. | ||
88 | |||
89 | bind(fd, &sockaddr_in, ...) | ||
90 | This binds the socket to a local IP address and port, and a | ||
91 | transport. | ||
92 | |||
93 | sendmsg(fd, ...) | ||
94 | Sends a message to the indicated recipient. The kernel will | ||
95 | transparently establish the underlying reliable connection | ||
96 | if it isn't up yet. | ||
97 | |||
98 | An attempt to send a message that exceeds SO_SNDSIZE will | ||
99 | return with -EMSGSIZE | ||
100 | |||
101 | An attempt to send a message that would take the total number | ||
102 | of queued bytes over the SO_SNDSIZE threshold will return | ||
103 | EAGAIN. | ||
104 | |||
105 | An attempt to send a message to a destination that is marked | ||
106 | as "congested" will return ENOBUFS. | ||
107 | |||
108 | recvmsg(fd, ...) | ||
109 | Receives a message that was queued to this socket. The sockets | ||
110 | recv queue accounting is adjusted, and if the queue length | ||
111 | drops below SO_SNDSIZE, the port is marked uncongested, and | ||
112 | a congestion update is sent to all peers. | ||
113 | |||
114 | Applications can ask the RDS kernel module to receive | ||
115 | notifications via control messages (for instance, there is a | ||
116 | notification when a congestion update arrived, or when a RDMA | ||
117 | operation completes). These notifications are received through | ||
118 | the msg.msg_control buffer of struct msghdr. The format of the | ||
119 | messages is described in manpages. | ||
120 | |||
121 | poll(fd) | ||
122 | RDS supports the poll interface to allow the application | ||
123 | to implement async I/O. | ||
124 | |||
125 | POLLIN handling is pretty straightforward. When there's an | ||
126 | incoming message queued to the socket, or a pending notification, | ||
127 | we signal POLLIN. | ||
128 | |||
129 | POLLOUT is a little harder. Since you can essentially send | ||
130 | to any destination, RDS will always signal POLLOUT as long as | ||
131 | there's room on the send queue (ie the number of bytes queued | ||
132 | is less than the sendbuf size). | ||
133 | |||
134 | However, the kernel will refuse to accept messages to | ||
135 | a destination marked congested - in this case you will loop | ||
136 | forever if you rely on poll to tell you what to do. | ||
137 | This isn't a trivial problem, but applications can deal with | ||
138 | this - by using congestion notifications, and by checking for | ||
139 | ENOBUFS errors returned by sendmsg. | ||
140 | |||
141 | setsockopt(SOL_RDS, RDS_CANCEL_SENT_TO, &sockaddr_in) | ||
142 | This allows the application to discard all messages queued to a | ||
143 | specific destination on this particular socket. | ||
144 | |||
145 | This allows the application to cancel outstanding messages if | ||
146 | it detects a timeout. For instance, if it tried to send a message, | ||
147 | and the remote host is unreachable, RDS will keep trying forever. | ||
148 | The application may decide it's not worth it, and cancel the | ||
149 | operation. In this case, it would use RDS_CANCEL_SENT_TO to | ||
150 | nuke any pending messages. | ||
151 | |||
152 | |||
153 | RDMA for RDS | ||
154 | ============ | ||
155 | |||
156 | see rds-rdma(7) manpage (available in rds-tools) | ||
157 | |||
158 | |||
159 | Congestion Notifications | ||
160 | ======================== | ||
161 | |||
162 | see rds(7) manpage | ||
163 | |||
164 | |||
165 | RDS Protocol | ||
166 | ============ | ||
167 | |||
168 | Message header | ||
169 | |||
170 | The message header is a 'struct rds_header' (see rds.h): | ||
171 | Fields: | ||
172 | h_sequence: | ||
173 | per-packet sequence number | ||
174 | h_ack: | ||
175 | piggybacked acknowledgment of last packet received | ||
176 | h_len: | ||
177 | length of data, not including header | ||
178 | h_sport: | ||
179 | source port | ||
180 | h_dport: | ||
181 | destination port | ||
182 | h_flags: | ||
183 | CONG_BITMAP - this is a congestion update bitmap | ||
184 | ACK_REQUIRED - receiver must ack this packet | ||
185 | RETRANSMITTED - packet has previously been sent | ||
186 | h_credit: | ||
187 | indicate to other end of connection that | ||
188 | it has more credits available (i.e. there is | ||
189 | more send room) | ||
190 | h_padding[4]: | ||
191 | unused, for future use | ||
192 | h_csum: | ||
193 | header checksum | ||
194 | h_exthdr: | ||
195 | optional data can be passed here. This is currently used for | ||
196 | passing RDMA-related information. | ||
197 | |||
198 | ACK and retransmit handling | ||
199 | |||
200 | One might think that with reliable IB connections you wouldn't need | ||
201 | to ack messages that have been received. The problem is that IB | ||
202 | hardware generates an ack message before it has DMAed the message | ||
203 | into memory. This creates a potential message loss if the HCA is | ||
204 | disabled for any reason between when it sends the ack and before | ||
205 | the message is DMAed and processed. This is only a potential issue | ||
206 | if another HCA is available for fail-over. | ||
207 | |||
208 | Sending an ack immediately would allow the sender to free the sent | ||
209 | message from their send queue quickly, but could cause excessive | ||
210 | traffic to be used for acks. RDS piggybacks acks on sent data | ||
211 | packets. Ack-only packets are reduced by only allowing one to be | ||
212 | in flight at a time, and by the sender only asking for acks when | ||
213 | its send buffers start to fill up. All retransmissions are also | ||
214 | acked. | ||
215 | |||
216 | Flow Control | ||
217 | |||
218 | RDS's IB transport uses a credit-based mechanism to verify that | ||
219 | there is space in the peer's receive buffers for more data. This | ||
220 | eliminates the need for hardware retries on the connection. | ||
221 | |||
222 | Congestion | ||
223 | |||
224 | Messages waiting in the receive queue on the receiving socket | ||
225 | are accounted against the sockets SO_RCVBUF option value. Only | ||
226 | the payload bytes in the message are accounted for. If the | ||
227 | number of bytes queued equals or exceeds rcvbuf then the socket | ||
228 | is congested. All sends attempted to this socket's address | ||
229 | should return block or return -EWOULDBLOCK. | ||
230 | |||
231 | Applications are expected to be reasonably tuned such that this | ||
232 | situation very rarely occurs. An application encountering this | ||
233 | "back-pressure" is considered a bug. | ||
234 | |||
235 | This is implemented by having each node maintain bitmaps which | ||
236 | indicate which ports on bound addresses are congested. As the | ||
237 | bitmap changes it is sent through all the connections which | ||
238 | terminate in the local address of the bitmap which changed. | ||
239 | |||
240 | The bitmaps are allocated as connections are brought up. This | ||
241 | avoids allocation in the interrupt handling path which queues | ||
242 | sages on sockets. The dense bitmaps let transports send the | ||
243 | entire bitmap on any bitmap change reasonably efficiently. This | ||
244 | is much easier to implement than some finer-grained | ||
245 | communication of per-port congestion. The sender does a very | ||
246 | inexpensive bit test to test if the port it's about to send to | ||
247 | is congested or not. | ||
248 | |||
249 | |||
250 | RDS Transport Layer | ||
251 | ================== | ||
252 | |||
253 | As mentioned above, RDS is not IB-specific. Its code is divided | ||
254 | into a general RDS layer and a transport layer. | ||
255 | |||
256 | The general layer handles the socket API, congestion handling, | ||
257 | loopback, stats, usermem pinning, and the connection state machine. | ||
258 | |||
259 | The transport layer handles the details of the transport. The IB | ||
260 | transport, for example, handles all the queue pairs, work requests, | ||
261 | CM event handlers, and other Infiniband details. | ||
262 | |||
263 | |||
264 | RDS Kernel Structures | ||
265 | ===================== | ||
266 | |||
267 | struct rds_message | ||
268 | aka possibly "rds_outgoing", the generic RDS layer copies data to | ||
269 | be sent and sets header fields as needed, based on the socket API. | ||
270 | This is then queued for the individual connection and sent by the | ||
271 | connection's transport. | ||
272 | struct rds_incoming | ||
273 | a generic struct referring to incoming data that can be handed from | ||
274 | the transport to the general code and queued by the general code | ||
275 | while the socket is awoken. It is then passed back to the transport | ||
276 | code to handle the actual copy-to-user. | ||
277 | struct rds_socket | ||
278 | per-socket information | ||
279 | struct rds_connection | ||
280 | per-connection information | ||
281 | struct rds_transport | ||
282 | pointers to transport-specific functions | ||
283 | struct rds_statistics | ||
284 | non-transport-specific statistics | ||
285 | struct rds_cong_map | ||
286 | wraps the raw congestion bitmap, contains rbnode, waitq, etc. | ||
287 | |||
288 | Connection management | ||
289 | ===================== | ||
290 | |||
291 | Connections may be in UP, DOWN, CONNECTING, DISCONNECTING, and | ||
292 | ERROR states. | ||
293 | |||
294 | The first time an attempt is made by an RDS socket to send data to | ||
295 | a node, a connection is allocated and connected. That connection is | ||
296 | then maintained forever -- if there are transport errors, the | ||
297 | connection will be dropped and re-established. | ||
298 | |||
299 | Dropping a connection while packets are queued will cause queued or | ||
300 | partially-sent datagrams to be retransmitted when the connection is | ||
301 | re-established. | ||
302 | |||
303 | |||
304 | The send path | ||
305 | ============= | ||
306 | |||
307 | rds_sendmsg() | ||
308 | struct rds_message built from incoming data | ||
309 | CMSGs parsed (e.g. RDMA ops) | ||
310 | transport connection alloced and connected if not already | ||
311 | rds_message placed on send queue | ||
312 | send worker awoken | ||
313 | rds_send_worker() | ||
314 | calls rds_send_xmit() until queue is empty | ||
315 | rds_send_xmit() | ||
316 | transmits congestion map if one is pending | ||
317 | may set ACK_REQUIRED | ||
318 | calls transport to send either non-RDMA or RDMA message | ||
319 | (RDMA ops never retransmitted) | ||
320 | rds_ib_xmit() | ||
321 | allocs work requests from send ring | ||
322 | adds any new send credits available to peer (h_credits) | ||
323 | maps the rds_message's sg list | ||
324 | piggybacks ack | ||
325 | populates work requests | ||
326 | post send to connection's queue pair | ||
327 | |||
328 | The recv path | ||
329 | ============= | ||
330 | |||
331 | rds_ib_recv_cq_comp_handler() | ||
332 | looks at write completions | ||
333 | unmaps recv buffer from device | ||
334 | no errors, call rds_ib_process_recv() | ||
335 | refill recv ring | ||
336 | rds_ib_process_recv() | ||
337 | validate header checksum | ||
338 | copy header to rds_ib_incoming struct if start of a new datagram | ||
339 | add to ibinc's fraglist | ||
340 | if competed datagram: | ||
341 | update cong map if datagram was cong update | ||
342 | call rds_recv_incoming() otherwise | ||
343 | note if ack is required | ||
344 | rds_recv_incoming() | ||
345 | drop duplicate packets | ||
346 | respond to pings | ||
347 | find the sock associated with this datagram | ||
348 | add to sock queue | ||
349 | wake up sock | ||
350 | do some congestion calculations | ||
351 | rds_recvmsg | ||
352 | copy data into user iovec | ||
353 | handle CMSGs | ||
354 | return to application | ||
355 | |||
356 | |||
diff --git a/Documentation/networking/regulatory.txt b/Documentation/networking/regulatory.txt index dcf31648414a..eaa1a25946c1 100644 --- a/Documentation/networking/regulatory.txt +++ b/Documentation/networking/regulatory.txt | |||
@@ -89,7 +89,7 @@ added to this document when its support is enabled. | |||
89 | Device drivers who provide their own built regulatory domain | 89 | Device drivers who provide their own built regulatory domain |
90 | do not need a callback as the channels registered by them are | 90 | do not need a callback as the channels registered by them are |
91 | the only ones that will be allowed and therefore *additional* | 91 | the only ones that will be allowed and therefore *additional* |
92 | cannels cannot be enabled. | 92 | channels cannot be enabled. |
93 | 93 | ||
94 | Example code - drivers hinting an alpha2: | 94 | Example code - drivers hinting an alpha2: |
95 | ------------------------------------------ | 95 | ------------------------------------------ |
diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt new file mode 100644 index 000000000000..0e58b4539176 --- /dev/null +++ b/Documentation/networking/timestamping.txt | |||
@@ -0,0 +1,180 @@ | |||
1 | The existing interfaces for getting network packages time stamped are: | ||
2 | |||
3 | * SO_TIMESTAMP | ||
4 | Generate time stamp for each incoming packet using the (not necessarily | ||
5 | monotonous!) system time. Result is returned via recv_msg() in a | ||
6 | control message as timeval (usec resolution). | ||
7 | |||
8 | * SO_TIMESTAMPNS | ||
9 | Same time stamping mechanism as SO_TIMESTAMP, but returns result as | ||
10 | timespec (nsec resolution). | ||
11 | |||
12 | * IP_MULTICAST_LOOP + SO_TIMESTAMP[NS] | ||
13 | Only for multicasts: approximate send time stamp by receiving the looped | ||
14 | packet and using its receive time stamp. | ||
15 | |||
16 | The following interface complements the existing ones: receive time | ||
17 | stamps can be generated and returned for arbitrary packets and much | ||
18 | closer to the point where the packet is really sent. Time stamps can | ||
19 | be generated in software (as before) or in hardware (if the hardware | ||
20 | has such a feature). | ||
21 | |||
22 | SO_TIMESTAMPING: | ||
23 | |||
24 | Instructs the socket layer which kind of information is wanted. The | ||
25 | parameter is an integer with some of the following bits set. Setting | ||
26 | other bits is an error and doesn't change the current state. | ||
27 | |||
28 | SOF_TIMESTAMPING_TX_HARDWARE: try to obtain send time stamp in hardware | ||
29 | SOF_TIMESTAMPING_TX_SOFTWARE: if SOF_TIMESTAMPING_TX_HARDWARE is off or | ||
30 | fails, then do it in software | ||
31 | SOF_TIMESTAMPING_RX_HARDWARE: return the original, unmodified time stamp | ||
32 | as generated by the hardware | ||
33 | SOF_TIMESTAMPING_RX_SOFTWARE: if SOF_TIMESTAMPING_RX_HARDWARE is off or | ||
34 | fails, then do it in software | ||
35 | SOF_TIMESTAMPING_RAW_HARDWARE: return original raw hardware time stamp | ||
36 | SOF_TIMESTAMPING_SYS_HARDWARE: return hardware time stamp transformed to | ||
37 | the system time base | ||
38 | SOF_TIMESTAMPING_SOFTWARE: return system time stamp generated in | ||
39 | software | ||
40 | |||
41 | SOF_TIMESTAMPING_TX/RX determine how time stamps are generated. | ||
42 | SOF_TIMESTAMPING_RAW/SYS determine how they are reported in the | ||
43 | following control message: | ||
44 | struct scm_timestamping { | ||
45 | struct timespec systime; | ||
46 | struct timespec hwtimetrans; | ||
47 | struct timespec hwtimeraw; | ||
48 | }; | ||
49 | |||
50 | recvmsg() can be used to get this control message for regular incoming | ||
51 | packets. For send time stamps the outgoing packet is looped back to | ||
52 | the socket's error queue with the send time stamp(s) attached. It can | ||
53 | be received with recvmsg(flags=MSG_ERRQUEUE). The call returns the | ||
54 | original outgoing packet data including all headers preprended down to | ||
55 | and including the link layer, the scm_timestamping control message and | ||
56 | a sock_extended_err control message with ee_errno==ENOMSG and | ||
57 | ee_origin==SO_EE_ORIGIN_TIMESTAMPING. A socket with such a pending | ||
58 | bounced packet is ready for reading as far as select() is concerned. | ||
59 | If the outgoing packet has to be fragmented, then only the first | ||
60 | fragment is time stamped and returned to the sending socket. | ||
61 | |||
62 | All three values correspond to the same event in time, but were | ||
63 | generated in different ways. Each of these values may be empty (= all | ||
64 | zero), in which case no such value was available. If the application | ||
65 | is not interested in some of these values, they can be left blank to | ||
66 | avoid the potential overhead of calculating them. | ||
67 | |||
68 | systime is the value of the system time at that moment. This | ||
69 | corresponds to the value also returned via SO_TIMESTAMP[NS]. If the | ||
70 | time stamp was generated by hardware, then this field is | ||
71 | empty. Otherwise it is filled in if SOF_TIMESTAMPING_SOFTWARE is | ||
72 | set. | ||
73 | |||
74 | hwtimeraw is the original hardware time stamp. Filled in if | ||
75 | SOF_TIMESTAMPING_RAW_HARDWARE is set. No assumptions about its | ||
76 | relation to system time should be made. | ||
77 | |||
78 | hwtimetrans is the hardware time stamp transformed so that it | ||
79 | corresponds as good as possible to system time. This correlation is | ||
80 | not perfect; as a consequence, sorting packets received via different | ||
81 | NICs by their hwtimetrans may differ from the order in which they were | ||
82 | received. hwtimetrans may be non-monotonic even for the same NIC. | ||
83 | Filled in if SOF_TIMESTAMPING_SYS_HARDWARE is set. Requires support | ||
84 | by the network device and will be empty without that support. | ||
85 | |||
86 | |||
87 | SIOCSHWTSTAMP: | ||
88 | |||
89 | Hardware time stamping must also be initialized for each device driver | ||
90 | that is expected to do hardware time stamping. The parameter is: | ||
91 | |||
92 | struct hwtstamp_config { | ||
93 | int flags; /* no flags defined right now, must be zero */ | ||
94 | int tx_type; /* HWTSTAMP_TX_* */ | ||
95 | int rx_filter; /* HWTSTAMP_FILTER_* */ | ||
96 | }; | ||
97 | |||
98 | Desired behavior is passed into the kernel and to a specific device by | ||
99 | calling ioctl(SIOCSHWTSTAMP) with a pointer to a struct ifreq whose | ||
100 | ifr_data points to a struct hwtstamp_config. The tx_type and | ||
101 | rx_filter are hints to the driver what it is expected to do. If | ||
102 | the requested fine-grained filtering for incoming packets is not | ||
103 | supported, the driver may time stamp more than just the requested types | ||
104 | of packets. | ||
105 | |||
106 | A driver which supports hardware time stamping shall update the struct | ||
107 | with the actual, possibly more permissive configuration. If the | ||
108 | requested packets cannot be time stamped, then nothing should be | ||
109 | changed and ERANGE shall be returned (in contrast to EINVAL, which | ||
110 | indicates that SIOCSHWTSTAMP is not supported at all). | ||
111 | |||
112 | Only a processes with admin rights may change the configuration. User | ||
113 | space is responsible to ensure that multiple processes don't interfere | ||
114 | with each other and that the settings are reset. | ||
115 | |||
116 | /* possible values for hwtstamp_config->tx_type */ | ||
117 | enum { | ||
118 | /* | ||
119 | * no outgoing packet will need hardware time stamping; | ||
120 | * should a packet arrive which asks for it, no hardware | ||
121 | * time stamping will be done | ||
122 | */ | ||
123 | HWTSTAMP_TX_OFF, | ||
124 | |||
125 | /* | ||
126 | * enables hardware time stamping for outgoing packets; | ||
127 | * the sender of the packet decides which are to be | ||
128 | * time stamped by setting SOF_TIMESTAMPING_TX_SOFTWARE | ||
129 | * before sending the packet | ||
130 | */ | ||
131 | HWTSTAMP_TX_ON, | ||
132 | }; | ||
133 | |||
134 | /* possible values for hwtstamp_config->rx_filter */ | ||
135 | enum { | ||
136 | /* time stamp no incoming packet at all */ | ||
137 | HWTSTAMP_FILTER_NONE, | ||
138 | |||
139 | /* time stamp any incoming packet */ | ||
140 | HWTSTAMP_FILTER_ALL, | ||
141 | |||
142 | /* return value: time stamp all packets requested plus some others */ | ||
143 | HWTSTAMP_FILTER_SOME, | ||
144 | |||
145 | /* PTP v1, UDP, any kind of event packet */ | ||
146 | HWTSTAMP_FILTER_PTP_V1_L4_EVENT, | ||
147 | |||
148 | ... | ||
149 | }; | ||
150 | |||
151 | |||
152 | DEVICE IMPLEMENTATION | ||
153 | |||
154 | A driver which supports hardware time stamping must support the | ||
155 | SIOCSHWTSTAMP ioctl. Time stamps for received packets must be stored | ||
156 | in the skb with skb_hwtstamp_set(). | ||
157 | |||
158 | Time stamps for outgoing packets are to be generated as follows: | ||
159 | - In hard_start_xmit(), check if skb_hwtstamp_check_tx_hardware() | ||
160 | returns non-zero. If yes, then the driver is expected | ||
161 | to do hardware time stamping. | ||
162 | - If this is possible for the skb and requested, then declare | ||
163 | that the driver is doing the time stamping by calling | ||
164 | skb_hwtstamp_tx_in_progress(). A driver not supporting | ||
165 | hardware time stamping doesn't do that. A driver must never | ||
166 | touch sk_buff::tstamp! It is used to store how time stamping | ||
167 | for an outgoing packets is to be done. | ||
168 | - As soon as the driver has sent the packet and/or obtained a | ||
169 | hardware time stamp for it, it passes the time stamp back by | ||
170 | calling skb_hwtstamp_tx() with the original skb, the raw | ||
171 | hardware time stamp and a handle to the device (necessary | ||
172 | to convert the hardware time stamp to system time). If obtaining | ||
173 | the hardware time stamp somehow fails, then the driver should | ||
174 | not fall back to software time stamping. The rationale is that | ||
175 | this would occur at a later time in the processing pipeline | ||
176 | than other software time stamping and therefore could lead | ||
177 | to unexpected deltas between time stamps. | ||
178 | - If the driver did not call skb_hwtstamp_tx_in_progress(), then | ||
179 | dev_hard_start_xmit() checks whether software time stamping | ||
180 | is wanted as fallback and potentially generates the time stamp. | ||
diff --git a/Documentation/networking/timestamping/.gitignore b/Documentation/networking/timestamping/.gitignore new file mode 100644 index 000000000000..71e81eb2e22f --- /dev/null +++ b/Documentation/networking/timestamping/.gitignore | |||
@@ -0,0 +1 @@ | |||
timestamping | |||
diff --git a/Documentation/networking/timestamping/Makefile b/Documentation/networking/timestamping/Makefile new file mode 100644 index 000000000000..2a1489fdc036 --- /dev/null +++ b/Documentation/networking/timestamping/Makefile | |||
@@ -0,0 +1,6 @@ | |||
1 | CPPFLAGS = -I../../../include | ||
2 | |||
3 | timestamping: timestamping.c | ||
4 | |||
5 | clean: | ||
6 | rm -f timestamping | ||
diff --git a/Documentation/networking/timestamping/timestamping.c b/Documentation/networking/timestamping/timestamping.c new file mode 100644 index 000000000000..43d143104210 --- /dev/null +++ b/Documentation/networking/timestamping/timestamping.c | |||
@@ -0,0 +1,533 @@ | |||
1 | /* | ||
2 | * This program demonstrates how the various time stamping features in | ||
3 | * the Linux kernel work. It emulates the behavior of a PTP | ||
4 | * implementation in stand-alone master mode by sending PTPv1 Sync | ||
5 | * multicasts once every second. It looks for similar packets, but | ||
6 | * beyond that doesn't actually implement PTP. | ||
7 | * | ||
8 | * Outgoing packets are time stamped with SO_TIMESTAMPING with or | ||
9 | * without hardware support. | ||
10 | * | ||
11 | * Incoming packets are time stamped with SO_TIMESTAMPING with or | ||
12 | * without hardware support, SIOCGSTAMP[NS] (per-socket time stamp) and | ||
13 | * SO_TIMESTAMP[NS]. | ||
14 | * | ||
15 | * Copyright (C) 2009 Intel Corporation. | ||
16 | * Author: Patrick Ohly <patrick.ohly@intel.com> | ||
17 | * | ||
18 | * This program is free software; you can redistribute it and/or modify it | ||
19 | * under the terms and conditions of the GNU General Public License, | ||
20 | * version 2, as published by the Free Software Foundation. | ||
21 | * | ||
22 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
23 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
24 | * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for | ||
25 | * more details. | ||
26 | * | ||
27 | * You should have received a copy of the GNU General Public License along with | ||
28 | * this program; if not, write to the Free Software Foundation, Inc., | ||
29 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
30 | */ | ||
31 | |||
32 | #include <stdio.h> | ||
33 | #include <stdlib.h> | ||
34 | #include <errno.h> | ||
35 | #include <string.h> | ||
36 | |||
37 | #include <sys/time.h> | ||
38 | #include <sys/socket.h> | ||
39 | #include <sys/select.h> | ||
40 | #include <sys/ioctl.h> | ||
41 | #include <arpa/inet.h> | ||
42 | #include <net/if.h> | ||
43 | |||
44 | #include "asm/types.h" | ||
45 | #include "linux/net_tstamp.h" | ||
46 | #include "linux/errqueue.h" | ||
47 | |||
48 | #ifndef SO_TIMESTAMPING | ||
49 | # define SO_TIMESTAMPING 37 | ||
50 | # define SCM_TIMESTAMPING SO_TIMESTAMPING | ||
51 | #endif | ||
52 | |||
53 | #ifndef SO_TIMESTAMPNS | ||
54 | # define SO_TIMESTAMPNS 35 | ||
55 | #endif | ||
56 | |||
57 | #ifndef SIOCGSTAMPNS | ||
58 | # define SIOCGSTAMPNS 0x8907 | ||
59 | #endif | ||
60 | |||
61 | #ifndef SIOCSHWTSTAMP | ||
62 | # define SIOCSHWTSTAMP 0x89b0 | ||
63 | #endif | ||
64 | |||
65 | static void usage(const char *error) | ||
66 | { | ||
67 | if (error) | ||
68 | printf("invalid option: %s\n", error); | ||
69 | printf("timestamping interface option*\n\n" | ||
70 | "Options:\n" | ||
71 | " IP_MULTICAST_LOOP - looping outgoing multicasts\n" | ||
72 | " SO_TIMESTAMP - normal software time stamping, ms resolution\n" | ||
73 | " SO_TIMESTAMPNS - more accurate software time stamping\n" | ||
74 | " SOF_TIMESTAMPING_TX_HARDWARE - hardware time stamping of outgoing packets\n" | ||
75 | " SOF_TIMESTAMPING_TX_SOFTWARE - software fallback for outgoing packets\n" | ||
76 | " SOF_TIMESTAMPING_RX_HARDWARE - hardware time stamping of incoming packets\n" | ||
77 | " SOF_TIMESTAMPING_RX_SOFTWARE - software fallback for incoming packets\n" | ||
78 | " SOF_TIMESTAMPING_SOFTWARE - request reporting of software time stamps\n" | ||
79 | " SOF_TIMESTAMPING_SYS_HARDWARE - request reporting of transformed HW time stamps\n" | ||
80 | " SOF_TIMESTAMPING_RAW_HARDWARE - request reporting of raw HW time stamps\n" | ||
81 | " SIOCGSTAMP - check last socket time stamp\n" | ||
82 | " SIOCGSTAMPNS - more accurate socket time stamp\n"); | ||
83 | exit(1); | ||
84 | } | ||
85 | |||
86 | static void bail(const char *error) | ||
87 | { | ||
88 | printf("%s: %s\n", error, strerror(errno)); | ||
89 | exit(1); | ||
90 | } | ||
91 | |||
92 | static const unsigned char sync[] = { | ||
93 | 0x00, 0x01, 0x00, 0x01, | ||
94 | 0x5f, 0x44, 0x46, 0x4c, | ||
95 | 0x54, 0x00, 0x00, 0x00, | ||
96 | 0x00, 0x00, 0x00, 0x00, | ||
97 | 0x00, 0x00, 0x00, 0x00, | ||
98 | 0x01, 0x01, | ||
99 | |||
100 | /* fake uuid */ | ||
101 | 0x00, 0x01, | ||
102 | 0x02, 0x03, 0x04, 0x05, | ||
103 | |||
104 | 0x00, 0x01, 0x00, 0x37, | ||
105 | 0x00, 0x00, 0x00, 0x08, | ||
106 | 0x00, 0x00, 0x00, 0x00, | ||
107 | 0x49, 0x05, 0xcd, 0x01, | ||
108 | 0x29, 0xb1, 0x8d, 0xb0, | ||
109 | 0x00, 0x00, 0x00, 0x00, | ||
110 | 0x00, 0x01, | ||
111 | |||
112 | /* fake uuid */ | ||
113 | 0x00, 0x01, | ||
114 | 0x02, 0x03, 0x04, 0x05, | ||
115 | |||
116 | 0x00, 0x00, 0x00, 0x37, | ||
117 | 0x00, 0x00, 0x00, 0x04, | ||
118 | 0x44, 0x46, 0x4c, 0x54, | ||
119 | 0x00, 0x00, 0xf0, 0x60, | ||
120 | 0x00, 0x01, 0x00, 0x00, | ||
121 | 0x00, 0x00, 0x00, 0x01, | ||
122 | 0x00, 0x00, 0xf0, 0x60, | ||
123 | 0x00, 0x00, 0x00, 0x00, | ||
124 | 0x00, 0x00, 0x00, 0x04, | ||
125 | 0x44, 0x46, 0x4c, 0x54, | ||
126 | 0x00, 0x01, | ||
127 | |||
128 | /* fake uuid */ | ||
129 | 0x00, 0x01, | ||
130 | 0x02, 0x03, 0x04, 0x05, | ||
131 | |||
132 | 0x00, 0x00, 0x00, 0x00, | ||
133 | 0x00, 0x00, 0x00, 0x00, | ||
134 | 0x00, 0x00, 0x00, 0x00, | ||
135 | 0x00, 0x00, 0x00, 0x00 | ||
136 | }; | ||
137 | |||
138 | static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len) | ||
139 | { | ||
140 | struct timeval now; | ||
141 | int res; | ||
142 | |||
143 | res = sendto(sock, sync, sizeof(sync), 0, | ||
144 | addr, addr_len); | ||
145 | gettimeofday(&now, 0); | ||
146 | if (res < 0) | ||
147 | printf("%s: %s\n", "send", strerror(errno)); | ||
148 | else | ||
149 | printf("%ld.%06ld: sent %d bytes\n", | ||
150 | (long)now.tv_sec, (long)now.tv_usec, | ||
151 | res); | ||
152 | } | ||
153 | |||
154 | static void printpacket(struct msghdr *msg, int res, | ||
155 | char *data, | ||
156 | int sock, int recvmsg_flags, | ||
157 | int siocgstamp, int siocgstampns) | ||
158 | { | ||
159 | struct sockaddr_in *from_addr = (struct sockaddr_in *)msg->msg_name; | ||
160 | struct cmsghdr *cmsg; | ||
161 | struct timeval tv; | ||
162 | struct timespec ts; | ||
163 | struct timeval now; | ||
164 | |||
165 | gettimeofday(&now, 0); | ||
166 | |||
167 | printf("%ld.%06ld: received %s data, %d bytes from %s, %d bytes control messages\n", | ||
168 | (long)now.tv_sec, (long)now.tv_usec, | ||
169 | (recvmsg_flags & MSG_ERRQUEUE) ? "error" : "regular", | ||
170 | res, | ||
171 | inet_ntoa(from_addr->sin_addr), | ||
172 | msg->msg_controllen); | ||
173 | for (cmsg = CMSG_FIRSTHDR(msg); | ||
174 | cmsg; | ||
175 | cmsg = CMSG_NXTHDR(msg, cmsg)) { | ||
176 | printf(" cmsg len %d: ", cmsg->cmsg_len); | ||
177 | switch (cmsg->cmsg_level) { | ||
178 | case SOL_SOCKET: | ||
179 | printf("SOL_SOCKET "); | ||
180 | switch (cmsg->cmsg_type) { | ||
181 | case SO_TIMESTAMP: { | ||
182 | struct timeval *stamp = | ||
183 | (struct timeval *)CMSG_DATA(cmsg); | ||
184 | printf("SO_TIMESTAMP %ld.%06ld", | ||
185 | (long)stamp->tv_sec, | ||
186 | (long)stamp->tv_usec); | ||
187 | break; | ||
188 | } | ||
189 | case SO_TIMESTAMPNS: { | ||
190 | struct timespec *stamp = | ||
191 | (struct timespec *)CMSG_DATA(cmsg); | ||
192 | printf("SO_TIMESTAMPNS %ld.%09ld", | ||
193 | (long)stamp->tv_sec, | ||
194 | (long)stamp->tv_nsec); | ||
195 | break; | ||
196 | } | ||
197 | case SO_TIMESTAMPING: { | ||
198 | struct timespec *stamp = | ||
199 | (struct timespec *)CMSG_DATA(cmsg); | ||
200 | printf("SO_TIMESTAMPING "); | ||
201 | printf("SW %ld.%09ld ", | ||
202 | (long)stamp->tv_sec, | ||
203 | (long)stamp->tv_nsec); | ||
204 | stamp++; | ||
205 | printf("HW transformed %ld.%09ld ", | ||
206 | (long)stamp->tv_sec, | ||
207 | (long)stamp->tv_nsec); | ||
208 | stamp++; | ||
209 | printf("HW raw %ld.%09ld", | ||
210 | (long)stamp->tv_sec, | ||
211 | (long)stamp->tv_nsec); | ||
212 | break; | ||
213 | } | ||
214 | default: | ||
215 | printf("type %d", cmsg->cmsg_type); | ||
216 | break; | ||
217 | } | ||
218 | break; | ||
219 | case IPPROTO_IP: | ||
220 | printf("IPPROTO_IP "); | ||
221 | switch (cmsg->cmsg_type) { | ||
222 | case IP_RECVERR: { | ||
223 | struct sock_extended_err *err = | ||
224 | (struct sock_extended_err *)CMSG_DATA(cmsg); | ||
225 | printf("IP_RECVERR ee_errno '%s' ee_origin %d => %s", | ||
226 | strerror(err->ee_errno), | ||
227 | err->ee_origin, | ||
228 | #ifdef SO_EE_ORIGIN_TIMESTAMPING | ||
229 | err->ee_origin == SO_EE_ORIGIN_TIMESTAMPING ? | ||
230 | "bounced packet" : "unexpected origin" | ||
231 | #else | ||
232 | "probably SO_EE_ORIGIN_TIMESTAMPING" | ||
233 | #endif | ||
234 | ); | ||
235 | if (res < sizeof(sync)) | ||
236 | printf(" => truncated data?!"); | ||
237 | else if (!memcmp(sync, data + res - sizeof(sync), | ||
238 | sizeof(sync))) | ||
239 | printf(" => GOT OUR DATA BACK (HURRAY!)"); | ||
240 | break; | ||
241 | } | ||
242 | case IP_PKTINFO: { | ||
243 | struct in_pktinfo *pktinfo = | ||
244 | (struct in_pktinfo *)CMSG_DATA(cmsg); | ||
245 | printf("IP_PKTINFO interface index %u", | ||
246 | pktinfo->ipi_ifindex); | ||
247 | break; | ||
248 | } | ||
249 | default: | ||
250 | printf("type %d", cmsg->cmsg_type); | ||
251 | break; | ||
252 | } | ||
253 | break; | ||
254 | default: | ||
255 | printf("level %d type %d", | ||
256 | cmsg->cmsg_level, | ||
257 | cmsg->cmsg_type); | ||
258 | break; | ||
259 | } | ||
260 | printf("\n"); | ||
261 | } | ||
262 | |||
263 | if (siocgstamp) { | ||
264 | if (ioctl(sock, SIOCGSTAMP, &tv)) | ||
265 | printf(" %s: %s\n", "SIOCGSTAMP", strerror(errno)); | ||
266 | else | ||
267 | printf("SIOCGSTAMP %ld.%06ld\n", | ||
268 | (long)tv.tv_sec, | ||
269 | (long)tv.tv_usec); | ||
270 | } | ||
271 | if (siocgstampns) { | ||
272 | if (ioctl(sock, SIOCGSTAMPNS, &ts)) | ||
273 | printf(" %s: %s\n", "SIOCGSTAMPNS", strerror(errno)); | ||
274 | else | ||
275 | printf("SIOCGSTAMPNS %ld.%09ld\n", | ||
276 | (long)ts.tv_sec, | ||
277 | (long)ts.tv_nsec); | ||
278 | } | ||
279 | } | ||
280 | |||
281 | static void recvpacket(int sock, int recvmsg_flags, | ||
282 | int siocgstamp, int siocgstampns) | ||
283 | { | ||
284 | char data[256]; | ||
285 | struct msghdr msg; | ||
286 | struct iovec entry; | ||
287 | struct sockaddr_in from_addr; | ||
288 | struct { | ||
289 | struct cmsghdr cm; | ||
290 | char control[512]; | ||
291 | } control; | ||
292 | int res; | ||
293 | |||
294 | memset(&msg, 0, sizeof(msg)); | ||
295 | msg.msg_iov = &entry; | ||
296 | msg.msg_iovlen = 1; | ||
297 | entry.iov_base = data; | ||
298 | entry.iov_len = sizeof(data); | ||
299 | msg.msg_name = (caddr_t)&from_addr; | ||
300 | msg.msg_namelen = sizeof(from_addr); | ||
301 | msg.msg_control = &control; | ||
302 | msg.msg_controllen = sizeof(control); | ||
303 | |||
304 | res = recvmsg(sock, &msg, recvmsg_flags|MSG_DONTWAIT); | ||
305 | if (res < 0) { | ||
306 | printf("%s %s: %s\n", | ||
307 | "recvmsg", | ||
308 | (recvmsg_flags & MSG_ERRQUEUE) ? "error" : "regular", | ||
309 | strerror(errno)); | ||
310 | } else { | ||
311 | printpacket(&msg, res, data, | ||
312 | sock, recvmsg_flags, | ||
313 | siocgstamp, siocgstampns); | ||
314 | } | ||
315 | } | ||
316 | |||
317 | int main(int argc, char **argv) | ||
318 | { | ||
319 | int so_timestamping_flags = 0; | ||
320 | int so_timestamp = 0; | ||
321 | int so_timestampns = 0; | ||
322 | int siocgstamp = 0; | ||
323 | int siocgstampns = 0; | ||
324 | int ip_multicast_loop = 0; | ||
325 | char *interface; | ||
326 | int i; | ||
327 | int enabled = 1; | ||
328 | int sock; | ||
329 | struct ifreq device; | ||
330 | struct ifreq hwtstamp; | ||
331 | struct hwtstamp_config hwconfig, hwconfig_requested; | ||
332 | struct sockaddr_in addr; | ||
333 | struct ip_mreq imr; | ||
334 | struct in_addr iaddr; | ||
335 | int val; | ||
336 | socklen_t len; | ||
337 | struct timeval next; | ||
338 | |||
339 | if (argc < 2) | ||
340 | usage(0); | ||
341 | interface = argv[1]; | ||
342 | |||
343 | for (i = 2; i < argc; i++) { | ||
344 | if (!strcasecmp(argv[i], "SO_TIMESTAMP")) | ||
345 | so_timestamp = 1; | ||
346 | else if (!strcasecmp(argv[i], "SO_TIMESTAMPNS")) | ||
347 | so_timestampns = 1; | ||
348 | else if (!strcasecmp(argv[i], "SIOCGSTAMP")) | ||
349 | siocgstamp = 1; | ||
350 | else if (!strcasecmp(argv[i], "SIOCGSTAMPNS")) | ||
351 | siocgstampns = 1; | ||
352 | else if (!strcasecmp(argv[i], "IP_MULTICAST_LOOP")) | ||
353 | ip_multicast_loop = 1; | ||
354 | else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_HARDWARE")) | ||
355 | so_timestamping_flags |= SOF_TIMESTAMPING_TX_HARDWARE; | ||
356 | else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_SOFTWARE")) | ||
357 | so_timestamping_flags |= SOF_TIMESTAMPING_TX_SOFTWARE; | ||
358 | else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_HARDWARE")) | ||
359 | so_timestamping_flags |= SOF_TIMESTAMPING_RX_HARDWARE; | ||
360 | else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_SOFTWARE")) | ||
361 | so_timestamping_flags |= SOF_TIMESTAMPING_RX_SOFTWARE; | ||
362 | else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_SOFTWARE")) | ||
363 | so_timestamping_flags |= SOF_TIMESTAMPING_SOFTWARE; | ||
364 | else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_SYS_HARDWARE")) | ||
365 | so_timestamping_flags |= SOF_TIMESTAMPING_SYS_HARDWARE; | ||
366 | else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RAW_HARDWARE")) | ||
367 | so_timestamping_flags |= SOF_TIMESTAMPING_RAW_HARDWARE; | ||
368 | else | ||
369 | usage(argv[i]); | ||
370 | } | ||
371 | |||
372 | sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP); | ||
373 | if (socket < 0) | ||
374 | bail("socket"); | ||
375 | |||
376 | memset(&device, 0, sizeof(device)); | ||
377 | strncpy(device.ifr_name, interface, sizeof(device.ifr_name)); | ||
378 | if (ioctl(sock, SIOCGIFADDR, &device) < 0) | ||
379 | bail("getting interface IP address"); | ||
380 | |||
381 | memset(&hwtstamp, 0, sizeof(hwtstamp)); | ||
382 | strncpy(hwtstamp.ifr_name, interface, sizeof(hwtstamp.ifr_name)); | ||
383 | hwtstamp.ifr_data = (void *)&hwconfig; | ||
384 | memset(&hwconfig, 0, sizeof(&hwconfig)); | ||
385 | hwconfig.tx_type = | ||
386 | (so_timestamping_flags & SOF_TIMESTAMPING_TX_HARDWARE) ? | ||
387 | HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; | ||
388 | hwconfig.rx_filter = | ||
389 | (so_timestamping_flags & SOF_TIMESTAMPING_RX_HARDWARE) ? | ||
390 | HWTSTAMP_FILTER_PTP_V1_L4_SYNC : HWTSTAMP_FILTER_NONE; | ||
391 | hwconfig_requested = hwconfig; | ||
392 | if (ioctl(sock, SIOCSHWTSTAMP, &hwtstamp) < 0) { | ||
393 | if ((errno == EINVAL || errno == ENOTSUP) && | ||
394 | hwconfig_requested.tx_type == HWTSTAMP_TX_OFF && | ||
395 | hwconfig_requested.rx_filter == HWTSTAMP_FILTER_NONE) | ||
396 | printf("SIOCSHWTSTAMP: disabling hardware time stamping not possible\n"); | ||
397 | else | ||
398 | bail("SIOCSHWTSTAMP"); | ||
399 | } | ||
400 | printf("SIOCSHWTSTAMP: tx_type %d requested, got %d; rx_filter %d requested, got %d\n", | ||
401 | hwconfig_requested.tx_type, hwconfig.tx_type, | ||
402 | hwconfig_requested.rx_filter, hwconfig.rx_filter); | ||
403 | |||
404 | /* bind to PTP port */ | ||
405 | addr.sin_family = AF_INET; | ||
406 | addr.sin_addr.s_addr = htonl(INADDR_ANY); | ||
407 | addr.sin_port = htons(319 /* PTP event port */); | ||
408 | if (bind(sock, | ||
409 | (struct sockaddr *)&addr, | ||
410 | sizeof(struct sockaddr_in)) < 0) | ||
411 | bail("bind"); | ||
412 | |||
413 | /* set multicast group for outgoing packets */ | ||
414 | inet_aton("224.0.1.130", &iaddr); /* alternate PTP domain 1 */ | ||
415 | addr.sin_addr = iaddr; | ||
416 | imr.imr_multiaddr.s_addr = iaddr.s_addr; | ||
417 | imr.imr_interface.s_addr = | ||
418 | ((struct sockaddr_in *)&device.ifr_addr)->sin_addr.s_addr; | ||
419 | if (setsockopt(sock, IPPROTO_IP, IP_MULTICAST_IF, | ||
420 | &imr.imr_interface.s_addr, sizeof(struct in_addr)) < 0) | ||
421 | bail("set multicast"); | ||
422 | |||
423 | /* join multicast group, loop our own packet */ | ||
424 | if (setsockopt(sock, IPPROTO_IP, IP_ADD_MEMBERSHIP, | ||
425 | &imr, sizeof(struct ip_mreq)) < 0) | ||
426 | bail("join multicast group"); | ||
427 | |||
428 | if (setsockopt(sock, IPPROTO_IP, IP_MULTICAST_LOOP, | ||
429 | &ip_multicast_loop, sizeof(enabled)) < 0) { | ||
430 | bail("loop multicast"); | ||
431 | } | ||
432 | |||
433 | /* set socket options for time stamping */ | ||
434 | if (so_timestamp && | ||
435 | setsockopt(sock, SOL_SOCKET, SO_TIMESTAMP, | ||
436 | &enabled, sizeof(enabled)) < 0) | ||
437 | bail("setsockopt SO_TIMESTAMP"); | ||
438 | |||
439 | if (so_timestampns && | ||
440 | setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPNS, | ||
441 | &enabled, sizeof(enabled)) < 0) | ||
442 | bail("setsockopt SO_TIMESTAMPNS"); | ||
443 | |||
444 | if (so_timestamping_flags && | ||
445 | setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, | ||
446 | &so_timestamping_flags, | ||
447 | sizeof(so_timestamping_flags)) < 0) | ||
448 | bail("setsockopt SO_TIMESTAMPING"); | ||
449 | |||
450 | /* request IP_PKTINFO for debugging purposes */ | ||
451 | if (setsockopt(sock, SOL_IP, IP_PKTINFO, | ||
452 | &enabled, sizeof(enabled)) < 0) | ||
453 | printf("%s: %s\n", "setsockopt IP_PKTINFO", strerror(errno)); | ||
454 | |||
455 | /* verify socket options */ | ||
456 | len = sizeof(val); | ||
457 | if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMP, &val, &len) < 0) | ||
458 | printf("%s: %s\n", "getsockopt SO_TIMESTAMP", strerror(errno)); | ||
459 | else | ||
460 | printf("SO_TIMESTAMP %d\n", val); | ||
461 | |||
462 | if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPNS, &val, &len) < 0) | ||
463 | printf("%s: %s\n", "getsockopt SO_TIMESTAMPNS", | ||
464 | strerror(errno)); | ||
465 | else | ||
466 | printf("SO_TIMESTAMPNS %d\n", val); | ||
467 | |||
468 | if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, &val, &len) < 0) { | ||
469 | printf("%s: %s\n", "getsockopt SO_TIMESTAMPING", | ||
470 | strerror(errno)); | ||
471 | } else { | ||
472 | printf("SO_TIMESTAMPING %d\n", val); | ||
473 | if (val != so_timestamping_flags) | ||
474 | printf(" not the expected value %d\n", | ||
475 | so_timestamping_flags); | ||
476 | } | ||
477 | |||
478 | /* send packets forever every five seconds */ | ||
479 | gettimeofday(&next, 0); | ||
480 | next.tv_sec = (next.tv_sec + 1) / 5 * 5; | ||
481 | next.tv_usec = 0; | ||
482 | while (1) { | ||
483 | struct timeval now; | ||
484 | struct timeval delta; | ||
485 | long delta_us; | ||
486 | int res; | ||
487 | fd_set readfs, errorfs; | ||
488 | |||
489 | gettimeofday(&now, 0); | ||
490 | delta_us = (long)(next.tv_sec - now.tv_sec) * 1000000 + | ||
491 | (long)(next.tv_usec - now.tv_usec); | ||
492 | if (delta_us > 0) { | ||
493 | /* continue waiting for timeout or data */ | ||
494 | delta.tv_sec = delta_us / 1000000; | ||
495 | delta.tv_usec = delta_us % 1000000; | ||
496 | |||
497 | FD_ZERO(&readfs); | ||
498 | FD_ZERO(&errorfs); | ||
499 | FD_SET(sock, &readfs); | ||
500 | FD_SET(sock, &errorfs); | ||
501 | printf("%ld.%06ld: select %ldus\n", | ||
502 | (long)now.tv_sec, (long)now.tv_usec, | ||
503 | delta_us); | ||
504 | res = select(sock + 1, &readfs, 0, &errorfs, &delta); | ||
505 | gettimeofday(&now, 0); | ||
506 | printf("%ld.%06ld: select returned: %d, %s\n", | ||
507 | (long)now.tv_sec, (long)now.tv_usec, | ||
508 | res, | ||
509 | res < 0 ? strerror(errno) : "success"); | ||
510 | if (res > 0) { | ||
511 | if (FD_ISSET(sock, &readfs)) | ||
512 | printf("ready for reading\n"); | ||
513 | if (FD_ISSET(sock, &errorfs)) | ||
514 | printf("has error\n"); | ||
515 | recvpacket(sock, 0, | ||
516 | siocgstamp, | ||
517 | siocgstampns); | ||
518 | recvpacket(sock, MSG_ERRQUEUE, | ||
519 | siocgstamp, | ||
520 | siocgstampns); | ||
521 | } | ||
522 | } else { | ||
523 | /* write one packet */ | ||
524 | sendpacket(sock, | ||
525 | (struct sockaddr *)&addr, | ||
526 | sizeof(addr)); | ||
527 | next.tv_sec += 5; | ||
528 | continue; | ||
529 | } | ||
530 | } | ||
531 | |||
532 | return 0; | ||
533 | } | ||
diff --git a/Documentation/networking/vxge.txt b/Documentation/networking/vxge.txt new file mode 100644 index 000000000000..d2e2997e6fa0 --- /dev/null +++ b/Documentation/networking/vxge.txt | |||
@@ -0,0 +1,100 @@ | |||
1 | Neterion's (Formerly S2io) X3100 Series 10GbE PCIe Server Adapter Linux driver | ||
2 | ============================================================================== | ||
3 | |||
4 | Contents | ||
5 | -------- | ||
6 | |||
7 | 1) Introduction | ||
8 | 2) Features supported | ||
9 | 3) Configurable driver parameters | ||
10 | 4) Troubleshooting | ||
11 | |||
12 | 1) Introduction: | ||
13 | ---------------- | ||
14 | This Linux driver supports all Neterion's X3100 series 10 GbE PCIe I/O | ||
15 | Virtualized Server adapters. | ||
16 | The X3100 series supports four modes of operation, configurable via | ||
17 | firmware - | ||
18 | Single function mode | ||
19 | Multi function mode | ||
20 | SRIOV mode | ||
21 | MRIOV mode | ||
22 | The functions share a 10GbE link and the pci-e bus, but hardly anything else | ||
23 | inside the ASIC. Features like independent hw reset, statistics, bandwidth/ | ||
24 | priority allocation and guarantees, GRO, TSO, interrupt moderation etc are | ||
25 | supported independently on each function. | ||
26 | |||
27 | (See below for a complete list of features supported for both IPv4 and IPv6) | ||
28 | |||
29 | 2) Features supported: | ||
30 | ---------------------- | ||
31 | |||
32 | i) Single function mode (up to 17 queues) | ||
33 | |||
34 | ii) Multi function mode (up to 17 functions) | ||
35 | |||
36 | iii) PCI-SIG's I/O Virtualization | ||
37 | - Single Root mode: v1.0 (up to 17 functions) | ||
38 | - Multi-Root mode: v1.0 (up to 17 functions) | ||
39 | |||
40 | iv) Jumbo frames | ||
41 | X3100 Series supports MTU up to 9600 bytes, modifiable using | ||
42 | ifconfig command. | ||
43 | |||
44 | v) Offloads supported: (Enabled by default) | ||
45 | Checksum offload (TCP/UDP/IP) on transmit and receive paths | ||
46 | TCP Segmentation Offload (TSO) on transmit path | ||
47 | Generic Receive Offload (GRO) on receive path | ||
48 | |||
49 | vi) MSI-X: (Enabled by default) | ||
50 | Resulting in noticeable performance improvement (up to 7% on certain | ||
51 | platforms). | ||
52 | |||
53 | vii) NAPI: (Enabled by default) | ||
54 | For better Rx interrupt moderation. | ||
55 | |||
56 | viii)RTH (Receive Traffic Hash): (Enabled by default) | ||
57 | Receive side steering for better scaling. | ||
58 | |||
59 | ix) Statistics | ||
60 | Comprehensive MAC-level and software statistics displayed using | ||
61 | "ethtool -S" option. | ||
62 | |||
63 | x) Multiple hardware queues: (Enabled by default) | ||
64 | Up to 17 hardware based transmit and receive data channels, with | ||
65 | multiple steering options (transmit multiqueue enabled by default). | ||
66 | |||
67 | 3) Configurable driver parameters: | ||
68 | ---------------------------------- | ||
69 | |||
70 | i) max_config_dev | ||
71 | Specifies maximum device functions to be enabled. | ||
72 | Valid range: 1-8 | ||
73 | |||
74 | ii) max_config_port | ||
75 | Specifies number of ports to be enabled. | ||
76 | Valid range: 1,2 | ||
77 | Default: 1 | ||
78 | |||
79 | iii)max_config_vpath | ||
80 | Specifies maximum VPATH(s) configured for each device function. | ||
81 | Valid range: 1-17 | ||
82 | |||
83 | iv) vlan_tag_strip | ||
84 | Enables/disables vlan tag stripping from all received tagged frames that | ||
85 | are not replicated at the internal L2 switch. | ||
86 | Valid range: 0,1 (disabled, enabled respectively) | ||
87 | Default: 1 | ||
88 | |||
89 | v) addr_learn_en | ||
90 | Enable learning the mac address of the guest OS interface in | ||
91 | virtualization environment. | ||
92 | Valid range: 0,1 (disabled, enabled respectively) | ||
93 | Default: 0 | ||
94 | |||
95 | 4) Troubleshooting: | ||
96 | ------------------- | ||
97 | |||
98 | To resolve an issue with the source code or X3100 series adapter, please collect | ||
99 | the statistics, register dumps using ethool, relevant logs and email them to | ||
100 | support@neterion.com. | ||
diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt index 421e7d00ffd0..c9abbd86bc18 100644 --- a/Documentation/power/devices.txt +++ b/Documentation/power/devices.txt | |||
@@ -75,9 +75,6 @@ may need to apply in domain-specific ways to their devices: | |||
75 | struct bus_type { | 75 | struct bus_type { |
76 | ... | 76 | ... |
77 | int (*suspend)(struct device *dev, pm_message_t state); | 77 | int (*suspend)(struct device *dev, pm_message_t state); |
78 | int (*suspend_late)(struct device *dev, pm_message_t state); | ||
79 | |||
80 | int (*resume_early)(struct device *dev); | ||
81 | int (*resume)(struct device *dev); | 78 | int (*resume)(struct device *dev); |
82 | }; | 79 | }; |
83 | 80 | ||
@@ -226,20 +223,7 @@ The phases are seen by driver notifications issued in this order: | |||
226 | 223 | ||
227 | This call should handle parts of device suspend logic that require | 224 | This call should handle parts of device suspend logic that require |
228 | sleeping. It probably does work to quiesce the device which hasn't | 225 | sleeping. It probably does work to quiesce the device which hasn't |
229 | been abstracted into class.suspend() or bus.suspend_late(). | 226 | been abstracted into class.suspend(). |
230 | |||
231 | 3 bus.suspend_late(dev, message) is called with IRQs disabled, and | ||
232 | with only one CPU active. Until the bus.resume_early() phase | ||
233 | completes (see later), IRQs are not enabled again. This method | ||
234 | won't be exposed by all busses; for message based busses like USB, | ||
235 | I2C, or SPI, device interactions normally require IRQs. This bus | ||
236 | call may be morphed into a driver call with bus-specific parameters. | ||
237 | |||
238 | This call might save low level hardware state that might otherwise | ||
239 | be lost in the upcoming low power state, and actually put the | ||
240 | device into a low power state ... so that in some cases the device | ||
241 | may stay partly usable until this late. This "late" call may also | ||
242 | help when coping with hardware that behaves badly. | ||
243 | 227 | ||
244 | The pm_message_t parameter is currently used to refine those semantics | 228 | The pm_message_t parameter is currently used to refine those semantics |
245 | (described later). | 229 | (described later). |
@@ -351,19 +335,11 @@ devices processing each phase's calls before the next phase begins. | |||
351 | 335 | ||
352 | The phases are seen by driver notifications issued in this order: | 336 | The phases are seen by driver notifications issued in this order: |
353 | 337 | ||
354 | 1 bus.resume_early(dev) is called with IRQs disabled, and with | 338 | 1 bus.resume(dev) reverses the effects of bus.suspend(). This may |
355 | only one CPU active. As with bus.suspend_late(), this method | 339 | be morphed into a device driver call with bus-specific parameters; |
356 | won't be supported on busses that require IRQs in order to | 340 | implementations may sleep. |
357 | interact with devices. | ||
358 | |||
359 | This reverses the effects of bus.suspend_late(). | ||
360 | |||
361 | 2 bus.resume(dev) is called next. This may be morphed into a device | ||
362 | driver call with bus-specific parameters; implementations may sleep. | ||
363 | |||
364 | This reverses the effects of bus.suspend(). | ||
365 | 341 | ||
366 | 3 class.resume(dev) is called for devices associated with a class | 342 | 2 class.resume(dev) is called for devices associated with a class |
367 | that has such a method. Implementations may sleep. | 343 | that has such a method. Implementations may sleep. |
368 | 344 | ||
369 | This reverses the effects of class.suspend(), and would usually | 345 | This reverses the effects of class.suspend(), and would usually |
diff --git a/Documentation/power/regulator/consumer.txt b/Documentation/power/regulator/consumer.txt index 82b7a43aadba..5f83fd24ea84 100644 --- a/Documentation/power/regulator/consumer.txt +++ b/Documentation/power/regulator/consumer.txt | |||
@@ -178,5 +178,5 @@ Consumers can uregister interest by calling :- | |||
178 | int regulator_unregister_notifier(struct regulator *regulator, | 178 | int regulator_unregister_notifier(struct regulator *regulator, |
179 | struct notifier_block *nb); | 179 | struct notifier_block *nb); |
180 | 180 | ||
181 | Regulators use the kernel notifier framework to send event to thier interested | 181 | Regulators use the kernel notifier framework to send event to their interested |
182 | consumers. | 182 | consumers. |
diff --git a/Documentation/power/regulator/overview.txt b/Documentation/power/regulator/overview.txt index bdcb332bd7fb..0cded696ca01 100644 --- a/Documentation/power/regulator/overview.txt +++ b/Documentation/power/regulator/overview.txt | |||
@@ -119,7 +119,7 @@ Some terms used in this document:- | |||
119 | battery power, USB power) | 119 | battery power, USB power) |
120 | 120 | ||
121 | Regulator Domains: is the new current limit within the | 121 | Regulator Domains: is the new current limit within the |
122 | regulator operating parameters for input/ouput voltage. | 122 | regulator operating parameters for input/output voltage. |
123 | 123 | ||
124 | If the regulator request passes all the constraint tests | 124 | If the regulator request passes all the constraint tests |
125 | then the new regulator value is applied. | 125 | then the new regulator value is applied. |
diff --git a/Documentation/power/s2ram.txt b/Documentation/power/s2ram.txt index 2ebdc6091ce1..514b94fc931e 100644 --- a/Documentation/power/s2ram.txt +++ b/Documentation/power/s2ram.txt | |||
@@ -63,7 +63,7 @@ hardware during resume operations where a value can be set that will | |||
63 | survive a reboot. | 63 | survive a reboot. |
64 | 64 | ||
65 | Consequence is that after a resume (even if it is successful) your system | 65 | Consequence is that after a resume (even if it is successful) your system |
66 | clock will have a value corresponding to the magic mumber instead of the | 66 | clock will have a value corresponding to the magic number instead of the |
67 | correct date/time! It is therefore advisable to use a program like ntp-date | 67 | correct date/time! It is therefore advisable to use a program like ntp-date |
68 | or rdate to reset the correct date/time from an external time source when | 68 | or rdate to reset the correct date/time from an external time source when |
69 | using this trace option. | 69 | using this trace option. |
diff --git a/Documentation/power/userland-swsusp.txt b/Documentation/power/userland-swsusp.txt index 7b99636564c8..b967cd9137d6 100644 --- a/Documentation/power/userland-swsusp.txt +++ b/Documentation/power/userland-swsusp.txt | |||
@@ -109,7 +109,7 @@ unfreeze user space processes frozen by SNAPSHOT_UNFREEZE if they are | |||
109 | still frozen when the device is being closed). | 109 | still frozen when the device is being closed). |
110 | 110 | ||
111 | Currently it is assumed that the userland utilities reading/writing the | 111 | Currently it is assumed that the userland utilities reading/writing the |
112 | snapshot image from/to the kernel will use a swap parition, called the resume | 112 | snapshot image from/to the kernel will use a swap partition, called the resume |
113 | partition, or a swap file as storage space (if a swap file is used, the resume | 113 | partition, or a swap file as storage space (if a swap file is used, the resume |
114 | partition is the partition that holds this file). However, this is not really | 114 | partition is the partition that holds this file). However, this is not really |
115 | required, as they can use, for example, a special (blank) suspend partition or | 115 | required, as they can use, for example, a special (blank) suspend partition or |
diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt index 0ab0230cbcb0..8d999d862d0e 100644 --- a/Documentation/powerpc/booting-without-of.txt +++ b/Documentation/powerpc/booting-without-of.txt | |||
@@ -43,12 +43,11 @@ Table of Contents | |||
43 | 2) Representing devices without a current OF specification | 43 | 2) Representing devices without a current OF specification |
44 | a) PHY nodes | 44 | a) PHY nodes |
45 | b) Interrupt controllers | 45 | b) Interrupt controllers |
46 | c) CFI or JEDEC memory-mapped NOR flash | 46 | c) 4xx/Axon EMAC ethernet nodes |
47 | d) 4xx/Axon EMAC ethernet nodes | 47 | d) Xilinx IP cores |
48 | e) Xilinx IP cores | 48 | e) USB EHCI controllers |
49 | f) USB EHCI controllers | 49 | f) MDIO on GPIOs |
50 | g) MDIO on GPIOs | 50 | g) SPI busses |
51 | h) SPI busses | ||
52 | 51 | ||
53 | VII - Marvell Discovery mv64[345]6x System Controller chips | 52 | VII - Marvell Discovery mv64[345]6x System Controller chips |
54 | 1) The /system-controller node | 53 | 1) The /system-controller node |
@@ -999,7 +998,7 @@ compatibility. | |||
999 | translation of SOC addresses for memory mapped SOC registers. | 998 | translation of SOC addresses for memory mapped SOC registers. |
1000 | - bus-frequency: Contains the bus frequency for the SOC node. | 999 | - bus-frequency: Contains the bus frequency for the SOC node. |
1001 | Typically, the value of this field is filled in by the boot | 1000 | Typically, the value of this field is filled in by the boot |
1002 | loader. | 1001 | loader. |
1003 | 1002 | ||
1004 | 1003 | ||
1005 | Recommended properties: | 1004 | Recommended properties: |
@@ -1287,71 +1286,7 @@ platforms are moved over to use the flattened-device-tree model. | |||
1287 | device_type = "open-pic"; | 1286 | device_type = "open-pic"; |
1288 | }; | 1287 | }; |
1289 | 1288 | ||
1290 | c) CFI or JEDEC memory-mapped NOR flash | 1289 | c) 4xx/Axon EMAC ethernet nodes |
1291 | |||
1292 | Flash chips (Memory Technology Devices) are often used for solid state | ||
1293 | file systems on embedded devices. | ||
1294 | |||
1295 | - compatible : should contain the specific model of flash chip(s) | ||
1296 | used, if known, followed by either "cfi-flash" or "jedec-flash" | ||
1297 | - reg : Address range of the flash chip | ||
1298 | - bank-width : Width (in bytes) of the flash bank. Equal to the | ||
1299 | device width times the number of interleaved chips. | ||
1300 | - device-width : (optional) Width of a single flash chip. If | ||
1301 | omitted, assumed to be equal to 'bank-width'. | ||
1302 | - #address-cells, #size-cells : Must be present if the flash has | ||
1303 | sub-nodes representing partitions (see below). In this case | ||
1304 | both #address-cells and #size-cells must be equal to 1. | ||
1305 | |||
1306 | For JEDEC compatible devices, the following additional properties | ||
1307 | are defined: | ||
1308 | |||
1309 | - vendor-id : Contains the flash chip's vendor id (1 byte). | ||
1310 | - device-id : Contains the flash chip's device id (1 byte). | ||
1311 | |||
1312 | In addition to the information on the flash bank itself, the | ||
1313 | device tree may optionally contain additional information | ||
1314 | describing partitions of the flash address space. This can be | ||
1315 | used on platforms which have strong conventions about which | ||
1316 | portions of the flash are used for what purposes, but which don't | ||
1317 | use an on-flash partition table such as RedBoot. | ||
1318 | |||
1319 | Each partition is represented as a sub-node of the flash device. | ||
1320 | Each node's name represents the name of the corresponding | ||
1321 | partition of the flash device. | ||
1322 | |||
1323 | Flash partitions | ||
1324 | - reg : The partition's offset and size within the flash bank. | ||
1325 | - label : (optional) The label / name for this flash partition. | ||
1326 | If omitted, the label is taken from the node name (excluding | ||
1327 | the unit address). | ||
1328 | - read-only : (optional) This parameter, if present, is a hint to | ||
1329 | Linux that this flash partition should only be mounted | ||
1330 | read-only. This is usually used for flash partitions | ||
1331 | containing early-boot firmware images or data which should not | ||
1332 | be clobbered. | ||
1333 | |||
1334 | Example: | ||
1335 | |||
1336 | flash@ff000000 { | ||
1337 | compatible = "amd,am29lv128ml", "cfi-flash"; | ||
1338 | reg = <ff000000 01000000>; | ||
1339 | bank-width = <4>; | ||
1340 | device-width = <1>; | ||
1341 | #address-cells = <1>; | ||
1342 | #size-cells = <1>; | ||
1343 | fs@0 { | ||
1344 | label = "fs"; | ||
1345 | reg = <0 f80000>; | ||
1346 | }; | ||
1347 | firmware@f80000 { | ||
1348 | label ="firmware"; | ||
1349 | reg = <f80000 80000>; | ||
1350 | read-only; | ||
1351 | }; | ||
1352 | }; | ||
1353 | |||
1354 | d) 4xx/Axon EMAC ethernet nodes | ||
1355 | 1290 | ||
1356 | The EMAC ethernet controller in IBM and AMCC 4xx chips, and also | 1291 | The EMAC ethernet controller in IBM and AMCC 4xx chips, and also |
1357 | the Axon bridge. To operate this needs to interact with a ths | 1292 | the Axon bridge. To operate this needs to interact with a ths |
@@ -1421,7 +1356,7 @@ platforms are moved over to use the flattened-device-tree model. | |||
1421 | - phy-map : 1 cell, optional, bitmap of addresses to probe the PHY | 1356 | - phy-map : 1 cell, optional, bitmap of addresses to probe the PHY |
1422 | for, used if phy-address is absent. bit 0x00000001 is | 1357 | for, used if phy-address is absent. bit 0x00000001 is |
1423 | MDIO address 0. | 1358 | MDIO address 0. |
1424 | For Axon it can be absent, thouugh my current driver | 1359 | For Axon it can be absent, though my current driver |
1425 | doesn't handle phy-address yet so for now, keep | 1360 | doesn't handle phy-address yet so for now, keep |
1426 | 0x00ffffff in it. | 1361 | 0x00ffffff in it. |
1427 | - rx-fifo-size-gige : 1 cell, Rx fifo size in bytes for 1000 Mb/sec | 1362 | - rx-fifo-size-gige : 1 cell, Rx fifo size in bytes for 1000 Mb/sec |
@@ -1499,11 +1434,11 @@ platforms are moved over to use the flattened-device-tree model. | |||
1499 | available. | 1434 | available. |
1500 | For Axon: 0x0000012a | 1435 | For Axon: 0x0000012a |
1501 | 1436 | ||
1502 | e) Xilinx IP cores | 1437 | d) Xilinx IP cores |
1503 | 1438 | ||
1504 | The Xilinx EDK toolchain ships with a set of IP cores (devices) for use | 1439 | The Xilinx EDK toolchain ships with a set of IP cores (devices) for use |
1505 | in Xilinx Spartan and Virtex FPGAs. The devices cover the whole range | 1440 | in Xilinx Spartan and Virtex FPGAs. The devices cover the whole range |
1506 | of standard device types (network, serial, etc.) and miscellanious | 1441 | of standard device types (network, serial, etc.) and miscellaneous |
1507 | devices (gpio, LCD, spi, etc). Also, since these devices are | 1442 | devices (gpio, LCD, spi, etc). Also, since these devices are |
1508 | implemented within the fpga fabric every instance of the device can be | 1443 | implemented within the fpga fabric every instance of the device can be |
1509 | synthesised with different options that change the behaviour. | 1444 | synthesised with different options that change the behaviour. |
@@ -1761,7 +1696,7 @@ platforms are moved over to use the flattened-device-tree model. | |||
1761 | listed above, nodes for these devices should include a phy-handle | 1696 | listed above, nodes for these devices should include a phy-handle |
1762 | property, and may include other common network device properties | 1697 | property, and may include other common network device properties |
1763 | like local-mac-address. | 1698 | like local-mac-address. |
1764 | 1699 | ||
1765 | iv) Xilinx Uartlite | 1700 | iv) Xilinx Uartlite |
1766 | 1701 | ||
1767 | Xilinx uartlite devices are simple fixed speed serial ports. | 1702 | Xilinx uartlite devices are simple fixed speed serial ports. |
@@ -1793,7 +1728,7 @@ platforms are moved over to use the flattened-device-tree model. | |||
1793 | - reg-offset : A value of 3 is required | 1728 | - reg-offset : A value of 3 is required |
1794 | - reg-shift : A value of 2 is required | 1729 | - reg-shift : A value of 2 is required |
1795 | 1730 | ||
1796 | f) USB EHCI controllers | 1731 | e) USB EHCI controllers |
1797 | 1732 | ||
1798 | Required properties: | 1733 | Required properties: |
1799 | - compatible : should be "usb-ehci". | 1734 | - compatible : should be "usb-ehci". |
@@ -1819,7 +1754,7 @@ platforms are moved over to use the flattened-device-tree model. | |||
1819 | big-endian; | 1754 | big-endian; |
1820 | }; | 1755 | }; |
1821 | 1756 | ||
1822 | g) MDIO on GPIOs | 1757 | f) MDIO on GPIOs |
1823 | 1758 | ||
1824 | Currently defined compatibles: | 1759 | Currently defined compatibles: |
1825 | - virtual,gpio-mdio | 1760 | - virtual,gpio-mdio |
@@ -1839,7 +1774,7 @@ platforms are moved over to use the flattened-device-tree model. | |||
1839 | &qe_pio_c 6>; | 1774 | &qe_pio_c 6>; |
1840 | }; | 1775 | }; |
1841 | 1776 | ||
1842 | h) SPI (Serial Peripheral Interface) busses | 1777 | g) SPI (Serial Peripheral Interface) busses |
1843 | 1778 | ||
1844 | SPI busses can be described with a node for the SPI master device | 1779 | SPI busses can be described with a node for the SPI master device |
1845 | and a set of child nodes for each SPI slave on the bus. For this | 1780 | and a set of child nodes for each SPI slave on the bus. For this |
diff --git a/Documentation/powerpc/dts-bindings/can/sja1000.txt b/Documentation/powerpc/dts-bindings/can/sja1000.txt new file mode 100644 index 000000000000..d6d209ded937 --- /dev/null +++ b/Documentation/powerpc/dts-bindings/can/sja1000.txt | |||
@@ -0,0 +1,53 @@ | |||
1 | Memory mapped SJA1000 CAN controller from NXP (formerly Philips) | ||
2 | |||
3 | Required properties: | ||
4 | |||
5 | - compatible : should be "nxp,sja1000". | ||
6 | |||
7 | - reg : should specify the chip select, address offset and size required | ||
8 | to map the registers of the SJA1000. The size is usually 0x80. | ||
9 | |||
10 | - interrupts: property with a value describing the interrupt source | ||
11 | (number and sensitivity) required for the SJA1000. | ||
12 | |||
13 | Optional properties: | ||
14 | |||
15 | - nxp,external-clock-frequency : Frequency of the external oscillator | ||
16 | clock in Hz. Note that the internal clock frequency used by the | ||
17 | SJA1000 is half of that value. If not specified, a default value | ||
18 | of 16000000 (16 MHz) is used. | ||
19 | |||
20 | - nxp,tx-output-mode : operation mode of the TX output control logic: | ||
21 | <0x0> : bi-phase output mode | ||
22 | <0x1> : normal output mode (default) | ||
23 | <0x2> : test output mode | ||
24 | <0x3> : clock output mode | ||
25 | |||
26 | - nxp,tx-output-config : TX output pin configuration: | ||
27 | <0x01> : TX0 invert | ||
28 | <0x02> : TX0 pull-down (default) | ||
29 | <0x04> : TX0 pull-up | ||
30 | <0x06> : TX0 push-pull | ||
31 | <0x08> : TX1 invert | ||
32 | <0x10> : TX1 pull-down | ||
33 | <0x20> : TX1 pull-up | ||
34 | <0x30> : TX1 push-pull | ||
35 | |||
36 | - nxp,clock-out-frequency : clock frequency in Hz on the CLKOUT pin. | ||
37 | If not specified or if the specified value is 0, the CLKOUT pin | ||
38 | will be disabled. | ||
39 | |||
40 | - nxp,no-comparator-bypass : Allows to disable the CAN input comperator. | ||
41 | |||
42 | For futher information, please have a look to the SJA1000 data sheet. | ||
43 | |||
44 | Examples: | ||
45 | |||
46 | can@3,100 { | ||
47 | compatible = "nxp,sja1000"; | ||
48 | reg = <3 0x100 0x80>; | ||
49 | interrupts = <2 0>; | ||
50 | interrupt-parent = <&mpic>; | ||
51 | nxp,external-clock-frequency = <16000000>; | ||
52 | }; | ||
53 | |||
diff --git a/Documentation/powerpc/dts-bindings/ecm.txt b/Documentation/powerpc/dts-bindings/ecm.txt new file mode 100644 index 000000000000..f514f29c67d6 --- /dev/null +++ b/Documentation/powerpc/dts-bindings/ecm.txt | |||
@@ -0,0 +1,64 @@ | |||
1 | ===================================================================== | ||
2 | E500 LAW & Coherency Module Device Tree Binding | ||
3 | Copyright (C) 2009 Freescale Semiconductor Inc. | ||
4 | ===================================================================== | ||
5 | |||
6 | Local Access Window (LAW) Node | ||
7 | |||
8 | The LAW node represents the region of CCSR space where local access | ||
9 | windows are configured. For ECM based devices this is the first 4k | ||
10 | of CCSR space that includes CCSRBAR, ALTCBAR, ALTCAR, BPTR, and some | ||
11 | number of local access windows as specified by fsl,num-laws. | ||
12 | |||
13 | PROPERTIES | ||
14 | |||
15 | - compatible | ||
16 | Usage: required | ||
17 | Value type: <string> | ||
18 | Definition: Must include "fsl,ecm-law" | ||
19 | |||
20 | - reg | ||
21 | Usage: required | ||
22 | Value type: <prop-encoded-array> | ||
23 | Definition: A standard property. The value specifies the | ||
24 | physical address offset and length of the CCSR space | ||
25 | registers. | ||
26 | |||
27 | - fsl,num-laws | ||
28 | Usage: required | ||
29 | Value type: <u32> | ||
30 | Definition: The value specifies the number of local access | ||
31 | windows for this device. | ||
32 | |||
33 | ===================================================================== | ||
34 | |||
35 | E500 Coherency Module Node | ||
36 | |||
37 | The E500 LAW node represents the region of CCSR space where ECM config | ||
38 | and error reporting registers exist, this is the second 4k (0x1000) | ||
39 | of CCSR space. | ||
40 | |||
41 | PROPERTIES | ||
42 | |||
43 | - compatible | ||
44 | Usage: required | ||
45 | Value type: <string> | ||
46 | Definition: Must include "fsl,CHIP-ecm", "fsl,ecm" where | ||
47 | CHIP is the processor (mpc8572, mpc8544, etc.) | ||
48 | |||
49 | - reg | ||
50 | Usage: required | ||
51 | Value type: <prop-encoded-array> | ||
52 | Definition: A standard property. The value specifies the | ||
53 | physical address offset and length of the CCSR space | ||
54 | registers. | ||
55 | |||
56 | - interrupts | ||
57 | Usage: required | ||
58 | Value type: <prop-encoded-array> | ||
59 | |||
60 | - interrupt-parent | ||
61 | Usage: required | ||
62 | Value type: <phandle> | ||
63 | |||
64 | ===================================================================== | ||
diff --git a/Documentation/powerpc/dts-bindings/fsl/board.txt b/Documentation/powerpc/dts-bindings/fsl/board.txt index 6c974d28eeb4..e8b5bc24d0ac 100644 --- a/Documentation/powerpc/dts-bindings/fsl/board.txt +++ b/Documentation/powerpc/dts-bindings/fsl/board.txt | |||
@@ -38,7 +38,7 @@ Required properities: | |||
38 | - reg : Should contain the address and the length of the GPIO bank | 38 | - reg : Should contain the address and the length of the GPIO bank |
39 | register. | 39 | register. |
40 | - #gpio-cells : Should be two. The first cell is the pin number and the | 40 | - #gpio-cells : Should be two. The first cell is the pin number and the |
41 | second cell is used to specify optional paramters (currently unused). | 41 | second cell is used to specify optional parameters (currently unused). |
42 | - gpio-controller : Marks the port as GPIO controller. | 42 | - gpio-controller : Marks the port as GPIO controller. |
43 | 43 | ||
44 | Example: | 44 | Example: |
diff --git a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/cpm.txt b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/cpm.txt index 088fc471e03a..160c752484b4 100644 --- a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/cpm.txt +++ b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/cpm.txt | |||
@@ -19,7 +19,7 @@ Example: | |||
19 | reg = <119c0 30>; | 19 | reg = <119c0 30>; |
20 | } | 20 | } |
21 | 21 | ||
22 | * Properties common to mulitple CPM/QE devices | 22 | * Properties common to multiple CPM/QE devices |
23 | 23 | ||
24 | - fsl,cpm-command : This value is ORed with the opcode and command flag | 24 | - fsl,cpm-command : This value is ORed with the opcode and command flag |
25 | to specify the device on which a CPM command operates. | 25 | to specify the device on which a CPM command operates. |
diff --git a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/gpio.txt b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/gpio.txt index 1815dfede1bc..349f79fd7076 100644 --- a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/gpio.txt +++ b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/gpio.txt | |||
@@ -11,7 +11,7 @@ Required properties: | |||
11 | "fsl,cpm1-pario-bank-c", "fsl,cpm1-pario-bank-d", | 11 | "fsl,cpm1-pario-bank-c", "fsl,cpm1-pario-bank-d", |
12 | "fsl,cpm1-pario-bank-e", "fsl,cpm2-pario-bank" | 12 | "fsl,cpm1-pario-bank-e", "fsl,cpm2-pario-bank" |
13 | - #gpio-cells : Should be two. The first cell is the pin number and the | 13 | - #gpio-cells : Should be two. The first cell is the pin number and the |
14 | second cell is used to specify optional paramters (currently unused). | 14 | second cell is used to specify optional parameters (currently unused). |
15 | - gpio-controller : Marks the port as GPIO controller. | 15 | - gpio-controller : Marks the port as GPIO controller. |
16 | 16 | ||
17 | Example of three SOC GPIO banks defined as gpio-controller nodes: | 17 | Example of three SOC GPIO banks defined as gpio-controller nodes: |
diff --git a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe.txt b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe.txt index 78790d58dc2c..6e37be1eeb2d 100644 --- a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe.txt +++ b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe.txt | |||
@@ -17,6 +17,9 @@ Required properties: | |||
17 | - model : precise model of the QE, Can be "QE", "CPM", or "CPM2" | 17 | - model : precise model of the QE, Can be "QE", "CPM", or "CPM2" |
18 | - reg : offset and length of the device registers. | 18 | - reg : offset and length of the device registers. |
19 | - bus-frequency : the clock frequency for QUICC Engine. | 19 | - bus-frequency : the clock frequency for QUICC Engine. |
20 | - fsl,qe-num-riscs: define how many RISC engines the QE has. | ||
21 | - fsl,qe-num-snums: define how many serial number(SNUM) the QE can use for the | ||
22 | threads. | ||
20 | 23 | ||
21 | Recommended properties | 24 | Recommended properties |
22 | - brg-frequency : the internal clock source frequency for baud-rate | 25 | - brg-frequency : the internal clock source frequency for baud-rate |
diff --git a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe/firmware.txt b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe/firmware.txt index 6c238f59b2a9..249db3a15d15 100644 --- a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe/firmware.txt +++ b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe/firmware.txt | |||
@@ -1,6 +1,6 @@ | |||
1 | * Uploaded QE firmware | 1 | * Uploaded QE firmware |
2 | 2 | ||
3 | If a new firwmare has been uploaded to the QE (usually by the | 3 | If a new firmware has been uploaded to the QE (usually by the |
4 | boot loader), then a 'firmware' child node should be added to the QE | 4 | boot loader), then a 'firmware' child node should be added to the QE |
5 | node. This node provides information on the uploaded firmware that | 5 | node. This node provides information on the uploaded firmware that |
6 | device drivers may need. | 6 | device drivers may need. |
diff --git a/Documentation/powerpc/dts-bindings/fsl/dma.txt b/Documentation/powerpc/dts-bindings/fsl/dma.txt index cc453110fc46..0732cdd05ba1 100644 --- a/Documentation/powerpc/dts-bindings/fsl/dma.txt +++ b/Documentation/powerpc/dts-bindings/fsl/dma.txt | |||
@@ -35,30 +35,30 @@ Example: | |||
35 | #address-cells = <1>; | 35 | #address-cells = <1>; |
36 | #size-cells = <1>; | 36 | #size-cells = <1>; |
37 | compatible = "fsl,mpc8349-dma", "fsl,elo-dma"; | 37 | compatible = "fsl,mpc8349-dma", "fsl,elo-dma"; |
38 | reg = <82a8 4>; | 38 | reg = <0x82a8 4>; |
39 | ranges = <0 8100 1a4>; | 39 | ranges = <0 0x8100 0x1a4>; |
40 | interrupt-parent = <&ipic>; | 40 | interrupt-parent = <&ipic>; |
41 | interrupts = <47 8>; | 41 | interrupts = <71 8>; |
42 | cell-index = <0>; | 42 | cell-index = <0>; |
43 | dma-channel@0 { | 43 | dma-channel@0 { |
44 | compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel"; | 44 | compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel"; |
45 | cell-index = <0>; | 45 | cell-index = <0>; |
46 | reg = <0 80>; | 46 | reg = <0 0x80>; |
47 | }; | 47 | }; |
48 | dma-channel@80 { | 48 | dma-channel@80 { |
49 | compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel"; | 49 | compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel"; |
50 | cell-index = <1>; | 50 | cell-index = <1>; |
51 | reg = <80 80>; | 51 | reg = <0x80 0x80>; |
52 | }; | 52 | }; |
53 | dma-channel@100 { | 53 | dma-channel@100 { |
54 | compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel"; | 54 | compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel"; |
55 | cell-index = <2>; | 55 | cell-index = <2>; |
56 | reg = <100 80>; | 56 | reg = <0x100 0x80>; |
57 | }; | 57 | }; |
58 | dma-channel@180 { | 58 | dma-channel@180 { |
59 | compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel"; | 59 | compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel"; |
60 | cell-index = <3>; | 60 | cell-index = <3>; |
61 | reg = <180 80>; | 61 | reg = <0x180 0x80>; |
62 | }; | 62 | }; |
63 | }; | 63 | }; |
64 | 64 | ||
@@ -93,36 +93,36 @@ Example: | |||
93 | #address-cells = <1>; | 93 | #address-cells = <1>; |
94 | #size-cells = <1>; | 94 | #size-cells = <1>; |
95 | compatible = "fsl,mpc8540-dma", "fsl,eloplus-dma"; | 95 | compatible = "fsl,mpc8540-dma", "fsl,eloplus-dma"; |
96 | reg = <21300 4>; | 96 | reg = <0x21300 4>; |
97 | ranges = <0 21100 200>; | 97 | ranges = <0 0x21100 0x200>; |
98 | cell-index = <0>; | 98 | cell-index = <0>; |
99 | dma-channel@0 { | 99 | dma-channel@0 { |
100 | compatible = "fsl,mpc8540-dma-channel", "fsl,eloplus-dma-channel"; | 100 | compatible = "fsl,mpc8540-dma-channel", "fsl,eloplus-dma-channel"; |
101 | reg = <0 80>; | 101 | reg = <0 0x80>; |
102 | cell-index = <0>; | 102 | cell-index = <0>; |
103 | interrupt-parent = <&mpic>; | 103 | interrupt-parent = <&mpic>; |
104 | interrupts = <14 2>; | 104 | interrupts = <20 2>; |
105 | }; | 105 | }; |
106 | dma-channel@80 { | 106 | dma-channel@80 { |
107 | compatible = "fsl,mpc8540-dma-channel", "fsl,eloplus-dma-channel"; | 107 | compatible = "fsl,mpc8540-dma-channel", "fsl,eloplus-dma-channel"; |
108 | reg = <80 80>; | 108 | reg = <0x80 0x80>; |
109 | cell-index = <1>; | 109 | cell-index = <1>; |
110 | interrupt-parent = <&mpic>; | 110 | interrupt-parent = <&mpic>; |
111 | interrupts = <15 2>; | 111 | interrupts = <21 2>; |
112 | }; | 112 | }; |
113 | dma-channel@100 { | 113 | dma-channel@100 { |
114 | compatible = "fsl,mpc8540-dma-channel", "fsl,eloplus-dma-channel"; | 114 | compatible = "fsl,mpc8540-dma-channel", "fsl,eloplus-dma-channel"; |
115 | reg = <100 80>; | 115 | reg = <0x100 0x80>; |
116 | cell-index = <2>; | 116 | cell-index = <2>; |
117 | interrupt-parent = <&mpic>; | 117 | interrupt-parent = <&mpic>; |
118 | interrupts = <16 2>; | 118 | interrupts = <22 2>; |
119 | }; | 119 | }; |
120 | dma-channel@180 { | 120 | dma-channel@180 { |
121 | compatible = "fsl,mpc8540-dma-channel", "fsl,eloplus-dma-channel"; | 121 | compatible = "fsl,mpc8540-dma-channel", "fsl,eloplus-dma-channel"; |
122 | reg = <180 80>; | 122 | reg = <0x180 0x80>; |
123 | cell-index = <3>; | 123 | cell-index = <3>; |
124 | interrupt-parent = <&mpic>; | 124 | interrupt-parent = <&mpic>; |
125 | interrupts = <17 2>; | 125 | interrupts = <23 2>; |
126 | }; | 126 | }; |
127 | }; | 127 | }; |
128 | 128 | ||
diff --git a/Documentation/powerpc/dts-bindings/fsl/esdhc.txt b/Documentation/powerpc/dts-bindings/fsl/esdhc.txt new file mode 100644 index 000000000000..3ed3797b5086 --- /dev/null +++ b/Documentation/powerpc/dts-bindings/fsl/esdhc.txt | |||
@@ -0,0 +1,25 @@ | |||
1 | * Freescale Enhanced Secure Digital Host Controller (eSDHC) | ||
2 | |||
3 | The Enhanced Secure Digital Host Controller provides an interface | ||
4 | for MMC, SD, and SDIO types of memory cards. | ||
5 | |||
6 | Required properties: | ||
7 | - compatible : should be | ||
8 | "fsl,<chip>-esdhc", "fsl,esdhc" | ||
9 | - reg : should contain eSDHC registers location and length. | ||
10 | - interrupts : should contain eSDHC interrupt. | ||
11 | - interrupt-parent : interrupt source phandle. | ||
12 | - clock-frequency : specifies eSDHC base clock frequency. | ||
13 | - sdhci,1-bit-only : (optional) specifies that a controller can | ||
14 | only handle 1-bit data transfers. | ||
15 | |||
16 | Example: | ||
17 | |||
18 | sdhci@2e000 { | ||
19 | compatible = "fsl,mpc8378-esdhc", "fsl,esdhc"; | ||
20 | reg = <0x2e000 0x1000>; | ||
21 | interrupts = <42 0x8>; | ||
22 | interrupt-parent = <&ipic>; | ||
23 | /* Filled in by U-Boot */ | ||
24 | clock-frequency = <0>; | ||
25 | }; | ||
diff --git a/Documentation/powerpc/dts-bindings/fsl/i2c.txt b/Documentation/powerpc/dts-bindings/fsl/i2c.txt index d0ab33e21fe6..b6d2e21474f9 100644 --- a/Documentation/powerpc/dts-bindings/fsl/i2c.txt +++ b/Documentation/powerpc/dts-bindings/fsl/i2c.txt | |||
@@ -7,8 +7,10 @@ Required properties : | |||
7 | 7 | ||
8 | Recommended properties : | 8 | Recommended properties : |
9 | 9 | ||
10 | - compatible : Should be "fsl-i2c" for parts compatible with | 10 | - compatible : compatibility list with 2 entries, the first should |
11 | Freescale I2C specifications. | 11 | be "fsl,CHIP-i2c" where CHIP is the name of a compatible processor, |
12 | e.g. mpc8313, mpc8543, mpc8544, mpc5200 or mpc5200b. The second one | ||
13 | should be "fsl-i2c". | ||
12 | - interrupts : <a b> where a is the interrupt number and b is a | 14 | - interrupts : <a b> where a is the interrupt number and b is a |
13 | field that represents an encoding of the sense and level | 15 | field that represents an encoding of the sense and level |
14 | information for the interrupt. This should be encoded based on | 16 | information for the interrupt. This should be encoded based on |
@@ -16,17 +18,31 @@ Recommended properties : | |||
16 | controller you have. | 18 | controller you have. |
17 | - interrupt-parent : the phandle for the interrupt controller that | 19 | - interrupt-parent : the phandle for the interrupt controller that |
18 | services interrupts for this device. | 20 | services interrupts for this device. |
19 | - dfsrr : boolean; if defined, indicates that this I2C device has | 21 | - fsl,preserve-clocking : boolean; if defined, the clock settings |
20 | a digital filter sampling rate register | 22 | from the bootloader are preserved (not touched). |
21 | - fsl5200-clocking : boolean; if defined, indicated that this device | 23 | - clock-frequency : desired I2C bus clock frequency in Hz. |
22 | uses the FSL 5200 clocking mechanism. | 24 | |
23 | 25 | Examples : | |
24 | Example : | 26 | |
25 | i2c@3000 { | 27 | i2c@3d00 { |
26 | interrupt-parent = <40000>; | 28 | #address-cells = <1>; |
27 | interrupts = <1b 3>; | 29 | #size-cells = <0>; |
28 | reg = <3000 18>; | 30 | compatible = "fsl,mpc5200b-i2c","fsl,mpc5200-i2c","fsl-i2c"; |
29 | device_type = "i2c"; | 31 | cell-index = <0>; |
30 | compatible = "fsl-i2c"; | 32 | reg = <0x3d00 0x40>; |
31 | dfsrr; | 33 | interrupts = <2 15 0>; |
34 | interrupt-parent = <&mpc5200_pic>; | ||
35 | fsl,preserve-clocking; | ||
32 | }; | 36 | }; |
37 | |||
38 | i2c@3100 { | ||
39 | #address-cells = <1>; | ||
40 | #size-cells = <0>; | ||
41 | cell-index = <1>; | ||
42 | compatible = "fsl,mpc8544-i2c", "fsl-i2c"; | ||
43 | reg = <0x3100 0x100>; | ||
44 | interrupts = <43 2>; | ||
45 | interrupt-parent = <&mpic>; | ||
46 | clock-frequency = <400000>; | ||
47 | }; | ||
48 | |||
diff --git a/Documentation/powerpc/dts-bindings/fsl/mcm.txt b/Documentation/powerpc/dts-bindings/fsl/mcm.txt new file mode 100644 index 000000000000..4ceda9b3b413 --- /dev/null +++ b/Documentation/powerpc/dts-bindings/fsl/mcm.txt | |||
@@ -0,0 +1,64 @@ | |||
1 | ===================================================================== | ||
2 | MPX LAW & Coherency Module Device Tree Binding | ||
3 | Copyright (C) 2009 Freescale Semiconductor Inc. | ||
4 | ===================================================================== | ||
5 | |||
6 | Local Access Window (LAW) Node | ||
7 | |||
8 | The LAW node represents the region of CCSR space where local access | ||
9 | windows are configured. For MCM based devices this is the first 4k | ||
10 | of CCSR space that includes CCSRBAR, ALTCBAR, ALTCAR, BPTR, and some | ||
11 | number of local access windows as specified by fsl,num-laws. | ||
12 | |||
13 | PROPERTIES | ||
14 | |||
15 | - compatible | ||
16 | Usage: required | ||
17 | Value type: <string> | ||
18 | Definition: Must include "fsl,mcm-law" | ||
19 | |||
20 | - reg | ||
21 | Usage: required | ||
22 | Value type: <prop-encoded-array> | ||
23 | Definition: A standard property. The value specifies the | ||
24 | physical address offset and length of the CCSR space | ||
25 | registers. | ||
26 | |||
27 | - fsl,num-laws | ||
28 | Usage: required | ||
29 | Value type: <u32> | ||
30 | Definition: The value specifies the number of local access | ||
31 | windows for this device. | ||
32 | |||
33 | ===================================================================== | ||
34 | |||
35 | MPX Coherency Module Node | ||
36 | |||
37 | The MPX LAW node represents the region of CCSR space where MCM config | ||
38 | and error reporting registers exist, this is the second 4k (0x1000) | ||
39 | of CCSR space. | ||
40 | |||
41 | PROPERTIES | ||
42 | |||
43 | - compatible | ||
44 | Usage: required | ||
45 | Value type: <string> | ||
46 | Definition: Must include "fsl,CHIP-mcm", "fsl,mcm" where | ||
47 | CHIP is the processor (mpc8641, mpc8610, etc.) | ||
48 | |||
49 | - reg | ||
50 | Usage: required | ||
51 | Value type: <prop-encoded-array> | ||
52 | Definition: A standard property. The value specifies the | ||
53 | physical address offset and length of the CCSR space | ||
54 | registers. | ||
55 | |||
56 | - interrupts | ||
57 | Usage: required | ||
58 | Value type: <prop-encoded-array> | ||
59 | |||
60 | - interrupt-parent | ||
61 | Usage: required | ||
62 | Value type: <phandle> | ||
63 | |||
64 | ===================================================================== | ||
diff --git a/Documentation/powerpc/dts-bindings/fsl/msi-pic.txt b/Documentation/powerpc/dts-bindings/fsl/msi-pic.txt index b26b91992c55..bcc30bac6831 100644 --- a/Documentation/powerpc/dts-bindings/fsl/msi-pic.txt +++ b/Documentation/powerpc/dts-bindings/fsl/msi-pic.txt | |||
@@ -1,6 +1,6 @@ | |||
1 | * Freescale MSI interrupt controller | 1 | * Freescale MSI interrupt controller |
2 | 2 | ||
3 | Reguired properities: | 3 | Required properties: |
4 | - compatible : compatible list, contains 2 entries, | 4 | - compatible : compatible list, contains 2 entries, |
5 | first is "fsl,CHIP-msi", where CHIP is the processor(mpc8610, mpc8572, | 5 | first is "fsl,CHIP-msi", where CHIP is the processor(mpc8610, mpc8572, |
6 | etc.) and the second is "fsl,mpic-msi" or "fsl,ipic-msi" depending on | 6 | etc.) and the second is "fsl,mpic-msi" or "fsl,ipic-msi" depending on |
diff --git a/Documentation/powerpc/dts-bindings/fsl/pmc.txt b/Documentation/powerpc/dts-bindings/fsl/pmc.txt index 02f6f43ee1b7..07256b7ffcaa 100644 --- a/Documentation/powerpc/dts-bindings/fsl/pmc.txt +++ b/Documentation/powerpc/dts-bindings/fsl/pmc.txt | |||
@@ -15,8 +15,8 @@ Properties: | |||
15 | compatible; all statements below that apply to "fsl,mpc8548-pmc" also | 15 | compatible; all statements below that apply to "fsl,mpc8548-pmc" also |
16 | apply to "fsl,mpc8641d-pmc". | 16 | apply to "fsl,mpc8641d-pmc". |
17 | 17 | ||
18 | Compatibility does not include bit assigments in SCCR/PMCDR/DEVDISR; these | 18 | Compatibility does not include bit assignments in SCCR/PMCDR/DEVDISR; these |
19 | bit assigments are indicated via the sleep specifier in each device's | 19 | bit assignments are indicated via the sleep specifier in each device's |
20 | sleep property. | 20 | sleep property. |
21 | 21 | ||
22 | - reg: For devices compatible with "fsl,mpc8349-pmc", the first resource | 22 | - reg: For devices compatible with "fsl,mpc8349-pmc", the first resource |
diff --git a/Documentation/powerpc/dts-bindings/fsl/ssi.txt b/Documentation/powerpc/dts-bindings/fsl/ssi.txt index a2d963998a65..5ff76c9c57d2 100644 --- a/Documentation/powerpc/dts-bindings/fsl/ssi.txt +++ b/Documentation/powerpc/dts-bindings/fsl/ssi.txt | |||
@@ -4,44 +4,56 @@ The SSI is a serial device that communicates with audio codecs. It can | |||
4 | be programmed in AC97, I2S, left-justified, or right-justified modes. | 4 | be programmed in AC97, I2S, left-justified, or right-justified modes. |
5 | 5 | ||
6 | Required properties: | 6 | Required properties: |
7 | - compatible : compatible list, containing "fsl,ssi" | 7 | - compatible: Compatible list, contains "fsl,ssi". |
8 | - cell-index : the SSI, <0> = SSI1, <1> = SSI2, and so on | 8 | - cell-index: The SSI, <0> = SSI1, <1> = SSI2, and so on. |
9 | - reg : offset and length of the register set for the device | 9 | - reg: Offset and length of the register set for the device. |
10 | - interrupts : <a b> where a is the interrupt number and b is a | 10 | - interrupts: <a b> where a is the interrupt number and b is a |
11 | field that represents an encoding of the sense and | 11 | field that represents an encoding of the sense and |
12 | level information for the interrupt. This should be | 12 | level information for the interrupt. This should be |
13 | encoded based on the information in section 2) | 13 | encoded based on the information in section 2) |
14 | depending on the type of interrupt controller you | 14 | depending on the type of interrupt controller you |
15 | have. | 15 | have. |
16 | - interrupt-parent : the phandle for the interrupt controller that | 16 | - interrupt-parent: The phandle for the interrupt controller that |
17 | services interrupts for this device. | 17 | services interrupts for this device. |
18 | - fsl,mode : the operating mode for the SSI interface | 18 | - fsl,mode: The operating mode for the SSI interface. |
19 | "i2s-slave" - I2S mode, SSI is clock slave | 19 | "i2s-slave" - I2S mode, SSI is clock slave |
20 | "i2s-master" - I2S mode, SSI is clock master | 20 | "i2s-master" - I2S mode, SSI is clock master |
21 | "lj-slave" - left-justified mode, SSI is clock slave | 21 | "lj-slave" - left-justified mode, SSI is clock slave |
22 | "lj-master" - l.j. mode, SSI is clock master | 22 | "lj-master" - l.j. mode, SSI is clock master |
23 | "rj-slave" - right-justified mode, SSI is clock slave | 23 | "rj-slave" - right-justified mode, SSI is clock slave |
24 | "rj-master" - r.j., SSI is clock master | 24 | "rj-master" - r.j., SSI is clock master |
25 | "ac97-slave" - AC97 mode, SSI is clock slave | 25 | "ac97-slave" - AC97 mode, SSI is clock slave |
26 | "ac97-master" - AC97 mode, SSI is clock master | 26 | "ac97-master" - AC97 mode, SSI is clock master |
27 | - fsl,playback-dma: phandle to a node for the DMA channel to use for | 27 | - fsl,playback-dma: Phandle to a node for the DMA channel to use for |
28 | playback of audio. This is typically dictated by SOC | 28 | playback of audio. This is typically dictated by SOC |
29 | design. See the notes below. | 29 | design. See the notes below. |
30 | - fsl,capture-dma: phandle to a node for the DMA channel to use for | 30 | - fsl,capture-dma: Phandle to a node for the DMA channel to use for |
31 | capture (recording) of audio. This is typically dictated | 31 | capture (recording) of audio. This is typically dictated |
32 | by SOC design. See the notes below. | 32 | by SOC design. See the notes below. |
33 | - fsl,fifo-depth: The number of elements in the transmit and receive FIFOs. | ||
34 | This number is the maximum allowed value for SFCSR[TFWM0]. | ||
35 | - fsl,ssi-asynchronous: | ||
36 | If specified, the SSI is to be programmed in asynchronous | ||
37 | mode. In this mode, pins SRCK, STCK, SRFS, and STFS must | ||
38 | all be connected to valid signals. In synchronous mode, | ||
39 | SRCK and SRFS are ignored. Asynchronous mode allows | ||
40 | playback and capture to use different sample sizes and | ||
41 | sample rates. Some drivers may require that SRCK and STCK | ||
42 | be connected together, and SRFS and STFS be connected | ||
43 | together. This would still allow different sample sizes, | ||
44 | but not different sample rates. | ||
33 | 45 | ||
34 | Optional properties: | 46 | Optional properties: |
35 | - codec-handle : phandle to a 'codec' node that defines an audio | 47 | - codec-handle: Phandle to a 'codec' node that defines an audio |
36 | codec connected to this SSI. This node is typically | 48 | codec connected to this SSI. This node is typically |
37 | a child of an I2C or other control node. | 49 | a child of an I2C or other control node. |
38 | 50 | ||
39 | Child 'codec' node required properties: | 51 | Child 'codec' node required properties: |
40 | - compatible : compatible list, contains the name of the codec | 52 | - compatible: Compatible list, contains the name of the codec |
41 | 53 | ||
42 | Child 'codec' node optional properties: | 54 | Child 'codec' node optional properties: |
43 | - clock-frequency : The frequency of the input clock, which typically | 55 | - clock-frequency: The frequency of the input clock, which typically comes |
44 | comes from an on-board dedicated oscillator. | 56 | from an on-board dedicated oscillator. |
45 | 57 | ||
46 | Notes on fsl,playback-dma and fsl,capture-dma: | 58 | Notes on fsl,playback-dma and fsl,capture-dma: |
47 | 59 | ||
diff --git a/Documentation/powerpc/dts-bindings/fsl/tsec.txt b/Documentation/powerpc/dts-bindings/fsl/tsec.txt index 7fa4b27574b5..edb7ae19e868 100644 --- a/Documentation/powerpc/dts-bindings/fsl/tsec.txt +++ b/Documentation/powerpc/dts-bindings/fsl/tsec.txt | |||
@@ -56,6 +56,12 @@ Properties: | |||
56 | hardware. | 56 | hardware. |
57 | - fsl,magic-packet : If present, indicates that the hardware supports | 57 | - fsl,magic-packet : If present, indicates that the hardware supports |
58 | waking up via magic packet. | 58 | waking up via magic packet. |
59 | - bd-stash : If present, indicates that the hardware supports stashing | ||
60 | buffer descriptors in the L2. | ||
61 | - rx-stash-len : Denotes the number of bytes of a received buffer to stash | ||
62 | in the L2. | ||
63 | - rx-stash-idx : Denotes the index of the first byte from the received | ||
64 | buffer to stash in the L2. | ||
59 | 65 | ||
60 | Example: | 66 | Example: |
61 | ethernet@24000 { | 67 | ethernet@24000 { |
diff --git a/Documentation/powerpc/dts-bindings/fsl/upm-nand.txt b/Documentation/powerpc/dts-bindings/fsl/upm-nand.txt index 84a04d5eb8e6..a48b2cadc7f0 100644 --- a/Documentation/powerpc/dts-bindings/fsl/upm-nand.txt +++ b/Documentation/powerpc/dts-bindings/fsl/upm-nand.txt | |||
@@ -5,9 +5,21 @@ Required properties: | |||
5 | - reg : should specify localbus chip select and size used for the chip. | 5 | - reg : should specify localbus chip select and size used for the chip. |
6 | - fsl,upm-addr-offset : UPM pattern offset for the address latch. | 6 | - fsl,upm-addr-offset : UPM pattern offset for the address latch. |
7 | - fsl,upm-cmd-offset : UPM pattern offset for the command latch. | 7 | - fsl,upm-cmd-offset : UPM pattern offset for the command latch. |
8 | - gpios : may specify optional GPIO connected to the Ready-Not-Busy pin. | ||
9 | 8 | ||
10 | Example: | 9 | Optional properties: |
10 | - fsl,upm-wait-flags : add chip-dependent short delays after running the | ||
11 | UPM pattern (0x1), after writing a data byte (0x2) or after | ||
12 | writing out a buffer (0x4). | ||
13 | - fsl,upm-addr-line-cs-offsets : address offsets for multi-chip support. | ||
14 | The corresponding address lines are used to select the chip. | ||
15 | - gpios : may specify optional GPIOs connected to the Ready-Not-Busy pins | ||
16 | (R/B#). For multi-chip devices, "n" GPIO definitions are required | ||
17 | according to the number of chips. | ||
18 | - chip-delay : chip dependent delay for transfering data from array to | ||
19 | read registers (tR). Required if property "gpios" is not used | ||
20 | (R/B# pins not connected). | ||
21 | |||
22 | Examples: | ||
11 | 23 | ||
12 | upm@1,0 { | 24 | upm@1,0 { |
13 | compatible = "fsl,upm-nand"; | 25 | compatible = "fsl,upm-nand"; |
@@ -26,3 +38,26 @@ upm@1,0 { | |||
26 | }; | 38 | }; |
27 | }; | 39 | }; |
28 | }; | 40 | }; |
41 | |||
42 | upm@3,0 { | ||
43 | #address-cells = <0>; | ||
44 | #size-cells = <0>; | ||
45 | compatible = "tqc,tqm8548-upm-nand", "fsl,upm-nand"; | ||
46 | reg = <3 0x0 0x800>; | ||
47 | fsl,upm-addr-offset = <0x10>; | ||
48 | fsl,upm-cmd-offset = <0x08>; | ||
49 | /* Multi-chip NAND device */ | ||
50 | fsl,upm-addr-line-cs-offsets = <0x0 0x200>; | ||
51 | fsl,upm-wait-flags = <0x5>; | ||
52 | chip-delay = <25>; // in micro-seconds | ||
53 | |||
54 | nand@0 { | ||
55 | #address-cells = <1>; | ||
56 | #size-cells = <1>; | ||
57 | |||
58 | partition@0 { | ||
59 | label = "fs"; | ||
60 | reg = <0x00000000 0x10000000>; | ||
61 | }; | ||
62 | }; | ||
63 | }; | ||
diff --git a/Documentation/powerpc/dts-bindings/gpio/led.txt b/Documentation/powerpc/dts-bindings/gpio/led.txt index ff51f4c0fa9d..4fe14deedc0a 100644 --- a/Documentation/powerpc/dts-bindings/gpio/led.txt +++ b/Documentation/powerpc/dts-bindings/gpio/led.txt | |||
@@ -1,15 +1,43 @@ | |||
1 | LED connected to GPIO | 1 | LEDs connected to GPIO lines |
2 | 2 | ||
3 | Required properties: | 3 | Required properties: |
4 | - compatible : should be "gpio-led". | 4 | - compatible : should be "gpio-leds". |
5 | - label : (optional) the label for this LED. If omitted, the label is | 5 | |
6 | Each LED is represented as a sub-node of the gpio-leds device. Each | ||
7 | node's name represents the name of the corresponding LED. | ||
8 | |||
9 | LED sub-node properties: | ||
10 | - gpios : Should specify the LED's GPIO, see "Specifying GPIO information | ||
11 | for devices" in Documentation/powerpc/booting-without-of.txt. Active | ||
12 | low LEDs should be indicated using flags in the GPIO specifier. | ||
13 | - label : (optional) The label for this LED. If omitted, the label is | ||
6 | taken from the node name (excluding the unit address). | 14 | taken from the node name (excluding the unit address). |
7 | - gpios : should specify LED GPIO. | 15 | - linux,default-trigger : (optional) This parameter, if present, is a |
16 | string defining the trigger assigned to the LED. Current triggers are: | ||
17 | "backlight" - LED will act as a back-light, controlled by the framebuffer | ||
18 | system | ||
19 | "default-on" - LED will turn on | ||
20 | "heartbeat" - LED "double" flashes at a load average based rate | ||
21 | "ide-disk" - LED indicates disk activity | ||
22 | "timer" - LED flashes at a fixed, configurable rate | ||
8 | 23 | ||
9 | Example: | 24 | Examples: |
10 | 25 | ||
11 | led@0 { | 26 | leds { |
12 | compatible = "gpio-led"; | 27 | compatible = "gpio-leds"; |
13 | label = "hdd"; | 28 | hdd { |
14 | gpios = <&mcu_pio 0 1>; | 29 | label = "IDE Activity"; |
30 | gpios = <&mcu_pio 0 1>; /* Active low */ | ||
31 | linux,default-trigger = "ide-disk"; | ||
32 | }; | ||
15 | }; | 33 | }; |
34 | |||
35 | run-control { | ||
36 | compatible = "gpio-leds"; | ||
37 | red { | ||
38 | gpios = <&mpc8572 6 0>; | ||
39 | }; | ||
40 | green { | ||
41 | gpios = <&mpc8572 7 0>; | ||
42 | }; | ||
43 | } | ||
diff --git a/Documentation/powerpc/dts-bindings/mmc-spi-slot.txt b/Documentation/powerpc/dts-bindings/mmc-spi-slot.txt new file mode 100644 index 000000000000..c39ac2891951 --- /dev/null +++ b/Documentation/powerpc/dts-bindings/mmc-spi-slot.txt | |||
@@ -0,0 +1,23 @@ | |||
1 | MMC/SD/SDIO slot directly connected to a SPI bus | ||
2 | |||
3 | Required properties: | ||
4 | - compatible : should be "mmc-spi-slot". | ||
5 | - reg : should specify SPI address (chip-select number). | ||
6 | - spi-max-frequency : maximum frequency for this device (Hz). | ||
7 | - voltage-ranges : two cells are required, first cell specifies minimum | ||
8 | slot voltage (mV), second cell specifies maximum slot voltage (mV). | ||
9 | Several ranges could be specified. | ||
10 | - gpios : (optional) may specify GPIOs in this order: Card-Detect GPIO, | ||
11 | Write-Protect GPIO. | ||
12 | |||
13 | Example: | ||
14 | |||
15 | mmc-slot@0 { | ||
16 | compatible = "fsl,mpc8323rdb-mmc-slot", | ||
17 | "mmc-spi-slot"; | ||
18 | reg = <0>; | ||
19 | gpios = <&qe_pio_d 14 1 | ||
20 | &qe_pio_d 15 0>; | ||
21 | voltage-ranges = <3300 3300>; | ||
22 | spi-max-frequency = <50000000>; | ||
23 | }; | ||
diff --git a/Documentation/powerpc/dts-bindings/mtd-physmap.txt b/Documentation/powerpc/dts-bindings/mtd-physmap.txt new file mode 100644 index 000000000000..667c9bde8699 --- /dev/null +++ b/Documentation/powerpc/dts-bindings/mtd-physmap.txt | |||
@@ -0,0 +1,80 @@ | |||
1 | CFI or JEDEC memory-mapped NOR flash | ||
2 | |||
3 | Flash chips (Memory Technology Devices) are often used for solid state | ||
4 | file systems on embedded devices. | ||
5 | |||
6 | - compatible : should contain the specific model of flash chip(s) | ||
7 | used, if known, followed by either "cfi-flash" or "jedec-flash" | ||
8 | - reg : Address range(s) of the flash chip(s) | ||
9 | It's possible to (optionally) define multiple "reg" tuples so that | ||
10 | non-identical NOR chips can be described in one flash node. | ||
11 | - bank-width : Width (in bytes) of the flash bank. Equal to the | ||
12 | device width times the number of interleaved chips. | ||
13 | - device-width : (optional) Width of a single flash chip. If | ||
14 | omitted, assumed to be equal to 'bank-width'. | ||
15 | - #address-cells, #size-cells : Must be present if the flash has | ||
16 | sub-nodes representing partitions (see below). In this case | ||
17 | both #address-cells and #size-cells must be equal to 1. | ||
18 | |||
19 | For JEDEC compatible devices, the following additional properties | ||
20 | are defined: | ||
21 | |||
22 | - vendor-id : Contains the flash chip's vendor id (1 byte). | ||
23 | - device-id : Contains the flash chip's device id (1 byte). | ||
24 | |||
25 | In addition to the information on the flash bank itself, the | ||
26 | device tree may optionally contain additional information | ||
27 | describing partitions of the flash address space. This can be | ||
28 | used on platforms which have strong conventions about which | ||
29 | portions of the flash are used for what purposes, but which don't | ||
30 | use an on-flash partition table such as RedBoot. | ||
31 | |||
32 | Each partition is represented as a sub-node of the flash device. | ||
33 | Each node's name represents the name of the corresponding | ||
34 | partition of the flash device. | ||
35 | |||
36 | Flash partitions | ||
37 | - reg : The partition's offset and size within the flash bank. | ||
38 | - label : (optional) The label / name for this flash partition. | ||
39 | If omitted, the label is taken from the node name (excluding | ||
40 | the unit address). | ||
41 | - read-only : (optional) This parameter, if present, is a hint to | ||
42 | Linux that this flash partition should only be mounted | ||
43 | read-only. This is usually used for flash partitions | ||
44 | containing early-boot firmware images or data which should not | ||
45 | be clobbered. | ||
46 | |||
47 | Example: | ||
48 | |||
49 | flash@ff000000 { | ||
50 | compatible = "amd,am29lv128ml", "cfi-flash"; | ||
51 | reg = <ff000000 01000000>; | ||
52 | bank-width = <4>; | ||
53 | device-width = <1>; | ||
54 | #address-cells = <1>; | ||
55 | #size-cells = <1>; | ||
56 | fs@0 { | ||
57 | label = "fs"; | ||
58 | reg = <0 f80000>; | ||
59 | }; | ||
60 | firmware@f80000 { | ||
61 | label ="firmware"; | ||
62 | reg = <f80000 80000>; | ||
63 | read-only; | ||
64 | }; | ||
65 | }; | ||
66 | |||
67 | Here an example with multiple "reg" tuples: | ||
68 | |||
69 | flash@f0000000,0 { | ||
70 | #address-cells = <1>; | ||
71 | #size-cells = <1>; | ||
72 | compatible = "intel,PC48F4400P0VB", "cfi-flash"; | ||
73 | reg = <0 0x00000000 0x02000000 | ||
74 | 0 0x02000000 0x02000000>; | ||
75 | bank-width = <2>; | ||
76 | partition@0 { | ||
77 | label = "test-part1"; | ||
78 | reg = <0 0x04000000>; | ||
79 | }; | ||
80 | }; | ||
diff --git a/Documentation/powerpc/qe_firmware.txt b/Documentation/powerpc/qe_firmware.txt index 06da4d4b44f9..2031ddb33d09 100644 --- a/Documentation/powerpc/qe_firmware.txt +++ b/Documentation/powerpc/qe_firmware.txt | |||
@@ -225,7 +225,7 @@ For example, to match the 8323, revision 1.0: | |||
225 | soc.major = 1 | 225 | soc.major = 1 |
226 | soc.minor = 0 | 226 | soc.minor = 0 |
227 | 227 | ||
228 | 'padding' is neccessary for structure alignment. This field ensures that the | 228 | 'padding' is necessary for structure alignment. This field ensures that the |
229 | 'extended_modes' field is aligned on a 64-bit boundary. | 229 | 'extended_modes' field is aligned on a 64-bit boundary. |
230 | 230 | ||
231 | 'extended_modes' is a bitfield that defines special functionality which has an | 231 | 'extended_modes' is a bitfield that defines special functionality which has an |
diff --git a/Documentation/pps/pps.txt b/Documentation/pps/pps.txt new file mode 100644 index 000000000000..125f4ab48998 --- /dev/null +++ b/Documentation/pps/pps.txt | |||
@@ -0,0 +1,172 @@ | |||
1 | |||
2 | PPS - Pulse Per Second | ||
3 | ---------------------- | ||
4 | |||
5 | (C) Copyright 2007 Rodolfo Giometti <giometti@enneenne.com> | ||
6 | |||
7 | This program is free software; you can redistribute it and/or modify | ||
8 | it under the terms of the GNU General Public License as published by | ||
9 | the Free Software Foundation; either version 2 of the License, or | ||
10 | (at your option) any later version. | ||
11 | |||
12 | This program is distributed in the hope that it will be useful, | ||
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | GNU General Public License for more details. | ||
16 | |||
17 | |||
18 | |||
19 | Overview | ||
20 | -------- | ||
21 | |||
22 | LinuxPPS provides a programming interface (API) to define in the | ||
23 | system several PPS sources. | ||
24 | |||
25 | PPS means "pulse per second" and a PPS source is just a device which | ||
26 | provides a high precision signal each second so that an application | ||
27 | can use it to adjust system clock time. | ||
28 | |||
29 | A PPS source can be connected to a serial port (usually to the Data | ||
30 | Carrier Detect pin) or to a parallel port (ACK-pin) or to a special | ||
31 | CPU's GPIOs (this is the common case in embedded systems) but in each | ||
32 | case when a new pulse arrives the system must apply to it a timestamp | ||
33 | and record it for userland. | ||
34 | |||
35 | Common use is the combination of the NTPD as userland program, with a | ||
36 | GPS receiver as PPS source, to obtain a wallclock-time with | ||
37 | sub-millisecond synchronisation to UTC. | ||
38 | |||
39 | |||
40 | RFC considerations | ||
41 | ------------------ | ||
42 | |||
43 | While implementing a PPS API as RFC 2783 defines and using an embedded | ||
44 | CPU GPIO-Pin as physical link to the signal, I encountered a deeper | ||
45 | problem: | ||
46 | |||
47 | At startup it needs a file descriptor as argument for the function | ||
48 | time_pps_create(). | ||
49 | |||
50 | This implies that the source has a /dev/... entry. This assumption is | ||
51 | ok for the serial and parallel port, where you can do something | ||
52 | useful besides(!) the gathering of timestamps as it is the central | ||
53 | task for a PPS-API. But this assumption does not work for a single | ||
54 | purpose GPIO line. In this case even basic file-related functionality | ||
55 | (like read() and write()) makes no sense at all and should not be a | ||
56 | precondition for the use of a PPS-API. | ||
57 | |||
58 | The problem can be simply solved if you consider that a PPS source is | ||
59 | not always connected with a GPS data source. | ||
60 | |||
61 | So your programs should check if the GPS data source (the serial port | ||
62 | for instance) is a PPS source too, and if not they should provide the | ||
63 | possibility to open another device as PPS source. | ||
64 | |||
65 | In LinuxPPS the PPS sources are simply char devices usually mapped | ||
66 | into files /dev/pps0, /dev/pps1, etc.. | ||
67 | |||
68 | |||
69 | Coding example | ||
70 | -------------- | ||
71 | |||
72 | To register a PPS source into the kernel you should define a struct | ||
73 | pps_source_info_s as follows: | ||
74 | |||
75 | static struct pps_source_info pps_ktimer_info = { | ||
76 | .name = "ktimer", | ||
77 | .path = "", | ||
78 | .mode = PPS_CAPTUREASSERT | PPS_OFFSETASSERT | \ | ||
79 | PPS_ECHOASSERT | \ | ||
80 | PPS_CANWAIT | PPS_TSFMT_TSPEC, | ||
81 | .echo = pps_ktimer_echo, | ||
82 | .owner = THIS_MODULE, | ||
83 | }; | ||
84 | |||
85 | and then calling the function pps_register_source() in your | ||
86 | intialization routine as follows: | ||
87 | |||
88 | source = pps_register_source(&pps_ktimer_info, | ||
89 | PPS_CAPTUREASSERT | PPS_OFFSETASSERT); | ||
90 | |||
91 | The pps_register_source() prototype is: | ||
92 | |||
93 | int pps_register_source(struct pps_source_info_s *info, int default_params) | ||
94 | |||
95 | where "info" is a pointer to a structure that describes a particular | ||
96 | PPS source, "default_params" tells the system what the initial default | ||
97 | parameters for the device should be (it is obvious that these parameters | ||
98 | must be a subset of ones defined in the struct | ||
99 | pps_source_info_s which describe the capabilities of the driver). | ||
100 | |||
101 | Once you have registered a new PPS source into the system you can | ||
102 | signal an assert event (for example in the interrupt handler routine) | ||
103 | just using: | ||
104 | |||
105 | pps_event(source, &ts, PPS_CAPTUREASSERT, ptr) | ||
106 | |||
107 | where "ts" is the event's timestamp. | ||
108 | |||
109 | The same function may also run the defined echo function | ||
110 | (pps_ktimer_echo(), passing to it the "ptr" pointer) if the user | ||
111 | asked for that... etc.. | ||
112 | |||
113 | Please see the file drivers/pps/clients/ktimer.c for example code. | ||
114 | |||
115 | |||
116 | SYSFS support | ||
117 | ------------- | ||
118 | |||
119 | If the SYSFS filesystem is enabled in the kernel it provides a new class: | ||
120 | |||
121 | $ ls /sys/class/pps/ | ||
122 | pps0/ pps1/ pps2/ | ||
123 | |||
124 | Every directory is the ID of a PPS sources defined in the system and | ||
125 | inside you find several files: | ||
126 | |||
127 | $ ls /sys/class/pps/pps0/ | ||
128 | assert clear echo mode name path subsystem@ uevent | ||
129 | |||
130 | Inside each "assert" and "clear" file you can find the timestamp and a | ||
131 | sequence number: | ||
132 | |||
133 | $ cat /sys/class/pps/pps0/assert | ||
134 | 1170026870.983207967#8 | ||
135 | |||
136 | Where before the "#" is the timestamp in seconds; after it is the | ||
137 | sequence number. Other files are: | ||
138 | |||
139 | * echo: reports if the PPS source has an echo function or not; | ||
140 | |||
141 | * mode: reports available PPS functioning modes; | ||
142 | |||
143 | * name: reports the PPS source's name; | ||
144 | |||
145 | * path: reports the PPS source's device path, that is the device the | ||
146 | PPS source is connected to (if it exists). | ||
147 | |||
148 | |||
149 | Testing the PPS support | ||
150 | ----------------------- | ||
151 | |||
152 | In order to test the PPS support even without specific hardware you can use | ||
153 | the ktimer driver (see the client subsection in the PPS configuration menu) | ||
154 | and the userland tools provided into Documentaion/pps/ directory. | ||
155 | |||
156 | Once you have enabled the compilation of ktimer just modprobe it (if | ||
157 | not statically compiled): | ||
158 | |||
159 | # modprobe ktimer | ||
160 | |||
161 | and the run ppstest as follow: | ||
162 | |||
163 | $ ./ppstest /dev/pps0 | ||
164 | trying PPS source "/dev/pps1" | ||
165 | found PPS source "/dev/pps1" | ||
166 | ok, found 1 source(s), now start fetching data... | ||
167 | source 0 - assert 1186592699.388832443, sequence: 364 - clear 0.000000000, sequence: 0 | ||
168 | source 0 - assert 1186592700.388931295, sequence: 365 - clear 0.000000000, sequence: 0 | ||
169 | source 0 - assert 1186592701.389032765, sequence: 366 - clear 0.000000000, sequence: 0 | ||
170 | |||
171 | Please, note that to compile userland programs you need the file timepps.h | ||
172 | (see Documentation/pps/). | ||
diff --git a/Documentation/rbtree.txt b/Documentation/rbtree.txt index 7224459b469e..aae8355d3166 100644 --- a/Documentation/rbtree.txt +++ b/Documentation/rbtree.txt | |||
@@ -131,8 +131,8 @@ Example: | |||
131 | } | 131 | } |
132 | 132 | ||
133 | /* Add new node and rebalance tree. */ | 133 | /* Add new node and rebalance tree. */ |
134 | rb_link_node(data->node, parent, new); | 134 | rb_link_node(&data->node, parent, new); |
135 | rb_insert_color(data->node, root); | 135 | rb_insert_color(&data->node, root); |
136 | 136 | ||
137 | return TRUE; | 137 | return TRUE; |
138 | } | 138 | } |
@@ -146,10 +146,10 @@ To remove an existing node from a tree, call: | |||
146 | 146 | ||
147 | Example: | 147 | Example: |
148 | 148 | ||
149 | struct mytype *data = mysearch(mytree, "walrus"); | 149 | struct mytype *data = mysearch(&mytree, "walrus"); |
150 | 150 | ||
151 | if (data) { | 151 | if (data) { |
152 | rb_erase(data->node, mytree); | 152 | rb_erase(&data->node, &mytree); |
153 | myfree(data); | 153 | myfree(data); |
154 | } | 154 | } |
155 | 155 | ||
@@ -188,5 +188,5 @@ Example: | |||
188 | 188 | ||
189 | struct rb_node *node; | 189 | struct rb_node *node; |
190 | for (node = rb_first(&mytree); node; node = rb_next(node)) | 190 | for (node = rb_first(&mytree); node; node = rb_next(node)) |
191 | printk("key=%s\n", rb_entry(node, int, keystring)); | 191 | printk("key=%s\n", rb_entry(node, struct mytype, node)->keystring); |
192 | 192 | ||
diff --git a/Documentation/rfkill.txt b/Documentation/rfkill.txt index 4d3ee317a4a3..b4860509c319 100644 --- a/Documentation/rfkill.txt +++ b/Documentation/rfkill.txt | |||
@@ -1,575 +1,139 @@ | |||
1 | rfkill - RF switch subsystem support | 1 | rfkill - RF kill switch support |
2 | ==================================== | 2 | =============================== |
3 | 3 | ||
4 | 1 Introduction | 4 | 1. Introduction |
5 | 2 Implementation details | 5 | 2. Implementation details |
6 | 3 Kernel driver guidelines | 6 | 3. Kernel API |
7 | 3.1 wireless device drivers | 7 | 4. Userspace support |
8 | 3.2 platform/switch drivers | ||
9 | 3.3 input device drivers | ||
10 | 4 Kernel API | ||
11 | 5 Userspace support | ||
12 | 8 | ||
13 | 9 | ||
14 | 1. Introduction: | 10 | 1. Introduction |
15 | 11 | ||
16 | The rfkill switch subsystem exists to add a generic interface to circuitry that | 12 | The rfkill subsystem provides a generic interface to disabling any radio |
17 | can enable or disable the signal output of a wireless *transmitter* of any | 13 | transmitter in the system. When a transmitter is blocked, it shall not |
18 | type. By far, the most common use is to disable radio-frequency transmitters. | 14 | radiate any power. |
19 | 15 | ||
20 | Note that disabling the signal output means that the the transmitter is to be | 16 | The subsystem also provides the ability to react on button presses and |
21 | made to not emit any energy when "blocked". rfkill is not about blocking data | 17 | disable all transmitters of a certain type (or all). This is intended for |
22 | transmissions, it is about blocking energy emission. | 18 | situations where transmitters need to be turned off, for example on |
19 | aircraft. | ||
23 | 20 | ||
24 | The rfkill subsystem offers support for keys and switches often found on | 21 | The rfkill subsystem has a concept of "hard" and "soft" block, which |
25 | laptops to enable wireless devices like WiFi and Bluetooth, so that these keys | 22 | differ little in their meaning (block == transmitters off) but rather in |
26 | and switches actually perform an action in all wireless devices of a given type | 23 | whether they can be changed or not: |
27 | attached to the system. | 24 | - hard block: read-only radio block that cannot be overriden by software |
25 | - soft block: writable radio block (need not be readable) that is set by | ||
26 | the system software. | ||
28 | 27 | ||
29 | The buttons to enable and disable the wireless transmitters are important in | ||
30 | situations where the user is for example using his laptop on a location where | ||
31 | radio-frequency transmitters _must_ be disabled (e.g. airplanes). | ||
32 | 28 | ||
33 | Because of this requirement, userspace support for the keys should not be made | 29 | 2. Implementation details |
34 | mandatory. Because userspace might want to perform some additional smarter | ||
35 | tasks when the key is pressed, rfkill provides userspace the possibility to | ||
36 | take over the task to handle the key events. | ||
37 | 30 | ||
38 | =============================================================================== | 31 | The rfkill subsystem is composed of three main components: |
39 | 2: Implementation details | 32 | * the rfkill core, |
33 | * the deprecated rfkill-input module (an input layer handler, being | ||
34 | replaced by userspace policy code) and | ||
35 | * the rfkill drivers. | ||
40 | 36 | ||
41 | The rfkill subsystem is composed of various components: the rfkill class, the | 37 | The rfkill core provides API for kernel drivers to register their radio |
42 | rfkill-input module (an input layer handler), and some specific input layer | 38 | transmitter with the kernel, methods for turning it on and off and, letting |
43 | events. | 39 | the system know about hardware-disabled states that may be implemented on |
40 | the device. | ||
44 | 41 | ||
45 | The rfkill class provides kernel drivers with an interface that allows them to | 42 | The rfkill core code also notifies userspace of state changes, and provides |
46 | know when they should enable or disable a wireless network device transmitter. | 43 | ways for userspace to query the current states. See the "Userspace support" |
47 | This is enabled by the CONFIG_RFKILL Kconfig option. | 44 | section below. |
48 | 45 | ||
49 | The rfkill class support makes sure userspace will be notified of all state | 46 | When the device is hard-blocked (either by a call to rfkill_set_hw_state() |
50 | changes on rfkill devices through uevents. It provides a notification chain | 47 | or from query_hw_block) set_block() will be invoked for additional software |
51 | for interested parties in the kernel to also get notified of rfkill state | 48 | block, but drivers can ignore the method call since they can use the return |
52 | changes in other drivers. It creates several sysfs entries which can be used | 49 | value of the function rfkill_set_hw_state() to sync the software state |
53 | by userspace. See section "Userspace support". | 50 | instead of keeping track of calls to set_block(). In fact, drivers should |
54 | 51 | use the return value of rfkill_set_hw_state() unless the hardware actually | |
55 | The rfkill-input module provides the kernel with the ability to implement a | 52 | keeps track of soft and hard block separately. |
56 | basic response when the user presses a key or button (or toggles a switch) | ||
57 | related to rfkill functionality. It is an in-kernel implementation of default | ||
58 | policy of reacting to rfkill-related input events and neither mandatory nor | ||
59 | required for wireless drivers to operate. It is enabled by the | ||
60 | CONFIG_RFKILL_INPUT Kconfig option. | ||
61 | |||
62 | rfkill-input is a rfkill-related events input layer handler. This handler will | ||
63 | listen to all rfkill key events and will change the rfkill state of the | ||
64 | wireless devices accordingly. With this option enabled userspace could either | ||
65 | do nothing or simply perform monitoring tasks. | ||
66 | |||
67 | The rfkill-input module also provides EPO (emergency power-off) functionality | ||
68 | for all wireless transmitters. This function cannot be overridden, and it is | ||
69 | always active. rfkill EPO is related to *_RFKILL_ALL input layer events. | ||
70 | |||
71 | |||
72 | Important terms for the rfkill subsystem: | ||
73 | |||
74 | In order to avoid confusion, we avoid the term "switch" in rfkill when it is | ||
75 | referring to an electronic control circuit that enables or disables a | ||
76 | transmitter. We reserve it for the physical device a human manipulates | ||
77 | (which is an input device, by the way): | ||
78 | |||
79 | rfkill switch: | ||
80 | |||
81 | A physical device a human manipulates. Its state can be perceived by | ||
82 | the kernel either directly (through a GPIO pin, ACPI GPE) or by its | ||
83 | effect on a rfkill line of a wireless device. | ||
84 | |||
85 | rfkill controller: | ||
86 | |||
87 | A hardware circuit that controls the state of a rfkill line, which a | ||
88 | kernel driver can interact with *to modify* that state (i.e. it has | ||
89 | either write-only or read/write access). | ||
90 | |||
91 | rfkill line: | ||
92 | |||
93 | An input channel (hardware or software) of a wireless device, which | ||
94 | causes a wireless transmitter to stop emitting energy (BLOCK) when it | ||
95 | is active. Point of view is extremely important here: rfkill lines are | ||
96 | always seen from the PoV of a wireless device (and its driver). | ||
97 | |||
98 | soft rfkill line/software rfkill line: | ||
99 | |||
100 | A rfkill line the wireless device driver can directly change the state | ||
101 | of. Related to rfkill_state RFKILL_STATE_SOFT_BLOCKED. | ||
102 | |||
103 | hard rfkill line/hardware rfkill line: | ||
104 | |||
105 | A rfkill line that works fully in hardware or firmware, and that cannot | ||
106 | be overridden by the kernel driver. The hardware device or the | ||
107 | firmware just exports its status to the driver, but it is read-only. | ||
108 | Related to rfkill_state RFKILL_STATE_HARD_BLOCKED. | ||
109 | |||
110 | The enum rfkill_state describes the rfkill state of a transmitter: | ||
111 | |||
112 | When a rfkill line or rfkill controller is in the RFKILL_STATE_UNBLOCKED state, | ||
113 | the wireless transmitter (radio TX circuit for example) is *enabled*. When the | ||
114 | it is in the RFKILL_STATE_SOFT_BLOCKED or RFKILL_STATE_HARD_BLOCKED, the | ||
115 | wireless transmitter is to be *blocked* from operating. | ||
116 | |||
117 | RFKILL_STATE_SOFT_BLOCKED indicates that a call to toggle_radio() can change | ||
118 | that state. RFKILL_STATE_HARD_BLOCKED indicates that a call to toggle_radio() | ||
119 | will not be able to change the state and will return with a suitable error if | ||
120 | attempts are made to set the state to RFKILL_STATE_UNBLOCKED. | ||
121 | |||
122 | RFKILL_STATE_HARD_BLOCKED is used by drivers to signal that the device is | ||
123 | locked in the BLOCKED state by a hardwire rfkill line (typically an input pin | ||
124 | that, when active, forces the transmitter to be disabled) which the driver | ||
125 | CANNOT override. | ||
126 | |||
127 | Full rfkill functionality requires two different subsystems to cooperate: the | ||
128 | input layer and the rfkill class. The input layer issues *commands* to the | ||
129 | entire system requesting that devices registered to the rfkill class change | ||
130 | state. The way this interaction happens is not complex, but it is not obvious | ||
131 | either: | ||
132 | |||
133 | Kernel Input layer: | ||
134 | |||
135 | * Generates KEY_WWAN, KEY_WLAN, KEY_BLUETOOTH, SW_RFKILL_ALL, and | ||
136 | other such events when the user presses certain keys, buttons, or | ||
137 | toggles certain physical switches. | ||
138 | |||
139 | THE INPUT LAYER IS NEVER USED TO PROPAGATE STATUS, NOTIFICATIONS OR THE | ||
140 | KIND OF STUFF AN ON-SCREEN-DISPLAY APPLICATION WOULD REPORT. It is | ||
141 | used to issue *commands* for the system to change behaviour, and these | ||
142 | commands may or may not be carried out by some kernel driver or | ||
143 | userspace application. It follows that doing user feedback based only | ||
144 | on input events is broken, as there is no guarantee that an input event | ||
145 | will be acted upon. | ||
146 | |||
147 | Most wireless communication device drivers implementing rfkill | ||
148 | functionality MUST NOT generate these events, and have no reason to | ||
149 | register themselves with the input layer. Doing otherwise is a common | ||
150 | misconception. There is an API to propagate rfkill status change | ||
151 | information, and it is NOT the input layer. | ||
152 | |||
153 | rfkill class: | ||
154 | |||
155 | * Calls a hook in a driver to effectively change the wireless | ||
156 | transmitter state; | ||
157 | * Keeps track of the wireless transmitter state (with help from | ||
158 | the driver); | ||
159 | * Generates userspace notifications (uevents) and a call to a | ||
160 | notification chain (kernel) when there is a wireless transmitter | ||
161 | state change; | ||
162 | * Connects a wireless communications driver with the common rfkill | ||
163 | control system, which, for example, allows actions such as | ||
164 | "switch all bluetooth devices offline" to be carried out by | ||
165 | userspace or by rfkill-input. | ||
166 | |||
167 | THE RFKILL CLASS NEVER ISSUES INPUT EVENTS. THE RFKILL CLASS DOES | ||
168 | NOT LISTEN TO INPUT EVENTS. NO DRIVER USING THE RFKILL CLASS SHALL | ||
169 | EVER LISTEN TO, OR ACT ON RFKILL INPUT EVENTS. Doing otherwise is | ||
170 | a layering violation. | ||
171 | |||
172 | Most wireless data communication drivers in the kernel have just to | ||
173 | implement the rfkill class API to work properly. Interfacing to the | ||
174 | input layer is not often required (and is very often a *bug*) on | ||
175 | wireless drivers. | ||
176 | |||
177 | Platform drivers often have to attach to the input layer to *issue* | ||
178 | (but never to listen to) rfkill events for rfkill switches, and also to | ||
179 | the rfkill class to export a control interface for the platform rfkill | ||
180 | controllers to the rfkill subsystem. This does NOT mean the rfkill | ||
181 | switch is attached to a rfkill class (doing so is almost always wrong). | ||
182 | It just means the same kernel module is the driver for different | ||
183 | devices (rfkill switches and rfkill controllers). | ||
184 | |||
185 | |||
186 | Userspace input handlers (uevents) or kernel input handlers (rfkill-input): | ||
187 | |||
188 | * Implements the policy of what should happen when one of the input | ||
189 | layer events related to rfkill operation is received. | ||
190 | * Uses the sysfs interface (userspace) or private rfkill API calls | ||
191 | to tell the devices registered with the rfkill class to change | ||
192 | their state (i.e. translates the input layer event into real | ||
193 | action). | ||
194 | |||
195 | * rfkill-input implements EPO by handling EV_SW SW_RFKILL_ALL 0 | ||
196 | (power off all transmitters) in a special way: it ignores any | ||
197 | overrides and local state cache and forces all transmitters to the | ||
198 | RFKILL_STATE_SOFT_BLOCKED state (including those which are already | ||
199 | supposed to be BLOCKED). | ||
200 | * rfkill EPO will remain active until rfkill-input receives an | ||
201 | EV_SW SW_RFKILL_ALL 1 event. While the EPO is active, transmitters | ||
202 | are locked in the blocked state (rfkill will refuse to unblock them). | ||
203 | * rfkill-input implements different policies that the user can | ||
204 | select for handling EV_SW SW_RFKILL_ALL 1. It will unlock rfkill, | ||
205 | and either do nothing (leave transmitters blocked, but now unlocked), | ||
206 | restore the transmitters to their state before the EPO, or unblock | ||
207 | them all. | ||
208 | |||
209 | Userspace uevent handler or kernel platform-specific drivers hooked to the | ||
210 | rfkill notifier chain: | ||
211 | |||
212 | * Taps into the rfkill notifier chain or to KOBJ_CHANGE uevents, | ||
213 | in order to know when a device that is registered with the rfkill | ||
214 | class changes state; | ||
215 | * Issues feedback notifications to the user; | ||
216 | * In the rare platforms where this is required, synthesizes an input | ||
217 | event to command all *OTHER* rfkill devices to also change their | ||
218 | statues when a specific rfkill device changes state. | ||
219 | |||
220 | |||
221 | =============================================================================== | ||
222 | 3: Kernel driver guidelines | ||
223 | |||
224 | Remember: point-of-view is everything for a driver that connects to the rfkill | ||
225 | subsystem. All the details below must be measured/perceived from the point of | ||
226 | view of the specific driver being modified. | ||
227 | |||
228 | The first thing one needs to know is whether his driver should be talking to | ||
229 | the rfkill class or to the input layer. In rare cases (platform drivers), it | ||
230 | could happen that you need to do both, as platform drivers often handle a | ||
231 | variety of devices in the same driver. | ||
232 | |||
233 | Do not mistake input devices for rfkill controllers. The only type of "rfkill | ||
234 | switch" device that is to be registered with the rfkill class are those | ||
235 | directly controlling the circuits that cause a wireless transmitter to stop | ||
236 | working (or the software equivalent of them), i.e. what we call a rfkill | ||
237 | controller. Every other kind of "rfkill switch" is just an input device and | ||
238 | MUST NOT be registered with the rfkill class. | ||
239 | |||
240 | A driver should register a device with the rfkill class when ALL of the | ||
241 | following conditions are met (they define a rfkill controller): | ||
242 | |||
243 | 1. The device is/controls a data communications wireless transmitter; | ||
244 | |||
245 | 2. The kernel can interact with the hardware/firmware to CHANGE the wireless | ||
246 | transmitter state (block/unblock TX operation); | ||
247 | |||
248 | 3. The transmitter can be made to not emit any energy when "blocked": | ||
249 | rfkill is not about blocking data transmissions, it is about blocking | ||
250 | energy emission; | ||
251 | |||
252 | A driver should register a device with the input subsystem to issue | ||
253 | rfkill-related events (KEY_WLAN, KEY_BLUETOOTH, KEY_WWAN, KEY_WIMAX, | ||
254 | SW_RFKILL_ALL, etc) when ALL of the folowing conditions are met: | ||
255 | |||
256 | 1. It is directly related to some physical device the user interacts with, to | ||
257 | command the O.S./firmware/hardware to enable/disable a data communications | ||
258 | wireless transmitter. | ||
259 | |||
260 | Examples of the physical device are: buttons, keys and switches the user | ||
261 | will press/touch/slide/switch to enable or disable the wireless | ||
262 | communication device. | ||
263 | |||
264 | 2. It is NOT slaved to another device, i.e. there is no other device that | ||
265 | issues rfkill-related input events in preference to this one. | ||
266 | |||
267 | Please refer to the corner cases and examples section for more details. | ||
268 | |||
269 | When in doubt, do not issue input events. For drivers that should generate | ||
270 | input events in some platforms, but not in others (e.g. b43), the best solution | ||
271 | is to NEVER generate input events in the first place. That work should be | ||
272 | deferred to a platform-specific kernel module (which will know when to generate | ||
273 | events through the rfkill notifier chain) or to userspace. This avoids the | ||
274 | usual maintenance problems with DMI whitelisting. | ||
275 | 53 | ||
276 | 54 | ||
277 | Corner cases and examples: | 55 | 3. Kernel API |
278 | ==================================== | ||
279 | |||
280 | 1. If the device is an input device that, because of hardware or firmware, | ||
281 | causes wireless transmitters to be blocked regardless of the kernel's will, it | ||
282 | is still just an input device, and NOT to be registered with the rfkill class. | ||
283 | 56 | ||
284 | 2. If the wireless transmitter switch control is read-only, it is an input | ||
285 | device and not to be registered with the rfkill class (and maybe not to be made | ||
286 | an input layer event source either, see below). | ||
287 | 57 | ||
288 | 3. If there is some other device driver *closer* to the actual hardware the | 58 | Drivers for radio transmitters normally implement an rfkill driver. |
289 | user interacted with (the button/switch/key) to issue an input event, THAT is | ||
290 | the device driver that should be issuing input events. | ||
291 | |||
292 | E.g: | ||
293 | [RFKILL slider switch] -- [GPIO hardware] -- [WLAN card rf-kill input] | ||
294 | (platform driver) (wireless card driver) | ||
295 | |||
296 | The user is closer to the RFKILL slide switch plaform driver, so the driver | ||
297 | which must issue input events is the platform driver looking at the GPIO | ||
298 | hardware, and NEVER the wireless card driver (which is just a slave). It is | ||
299 | very likely that there are other leaves than just the WLAN card rf-kill input | ||
300 | (e.g. a bluetooth card, etc)... | ||
301 | 59 | ||
302 | On the other hand, some embedded devices do this: | 60 | Platform drivers might implement input devices if the rfkill button is just |
61 | that, a button. If that button influences the hardware then you need to | ||
62 | implement an rfkill driver instead. This also applies if the platform provides | ||
63 | a way to turn on/off the transmitter(s). | ||
303 | 64 | ||
304 | [RFKILL slider switch] -- [WLAN card rf-kill input] | 65 | For some platforms, it is possible that the hardware state changes during |
305 | (wireless card driver) | 66 | suspend/hibernation, in which case it will be necessary to update the rfkill |
67 | core with the current state is at resume time. | ||
306 | 68 | ||
307 | In this situation, the wireless card driver *could* register itself as an input | 69 | To create an rfkill driver, driver's Kconfig needs to have |
308 | device and issue rf-kill related input events... but in order to AVOID the need | ||
309 | for DMI whitelisting, the wireless card driver does NOT do it. Userspace (HAL) | ||
310 | or a platform driver (that exists only on these embedded devices) will do the | ||
311 | dirty job of issuing the input events. | ||
312 | 70 | ||
71 | depends on RFKILL || !RFKILL | ||
313 | 72 | ||
314 | COMMON MISTAKES in kernel drivers, related to rfkill: | 73 | to ensure the driver cannot be built-in when rfkill is modular. The !RFKILL |
315 | ==================================== | 74 | case allows the driver to be built when rfkill is not configured, which which |
75 | case all rfkill API can still be used but will be provided by static inlines | ||
76 | which compile to almost nothing. | ||
316 | 77 | ||
317 | 1. NEVER confuse input device keys and buttons with input device switches. | 78 | Calling rfkill_set_hw_state() when a state change happens is required from |
79 | rfkill drivers that control devices that can be hard-blocked unless they also | ||
80 | assign the poll_hw_block() callback (then the rfkill core will poll the | ||
81 | device). Don't do this unless you cannot get the event in any other way. | ||
318 | 82 | ||
319 | 1a. Switches are always set or reset. They report the current state | ||
320 | (on position or off position). | ||
321 | 83 | ||
322 | 1b. Keys and buttons are either in the pressed or not-pressed state, and | ||
323 | that's it. A "button" that latches down when you press it, and | ||
324 | unlatches when you press it again is in fact a switch as far as input | ||
325 | devices go. | ||
326 | 84 | ||
327 | Add the SW_* events you need for switches, do NOT try to emulate a button using | 85 | 5. Userspace support |
328 | KEY_* events just because there is no such SW_* event yet. Do NOT try to use, | ||
329 | for example, KEY_BLUETOOTH when you should be using SW_BLUETOOTH instead. | ||
330 | 86 | ||
331 | 2. Input device switches (sources of EV_SW events) DO store their current state | 87 | The recommended userspace interface to use is /dev/rfkill, which is a misc |
332 | (so you *must* initialize it by issuing a gratuitous input layer event on | 88 | character device that allows userspace to obtain and set the state of rfkill |
333 | driver start-up and also when resuming from sleep), and that state CAN be | 89 | devices and sets of devices. It also notifies userspace about device addition |
334 | queried from userspace through IOCTLs. There is no sysfs interface for this, | 90 | and removal. The API is a simple read/write API that is defined in |
335 | but that doesn't mean you should break things trying to hook it to the rfkill | 91 | linux/rfkill.h, with one ioctl that allows turning off the deprecated input |
336 | class to get a sysfs interface :-) | 92 | handler in the kernel for the transition period. |
337 | 93 | ||
338 | 3. Do not issue *_RFKILL_ALL events by default, unless you are sure it is the | 94 | Except for the one ioctl, communication with the kernel is done via read() |
339 | correct event for your switch/button. These events are emergency power-off | 95 | and write() of instances of 'struct rfkill_event'. In this structure, the |
340 | events when they are trying to turn the transmitters off. An example of an | 96 | soft and hard block are properly separated (unlike sysfs, see below) and |
341 | input device which SHOULD generate *_RFKILL_ALL events is the wireless-kill | 97 | userspace is able to get a consistent snapshot of all rfkill devices in the |
342 | switch in a laptop which is NOT a hotkey, but a real sliding/rocker switch. | 98 | system. Also, it is possible to switch all rfkill drivers (or all drivers of |
343 | An example of an input device which SHOULD NOT generate *_RFKILL_ALL events by | 99 | a specified type) into a state which also updates the default state for |
344 | default, is any sort of hot key that is type-specific (e.g. the one for WLAN). | 100 | hotplugged devices. |
345 | 101 | ||
102 | After an application opens /dev/rfkill, it can read the current state of | ||
103 | all devices, and afterwards can poll the descriptor for hotplug or state | ||
104 | change events. | ||
346 | 105 | ||
347 | 3.1 Guidelines for wireless device drivers | 106 | Applications must ignore operations (the "op" field) they do not handle, |
348 | ------------------------------------------ | 107 | this allows the API to be extended in the future. |
349 | 108 | ||
350 | (in this text, rfkill->foo means the foo field of struct rfkill). | 109 | Additionally, each rfkill device is registered in sysfs and there has the |
351 | 110 | following attributes: | |
352 | 1. Each independent transmitter in a wireless device (usually there is only one | ||
353 | transmitter per device) should have a SINGLE rfkill class attached to it. | ||
354 | |||
355 | 2. If the device does not have any sort of hardware assistance to allow the | ||
356 | driver to rfkill the device, the driver should emulate it by taking all actions | ||
357 | required to silence the transmitter. | ||
358 | |||
359 | 3. If it is impossible to silence the transmitter (i.e. it still emits energy, | ||
360 | even if it is just in brief pulses, when there is no data to transmit and there | ||
361 | is no hardware support to turn it off) do NOT lie to the users. Do not attach | ||
362 | it to a rfkill class. The rfkill subsystem does not deal with data | ||
363 | transmission, it deals with energy emission. If the transmitter is emitting | ||
364 | energy, it is not blocked in rfkill terms. | ||
365 | |||
366 | 4. It doesn't matter if the device has multiple rfkill input lines affecting | ||
367 | the same transmitter, their combined state is to be exported as a single state | ||
368 | per transmitter (see rule 1). | ||
369 | |||
370 | This rule exists because users of the rfkill subsystem expect to get (and set, | ||
371 | when possible) the overall transmitter rfkill state, not of a particular rfkill | ||
372 | line. | ||
373 | |||
374 | 5. The wireless device driver MUST NOT leave the transmitter enabled during | ||
375 | suspend and hibernation unless: | ||
376 | |||
377 | 5.1. The transmitter has to be enabled for some sort of functionality | ||
378 | like wake-on-wireless-packet or autonomous packed forwarding in a mesh | ||
379 | network, and that functionality is enabled for this suspend/hibernation | ||
380 | cycle. | ||
381 | |||
382 | AND | ||
383 | |||
384 | 5.2. The device was not on a user-requested BLOCKED state before | ||
385 | the suspend (i.e. the driver must NOT unblock a device, not even | ||
386 | to support wake-on-wireless-packet or remain in the mesh). | ||
387 | |||
388 | In other words, there is absolutely no allowed scenario where a driver can | ||
389 | automatically take action to unblock a rfkill controller (obviously, this deals | ||
390 | with scenarios where soft-blocking or both soft and hard blocking is happening. | ||
391 | Scenarios where hardware rfkill lines are the only ones blocking the | ||
392 | transmitter are outside of this rule, since the wireless device driver does not | ||
393 | control its input hardware rfkill lines in the first place). | ||
394 | |||
395 | 6. During resume, rfkill will try to restore its previous state. | ||
396 | |||
397 | 7. After a rfkill class is suspended, it will *not* call rfkill->toggle_radio | ||
398 | until it is resumed. | ||
399 | |||
400 | |||
401 | Example of a WLAN wireless driver connected to the rfkill subsystem: | ||
402 | -------------------------------------------------------------------- | ||
403 | |||
404 | A certain WLAN card has one input pin that causes it to block the transmitter | ||
405 | and makes the status of that input pin available (only for reading!) to the | ||
406 | kernel driver. This is a hard rfkill input line (it cannot be overridden by | ||
407 | the kernel driver). | ||
408 | |||
409 | The card also has one PCI register that, if manipulated by the driver, causes | ||
410 | it to block the transmitter. This is a soft rfkill input line. | ||
411 | |||
412 | It has also a thermal protection circuitry that shuts down its transmitter if | ||
413 | the card overheats, and makes the status of that protection available (only for | ||
414 | reading!) to the kernel driver. This is also a hard rfkill input line. | ||
415 | |||
416 | If either one of these rfkill lines are active, the transmitter is blocked by | ||
417 | the hardware and forced offline. | ||
418 | |||
419 | The driver should allocate and attach to its struct device *ONE* instance of | ||
420 | the rfkill class (there is only one transmitter). | ||
421 | |||
422 | It can implement the get_state() hook, and return RFKILL_STATE_HARD_BLOCKED if | ||
423 | either one of its two hard rfkill input lines are active. If the two hard | ||
424 | rfkill lines are inactive, it must return RFKILL_STATE_SOFT_BLOCKED if its soft | ||
425 | rfkill input line is active. Only if none of the rfkill input lines are | ||
426 | active, will it return RFKILL_STATE_UNBLOCKED. | ||
427 | |||
428 | Since the device has a hardware rfkill line, it IS subject to state changes | ||
429 | external to rfkill. Therefore, the driver must make sure that it calls | ||
430 | rfkill_force_state() to keep the status always up-to-date, and it must do a | ||
431 | rfkill_force_state() on resume from sleep. | ||
432 | |||
433 | Every time the driver gets a notification from the card that one of its rfkill | ||
434 | lines changed state (polling might be needed on badly designed cards that don't | ||
435 | generate interrupts for such events), it recomputes the rfkill state as per | ||
436 | above, and calls rfkill_force_state() to update it. | ||
437 | |||
438 | The driver should implement the toggle_radio() hook, that: | ||
439 | |||
440 | 1. Returns an error if one of the hardware rfkill lines are active, and the | ||
441 | caller asked for RFKILL_STATE_UNBLOCKED. | ||
442 | |||
443 | 2. Activates the soft rfkill line if the caller asked for state | ||
444 | RFKILL_STATE_SOFT_BLOCKED. It should do this even if one of the hard rfkill | ||
445 | lines are active, effectively double-blocking the transmitter. | ||
446 | |||
447 | 3. Deactivates the soft rfkill line if none of the hardware rfkill lines are | ||
448 | active and the caller asked for RFKILL_STATE_UNBLOCKED. | ||
449 | |||
450 | =============================================================================== | ||
451 | 4: Kernel API | ||
452 | |||
453 | To build a driver with rfkill subsystem support, the driver should depend on | ||
454 | (or select) the Kconfig symbol RFKILL; it should _not_ depend on RKFILL_INPUT. | ||
455 | |||
456 | The hardware the driver talks to may be write-only (where the current state | ||
457 | of the hardware is unknown), or read-write (where the hardware can be queried | ||
458 | about its current state). | ||
459 | |||
460 | The rfkill class will call the get_state hook of a device every time it needs | ||
461 | to know the *real* current state of the hardware. This can happen often, but | ||
462 | it does not do any polling, so it is not enough on hardware that is subject | ||
463 | to state changes outside of the rfkill subsystem. | ||
464 | |||
465 | Therefore, calling rfkill_force_state() when a state change happens is | ||
466 | mandatory when the device has a hardware rfkill line, or when something else | ||
467 | like the firmware could cause its state to be changed without going through the | ||
468 | rfkill class. | ||
469 | |||
470 | Some hardware provides events when its status changes. In these cases, it is | ||
471 | best for the driver to not provide a get_state hook, and instead register the | ||
472 | rfkill class *already* with the correct status, and keep it updated using | ||
473 | rfkill_force_state() when it gets an event from the hardware. | ||
474 | |||
475 | rfkill_force_state() must be used on the device resume handlers to update the | ||
476 | rfkill status, should there be any chance of the device status changing during | ||
477 | the sleep. | ||
478 | |||
479 | There is no provision for a statically-allocated rfkill struct. You must | ||
480 | use rfkill_allocate() to allocate one. | ||
481 | |||
482 | You should: | ||
483 | - rfkill_allocate() | ||
484 | - modify rfkill fields (flags, name) | ||
485 | - modify state to the current hardware state (THIS IS THE ONLY TIME | ||
486 | YOU CAN ACCESS state DIRECTLY) | ||
487 | - rfkill_register() | ||
488 | |||
489 | The only way to set a device to the RFKILL_STATE_HARD_BLOCKED state is through | ||
490 | a suitable return of get_state() or through rfkill_force_state(). | ||
491 | |||
492 | When a device is in the RFKILL_STATE_HARD_BLOCKED state, the only way to switch | ||
493 | it to a different state is through a suitable return of get_state() or through | ||
494 | rfkill_force_state(). | ||
495 | |||
496 | If toggle_radio() is called to set a device to state RFKILL_STATE_SOFT_BLOCKED | ||
497 | when that device is already at the RFKILL_STATE_HARD_BLOCKED state, it should | ||
498 | not return an error. Instead, it should try to double-block the transmitter, | ||
499 | so that its state will change from RFKILL_STATE_HARD_BLOCKED to | ||
500 | RFKILL_STATE_SOFT_BLOCKED should the hardware blocking cease. | ||
501 | |||
502 | Please refer to the source for more documentation. | ||
503 | |||
504 | =============================================================================== | ||
505 | 5: Userspace support | ||
506 | |||
507 | rfkill devices issue uevents (with an action of "change"), with the following | ||
508 | environment variables set: | ||
509 | |||
510 | RFKILL_NAME | ||
511 | RFKILL_STATE | ||
512 | RFKILL_TYPE | ||
513 | |||
514 | The ABI for these variables is defined by the sysfs attributes. It is best | ||
515 | to take a quick look at the source to make sure of the possible values. | ||
516 | |||
517 | It is expected that HAL will trap those, and bridge them to DBUS, etc. These | ||
518 | events CAN and SHOULD be used to give feedback to the user about the rfkill | ||
519 | status of the system. | ||
520 | |||
521 | Input devices may issue events that are related to rfkill. These are the | ||
522 | various KEY_* events and SW_* events supported by rfkill-input.c. | ||
523 | |||
524 | ******IMPORTANT****** | ||
525 | When rfkill-input is ACTIVE, userspace is NOT TO CHANGE THE STATE OF AN RFKILL | ||
526 | SWITCH IN RESPONSE TO AN INPUT EVENT also handled by rfkill-input, unless it | ||
527 | has set to true the user_claim attribute for that particular switch. This rule | ||
528 | is *absolute*; do NOT violate it. | ||
529 | ******IMPORTANT****** | ||
530 | |||
531 | Userspace must not assume it is the only source of control for rfkill switches. | ||
532 | Their state CAN and WILL change due to firmware actions, direct user actions, | ||
533 | and the rfkill-input EPO override for *_RFKILL_ALL. | ||
534 | |||
535 | When rfkill-input is not active, userspace must initiate a rfkill status | ||
536 | change by writing to the "state" attribute in order for anything to happen. | ||
537 | |||
538 | Take particular care to implement EV_SW SW_RFKILL_ALL properly. When that | ||
539 | switch is set to OFF, *every* rfkill device *MUST* be immediately put into the | ||
540 | RFKILL_STATE_SOFT_BLOCKED state, no questions asked. | ||
541 | |||
542 | The following sysfs entries will be created: | ||
543 | 111 | ||
544 | name: Name assigned by driver to this key (interface or driver name). | 112 | name: Name assigned by driver to this key (interface or driver name). |
545 | type: Name of the key type ("wlan", "bluetooth", etc). | 113 | type: Driver type string ("wlan", "bluetooth", etc). |
114 | persistent: Whether the soft blocked state is initialised from | ||
115 | non-volatile storage at startup. | ||
546 | state: Current state of the transmitter | 116 | state: Current state of the transmitter |
547 | 0: RFKILL_STATE_SOFT_BLOCKED | 117 | 0: RFKILL_STATE_SOFT_BLOCKED |
548 | transmitter is forced off, but one can override it | 118 | transmitter is turned off by software |
549 | by a write to the state attribute; | ||
550 | 1: RFKILL_STATE_UNBLOCKED | 119 | 1: RFKILL_STATE_UNBLOCKED |
551 | transmiter is NOT forced off, and may operate if | 120 | transmitter is (potentially) active |
552 | all other conditions for such operation are met | ||
553 | (such as interface is up and configured, etc); | ||
554 | 2: RFKILL_STATE_HARD_BLOCKED | 121 | 2: RFKILL_STATE_HARD_BLOCKED |
555 | transmitter is forced off by something outside of | 122 | transmitter is forced off by something outside of |
556 | the driver's control. One cannot set a device to | 123 | the driver's control. |
557 | this state through writes to the state attribute; | 124 | This file is deprecated because it can only properly show |
558 | claim: 1: Userspace handles events, 0: Kernel handles events | 125 | three of the four possible states, soft-and-hard-blocked is |
559 | 126 | missing. | |
560 | Both the "state" and "claim" entries are also writable. For the "state" entry | 127 | claim: 0: Kernel handles events |
561 | this means that when 1 or 0 is written, the device rfkill state (if not yet in | 128 | This file is deprecated because there no longer is a way to |
562 | the requested state), will be will be toggled accordingly. | 129 | claim just control over a single rfkill instance. |
563 | 130 | ||
564 | For the "claim" entry writing 1 to it means that the kernel no longer handles | 131 | rfkill devices also issue uevents (with an action of "change"), with the |
565 | key events even though RFKILL_INPUT input was enabled. When "claim" has been | 132 | following environment variables set: |
566 | set to 0, userspace should make sure that it listens for the input events or | 133 | |
567 | check the sysfs "state" entry regularly to correctly perform the required tasks | 134 | RFKILL_NAME |
568 | when the rkfill key is pressed. | 135 | RFKILL_STATE |
569 | 136 | RFKILL_TYPE | |
570 | A note about input devices and EV_SW events: | 137 | |
571 | 138 | The contents of these variables corresponds to the "name", "state" and | |
572 | In order to know the current state of an input device switch (like | 139 | "type" sysfs files explained above. |
573 | SW_RFKILL_ALL), you will need to use an IOCTL. That information is not | ||
574 | available through sysfs in a generic way at this time, and it is not available | ||
575 | through the rfkill class AT ALL. | ||
diff --git a/Documentation/robust-futex-ABI.txt b/Documentation/robust-futex-ABI.txt index 535f69fab45f..fd1cd8aae4eb 100644 --- a/Documentation/robust-futex-ABI.txt +++ b/Documentation/robust-futex-ABI.txt | |||
@@ -135,7 +135,7 @@ manipulating this list), the user code must observe the following | |||
135 | protocol on 'lock entry' insertion and removal: | 135 | protocol on 'lock entry' insertion and removal: |
136 | 136 | ||
137 | On insertion: | 137 | On insertion: |
138 | 1) set the 'list_op_pending' word to the address of the 'lock word' | 138 | 1) set the 'list_op_pending' word to the address of the 'lock entry' |
139 | to be inserted, | 139 | to be inserted, |
140 | 2) acquire the futex lock, | 140 | 2) acquire the futex lock, |
141 | 3) add the lock entry, with its thread id (TID) in the bottom 29 bits | 141 | 3) add the lock entry, with its thread id (TID) in the bottom 29 bits |
@@ -143,7 +143,7 @@ On insertion: | |||
143 | 4) clear the 'list_op_pending' word. | 143 | 4) clear the 'list_op_pending' word. |
144 | 144 | ||
145 | On removal: | 145 | On removal: |
146 | 1) set the 'list_op_pending' word to the address of the 'lock word' | 146 | 1) set the 'list_op_pending' word to the address of the 'lock entry' |
147 | to be removed, | 147 | to be removed, |
148 | 2) remove the lock entry for this lock from the 'head' list, | 148 | 2) remove the lock entry for this lock from the 'head' list, |
149 | 2) release the futex lock, and | 149 | 2) release the futex lock, and |
diff --git a/Documentation/s390/Debugging390.txt b/Documentation/s390/Debugging390.txt index 10711d9f0788..1eb576a023bd 100644 --- a/Documentation/s390/Debugging390.txt +++ b/Documentation/s390/Debugging390.txt | |||
@@ -1984,7 +1984,7 @@ break *$pc | |||
1984 | 1984 | ||
1985 | break *0x400618 | 1985 | break *0x400618 |
1986 | 1986 | ||
1987 | heres a really useful one for large programs | 1987 | Here's a really useful one for large programs |
1988 | rbr | 1988 | rbr |
1989 | Set a breakpoint for all functions matching REGEXP | 1989 | Set a breakpoint for all functions matching REGEXP |
1990 | e.g. | 1990 | e.g. |
@@ -2211,7 +2211,7 @@ Breakpoint 2 at 0x4d87a4: file top.c, line 2609. | |||
2211 | #5 0x51692c in readline_internal () at readline.c:521 | 2211 | #5 0x51692c in readline_internal () at readline.c:521 |
2212 | #6 0x5164fe in readline (prompt=0x7ffff810 "\177ÿøx\177ÿ÷Ø\177ÿøxÀ") | 2212 | #6 0x5164fe in readline (prompt=0x7ffff810 "\177ÿøx\177ÿ÷Ø\177ÿøxÀ") |
2213 | at readline.c:349 | 2213 | at readline.c:349 |
2214 | #7 0x4d7a8a in command_line_input (prrompt=0x564420 "(gdb) ", repeat=1, | 2214 | #7 0x4d7a8a in command_line_input (prompt=0x564420 "(gdb) ", repeat=1, |
2215 | annotation_suffix=0x4d6b44 "prompt") at top.c:2091 | 2215 | annotation_suffix=0x4d6b44 "prompt") at top.c:2091 |
2216 | #8 0x4d6cf0 in command_loop () at top.c:1345 | 2216 | #8 0x4d6cf0 in command_loop () at top.c:1345 |
2217 | #9 0x4e25bc in main (argc=1, argv=0x7ffffdf4) at main.c:635 | 2217 | #9 0x4e25bc in main (argc=1, argv=0x7ffffdf4) at main.c:635 |
diff --git a/Documentation/scheduler/00-INDEX b/Documentation/scheduler/00-INDEX index aabcc3a089ba..3c00c9c3219e 100644 --- a/Documentation/scheduler/00-INDEX +++ b/Documentation/scheduler/00-INDEX | |||
@@ -2,8 +2,6 @@ | |||
2 | - this file. | 2 | - this file. |
3 | sched-arch.txt | 3 | sched-arch.txt |
4 | - CPU Scheduler implementation hints for architecture specific code. | 4 | - CPU Scheduler implementation hints for architecture specific code. |
5 | sched-coding.txt | ||
6 | - reference for various scheduler-related methods in the O(1) scheduler. | ||
7 | sched-design-CFS.txt | 5 | sched-design-CFS.txt |
8 | - goals, design and implementation of the Complete Fair Scheduler. | 6 | - goals, design and implementation of the Complete Fair Scheduler. |
9 | sched-domains.txt | 7 | sched-domains.txt |
diff --git a/Documentation/scheduler/sched-coding.txt b/Documentation/scheduler/sched-coding.txt deleted file mode 100644 index cbd8db752acf..000000000000 --- a/Documentation/scheduler/sched-coding.txt +++ /dev/null | |||
@@ -1,126 +0,0 @@ | |||
1 | Reference for various scheduler-related methods in the O(1) scheduler | ||
2 | Robert Love <rml@tech9.net>, MontaVista Software | ||
3 | |||
4 | |||
5 | Note most of these methods are local to kernel/sched.c - this is by design. | ||
6 | The scheduler is meant to be self-contained and abstracted away. This document | ||
7 | is primarily for understanding the scheduler, not interfacing to it. Some of | ||
8 | the discussed interfaces, however, are general process/scheduling methods. | ||
9 | They are typically defined in include/linux/sched.h. | ||
10 | |||
11 | |||
12 | Main Scheduling Methods | ||
13 | ----------------------- | ||
14 | |||
15 | void load_balance(runqueue_t *this_rq, int idle) | ||
16 | Attempts to pull tasks from one cpu to another to balance cpu usage, | ||
17 | if needed. This method is called explicitly if the runqueues are | ||
18 | imbalanced or periodically by the timer tick. Prior to calling, | ||
19 | the current runqueue must be locked and interrupts disabled. | ||
20 | |||
21 | void schedule() | ||
22 | The main scheduling function. Upon return, the highest priority | ||
23 | process will be active. | ||
24 | |||
25 | |||
26 | Locking | ||
27 | ------- | ||
28 | |||
29 | Each runqueue has its own lock, rq->lock. When multiple runqueues need | ||
30 | to be locked, lock acquires must be ordered by ascending &runqueue value. | ||
31 | |||
32 | A specific runqueue is locked via | ||
33 | |||
34 | task_rq_lock(task_t pid, unsigned long *flags) | ||
35 | |||
36 | which disables preemption, disables interrupts, and locks the runqueue pid is | ||
37 | running on. Likewise, | ||
38 | |||
39 | task_rq_unlock(task_t pid, unsigned long *flags) | ||
40 | |||
41 | unlocks the runqueue pid is running on, restores interrupts to their previous | ||
42 | state, and reenables preemption. | ||
43 | |||
44 | The routines | ||
45 | |||
46 | double_rq_lock(runqueue_t *rq1, runqueue_t *rq2) | ||
47 | |||
48 | and | ||
49 | |||
50 | double_rq_unlock(runqueue_t *rq1, runqueue_t *rq2) | ||
51 | |||
52 | safely lock and unlock, respectively, the two specified runqueues. They do | ||
53 | not, however, disable and restore interrupts. Users are required to do so | ||
54 | manually before and after calls. | ||
55 | |||
56 | |||
57 | Values | ||
58 | ------ | ||
59 | |||
60 | MAX_PRIO | ||
61 | The maximum priority of the system, stored in the task as task->prio. | ||
62 | Lower priorities are higher. Normal (non-RT) priorities range from | ||
63 | MAX_RT_PRIO to (MAX_PRIO - 1). | ||
64 | MAX_RT_PRIO | ||
65 | The maximum real-time priority of the system. Valid RT priorities | ||
66 | range from 0 to (MAX_RT_PRIO - 1). | ||
67 | MAX_USER_RT_PRIO | ||
68 | The maximum real-time priority that is exported to user-space. Should | ||
69 | always be equal to or less than MAX_RT_PRIO. Setting it less allows | ||
70 | kernel threads to have higher priorities than any user-space task. | ||
71 | MIN_TIMESLICE | ||
72 | MAX_TIMESLICE | ||
73 | Respectively, the minimum and maximum timeslices (quanta) of a process. | ||
74 | |||
75 | Data | ||
76 | ---- | ||
77 | |||
78 | struct runqueue | ||
79 | The main per-CPU runqueue data structure. | ||
80 | struct task_struct | ||
81 | The main per-process data structure. | ||
82 | |||
83 | |||
84 | General Methods | ||
85 | --------------- | ||
86 | |||
87 | cpu_rq(cpu) | ||
88 | Returns the runqueue of the specified cpu. | ||
89 | this_rq() | ||
90 | Returns the runqueue of the current cpu. | ||
91 | task_rq(pid) | ||
92 | Returns the runqueue which holds the specified pid. | ||
93 | cpu_curr(cpu) | ||
94 | Returns the task currently running on the given cpu. | ||
95 | rt_task(pid) | ||
96 | Returns true if pid is real-time, false if not. | ||
97 | |||
98 | |||
99 | Process Control Methods | ||
100 | ----------------------- | ||
101 | |||
102 | void set_user_nice(task_t *p, long nice) | ||
103 | Sets the "nice" value of task p to the given value. | ||
104 | int setscheduler(pid_t pid, int policy, struct sched_param *param) | ||
105 | Sets the scheduling policy and parameters for the given pid. | ||
106 | int set_cpus_allowed(task_t *p, unsigned long new_mask) | ||
107 | Sets a given task's CPU affinity and migrates it to a proper cpu. | ||
108 | Callers must have a valid reference to the task and assure the | ||
109 | task not exit prematurely. No locks can be held during the call. | ||
110 | set_task_state(tsk, state_value) | ||
111 | Sets the given task's state to the given value. | ||
112 | set_current_state(state_value) | ||
113 | Sets the current task's state to the given value. | ||
114 | void set_tsk_need_resched(struct task_struct *tsk) | ||
115 | Sets need_resched in the given task. | ||
116 | void clear_tsk_need_resched(struct task_struct *tsk) | ||
117 | Clears need_resched in the given task. | ||
118 | void set_need_resched() | ||
119 | Sets need_resched in the current task. | ||
120 | void clear_need_resched() | ||
121 | Clears need_resched in the current task. | ||
122 | int need_resched() | ||
123 | Returns true if need_resched is set in the current task, false | ||
124 | otherwise. | ||
125 | yield() | ||
126 | Place the current process at the end of the runqueue and call schedule. | ||
diff --git a/Documentation/scheduler/sched-nice-design.txt b/Documentation/scheduler/sched-nice-design.txt index e2bae5a577e3..3ac1e46d5365 100644 --- a/Documentation/scheduler/sched-nice-design.txt +++ b/Documentation/scheduler/sched-nice-design.txt | |||
@@ -55,7 +55,7 @@ To sum it up: we always wanted to make nice levels more consistent, but | |||
55 | within the constraints of HZ and jiffies and their nasty design level | 55 | within the constraints of HZ and jiffies and their nasty design level |
56 | coupling to timeslices and granularity it was not really viable. | 56 | coupling to timeslices and granularity it was not really viable. |
57 | 57 | ||
58 | The second (less frequent but still periodically occuring) complaint | 58 | The second (less frequent but still periodically occurring) complaint |
59 | about Linux's nice level support was its assymetry around the origo | 59 | about Linux's nice level support was its assymetry around the origo |
60 | (which you can see demonstrated in the picture above), or more | 60 | (which you can see demonstrated in the picture above), or more |
61 | accurately: the fact that nice level behavior depended on the _absolute_ | 61 | accurately: the fact that nice level behavior depended on the _absolute_ |
diff --git a/Documentation/scheduler/sched-rt-group.txt b/Documentation/scheduler/sched-rt-group.txt index 3ef339f491e0..1df7f9cdab05 100644 --- a/Documentation/scheduler/sched-rt-group.txt +++ b/Documentation/scheduler/sched-rt-group.txt | |||
@@ -4,6 +4,7 @@ | |||
4 | CONTENTS | 4 | CONTENTS |
5 | ======== | 5 | ======== |
6 | 6 | ||
7 | 0. WARNING | ||
7 | 1. Overview | 8 | 1. Overview |
8 | 1.1 The problem | 9 | 1.1 The problem |
9 | 1.2 The solution | 10 | 1.2 The solution |
@@ -14,6 +15,23 @@ CONTENTS | |||
14 | 3. Future plans | 15 | 3. Future plans |
15 | 16 | ||
16 | 17 | ||
18 | 0. WARNING | ||
19 | ========== | ||
20 | |||
21 | Fiddling with these settings can result in an unstable system, the knobs are | ||
22 | root only and assumes root knows what he is doing. | ||
23 | |||
24 | Most notable: | ||
25 | |||
26 | * very small values in sched_rt_period_us can result in an unstable | ||
27 | system when the period is smaller than either the available hrtimer | ||
28 | resolution, or the time it takes to handle the budget refresh itself. | ||
29 | |||
30 | * very small values in sched_rt_runtime_us can result in an unstable | ||
31 | system when the runtime is so small the system has difficulty making | ||
32 | forward progress (NOTE: the migration thread and kstopmachine both | ||
33 | are real-time processes). | ||
34 | |||
17 | 1. Overview | 35 | 1. Overview |
18 | =========== | 36 | =========== |
19 | 37 | ||
@@ -126,7 +144,7 @@ This uses the /cgroup virtual file system and "/cgroup/<cgroup>/cpu.rt_runtime_u | |||
126 | to control the CPU time reserved for each control group instead. | 144 | to control the CPU time reserved for each control group instead. |
127 | 145 | ||
128 | For more information on working with control groups, you should read | 146 | For more information on working with control groups, you should read |
129 | Documentation/cgroups.txt as well. | 147 | Documentation/cgroups/cgroups.txt as well. |
130 | 148 | ||
131 | Group settings are checked against the following limits in order to keep the configuration | 149 | Group settings are checked against the following limits in order to keep the configuration |
132 | schedulable: | 150 | schedulable: |
@@ -169,7 +187,7 @@ get their allocated time. | |||
169 | 187 | ||
170 | Implementing SCHED_EDF might take a while to complete. Priority Inheritance is | 188 | Implementing SCHED_EDF might take a while to complete. Priority Inheritance is |
171 | the biggest challenge as the current linux PI infrastructure is geared towards | 189 | the biggest challenge as the current linux PI infrastructure is geared towards |
172 | the limited static priority levels 0-139. With deadline scheduling you need to | 190 | the limited static priority levels 0-99. With deadline scheduling you need to |
173 | do deadline inheritance (since priority is inversely proportional to the | 191 | do deadline inheritance (since priority is inversely proportional to the |
174 | deadline delta (deadline - now). | 192 | deadline delta (deadline - now). |
175 | 193 | ||
diff --git a/Documentation/scsi/aacraid.txt b/Documentation/scsi/aacraid.txt index ddace3afc83b..30f643f611b2 100644 --- a/Documentation/scsi/aacraid.txt +++ b/Documentation/scsi/aacraid.txt | |||
@@ -60,17 +60,9 @@ Supported Cards/Chipsets | |||
60 | 9005:0285:9005:02d5 Adaptec ASR-2405 (Voodoo40 Lite) | 60 | 9005:0285:9005:02d5 Adaptec ASR-2405 (Voodoo40 Lite) |
61 | 9005:0285:9005:02d6 Adaptec ASR-2445 (Voodoo44 Lite) | 61 | 9005:0285:9005:02d6 Adaptec ASR-2445 (Voodoo44 Lite) |
62 | 9005:0285:9005:02d7 Adaptec ASR-2805 (Voodoo80 Lite) | 62 | 9005:0285:9005:02d7 Adaptec ASR-2805 (Voodoo80 Lite) |
63 | 9005:0285:9005:02d8 Adaptec 5405G (Voodoo40 PM) | 63 | 9005:0285:9005:02d8 Adaptec 5405Z (Voodoo40 BLBU) |
64 | 9005:0285:9005:02d9 Adaptec 5445G (Voodoo44 PM) | 64 | 9005:0285:9005:02d9 Adaptec 5445Z (Voodoo44 BLBU) |
65 | 9005:0285:9005:02da Adaptec 5805G (Voodoo80 PM) | 65 | 9005:0285:9005:02da Adaptec 5805Z (Voodoo80 BLBU) |
66 | 9005:0285:9005:02db Adaptec 5085G (Voodoo08 PM) | ||
67 | 9005:0285:9005:02dc Adaptec 51245G (Voodoo124 PM) | ||
68 | 9005:0285:9005:02dd Adaptec 51645G (Voodoo164 PM) | ||
69 | 9005:0285:9005:02de Adaptec 52445G (Voodoo244 PM) | ||
70 | 9005:0285:9005:02df Adaptec ASR-2045G (Voodoo04 Lite PM) | ||
71 | 9005:0285:9005:02e0 Adaptec ASR-2405G (Voodoo40 Lite PM) | ||
72 | 9005:0285:9005:02e1 Adaptec ASR-2445G (Voodoo44 Lite PM) | ||
73 | 9005:0285:9005:02e2 Adaptec ASR-2805G (Voodoo80 Lite PM) | ||
74 | 1011:0046:9005:0364 Adaptec 5400S (Mustang) | 66 | 1011:0046:9005:0364 Adaptec 5400S (Mustang) |
75 | 1011:0046:9005:0365 Adaptec 5400S (Mustang) | 67 | 1011:0046:9005:0365 Adaptec 5400S (Mustang) |
76 | 9005:0287:9005:0800 Adaptec Themisto (Jupiter) | 68 | 9005:0287:9005:0800 Adaptec Themisto (Jupiter) |
@@ -140,6 +132,7 @@ Deanna Bonds (non-DASD support, PAE fibs and 64 bit, | |||
140 | where fibs that go to the hardware are consistently called hw_fibs and | 132 | where fibs that go to the hardware are consistently called hw_fibs and |
141 | not just fibs like the name of the driver tracking structure) | 133 | not just fibs like the name of the driver tracking structure) |
142 | Mark Salyzyn <Mark_Salyzyn@adaptec.com> Fixed panic issues and added some new product ids for upcoming hbas. Performance tuning, card failover and bug mitigations. | 134 | Mark Salyzyn <Mark_Salyzyn@adaptec.com> Fixed panic issues and added some new product ids for upcoming hbas. Performance tuning, card failover and bug mitigations. |
135 | Achim Leubner <Achim_Leubner@adaptec.com> | ||
143 | 136 | ||
144 | Original Driver | 137 | Original Driver |
145 | ------------------------- | 138 | ------------------------- |
diff --git a/Documentation/scsi/aic79xx.txt b/Documentation/scsi/aic79xx.txt index 683ccae00ad4..c014eccaf19f 100644 --- a/Documentation/scsi/aic79xx.txt +++ b/Documentation/scsi/aic79xx.txt | |||
@@ -194,7 +194,7 @@ The following information is available in this file: | |||
194 | - Packetized SCSI Protocol at 160MB/s and 320MB/s | 194 | - Packetized SCSI Protocol at 160MB/s and 320MB/s |
195 | - Quick Arbitration Selection (QAS) | 195 | - Quick Arbitration Selection (QAS) |
196 | - Retained Training Information (Rev B. ASIC only) | 196 | - Retained Training Information (Rev B. ASIC only) |
197 | - Interrupt Coalessing | 197 | - Interrupt Coalescing |
198 | - Initiator Mode (target mode not currently | 198 | - Initiator Mode (target mode not currently |
199 | supported) | 199 | supported) |
200 | - Support for the PCI-X standard up to 133MHz | 200 | - Support for the PCI-X standard up to 133MHz |
diff --git a/Documentation/scsi/ncr53c8xx.txt b/Documentation/scsi/ncr53c8xx.txt index 230e30846ef2..08e2b4d04aab 100644 --- a/Documentation/scsi/ncr53c8xx.txt +++ b/Documentation/scsi/ncr53c8xx.txt | |||
@@ -206,7 +206,7 @@ of MOVE MEMORY instructions. | |||
206 | The 896 and the 895A allows handling of the phase mismatch context from | 206 | The 896 and the 895A allows handling of the phase mismatch context from |
207 | SCRIPTS (avoids the phase mismatch interrupt that stops the SCSI processor | 207 | SCRIPTS (avoids the phase mismatch interrupt that stops the SCSI processor |
208 | until the C code has saved the context of the transfer). | 208 | until the C code has saved the context of the transfer). |
209 | Implementing this without using LOAD/STORE instructions would be painfull | 209 | Implementing this without using LOAD/STORE instructions would be painful |
210 | and I didn't even want to try it. | 210 | and I didn't even want to try it. |
211 | 211 | ||
212 | The 896 chip supports 64 bit PCI transactions and addressing, while the | 212 | The 896 chip supports 64 bit PCI transactions and addressing, while the |
@@ -240,7 +240,7 @@ characteristics. This feature may also reduce average command latency. | |||
240 | In order to really gain advantage of this feature, devices must have | 240 | In order to really gain advantage of this feature, devices must have |
241 | a reasonable cache size (No miracle is to be expected for a low-end | 241 | a reasonable cache size (No miracle is to be expected for a low-end |
242 | hard disk with 128 KB or less). | 242 | hard disk with 128 KB or less). |
243 | Some kown SCSI devices do not properly support tagged command queuing. | 243 | Some known SCSI devices do not properly support tagged command queuing. |
244 | Generally, firmware revisions that fix this kind of problems are available | 244 | Generally, firmware revisions that fix this kind of problems are available |
245 | at respective vendor web/ftp sites. | 245 | at respective vendor web/ftp sites. |
246 | All I can say is that the hard disks I use on my machines behave well with | 246 | All I can say is that the hard disks I use on my machines behave well with |
diff --git a/Documentation/scsi/osd.txt b/Documentation/scsi/osd.txt new file mode 100644 index 000000000000..da162f7fd5f5 --- /dev/null +++ b/Documentation/scsi/osd.txt | |||
@@ -0,0 +1,198 @@ | |||
1 | The OSD Standard | ||
2 | ================ | ||
3 | OSD (Object-Based Storage Device) is a T10 SCSI command set that is designed | ||
4 | to provide efficient operation of input/output logical units that manage the | ||
5 | allocation, placement, and accessing of variable-size data-storage containers, | ||
6 | called objects. Objects are intended to contain operating system and application | ||
7 | constructs. Each object has associated attributes attached to it, which are | ||
8 | integral part of the object and provide metadata about the object. The standard | ||
9 | defines some common obligatory attributes, but user attributes can be added as | ||
10 | needed. | ||
11 | |||
12 | See: http://www.t10.org/ftp/t10/drafts/osd2/ for the latest draft for OSD 2 | ||
13 | or search the web for "OSD SCSI" | ||
14 | |||
15 | OSD in the Linux Kernel | ||
16 | ======================= | ||
17 | osd-initiator: | ||
18 | The main component of OSD in Kernel is the osd-initiator library. Its main | ||
19 | user is intended to be the pNFS-over-objects layout driver, which uses objects | ||
20 | as its back-end data storage. Other clients are the other osd parts listed below. | ||
21 | |||
22 | osd-uld: | ||
23 | This is a SCSI ULD that registers for OSD type devices and provides a testing | ||
24 | platform, both for the in-kernel initiator as well as connected targets. It | ||
25 | currently has no useful user-mode API, though it could have if need be. | ||
26 | |||
27 | exofs: | ||
28 | Is an OSD based Linux file system. It uses the osd-initiator and osd-uld, | ||
29 | to export a usable file system for users. | ||
30 | See Documentation/filesystems/exofs.txt for more details | ||
31 | |||
32 | osd target: | ||
33 | There are no current plans for an OSD target implementation in kernel. For all | ||
34 | needs, a user-mode target that is based on the scsi tgt target framework is | ||
35 | available from Ohio Supercomputer Center (OSC) at: | ||
36 | http://www.open-osd.org/bin/view/Main/OscOsdProject | ||
37 | There are several other target implementations. See http://open-osd.org for more | ||
38 | links. | ||
39 | |||
40 | Files and Folders | ||
41 | ================= | ||
42 | This is the complete list of files included in this work: | ||
43 | include/scsi/ | ||
44 | osd_initiator.h Main API for the initiator library | ||
45 | osd_types.h Common OSD types | ||
46 | osd_sec.h Security Manager API | ||
47 | osd_protocol.h Wire definitions of the OSD standard protocol | ||
48 | osd_attributes.h Wire definitions of OSD attributes | ||
49 | |||
50 | drivers/scsi/osd/ | ||
51 | osd_initiator.c OSD-Initiator library implementation | ||
52 | osd_uld.c The OSD scsi ULD | ||
53 | osd_ktest.{h,c} In-kernel test suite (called by osd_uld) | ||
54 | osd_debug.h Some printk macros | ||
55 | Makefile For both in-tree and out-of-tree compilation | ||
56 | Kconfig Enables inclusion of the different pieces | ||
57 | osd_test.c User-mode application to call the kernel tests | ||
58 | |||
59 | The OSD-Initiator Library | ||
60 | ========================= | ||
61 | osd_initiator is a low level implementation of an osd initiator encoder. | ||
62 | But even though, it should be intuitive and easy to use. Perhaps over time an | ||
63 | higher lever will form that automates some of the more common recipes. | ||
64 | |||
65 | init/fini: | ||
66 | - osd_dev_init() associates a scsi_device with an osd_dev structure | ||
67 | and initializes some global pools. This should be done once per scsi_device | ||
68 | (OSD LUN). The osd_dev structure is needed for calling osd_start_request(). | ||
69 | |||
70 | - osd_dev_fini() cleans up before a osd_dev/scsi_device destruction. | ||
71 | |||
72 | OSD commands encoding, execution, and decoding of results: | ||
73 | |||
74 | struct osd_request's is used to iteratively encode an OSD command and carry | ||
75 | its state throughout execution. Each request goes through these stages: | ||
76 | |||
77 | a. osd_start_request() allocates the request. | ||
78 | |||
79 | b. Any of the osd_req_* methods is used to encode a request of the specified | ||
80 | type. | ||
81 | |||
82 | c. osd_req_add_{get,set}_attr_* may be called to add get/set attributes to the | ||
83 | CDB. "List" or "Page" mode can be used exclusively. The attribute-list API | ||
84 | can be called multiple times on the same request. However, only one | ||
85 | attribute-page can be read, as mandated by the OSD standard. | ||
86 | |||
87 | d. osd_finalize_request() computes offsets into the data-in and data-out buffers | ||
88 | and signs the request using the provided capability key and integrity- | ||
89 | check parameters. | ||
90 | |||
91 | e. osd_execute_request() may be called to execute the request via the block | ||
92 | layer and wait for its completion. The request can be executed | ||
93 | asynchronously by calling the block layer API directly. | ||
94 | |||
95 | f. After execution, osd_req_decode_sense() can be called to decode the request's | ||
96 | sense information. | ||
97 | |||
98 | g. osd_req_decode_get_attr() may be called to retrieve osd_add_get_attr_list() | ||
99 | values. | ||
100 | |||
101 | h. osd_end_request() must be called to deallocate the request and any resource | ||
102 | associated with it. Note that osd_end_request cleans up the request at any | ||
103 | stage and it must always be called after a successful osd_start_request(). | ||
104 | |||
105 | osd_request's structure: | ||
106 | |||
107 | The OSD standard defines a complex structure of IO segments pointed to by | ||
108 | members in the CDB. Up to 3 segments can be deployed in the IN-Buffer and up to | ||
109 | 4 in the OUT-Buffer. The ASCII illustration below depicts a secure-read with | ||
110 | associated get+set of attributes-lists. Other combinations very on the same | ||
111 | basic theme. From no-segments-used up to all-segments-used. | ||
112 | |||
113 | |________OSD-CDB__________| | ||
114 | | | | ||
115 | |read_len (offset=0) -|---------\ | ||
116 | | | | | ||
117 | |get_attrs_list_length | | | ||
118 | |get_attrs_list_offset -|----\ | | ||
119 | | | | | | ||
120 | |retrieved_attrs_alloc_len| | | | ||
121 | |retrieved_attrs_offset -|----|----|-\ | ||
122 | | | | | | | ||
123 | |set_attrs_list_length | | | | | ||
124 | |set_attrs_list_offset -|-\ | | | | ||
125 | | | | | | | | ||
126 | |in_data_integ_offset -|-|--|----|-|-\ | ||
127 | |out_data_integ_offset -|-|--|--\ | | | | ||
128 | \_________________________/ | | | | | | | ||
129 | | | | | | | | ||
130 | |_______OUT-BUFFER________| | | | | | | | ||
131 | | Set attr list |</ | | | | | | ||
132 | | | | | | | | | ||
133 | |-------------------------| | | | | | | ||
134 | | Get attr descriptors |<---/ | | | | | ||
135 | | | | | | | | ||
136 | |-------------------------| | | | | | ||
137 | | Out-data integrity |<------/ | | | | ||
138 | | | | | | | ||
139 | \_________________________/ | | | | ||
140 | | | | | ||
141 | |________IN-BUFFER________| | | | | ||
142 | | In-Data read |<--------/ | | | ||
143 | | | | | | ||
144 | |-------------------------| | | | ||
145 | | Get attr list |<----------/ | | ||
146 | | | | | ||
147 | |-------------------------| | | ||
148 | | In-data integrity |<------------/ | ||
149 | | | | ||
150 | \_________________________/ | ||
151 | |||
152 | A block device request can carry bidirectional payload by means of associating | ||
153 | a bidi_read request with a main write-request. Each in/out request is described | ||
154 | by a chain of BIOs associated with each request. | ||
155 | The CDB is of a SCSI VARLEN CDB format, as described by OSD standard. | ||
156 | The OSD standard also mandates alignment restrictions at start of each segment. | ||
157 | |||
158 | In the code, in struct osd_request, there are two _osd_io_info structures to | ||
159 | describe the IN/OUT buffers above, two BIOs for the data payload and up to five | ||
160 | _osd_req_data_segment structures to hold the different segments allocation and | ||
161 | information. | ||
162 | |||
163 | Important: We have chosen to disregard the assumption that a BIO-chain (and | ||
164 | the resulting sg-list) describes a linear memory buffer. Meaning only first and | ||
165 | last scatter chain can be incomplete and all the middle chains are of PAGE_SIZE. | ||
166 | For us, a scatter-gather-list, as its name implies and as used by the Networking | ||
167 | layer, is to describe a vector of buffers that will be transferred to/from the | ||
168 | wire. It works very well with current iSCSI transport. iSCSI is currently the | ||
169 | only deployed OSD transport. In the future we anticipate SAS and FC attached OSD | ||
170 | devices as well. | ||
171 | |||
172 | The OSD Testing ULD | ||
173 | =================== | ||
174 | TODO: More user-mode control on tests. | ||
175 | |||
176 | Authors, Mailing list | ||
177 | ===================== | ||
178 | Please communicate with us on any deployment of osd, whether using this code | ||
179 | or not. | ||
180 | |||
181 | Any problems, questions, bug reports, lonely OSD nights, please email: | ||
182 | OSD Dev List <osd-dev@open-osd.org> | ||
183 | |||
184 | More up-to-date information can be found on: | ||
185 | http://open-osd.org | ||
186 | |||
187 | Boaz Harrosh <bharrosh@panasas.com> | ||
188 | Benny Halevy <bhalevy@panasas.com> | ||
189 | |||
190 | References | ||
191 | ========== | ||
192 | Weber, R., "SCSI Object-Based Storage Device Commands", | ||
193 | T10/1355-D ANSI/INCITS 400-2004, | ||
194 | http://www.t10.org/ftp/t10/drafts/osd/osd-r10.pdf | ||
195 | |||
196 | Weber, R., "SCSI Object-Based Storage Device Commands -2 (OSD-2)" | ||
197 | T10/1729-D, Working Draft, rev. 3 | ||
198 | http://www.t10.org/ftp/t10/drafts/osd2/osd2r03.pdf | ||
diff --git a/Documentation/scsi/scsi_fc_transport.txt b/Documentation/scsi/scsi_fc_transport.txt index e5b071d46619..d7f181701dc2 100644 --- a/Documentation/scsi/scsi_fc_transport.txt +++ b/Documentation/scsi/scsi_fc_transport.txt | |||
@@ -1,10 +1,11 @@ | |||
1 | SCSI FC Tansport | 1 | SCSI FC Tansport |
2 | ============================================= | 2 | ============================================= |
3 | 3 | ||
4 | Date: 4/12/2007 | 4 | Date: 11/18/2008 |
5 | Kernel Revisions for features: | 5 | Kernel Revisions for features: |
6 | rports : <<TBS>> | 6 | rports : <<TBS>> |
7 | vports : 2.6.22 (? TBD) | 7 | vports : 2.6.22 |
8 | bsg support : 2.6.30 (?TBD?) | ||
8 | 9 | ||
9 | 10 | ||
10 | Introduction | 11 | Introduction |
@@ -15,6 +16,7 @@ The FC transport can be found at: | |||
15 | drivers/scsi/scsi_transport_fc.c | 16 | drivers/scsi/scsi_transport_fc.c |
16 | include/scsi/scsi_transport_fc.h | 17 | include/scsi/scsi_transport_fc.h |
17 | include/scsi/scsi_netlink_fc.h | 18 | include/scsi/scsi_netlink_fc.h |
19 | include/scsi/scsi_bsg_fc.h | ||
18 | 20 | ||
19 | This file is found at Documentation/scsi/scsi_fc_transport.txt | 21 | This file is found at Documentation/scsi/scsi_fc_transport.txt |
20 | 22 | ||
@@ -472,6 +474,14 @@ int | |||
472 | fc_vport_terminate(struct fc_vport *vport) | 474 | fc_vport_terminate(struct fc_vport *vport) |
473 | 475 | ||
474 | 476 | ||
477 | FC BSG support (CT & ELS passthru, and more) | ||
478 | ======================================================================== | ||
479 | << To Be Supplied >> | ||
480 | |||
481 | |||
482 | |||
483 | |||
484 | |||
475 | Credits | 485 | Credits |
476 | ======= | 486 | ======= |
477 | The following people have contributed to this document: | 487 | The following people have contributed to this document: |
diff --git a/Documentation/scsi/scsi_mid_low_api.txt b/Documentation/scsi/scsi_mid_low_api.txt index a6d5354639b2..de67229251d8 100644 --- a/Documentation/scsi/scsi_mid_low_api.txt +++ b/Documentation/scsi/scsi_mid_low_api.txt | |||
@@ -1271,6 +1271,11 @@ of interest: | |||
1271 | hostdata[0] - area reserved for LLD at end of struct Scsi_Host. Size | 1271 | hostdata[0] - area reserved for LLD at end of struct Scsi_Host. Size |
1272 | is set by the second argument (named 'xtr_bytes') to | 1272 | is set by the second argument (named 'xtr_bytes') to |
1273 | scsi_host_alloc() or scsi_register(). | 1273 | scsi_host_alloc() or scsi_register(). |
1274 | vendor_id - a unique value that identifies the vendor supplying | ||
1275 | the LLD for the Scsi_Host. Used most often in validating | ||
1276 | vendor-specific message requests. Value consists of an | ||
1277 | identifier type and a vendor-specific value. | ||
1278 | See scsi_netlink.h for a description of valid formats. | ||
1274 | 1279 | ||
1275 | The scsi_host structure is defined in include/scsi/scsi_host.h | 1280 | The scsi_host structure is defined in include/scsi/scsi_host.h |
1276 | 1281 | ||
diff --git a/Documentation/scsi/sym53c8xx_2.txt b/Documentation/scsi/sym53c8xx_2.txt index 49ea5c58c6bc..eb9a7b905b64 100644 --- a/Documentation/scsi/sym53c8xx_2.txt +++ b/Documentation/scsi/sym53c8xx_2.txt | |||
@@ -206,7 +206,7 @@ characteristics. This feature may also reduce average command latency. | |||
206 | In order to really gain advantage of this feature, devices must have | 206 | In order to really gain advantage of this feature, devices must have |
207 | a reasonable cache size (No miracle is to be expected for a low-end | 207 | a reasonable cache size (No miracle is to be expected for a low-end |
208 | hard disk with 128 KB or less). | 208 | hard disk with 128 KB or less). |
209 | Some kown old SCSI devices do not properly support tagged command queuing. | 209 | Some known old SCSI devices do not properly support tagged command queuing. |
210 | Generally, firmware revisions that fix this kind of problems are available | 210 | Generally, firmware revisions that fix this kind of problems are available |
211 | at respective vendor web/ftp sites. | 211 | at respective vendor web/ftp sites. |
212 | All I can say is that I never have had problem with tagged queuing using | 212 | All I can say is that I never have had problem with tagged queuing using |
diff --git a/Documentation/slow-work.txt b/Documentation/slow-work.txt new file mode 100644 index 000000000000..ebc50f808ea4 --- /dev/null +++ b/Documentation/slow-work.txt | |||
@@ -0,0 +1,174 @@ | |||
1 | ==================================== | ||
2 | SLOW WORK ITEM EXECUTION THREAD POOL | ||
3 | ==================================== | ||
4 | |||
5 | By: David Howells <dhowells@redhat.com> | ||
6 | |||
7 | The slow work item execution thread pool is a pool of threads for performing | ||
8 | things that take a relatively long time, such as making mkdir calls. | ||
9 | Typically, when processing something, these items will spend a lot of time | ||
10 | blocking a thread on I/O, thus making that thread unavailable for doing other | ||
11 | work. | ||
12 | |||
13 | The standard workqueue model is unsuitable for this class of work item as that | ||
14 | limits the owner to a single thread or a single thread per CPU. For some | ||
15 | tasks, however, more threads - or fewer - are required. | ||
16 | |||
17 | There is just one pool per system. It contains no threads unless something | ||
18 | wants to use it - and that something must register its interest first. When | ||
19 | the pool is active, the number of threads it contains is dynamic, varying | ||
20 | between a maximum and minimum setting, depending on the load. | ||
21 | |||
22 | |||
23 | ==================== | ||
24 | CLASSES OF WORK ITEM | ||
25 | ==================== | ||
26 | |||
27 | This pool support two classes of work items: | ||
28 | |||
29 | (*) Slow work items. | ||
30 | |||
31 | (*) Very slow work items. | ||
32 | |||
33 | The former are expected to finish much quicker than the latter. | ||
34 | |||
35 | An operation of the very slow class may do a batch combination of several | ||
36 | lookups, mkdirs, and a create for instance. | ||
37 | |||
38 | An operation of the ordinarily slow class may, for example, write stuff or | ||
39 | expand files, provided the time taken to do so isn't too long. | ||
40 | |||
41 | Operations of both types may sleep during execution, thus tying up the thread | ||
42 | loaned to it. | ||
43 | |||
44 | |||
45 | THREAD-TO-CLASS ALLOCATION | ||
46 | -------------------------- | ||
47 | |||
48 | Not all the threads in the pool are available to work on very slow work items. | ||
49 | The number will be between one and one fewer than the number of active threads. | ||
50 | This is configurable (see the "Pool Configuration" section). | ||
51 | |||
52 | All the threads are available to work on ordinarily slow work items, but a | ||
53 | percentage of the threads will prefer to work on very slow work items. | ||
54 | |||
55 | The configuration ensures that at least one thread will be available to work on | ||
56 | very slow work items, and at least one thread will be available that won't work | ||
57 | on very slow work items at all. | ||
58 | |||
59 | |||
60 | ===================== | ||
61 | USING SLOW WORK ITEMS | ||
62 | ===================== | ||
63 | |||
64 | Firstly, a module or subsystem wanting to make use of slow work items must | ||
65 | register its interest: | ||
66 | |||
67 | int ret = slow_work_register_user(); | ||
68 | |||
69 | This will return 0 if successful, or a -ve error upon failure. | ||
70 | |||
71 | |||
72 | Slow work items may then be set up by: | ||
73 | |||
74 | (1) Declaring a slow_work struct type variable: | ||
75 | |||
76 | #include <linux/slow-work.h> | ||
77 | |||
78 | struct slow_work myitem; | ||
79 | |||
80 | (2) Declaring the operations to be used for this item: | ||
81 | |||
82 | struct slow_work_ops myitem_ops = { | ||
83 | .get_ref = myitem_get_ref, | ||
84 | .put_ref = myitem_put_ref, | ||
85 | .execute = myitem_execute, | ||
86 | }; | ||
87 | |||
88 | [*] For a description of the ops, see section "Item Operations". | ||
89 | |||
90 | (3) Initialising the item: | ||
91 | |||
92 | slow_work_init(&myitem, &myitem_ops); | ||
93 | |||
94 | or: | ||
95 | |||
96 | vslow_work_init(&myitem, &myitem_ops); | ||
97 | |||
98 | depending on its class. | ||
99 | |||
100 | A suitably set up work item can then be enqueued for processing: | ||
101 | |||
102 | int ret = slow_work_enqueue(&myitem); | ||
103 | |||
104 | This will return a -ve error if the thread pool is unable to gain a reference | ||
105 | on the item, 0 otherwise. | ||
106 | |||
107 | |||
108 | The items are reference counted, so there ought to be no need for a flush | ||
109 | operation. When all a module's slow work items have been processed, and the | ||
110 | module has no further interest in the facility, it should unregister its | ||
111 | interest: | ||
112 | |||
113 | slow_work_unregister_user(); | ||
114 | |||
115 | |||
116 | =============== | ||
117 | ITEM OPERATIONS | ||
118 | =============== | ||
119 | |||
120 | Each work item requires a table of operations of type struct slow_work_ops. | ||
121 | All members are required: | ||
122 | |||
123 | (*) Get a reference on an item: | ||
124 | |||
125 | int (*get_ref)(struct slow_work *work); | ||
126 | |||
127 | This allows the thread pool to attempt to pin an item by getting a | ||
128 | reference on it. This function should return 0 if the reference was | ||
129 | granted, or a -ve error otherwise. If an error is returned, | ||
130 | slow_work_enqueue() will fail. | ||
131 | |||
132 | The reference is held whilst the item is queued and whilst it is being | ||
133 | executed. The item may then be requeued with the same reference held, or | ||
134 | the reference will be released. | ||
135 | |||
136 | (*) Release a reference on an item: | ||
137 | |||
138 | void (*put_ref)(struct slow_work *work); | ||
139 | |||
140 | This allows the thread pool to unpin an item by releasing the reference on | ||
141 | it. The thread pool will not touch the item again once this has been | ||
142 | called. | ||
143 | |||
144 | (*) Execute an item: | ||
145 | |||
146 | void (*execute)(struct slow_work *work); | ||
147 | |||
148 | This should perform the work required of the item. It may sleep, it may | ||
149 | perform disk I/O and it may wait for locks. | ||
150 | |||
151 | |||
152 | ================== | ||
153 | POOL CONFIGURATION | ||
154 | ================== | ||
155 | |||
156 | The slow-work thread pool has a number of configurables: | ||
157 | |||
158 | (*) /proc/sys/kernel/slow-work/min-threads | ||
159 | |||
160 | The minimum number of threads that should be in the pool whilst it is in | ||
161 | use. This may be anywhere between 2 and max-threads. | ||
162 | |||
163 | (*) /proc/sys/kernel/slow-work/max-threads | ||
164 | |||
165 | The maximum number of threads that should in the pool. This may be | ||
166 | anywhere between min-threads and 255 or NR_CPUS * 2, whichever is greater. | ||
167 | |||
168 | (*) /proc/sys/kernel/slow-work/vslow-percentage | ||
169 | |||
170 | The percentage of active threads in the pool that may be used to execute | ||
171 | very slow work items. This may be between 1 and 99. The resultant number | ||
172 | is bounded to between 1 and one fewer than the number of active threads. | ||
173 | This ensures there is always at least one thread that can process very | ||
174 | slow work items, and always at least one thread that won't. | ||
diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt index 841a9365d5fd..4252697a95d6 100644 --- a/Documentation/sound/alsa/ALSA-Configuration.txt +++ b/Documentation/sound/alsa/ALSA-Configuration.txt | |||
@@ -346,6 +346,9 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. | |||
346 | sbirq - IRQ # for CMI8330 chip (SB16) | 346 | sbirq - IRQ # for CMI8330 chip (SB16) |
347 | sbdma8 - 8bit DMA # for CMI8330 chip (SB16) | 347 | sbdma8 - 8bit DMA # for CMI8330 chip (SB16) |
348 | sbdma16 - 16bit DMA # for CMI8330 chip (SB16) | 348 | sbdma16 - 16bit DMA # for CMI8330 chip (SB16) |
349 | fmport - (optional) OPL3 I/O port | ||
350 | mpuport - (optional) MPU401 I/O port | ||
351 | mpuirq - (optional) MPU401 irq # | ||
349 | 352 | ||
350 | This module supports multiple cards and autoprobe. | 353 | This module supports multiple cards and autoprobe. |
351 | 354 | ||
@@ -388,34 +391,11 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. | |||
388 | 391 | ||
389 | The power-management is supported. | 392 | The power-management is supported. |
390 | 393 | ||
391 | Module snd-cs4232 | ||
392 | ----------------- | ||
393 | |||
394 | Module for sound cards based on CS4232/CS4232A ISA chips. | ||
395 | |||
396 | isapnp - ISA PnP detection - 0 = disable, 1 = enable (default) | ||
397 | |||
398 | with isapnp=0, the following options are available: | ||
399 | |||
400 | port - port # for CS4232 chip (PnP setup - 0x534) | ||
401 | cport - control port # for CS4232 chip (PnP setup - 0x120,0x210,0xf00) | ||
402 | mpu_port - port # for MPU-401 UART (PnP setup - 0x300), -1 = disable | ||
403 | fm_port - FM port # for CS4232 chip (PnP setup - 0x388), -1 = disable | ||
404 | irq - IRQ # for CS4232 chip (5,7,9,11,12,15) | ||
405 | mpu_irq - IRQ # for MPU-401 UART (9,11,12,15) | ||
406 | dma1 - first DMA # for CS4232 chip (0,1,3) | ||
407 | dma2 - second DMA # for Yamaha CS4232 chip (0,1,3), -1 = disable | ||
408 | |||
409 | This module supports multiple cards. This module does not support autoprobe | ||
410 | (if ISA PnP is not used) thus main port must be specified!!! Other ports are | ||
411 | optional. | ||
412 | |||
413 | The power-management is supported. | ||
414 | |||
415 | Module snd-cs4236 | 394 | Module snd-cs4236 |
416 | ----------------- | 395 | ----------------- |
417 | 396 | ||
418 | Module for sound cards based on CS4235/CS4236/CS4236B/CS4237B/ | 397 | Module for sound cards based on CS4232/CS4232A, |
398 | CS4235/CS4236/CS4236B/CS4237B/ | ||
419 | CS4238B/CS4239 ISA chips. | 399 | CS4238B/CS4239 ISA chips. |
420 | 400 | ||
421 | isapnp - ISA PnP detection - 0 = disable, 1 = enable (default) | 401 | isapnp - ISA PnP detection - 0 = disable, 1 = enable (default) |
@@ -437,6 +417,9 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. | |||
437 | 417 | ||
438 | The power-management is supported. | 418 | The power-management is supported. |
439 | 419 | ||
420 | This module is aliased as snd-cs4232 since it provides the old | ||
421 | snd-cs4232 functionality, too. | ||
422 | |||
440 | Module snd-cs4281 | 423 | Module snd-cs4281 |
441 | ----------------- | 424 | ----------------- |
442 | 425 | ||
@@ -477,6 +460,25 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. | |||
477 | 460 | ||
478 | The power-management is supported. | 461 | The power-management is supported. |
479 | 462 | ||
463 | Module snd-ctxfi | ||
464 | ---------------- | ||
465 | |||
466 | Module for Creative Sound Blaster X-Fi boards (20k1 / 20k2 chips) | ||
467 | * Creative Sound Blaster X-Fi Titanium Fatal1ty Champion Series | ||
468 | * Creative Sound Blaster X-Fi Titanium Fatal1ty Professional Series | ||
469 | * Creative Sound Blaster X-Fi Titanium Professional Audio | ||
470 | * Creative Sound Blaster X-Fi Titanium | ||
471 | * Creative Sound Blaster X-Fi Elite Pro | ||
472 | * Creative Sound Blaster X-Fi Platinum | ||
473 | * Creative Sound Blaster X-Fi Fatal1ty | ||
474 | * Creative Sound Blaster X-Fi XtremeGamer | ||
475 | * Creative Sound Blaster X-Fi XtremeMusic | ||
476 | |||
477 | reference_rate - reference sample rate, 44100 or 48000 (default) | ||
478 | multiple - multiple to ref. sample rate, 1 or 2 (default) | ||
479 | |||
480 | This module supports multiple cards. | ||
481 | |||
480 | Module snd-darla20 | 482 | Module snd-darla20 |
481 | ------------------ | 483 | ------------------ |
482 | 484 | ||
@@ -606,6 +608,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. | |||
606 | Module for ESS AudioDrive ES-1688 and ES-688 sound cards. | 608 | Module for ESS AudioDrive ES-1688 and ES-688 sound cards. |
607 | 609 | ||
608 | port - port # for ES-1688 chip (0x220,0x240,0x260) | 610 | port - port # for ES-1688 chip (0x220,0x240,0x260) |
611 | fm_port - port # for OPL3 (option; share the same port as default) | ||
609 | mpu_port - port # for MPU-401 port (0x300,0x310,0x320,0x330), -1 = disable (default) | 612 | mpu_port - port # for MPU-401 port (0x300,0x310,0x320,0x330), -1 = disable (default) |
610 | irq - IRQ # for ES-1688 chip (5,7,9,10) | 613 | irq - IRQ # for ES-1688 chip (5,7,9,10) |
611 | mpu_irq - IRQ # for MPU-401 port (5,7,9,10) | 614 | mpu_irq - IRQ # for MPU-401 port (5,7,9,10) |
@@ -757,6 +760,9 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. | |||
757 | model - force the model name | 760 | model - force the model name |
758 | position_fix - Fix DMA pointer (0 = auto, 1 = use LPIB, 2 = POSBUF) | 761 | position_fix - Fix DMA pointer (0 = auto, 1 = use LPIB, 2 = POSBUF) |
759 | probe_mask - Bitmask to probe codecs (default = -1, meaning all slots) | 762 | probe_mask - Bitmask to probe codecs (default = -1, meaning all slots) |
763 | When the bit 8 (0x100) is set, the lower 8 bits are used | ||
764 | as the "fixed" codec slots; i.e. the driver probes the | ||
765 | slots regardless what hardware reports back | ||
760 | probe_only - Only probing and no codec initialization (default=off); | 766 | probe_only - Only probing and no codec initialization (default=off); |
761 | Useful to check the initial codec status for debugging | 767 | Useful to check the initial codec status for debugging |
762 | bdl_pos_adj - Specifies the DMA IRQ timing delay in samples. | 768 | bdl_pos_adj - Specifies the DMA IRQ timing delay in samples. |
@@ -767,7 +773,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. | |||
767 | single_cmd - Use single immediate commands to communicate with | 773 | single_cmd - Use single immediate commands to communicate with |
768 | codecs (for debugging only) | 774 | codecs (for debugging only) |
769 | enable_msi - Enable Message Signaled Interrupt (MSI) (default = off) | 775 | enable_msi - Enable Message Signaled Interrupt (MSI) (default = off) |
770 | power_save - Automatic power-saving timtout (in second, 0 = | 776 | power_save - Automatic power-saving timeout (in second, 0 = |
771 | disable) | 777 | disable) |
772 | power_save_controller - Reset HD-audio controller in power-saving mode | 778 | power_save_controller - Reset HD-audio controller in power-saving mode |
773 | (default = on) | 779 | (default = on) |
@@ -938,6 +944,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. | |||
938 | * Onkyo SE-90PCI | 944 | * Onkyo SE-90PCI |
939 | * Onkyo SE-200PCI | 945 | * Onkyo SE-200PCI |
940 | * ESI Juli@ | 946 | * ESI Juli@ |
947 | * ESI Maya44 | ||
941 | * Hercules Fortissimo IV | 948 | * Hercules Fortissimo IV |
942 | * EGO-SYS WaveTerminal 192M | 949 | * EGO-SYS WaveTerminal 192M |
943 | 950 | ||
@@ -946,7 +953,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. | |||
946 | prodigy71xt, prodigy71hifi, prodigyhd2, prodigy192, | 953 | prodigy71xt, prodigy71hifi, prodigyhd2, prodigy192, |
947 | juli, aureon51, aureon71, universe, ap192, k8x800, | 954 | juli, aureon51, aureon71, universe, ap192, k8x800, |
948 | phase22, phase28, ms300, av710, se200pci, se90pci, | 955 | phase22, phase28, ms300, av710, se200pci, se90pci, |
949 | fortissimo4, sn25p, WT192M | 956 | fortissimo4, sn25p, WT192M, maya44 |
950 | 957 | ||
951 | This module supports multiple cards and autoprobe. | 958 | This module supports multiple cards and autoprobe. |
952 | 959 | ||
@@ -1106,6 +1113,13 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. | |||
1106 | This module supports multiple cards. | 1113 | This module supports multiple cards. |
1107 | The driver requires the firmware loader support on kernel. | 1114 | The driver requires the firmware loader support on kernel. |
1108 | 1115 | ||
1116 | Module snd-lx6464es | ||
1117 | ------------------- | ||
1118 | |||
1119 | Module for Digigram LX6464ES boards | ||
1120 | |||
1121 | This module supports multiple cards. | ||
1122 | |||
1109 | Module snd-maestro3 | 1123 | Module snd-maestro3 |
1110 | ------------------- | 1124 | ------------------- |
1111 | 1125 | ||
@@ -1185,6 +1199,54 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. | |||
1185 | 1199 | ||
1186 | This module supports multiple devices and PnP. | 1200 | This module supports multiple devices and PnP. |
1187 | 1201 | ||
1202 | Module snd-msnd-classic | ||
1203 | ----------------------- | ||
1204 | |||
1205 | Module for Turtle Beach MultiSound Classic, Tahiti or Monterey | ||
1206 | soundcards. | ||
1207 | |||
1208 | io - Port # for msnd-classic card | ||
1209 | irq - IRQ # for msnd-classic card | ||
1210 | mem - Memory address (0xb0000, 0xc8000, 0xd0000, 0xd8000, | ||
1211 | 0xe0000 or 0xe8000) | ||
1212 | write_ndelay - enable write ndelay (default = 1) | ||
1213 | calibrate_signal - calibrate signal (default = 0) | ||
1214 | isapnp - ISA PnP detection - 0 = disable, 1 = enable (default) | ||
1215 | digital - Digital daughterboard present (default = 0) | ||
1216 | cfg - Config port (0x250, 0x260 or 0x270) default = PnP | ||
1217 | reset - Reset all devices | ||
1218 | mpu_io - MPU401 I/O port | ||
1219 | mpu_irq - MPU401 irq# | ||
1220 | ide_io0 - IDE port #0 | ||
1221 | ide_io1 - IDE port #1 | ||
1222 | ide_irq - IDE irq# | ||
1223 | joystick_io - Joystick I/O port | ||
1224 | |||
1225 | The driver requires firmware files "turtlebeach/msndinit.bin" and | ||
1226 | "turtlebeach/msndperm.bin" in the proper firmware directory. | ||
1227 | |||
1228 | See Documentation/sound/oss/MultiSound for important information | ||
1229 | about this driver. Note that it has been discontinued, but the | ||
1230 | Voyetra Turtle Beach knowledge base entry for it is still available | ||
1231 | at | ||
1232 | http://www.turtlebeach.com/site/kb_ftp/790.asp | ||
1233 | |||
1234 | Module snd-msnd-pinnacle | ||
1235 | ------------------------ | ||
1236 | |||
1237 | Module for Turtle Beach MultiSound Pinnacle/Fiji soundcards. | ||
1238 | |||
1239 | io - Port # for pinnacle/fiji card | ||
1240 | irq - IRQ # for pinnalce/fiji card | ||
1241 | mem - Memory address (0xb0000, 0xc8000, 0xd0000, 0xd8000, | ||
1242 | 0xe0000 or 0xe8000) | ||
1243 | write_ndelay - enable write ndelay (default = 1) | ||
1244 | calibrate_signal - calibrate signal (default = 0) | ||
1245 | isapnp - ISA PnP detection - 0 = disable, 1 = enable (default) | ||
1246 | |||
1247 | The driver requires firmware files "turtlebeach/pndspini.bin" and | ||
1248 | "turtlebeach/pndsperm.bin" in the proper firmware directory. | ||
1249 | |||
1188 | Module snd-mtpav | 1250 | Module snd-mtpav |
1189 | ---------------- | 1251 | ---------------- |
1190 | 1252 | ||
@@ -1508,13 +1570,15 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. | |||
1508 | Module snd-sc6000 | 1570 | Module snd-sc6000 |
1509 | ----------------- | 1571 | ----------------- |
1510 | 1572 | ||
1511 | Module for Gallant SC-6000 soundcard. | 1573 | Module for Gallant SC-6000 soundcard and later models: SC-6600 |
1574 | and SC-7000. | ||
1512 | 1575 | ||
1513 | port - Port # (0x220 or 0x240) | 1576 | port - Port # (0x220 or 0x240) |
1514 | mss_port - MSS Port # (0x530 or 0xe80) | 1577 | mss_port - MSS Port # (0x530 or 0xe80) |
1515 | irq - IRQ # (5,7,9,10,11) | 1578 | irq - IRQ # (5,7,9,10,11) |
1516 | mpu_irq - MPU-401 IRQ # (5,7,9,10) ,0 - no MPU-401 irq | 1579 | mpu_irq - MPU-401 IRQ # (5,7,9,10) ,0 - no MPU-401 irq |
1517 | dma - DMA # (1,3,0) | 1580 | dma - DMA # (1,3,0) |
1581 | joystick - Enable gameport - 0 = disable (default), 1 = enable | ||
1518 | 1582 | ||
1519 | This module supports multiple cards. | 1583 | This module supports multiple cards. |
1520 | 1584 | ||
@@ -1824,7 +1888,8 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. | |||
1824 | ------------------- | 1888 | ------------------- |
1825 | 1889 | ||
1826 | Module for sound cards based on the Asus AV100/AV200 chips, | 1890 | Module for sound cards based on the Asus AV100/AV200 chips, |
1827 | i.e., Xonar D1, DX, D2, D2X and HDAV1.3 (Deluxe). | 1891 | i.e., Xonar D1, DX, D2, D2X, HDAV1.3 (Deluxe), Essence ST |
1892 | (Deluxe) and Essence STX. | ||
1828 | 1893 | ||
1829 | This module supports autoprobe and multiple cards. | 1894 | This module supports autoprobe and multiple cards. |
1830 | 1895 | ||
diff --git a/Documentation/sound/alsa/HD-Audio-Models.txt b/Documentation/sound/alsa/HD-Audio-Models.txt index 0f5d26bea80f..0d8d23581c44 100644 --- a/Documentation/sound/alsa/HD-Audio-Models.txt +++ b/Documentation/sound/alsa/HD-Audio-Models.txt | |||
@@ -36,6 +36,7 @@ ALC260 | |||
36 | acer Acer TravelMate | 36 | acer Acer TravelMate |
37 | will Will laptops (PB V7900) | 37 | will Will laptops (PB V7900) |
38 | replacer Replacer 672V | 38 | replacer Replacer 672V |
39 | favorit100 Maxdata Favorit 100XS | ||
39 | basic fixed pin assignment (old default model) | 40 | basic fixed pin assignment (old default model) |
40 | test for testing/debugging purpose, almost all controls can | 41 | test for testing/debugging purpose, almost all controls can |
41 | adjusted. Appearing only when compiled with | 42 | adjusted. Appearing only when compiled with |
@@ -56,6 +57,7 @@ ALC262 | |||
56 | sony-assamd Sony ASSAMD | 57 | sony-assamd Sony ASSAMD |
57 | toshiba-s06 Toshiba S06 | 58 | toshiba-s06 Toshiba S06 |
58 | toshiba-rx1 Toshiba RX1 | 59 | toshiba-rx1 Toshiba RX1 |
60 | tyan Tyan Thunder n6650W (S2915-E) | ||
59 | ultra Samsung Q1 Ultra Vista model | 61 | ultra Samsung Q1 Ultra Vista model |
60 | lenovo-3000 Lenovo 3000 y410 | 62 | lenovo-3000 Lenovo 3000 y410 |
61 | nec NEC Versa S9100 | 63 | nec NEC Versa S9100 |
@@ -84,10 +86,11 @@ ALC269 | |||
84 | eeepc-p703 ASUS Eeepc P703 P900A | 86 | eeepc-p703 ASUS Eeepc P703 P900A |
85 | eeepc-p901 ASUS Eeepc P901 S101 | 87 | eeepc-p901 ASUS Eeepc P901 S101 |
86 | fujitsu FSC Amilo | 88 | fujitsu FSC Amilo |
89 | lifebook Fujitsu Lifebook S6420 | ||
87 | auto auto-config reading BIOS (default) | 90 | auto auto-config reading BIOS (default) |
88 | 91 | ||
89 | ALC662/663 | 92 | ALC662/663/272 |
90 | ========== | 93 | ============== |
91 | 3stack-dig 3-stack (2-channel) with SPDIF | 94 | 3stack-dig 3-stack (2-channel) with SPDIF |
92 | 3stack-6ch 3-stack (6-channel) | 95 | 3stack-6ch 3-stack (6-channel) |
93 | 3stack-6ch-dig 3-stack (6-channel) with SPDIF | 96 | 3stack-6ch-dig 3-stack (6-channel) with SPDIF |
@@ -106,6 +109,9 @@ ALC662/663 | |||
106 | asus-mode4 ASUS | 109 | asus-mode4 ASUS |
107 | asus-mode5 ASUS | 110 | asus-mode5 ASUS |
108 | asus-mode6 ASUS | 111 | asus-mode6 ASUS |
112 | dell Dell with ALC272 | ||
113 | dell-zm1 Dell ZM1 with ALC272 | ||
114 | samsung-nc10 Samsung NC10 mini notebook | ||
109 | auto auto-config reading BIOS (default) | 115 | auto auto-config reading BIOS (default) |
110 | 116 | ||
111 | ALC882/885 | 117 | ALC882/885 |
@@ -117,6 +123,7 @@ ALC882/885 | |||
117 | asus-a7j ASUS A7J | 123 | asus-a7j ASUS A7J |
118 | asus-a7m ASUS A7M | 124 | asus-a7m ASUS A7M |
119 | macpro MacPro support | 125 | macpro MacPro support |
126 | mb5 Macbook 5,1 | ||
120 | mbp3 Macbook Pro rev3 | 127 | mbp3 Macbook Pro rev3 |
121 | imac24 iMac 24'' with jack detection | 128 | imac24 iMac 24'' with jack detection |
122 | w2jc ASUS W2JC | 129 | w2jc ASUS W2JC |
@@ -132,10 +139,13 @@ ALC883/888 | |||
132 | acer Acer laptops (Travelmate 3012WTMi, Aspire 5600, etc) | 139 | acer Acer laptops (Travelmate 3012WTMi, Aspire 5600, etc) |
133 | acer-aspire Acer Aspire 9810 | 140 | acer-aspire Acer Aspire 9810 |
134 | acer-aspire-4930g Acer Aspire 4930G | 141 | acer-aspire-4930g Acer Aspire 4930G |
142 | acer-aspire-6530g Acer Aspire 6530G | ||
143 | acer-aspire-8930g Acer Aspire 8930G | ||
135 | medion Medion Laptops | 144 | medion Medion Laptops |
136 | medion-md2 Medion MD2 | 145 | medion-md2 Medion MD2 |
137 | targa-dig Targa/MSI | 146 | targa-dig Targa/MSI |
138 | targa-2ch-dig Targs/MSI with 2-channel | 147 | targa-2ch-dig Targa/MSI with 2-channel |
148 | targa-8ch-dig Targa/MSI with 8-channel (MSI GX620) | ||
139 | laptop-eapd 3-jack with SPDIF I/O and EAPD (Clevo M540JE, M550JE) | 149 | laptop-eapd 3-jack with SPDIF I/O and EAPD (Clevo M540JE, M550JE) |
140 | lenovo-101e Lenovo 101E | 150 | lenovo-101e Lenovo 101E |
141 | lenovo-nb0763 Lenovo NB0763 | 151 | lenovo-nb0763 Lenovo NB0763 |
@@ -149,6 +159,9 @@ ALC883/888 | |||
149 | fujitsu-pi2515 Fujitsu AMILO Pi2515 | 159 | fujitsu-pi2515 Fujitsu AMILO Pi2515 |
150 | fujitsu-xa3530 Fujitsu AMILO XA3530 | 160 | fujitsu-xa3530 Fujitsu AMILO XA3530 |
151 | 3stack-6ch-intel Intel DG33* boards | 161 | 3stack-6ch-intel Intel DG33* boards |
162 | asus-p5q ASUS P5Q-EM boards | ||
163 | mb31 MacBook 3,1 | ||
164 | sony-vaio-tt Sony VAIO TT | ||
152 | auto auto-config reading BIOS (default) | 165 | auto auto-config reading BIOS (default) |
153 | 166 | ||
154 | ALC861/660 | 167 | ALC861/660 |
@@ -261,6 +274,8 @@ Conexant 5051 | |||
261 | ============= | 274 | ============= |
262 | laptop Basic Laptop config (default) | 275 | laptop Basic Laptop config (default) |
263 | hp HP Spartan laptop | 276 | hp HP Spartan laptop |
277 | hp-dv6736 HP dv6736 | ||
278 | lenovo-x200 Lenovo X200 laptop | ||
264 | 279 | ||
265 | STAC9200 | 280 | STAC9200 |
266 | ======== | 281 | ======== |
@@ -278,6 +293,7 @@ STAC9200 | |||
278 | gateway-m4 Gateway laptops with EAPD control | 293 | gateway-m4 Gateway laptops with EAPD control |
279 | gateway-m4-2 Gateway laptops with EAPD control | 294 | gateway-m4-2 Gateway laptops with EAPD control |
280 | panasonic Panasonic CF-74 | 295 | panasonic Panasonic CF-74 |
296 | auto BIOS setup (default) | ||
281 | 297 | ||
282 | STAC9205/9254 | 298 | STAC9205/9254 |
283 | ============= | 299 | ============= |
@@ -285,6 +301,8 @@ STAC9205/9254 | |||
285 | dell-m42 Dell (unknown) | 301 | dell-m42 Dell (unknown) |
286 | dell-m43 Dell Precision | 302 | dell-m43 Dell Precision |
287 | dell-m44 Dell Inspiron | 303 | dell-m44 Dell Inspiron |
304 | eapd Keep EAPD on (e.g. Gateway T1616) | ||
305 | auto BIOS setup (default) | ||
288 | 306 | ||
289 | STAC9220/9221 | 307 | STAC9220/9221 |
290 | ============= | 308 | ============= |
@@ -308,6 +326,7 @@ STAC9220/9221 | |||
308 | dell-d82 Dell (unknown) | 326 | dell-d82 Dell (unknown) |
309 | dell-m81 Dell (unknown) | 327 | dell-m81 Dell (unknown) |
310 | dell-m82 Dell XPS M1210 | 328 | dell-m82 Dell XPS M1210 |
329 | auto BIOS setup (default) | ||
311 | 330 | ||
312 | STAC9202/9250/9251 | 331 | STAC9202/9250/9251 |
313 | ================== | 332 | ================== |
@@ -319,6 +338,7 @@ STAC9202/9250/9251 | |||
319 | m3 Some Gateway MX series laptops | 338 | m3 Some Gateway MX series laptops |
320 | m5 Some Gateway MX series laptops (MP6954) | 339 | m5 Some Gateway MX series laptops (MP6954) |
321 | m6 Some Gateway NX series laptops | 340 | m6 Some Gateway NX series laptops |
341 | auto BIOS setup (default) | ||
322 | 342 | ||
323 | STAC9227/9228/9229/927x | 343 | STAC9227/9228/9229/927x |
324 | ======================= | 344 | ======================= |
@@ -326,8 +346,10 @@ STAC9227/9228/9229/927x | |||
326 | ref-no-jd Reference board without HP/Mic jack detection | 346 | ref-no-jd Reference board without HP/Mic jack detection |
327 | 3stack D965 3stack | 347 | 3stack D965 3stack |
328 | 5stack D965 5stack + SPDIF | 348 | 5stack D965 5stack + SPDIF |
349 | 5stack-no-fp D965 5stack without front panel | ||
329 | dell-3stack Dell Dimension E520 | 350 | dell-3stack Dell Dimension E520 |
330 | dell-bios Fixes with Dell BIOS setup | 351 | dell-bios Fixes with Dell BIOS setup |
352 | auto BIOS setup (default) | ||
331 | 353 | ||
332 | STAC92HD71B* | 354 | STAC92HD71B* |
333 | ============ | 355 | ============ |
@@ -335,7 +357,11 @@ STAC92HD71B* | |||
335 | dell-m4-1 Dell desktops | 357 | dell-m4-1 Dell desktops |
336 | dell-m4-2 Dell desktops | 358 | dell-m4-2 Dell desktops |
337 | dell-m4-3 Dell desktops | 359 | dell-m4-3 Dell desktops |
338 | hp-m4 HP dv laptops | 360 | hp-m4 HP mini 1000 |
361 | hp-dv5 HP dv series | ||
362 | hp-hdx HP HDX series | ||
363 | hp-dv4-1222nr HP dv4-1222nr (with LED support) | ||
364 | auto BIOS setup (default) | ||
339 | 365 | ||
340 | STAC92HD73* | 366 | STAC92HD73* |
341 | =========== | 367 | =========== |
@@ -345,13 +371,16 @@ STAC92HD73* | |||
345 | dell-m6-dmic Dell desktops/laptops with digital mics | 371 | dell-m6-dmic Dell desktops/laptops with digital mics |
346 | dell-m6 Dell desktops/laptops with both type of mics | 372 | dell-m6 Dell desktops/laptops with both type of mics |
347 | dell-eq Dell desktops/laptops | 373 | dell-eq Dell desktops/laptops |
374 | auto BIOS setup (default) | ||
348 | 375 | ||
349 | STAC92HD83* | 376 | STAC92HD83* |
350 | =========== | 377 | =========== |
351 | ref Reference board | 378 | ref Reference board |
352 | mic-ref Reference board with power managment for ports | 379 | mic-ref Reference board with power managment for ports |
380 | dell-s14 Dell laptop | ||
381 | auto BIOS setup (default) | ||
353 | 382 | ||
354 | STAC9872 | 383 | STAC9872 |
355 | ======== | 384 | ======== |
356 | vaio Setup for VAIO FE550G/SZ110 | 385 | vaio VAIO laptop without SPDIF |
357 | vaio-ar Setup for VAIO AR | 386 | auto BIOS setup (default) |
diff --git a/Documentation/sound/alsa/HD-Audio.txt b/Documentation/sound/alsa/HD-Audio.txt index 8d68fff71839..71ac995b1915 100644 --- a/Documentation/sound/alsa/HD-Audio.txt +++ b/Documentation/sound/alsa/HD-Audio.txt | |||
@@ -16,7 +16,7 @@ methods for the HD-audio hardware. | |||
16 | The HD-audio component consists of two parts: the controller chip and | 16 | The HD-audio component consists of two parts: the controller chip and |
17 | the codec chips on the HD-audio bus. Linux provides a single driver | 17 | the codec chips on the HD-audio bus. Linux provides a single driver |
18 | for all controllers, snd-hda-intel. Although the driver name contains | 18 | for all controllers, snd-hda-intel. Although the driver name contains |
19 | a word of a well-known harware vendor, it's not specific to it but for | 19 | a word of a well-known hardware vendor, it's not specific to it but for |
20 | all controller chips by other companies. Since the HD-audio | 20 | all controller chips by other companies. Since the HD-audio |
21 | controllers are supposed to be compatible, the single snd-hda-driver | 21 | controllers are supposed to be compatible, the single snd-hda-driver |
22 | should work in most cases. But, not surprisingly, there are known | 22 | should work in most cases. But, not surprisingly, there are known |
@@ -109,6 +109,13 @@ slot, pass `probe_mask=1`. For the first and the third slots, pass | |||
109 | Since 2.6.29 kernel, the driver has a more robust probing method, so | 109 | Since 2.6.29 kernel, the driver has a more robust probing method, so |
110 | this error might happen rarely, though. | 110 | this error might happen rarely, though. |
111 | 111 | ||
112 | On a machine with a broken BIOS, sometimes you need to force the | ||
113 | driver to probe the codec slots the hardware doesn't report for use. | ||
114 | In such a case, turn the bit 8 (0x100) of `probe_mask` option on. | ||
115 | Then the rest 8 bits are passed as the codec slots to probe | ||
116 | unconditionally. For example, `probe_mask=0x103` will force to probe | ||
117 | the codec slots 0 and 1 no matter what the hardware reports. | ||
118 | |||
112 | 119 | ||
113 | Interrupt Handling | 120 | Interrupt Handling |
114 | ~~~~~~~~~~~~~~~~~~ | 121 | ~~~~~~~~~~~~~~~~~~ |
@@ -162,7 +169,7 @@ PCI SSID look-up. | |||
162 | What `model` option values are available depends on the codec chip. | 169 | What `model` option values are available depends on the codec chip. |
163 | Check your codec chip from the codec proc file (see "Codec Proc-File" | 170 | Check your codec chip from the codec proc file (see "Codec Proc-File" |
164 | section below). It will show the vendor/product name of your codec | 171 | section below). It will show the vendor/product name of your codec |
165 | chip. Then, see Documentation/sound/alsa/HD-Audio-Modelstxt file, | 172 | chip. Then, see Documentation/sound/alsa/HD-Audio-Models.txt file, |
166 | the section of HD-audio driver. You can find a list of codecs | 173 | the section of HD-audio driver. You can find a list of codecs |
167 | and `model` options belonging to each codec. For example, for Realtek | 174 | and `model` options belonging to each codec. For example, for Realtek |
168 | ALC262 codec chip, pass `model=ultra` for devices that are compatible | 175 | ALC262 codec chip, pass `model=ultra` for devices that are compatible |
@@ -170,7 +177,7 @@ with Samsung Q1 Ultra. | |||
170 | 177 | ||
171 | Thus, the first thing you can do for any brand-new, unsupported and | 178 | Thus, the first thing you can do for any brand-new, unsupported and |
172 | non-working HD-audio hardware is to check HD-audio codec and several | 179 | non-working HD-audio hardware is to check HD-audio codec and several |
173 | different `model` option values. If you have a luck, some of them | 180 | different `model` option values. If you have any luck, some of them |
174 | might suit with your device well. | 181 | might suit with your device well. |
175 | 182 | ||
176 | Some codecs such as ALC880 have a special model option `model=test`. | 183 | Some codecs such as ALC880 have a special model option `model=test`. |
@@ -358,10 +365,26 @@ modelname:: | |||
358 | to this file. | 365 | to this file. |
359 | init_verbs:: | 366 | init_verbs:: |
360 | The extra verbs to execute at initialization. You can add a verb by | 367 | The extra verbs to execute at initialization. You can add a verb by |
361 | writing to this file. Pass tree numbers, nid, verb and parameter. | 368 | writing to this file. Pass three numbers: nid, verb and parameter |
369 | (separated with a space). | ||
362 | hints:: | 370 | hints:: |
363 | Shows hint strings for codec parsers for any use. Right now it's | 371 | Shows / stores hint strings for codec parsers for any use. |
364 | not used. | 372 | Its format is `key = value`. For example, passing `hp_detect = yes` |
373 | to IDT/STAC codec parser will result in the disablement of the | ||
374 | headphone detection. | ||
375 | init_pin_configs:: | ||
376 | Shows the initial pin default config values set by BIOS. | ||
377 | driver_pin_configs:: | ||
378 | Shows the pin default values set by the codec parser explicitly. | ||
379 | This doesn't show all pin values but only the changed values by | ||
380 | the parser. That is, if the parser doesn't change the pin default | ||
381 | config values by itself, this will contain nothing. | ||
382 | user_pin_configs:: | ||
383 | Shows the pin default config values to override the BIOS setup. | ||
384 | Writing this (with two numbers, NID and value) appends the new | ||
385 | value. The given will be used instead of the initial BIOS value at | ||
386 | the next reconfiguration time. Note that this config will override | ||
387 | even the driver pin configs, too. | ||
365 | reconfig:: | 388 | reconfig:: |
366 | Triggers the codec re-configuration. When any value is written to | 389 | Triggers the codec re-configuration. When any value is written to |
367 | this file, the driver re-initialize and parses the codec tree | 390 | this file, the driver re-initialize and parses the codec tree |
@@ -371,6 +394,14 @@ clear:: | |||
371 | Resets the codec, removes the mixer elements and PCM stuff of the | 394 | Resets the codec, removes the mixer elements and PCM stuff of the |
372 | specified codec, and clear all init verbs and hints. | 395 | specified codec, and clear all init verbs and hints. |
373 | 396 | ||
397 | For example, when you want to change the pin default configuration | ||
398 | value of the pin widget 0x14 to 0x9993013f, and let the driver | ||
399 | re-configure based on that state, run like below: | ||
400 | ------------------------------------------------------------------------ | ||
401 | # echo 0x14 0x9993013f > /sys/class/sound/hwC0D0/user_pin_configs | ||
402 | # echo 1 > /sys/class/sound/hwC0D0/reconfig | ||
403 | ------------------------------------------------------------------------ | ||
404 | |||
374 | 405 | ||
375 | Power-Saving | 406 | Power-Saving |
376 | ~~~~~~~~~~~~ | 407 | ~~~~~~~~~~~~ |
@@ -461,6 +492,16 @@ run with `--no-upload` option, and attach the generated file. | |||
461 | There are some other useful options. See `--help` option output for | 492 | There are some other useful options. See `--help` option output for |
462 | details. | 493 | details. |
463 | 494 | ||
495 | When a probe error occurs or when the driver obviously assigns a | ||
496 | mismatched model, it'd be helpful to load the driver with | ||
497 | `probe_only=1` option (at best after the cold reboot) and run | ||
498 | alsa-info at this state. With this option, the driver won't configure | ||
499 | the mixer and PCM but just tries to probe the codec slot. After | ||
500 | probing, the proc file is available, so you can get the raw codec | ||
501 | information before modified by the driver. Of course, the driver | ||
502 | isn't usable with `probe_only=1`. But you can continue the | ||
503 | configuration via hwdep sysfs file if hda-reconfig option is enabled. | ||
504 | |||
464 | 505 | ||
465 | hda-verb | 506 | hda-verb |
466 | ~~~~~~~~ | 507 | ~~~~~~~~ |
diff --git a/Documentation/sound/alsa/Procfile.txt b/Documentation/sound/alsa/Procfile.txt index bba2dbb79d81..381908d8ca42 100644 --- a/Documentation/sound/alsa/Procfile.txt +++ b/Documentation/sound/alsa/Procfile.txt | |||
@@ -88,21 +88,34 @@ card*/pcm*/info | |||
88 | substreams, etc. | 88 | substreams, etc. |
89 | 89 | ||
90 | card*/pcm*/xrun_debug | 90 | card*/pcm*/xrun_debug |
91 | This file appears when CONFIG_SND_DEBUG=y. | 91 | This file appears when CONFIG_SND_DEBUG=y and |
92 | This shows the status of xrun (= buffer overrun/xrun) debug of | 92 | CONFIG_PCM_XRUN_DEBUG=y. |
93 | ALSA PCM middle layer, as an integer from 0 to 2. The value | 93 | This shows the status of xrun (= buffer overrun/xrun) and |
94 | can be changed by writing to this file, such as | 94 | invalid PCM position debug/check of ALSA PCM middle layer. |
95 | 95 | It takes an integer value, can be changed by writing to this | |
96 | # cat 2 > /proc/asound/card0/pcm0p/xrun_debug | 96 | file, such as |
97 | 97 | ||
98 | When this value is greater than 0, the driver will show the | 98 | # cat 5 > /proc/asound/card0/pcm0p/xrun_debug |
99 | messages to kernel log when an xrun is detected. The debug | 99 | |
100 | message is shown also when the invalid H/W pointer is detected | 100 | The value consists of the following bit flags: |
101 | at the update of periods (usually called from the interrupt | 101 | bit 0 = Enable XRUN/jiffies debug messages |
102 | bit 1 = Show stack trace at XRUN / jiffies check | ||
103 | bit 2 = Enable additional jiffies check | ||
104 | |||
105 | When the bit 0 is set, the driver will show the messages to | ||
106 | kernel log when an xrun is detected. The debug message is | ||
107 | shown also when the invalid H/W pointer is detected at the | ||
108 | update of periods (usually called from the interrupt | ||
102 | handler). | 109 | handler). |
103 | 110 | ||
104 | When this value is greater than 1, the driver will show the | 111 | When the bit 1 is set, the driver will show the stack trace |
105 | stack trace additionally. This may help the debugging. | 112 | additionally. This may help the debugging. |
113 | |||
114 | Since 2.6.30, this option can enable the hwptr check using | ||
115 | jiffies. This detects spontaneous invalid pointer callback | ||
116 | values, but can be lead to too much corrections for a (mostly | ||
117 | buggy) hardware that doesn't give smooth pointer updates. | ||
118 | This feature is enabled via the bit 2. | ||
106 | 119 | ||
107 | card*/pcm*/sub*/info | 120 | card*/pcm*/sub*/info |
108 | The general information of this PCM sub-stream. | 121 | The general information of this PCM sub-stream. |
diff --git a/Documentation/sound/alsa/README.maya44 b/Documentation/sound/alsa/README.maya44 new file mode 100644 index 000000000000..0e41576fa13e --- /dev/null +++ b/Documentation/sound/alsa/README.maya44 | |||
@@ -0,0 +1,163 @@ | |||
1 | NOTE: The following is the original document of Rainer's patch that the | ||
2 | current maya44 code based on. Some contents might be obsoleted, but I | ||
3 | keep here as reference -- tiwai | ||
4 | |||
5 | ---------------------------------------------------------------- | ||
6 | |||
7 | STATE OF DEVELOPMENT: | ||
8 | |||
9 | This driver is being developed on the initiative of Piotr Makowski (oponek@gmail.com) and financed by Lars Bergmann. | ||
10 | Development is carried out by Rainer Zimmermann (mail@lightshed.de). | ||
11 | |||
12 | ESI provided a sample Maya44 card for the development work. | ||
13 | |||
14 | However, unfortunately it has turned out difficult to get detailed programming information, so I (Rainer Zimmermann) had to find out some card-specific information by experiment and conjecture. Some information (in particular, several GPIO bits) is still missing. | ||
15 | |||
16 | This is the first testing version of the Maya44 driver released to the alsa-devel mailing list (Feb 5, 2008). | ||
17 | |||
18 | |||
19 | The following functions work, as tested by Rainer Zimmermann and Piotr Makowski: | ||
20 | |||
21 | - playback and capture at all sampling rates | ||
22 | - input/output level | ||
23 | - crossmixing | ||
24 | - line/mic switch | ||
25 | - phantom power switch | ||
26 | - analogue monitor a.k.a bypass | ||
27 | |||
28 | |||
29 | The following functions *should* work, but are not fully tested: | ||
30 | |||
31 | - Channel 3+4 analogue - S/PDIF input switching | ||
32 | - S/PDIF output | ||
33 | - all inputs/outputs on the M/IO/DIO extension card | ||
34 | - internal/external clock selection | ||
35 | |||
36 | |||
37 | *In particular, we would appreciate testing of these functions by anyone who has access to an M/IO/DIO extension card.* | ||
38 | |||
39 | |||
40 | Things that do not seem to work: | ||
41 | |||
42 | - The level meters ("multi track") in 'alsamixer' do not seem to react to signals in (if this is a bug, it would probably be in the existing ICE1724 code). | ||
43 | |||
44 | - Ardour 2.1 seems to work only via JACK, not using ALSA directly or via OSS. This still needs to be tracked down. | ||
45 | |||
46 | |||
47 | DRIVER DETAILS: | ||
48 | |||
49 | the following files were added: | ||
50 | |||
51 | pci/ice1724/maya44.c - Maya44 specific code | ||
52 | pci/ice1724/maya44.h | ||
53 | pci/ice1724/ice1724.patch | ||
54 | pci/ice1724/ice1724.h.patch - PROPOSED patch to ice1724.h (see SAMPLING RATES) | ||
55 | i2c/other/wm8776.c - low-level access routines for Wolfson WM8776 codecs | ||
56 | include/wm8776.h | ||
57 | |||
58 | |||
59 | Note that the wm8776.c code is meant to be card-independent and does not actually register the codec with the ALSA infrastructure. | ||
60 | This is done in maya44.c, mainly because some of the WM8776 controls are used in Maya44-specific ways, and should be named appropriately. | ||
61 | |||
62 | |||
63 | the following files were created in pci/ice1724, simply #including the corresponding file from the alsa-kernel tree: | ||
64 | |||
65 | wtm.h | ||
66 | vt1720_mobo.h | ||
67 | revo.h | ||
68 | prodigy192.h | ||
69 | pontis.h | ||
70 | phase.h | ||
71 | maya44.h | ||
72 | juli.h | ||
73 | aureon.h | ||
74 | amp.h | ||
75 | envy24ht.h | ||
76 | se.h | ||
77 | prodigy_hifi.h | ||
78 | |||
79 | |||
80 | *I hope this is the correct way to do things.* | ||
81 | |||
82 | |||
83 | SAMPLING RATES: | ||
84 | |||
85 | The Maya44 card (or more exactly, the Wolfson WM8776 codecs) allow a maximum sampling rate of 192 kHz for playback and 92 kHz for capture. | ||
86 | |||
87 | As the ICE1724 chip only allows one global sampling rate, this is handled as follows: | ||
88 | |||
89 | * setting the sampling rate on any open PCM device on the maya44 card will always set the *global* sampling rate for all playback and capture channels. | ||
90 | |||
91 | * In the current state of the driver, setting rates of up to 192 kHz is permitted even for capture devices. | ||
92 | |||
93 | *AVOID CAPTURING AT RATES ABOVE 96kHz*, even though it may appear to work. The codec cannot actually capture at such rates, meaning poor quality. | ||
94 | |||
95 | |||
96 | I propose some additional code for limiting the sampling rate when setting on a capture pcm device. However because of the global sampling rate, this logic would be somewhat problematic. | ||
97 | |||
98 | The proposed code (currently deactivated) is in ice1712.h.patch, ice1724.c and maya44.c (in pci/ice1712). | ||
99 | |||
100 | |||
101 | SOUND DEVICES: | ||
102 | |||
103 | PCM devices correspond to inputs/outputs as follows (assuming Maya44 is card #0): | ||
104 | |||
105 | hw:0,0 input - stereo, analog input 1+2 | ||
106 | hw:0,0 output - stereo, analog output 1+2 | ||
107 | hw:0,1 input - stereo, analog input 3+4 OR S/PDIF input | ||
108 | hw:0,1 output - stereo, analog output 3+4 (and SPDIF out) | ||
109 | |||
110 | |||
111 | NAMING OF MIXER CONTROLS: | ||
112 | |||
113 | (for more information about the signal flow, please refer to the block diagram on p.24 of the ESI Maya44 manual, or in the ESI windows software). | ||
114 | |||
115 | |||
116 | PCM: (digital) output level for channel 1+2 | ||
117 | PCM 1: same for channel 3+4 | ||
118 | |||
119 | Mic Phantom+48V: switch for +48V phantom power for electrostatic microphones on input 1/2. | ||
120 | Make sure this is not turned on while any other source is connected to input 1/2. | ||
121 | It might damage the source and/or the maya44 card. | ||
122 | |||
123 | Mic/Line input: if switch is is on, input jack 1/2 is microphone input (mono), otherwise line input (stereo). | ||
124 | |||
125 | Bypass: analogue bypass from ADC input to output for channel 1+2. Same as "Monitor" in the windows driver. | ||
126 | Bypass 1: same for channel 3+4. | ||
127 | |||
128 | Crossmix: cross-mixer from channels 1+2 to channels 3+4 | ||
129 | Crossmix 1: cross-mixer from channels 3+4 to channels 1+2 | ||
130 | |||
131 | IEC958 Output: switch for S/PDIF output. | ||
132 | This is not supported by the ESI windows driver. | ||
133 | S/PDIF should output the same signal as channel 3+4. [untested!] | ||
134 | |||
135 | |||
136 | Digitial output selectors: | ||
137 | |||
138 | These switches allow a direct digital routing from the ADCs to the DACs. | ||
139 | Each switch determines where the digital input data to one of the DACs comes from. | ||
140 | They are not supported by the ESI windows driver. | ||
141 | For normal operation, they should all be set to "PCM out". | ||
142 | |||
143 | H/W: Output source channel 1 | ||
144 | H/W 1: Output source channel 2 | ||
145 | H/W 2: Output source channel 3 | ||
146 | H/W 3: Output source channel 4 | ||
147 | |||
148 | H/W 4 ... H/W 9: unknown function, left in to enable testing. | ||
149 | Possibly some of these control S/PDIF output(s). | ||
150 | If these turn out to be unused, they will go away in later driver versions. | ||
151 | |||
152 | Selectable values for each of the digital output selectors are: | ||
153 | "PCM out" -> DAC output of the corresponding channel (default setting) | ||
154 | "Input 1"... | ||
155 | "Input 4" -> direct routing from ADC output of the selected input channel | ||
156 | |||
157 | |||
158 | -------- | ||
159 | |||
160 | Feb 14, 2008 | ||
161 | Rainer Zimmermann | ||
162 | mail@lightshed.de | ||
163 | |||
diff --git a/Documentation/sound/alsa/hda_codec.txt b/Documentation/sound/alsa/hda_codec.txt index 34e87ec1379c..de8efbc7e4bd 100644 --- a/Documentation/sound/alsa/hda_codec.txt +++ b/Documentation/sound/alsa/hda_codec.txt | |||
@@ -114,7 +114,7 @@ For writing a sequence of verbs, use snd_hda_sequence_write(). | |||
114 | 114 | ||
115 | There are variants of cached read/write, snd_hda_codec_write_cache(), | 115 | There are variants of cached read/write, snd_hda_codec_write_cache(), |
116 | snd_hda_sequence_write_cache(). These are used for recording the | 116 | snd_hda_sequence_write_cache(). These are used for recording the |
117 | register states for the power-mangement resume. When no PM is needed, | 117 | register states for the power-management resume. When no PM is needed, |
118 | these are equivalent with non-cached version. | 118 | these are equivalent with non-cached version. |
119 | 119 | ||
120 | To retrieve the number of sub nodes connected to the given node, use | 120 | To retrieve the number of sub nodes connected to the given node, use |
diff --git a/Documentation/sound/alsa/soc/dapm.txt b/Documentation/sound/alsa/soc/dapm.txt index 46f9684d0b29..9ac842be9b4f 100644 --- a/Documentation/sound/alsa/soc/dapm.txt +++ b/Documentation/sound/alsa/soc/dapm.txt | |||
@@ -62,6 +62,7 @@ Audio DAPM widgets fall into a number of types:- | |||
62 | o Mic - Mic (and optional Jack) | 62 | o Mic - Mic (and optional Jack) |
63 | o Line - Line Input/Output (and optional Jack) | 63 | o Line - Line Input/Output (and optional Jack) |
64 | o Speaker - Speaker | 64 | o Speaker - Speaker |
65 | o Supply - Power or clock supply widget used by other widgets. | ||
65 | o Pre - Special PRE widget (exec before all others) | 66 | o Pre - Special PRE widget (exec before all others) |
66 | o Post - Special POST widget (exec after all others) | 67 | o Post - Special POST widget (exec after all others) |
67 | 68 | ||
@@ -116,6 +117,9 @@ SOC_DAPM_SINGLE("HiFi Playback Switch", WM8731_APANA, 4, 1, 0), | |||
116 | SND_SOC_DAPM_MIXER("Output Mixer", WM8731_PWR, 4, 1, wm8731_output_mixer_controls, | 117 | SND_SOC_DAPM_MIXER("Output Mixer", WM8731_PWR, 4, 1, wm8731_output_mixer_controls, |
117 | ARRAY_SIZE(wm8731_output_mixer_controls)), | 118 | ARRAY_SIZE(wm8731_output_mixer_controls)), |
118 | 119 | ||
120 | If you dont want the mixer elements prefixed with the name of the mixer widget, | ||
121 | you can use SND_SOC_DAPM_MIXER_NAMED_CTL instead. the parameters are the same | ||
122 | as for SND_SOC_DAPM_MIXER. | ||
119 | 123 | ||
120 | 2.3 Platform/Machine domain Widgets | 124 | 2.3 Platform/Machine domain Widgets |
121 | ----------------------------------- | 125 | ----------------------------------- |
diff --git a/Documentation/sound/alsa/soc/jack.txt b/Documentation/sound/alsa/soc/jack.txt new file mode 100644 index 000000000000..fcf82a417293 --- /dev/null +++ b/Documentation/sound/alsa/soc/jack.txt | |||
@@ -0,0 +1,71 @@ | |||
1 | ASoC jack detection | ||
2 | =================== | ||
3 | |||
4 | ALSA has a standard API for representing physical jacks to user space, | ||
5 | the kernel side of which can be seen in include/sound/jack.h. ASoC | ||
6 | provides a version of this API adding two additional features: | ||
7 | |||
8 | - It allows more than one jack detection method to work together on one | ||
9 | user visible jack. In embedded systems it is common for multiple | ||
10 | to be present on a single jack but handled by separate bits of | ||
11 | hardware. | ||
12 | |||
13 | - Integration with DAPM, allowing DAPM endpoints to be updated | ||
14 | automatically based on the detected jack status (eg, turning off the | ||
15 | headphone outputs if no headphones are present). | ||
16 | |||
17 | This is done by splitting the jacks up into three things working | ||
18 | together: the jack itself represented by a struct snd_soc_jack, sets of | ||
19 | snd_soc_jack_pins representing DAPM endpoints to update and blocks of | ||
20 | code providing jack reporting mechanisms. | ||
21 | |||
22 | For example, a system may have a stereo headset jack with two reporting | ||
23 | mechanisms, one for the headphone and one for the microphone. Some | ||
24 | systems won't be able to use their speaker output while a headphone is | ||
25 | connected and so will want to make sure to update both speaker and | ||
26 | headphone when the headphone jack status changes. | ||
27 | |||
28 | The jack - struct snd_soc_jack | ||
29 | ============================== | ||
30 | |||
31 | This represents a physical jack on the system and is what is visible to | ||
32 | user space. The jack itself is completely passive, it is set up by the | ||
33 | machine driver and updated by jack detection methods. | ||
34 | |||
35 | Jacks are created by the machine driver calling snd_soc_jack_new(). | ||
36 | |||
37 | snd_soc_jack_pin | ||
38 | ================ | ||
39 | |||
40 | These represent a DAPM pin to update depending on some of the status | ||
41 | bits supported by the jack. Each snd_soc_jack has zero or more of these | ||
42 | which are updated automatically. They are created by the machine driver | ||
43 | and associated with the jack using snd_soc_jack_add_pins(). The status | ||
44 | of the endpoint may configured to be the opposite of the jack status if | ||
45 | required (eg, enabling a built in microphone if a microphone is not | ||
46 | connected via a jack). | ||
47 | |||
48 | Jack detection methods | ||
49 | ====================== | ||
50 | |||
51 | Actual jack detection is done by code which is able to monitor some | ||
52 | input to the system and update a jack by calling snd_soc_jack_report(), | ||
53 | specifying a subset of bits to update. The jack detection code should | ||
54 | be set up by the machine driver, taking configuration for the jack to | ||
55 | update and the set of things to report when the jack is connected. | ||
56 | |||
57 | Often this is done based on the status of a GPIO - a handler for this is | ||
58 | provided by the snd_soc_jack_add_gpio() function. Other methods are | ||
59 | also available, for example integrated into CODECs. One example of | ||
60 | CODEC integrated jack detection can be see in the WM8350 driver. | ||
61 | |||
62 | Each jack may have multiple reporting mechanisms, though it will need at | ||
63 | least one to be useful. | ||
64 | |||
65 | Machine drivers | ||
66 | =============== | ||
67 | |||
68 | These are all hooked together by the machine driver depending on the | ||
69 | system hardware. The machine driver will set up the snd_soc_jack and | ||
70 | the list of pins to update then set up one or more jack detection | ||
71 | mechanisms to update that jack based on their current status. | ||
diff --git a/Documentation/sound/oss/CS4232 b/Documentation/sound/oss/CS4232 deleted file mode 100644 index 7d6af7a5c1c2..000000000000 --- a/Documentation/sound/oss/CS4232 +++ /dev/null | |||
@@ -1,23 +0,0 @@ | |||
1 | To configure the Crystal CS423x sound chip and activate its DSP functions, | ||
2 | modules may be loaded in this order: | ||
3 | |||
4 | modprobe sound | ||
5 | insmod ad1848 | ||
6 | insmod uart401 | ||
7 | insmod cs4232 io=* irq=* dma=* dma2=* | ||
8 | |||
9 | This is the meaning of the parameters: | ||
10 | |||
11 | io--I/O address of the Windows Sound System (normally 0x534) | ||
12 | irq--IRQ of this device | ||
13 | dma and dma2--DMA channels (DMA2 may be 0) | ||
14 | |||
15 | On some cards, the board attempts to do non-PnP setup, and fails. If you | ||
16 | have problems, use Linux' PnP facilities. | ||
17 | |||
18 | To get MIDI facilities add | ||
19 | |||
20 | insmod opl3 io=* | ||
21 | |||
22 | where "io" is the I/O address of the OPL3 synthesizer. This will be shown | ||
23 | in /proc/sys/pnp and is normally 0x388. | ||
diff --git a/Documentation/sound/oss/Introduction b/Documentation/sound/oss/Introduction index f04ba6bb7395..75d967ff9266 100644 --- a/Documentation/sound/oss/Introduction +++ b/Documentation/sound/oss/Introduction | |||
@@ -80,7 +80,7 @@ Notes: | |||
80 | additional features. | 80 | additional features. |
81 | 81 | ||
82 | 2. The commercial OSS driver may be obtained from the site: | 82 | 2. The commercial OSS driver may be obtained from the site: |
83 | http://www/opensound.com. This may be used for cards that | 83 | http://www.opensound.com. This may be used for cards that |
84 | are unsupported by the kernel driver, or may be used | 84 | are unsupported by the kernel driver, or may be used |
85 | by other operating systems. | 85 | by other operating systems. |
86 | 86 | ||
diff --git a/Documentation/sparse.txt b/Documentation/sparse.txt index 42f43fa59f24..34c76a55bc04 100644 --- a/Documentation/sparse.txt +++ b/Documentation/sparse.txt | |||
@@ -42,6 +42,14 @@ sure that bitwise types don't get mixed up (little-endian vs big-endian | |||
42 | vs cpu-endian vs whatever), and there the constant "0" really _is_ | 42 | vs cpu-endian vs whatever), and there the constant "0" really _is_ |
43 | special. | 43 | special. |
44 | 44 | ||
45 | __bitwise__ - to be used for relatively compact stuff (gfp_t, etc.) that | ||
46 | is mostly warning-free and is supposed to stay that way. Warnings will | ||
47 | be generated without __CHECK_ENDIAN__. | ||
48 | |||
49 | __bitwise - noisy stuff; in particular, __le*/__be* are that. We really | ||
50 | don't want to drown in noise unless we'd explicitly asked for it. | ||
51 | |||
52 | |||
45 | Getting sparse | 53 | Getting sparse |
46 | ~~~~~~~~~~~~~~ | 54 | ~~~~~~~~~~~~~~ |
47 | 55 | ||
diff --git a/Documentation/spi/spi-summary b/Documentation/spi/spi-summary index 0f5122eb282b..4a02d2508bc8 100644 --- a/Documentation/spi/spi-summary +++ b/Documentation/spi/spi-summary | |||
@@ -511,10 +511,16 @@ SPI MASTER METHODS | |||
511 | This sets up the device clock rate, SPI mode, and word sizes. | 511 | This sets up the device clock rate, SPI mode, and word sizes. |
512 | Drivers may change the defaults provided by board_info, and then | 512 | Drivers may change the defaults provided by board_info, and then |
513 | call spi_setup(spi) to invoke this routine. It may sleep. | 513 | call spi_setup(spi) to invoke this routine. It may sleep. |
514 | |||
514 | Unless each SPI slave has its own configuration registers, don't | 515 | Unless each SPI slave has its own configuration registers, don't |
515 | change them right away ... otherwise drivers could corrupt I/O | 516 | change them right away ... otherwise drivers could corrupt I/O |
516 | that's in progress for other SPI devices. | 517 | that's in progress for other SPI devices. |
517 | 518 | ||
519 | ** BUG ALERT: for some reason the first version of | ||
520 | ** many spi_master drivers seems to get this wrong. | ||
521 | ** When you code setup(), ASSUME that the controller | ||
522 | ** is actively processing transfers for another device. | ||
523 | |||
518 | master->transfer(struct spi_device *spi, struct spi_message *message) | 524 | master->transfer(struct spi_device *spi, struct spi_message *message) |
519 | This must not sleep. Its responsibility is arrange that the | 525 | This must not sleep. Its responsibility is arrange that the |
520 | transfer happens and its complete() callback is issued. The two | 526 | transfer happens and its complete() callback is issued. The two |
diff --git a/Documentation/sysctl/00-INDEX b/Documentation/sysctl/00-INDEX index a20a9066dc4c..1286f455992f 100644 --- a/Documentation/sysctl/00-INDEX +++ b/Documentation/sysctl/00-INDEX | |||
@@ -10,6 +10,8 @@ fs.txt | |||
10 | - documentation for /proc/sys/fs/*. | 10 | - documentation for /proc/sys/fs/*. |
11 | kernel.txt | 11 | kernel.txt |
12 | - documentation for /proc/sys/kernel/*. | 12 | - documentation for /proc/sys/kernel/*. |
13 | net.txt | ||
14 | - documentation for /proc/sys/net/*. | ||
13 | sunrpc.txt | 15 | sunrpc.txt |
14 | - documentation for /proc/sys/sunrpc/*. | 16 | - documentation for /proc/sys/sunrpc/*. |
15 | vm.txt | 17 | vm.txt |
diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt index f99254327ae5..1458448436cc 100644 --- a/Documentation/sysctl/fs.txt +++ b/Documentation/sysctl/fs.txt | |||
@@ -1,5 +1,6 @@ | |||
1 | Documentation for /proc/sys/fs/* kernel version 2.2.10 | 1 | Documentation for /proc/sys/fs/* kernel version 2.2.10 |
2 | (c) 1998, 1999, Rik van Riel <riel@nl.linux.org> | 2 | (c) 1998, 1999, Rik van Riel <riel@nl.linux.org> |
3 | (c) 2009, Shen Feng<shen@cn.fujitsu.com> | ||
3 | 4 | ||
4 | For general info and legal blurb, please look in README. | 5 | For general info and legal blurb, please look in README. |
5 | 6 | ||
@@ -14,7 +15,12 @@ kernel. Since some of the files _can_ be used to screw up your | |||
14 | system, it is advisable to read both documentation and source | 15 | system, it is advisable to read both documentation and source |
15 | before actually making adjustments. | 16 | before actually making adjustments. |
16 | 17 | ||
18 | 1. /proc/sys/fs | ||
19 | ---------------------------------------------------------- | ||
20 | |||
17 | Currently, these files are in /proc/sys/fs: | 21 | Currently, these files are in /proc/sys/fs: |
22 | - aio-max-nr | ||
23 | - aio-nr | ||
18 | - dentry-state | 24 | - dentry-state |
19 | - dquot-max | 25 | - dquot-max |
20 | - dquot-nr | 26 | - dquot-nr |
@@ -30,8 +36,15 @@ Currently, these files are in /proc/sys/fs: | |||
30 | - super-max | 36 | - super-max |
31 | - super-nr | 37 | - super-nr |
32 | 38 | ||
33 | Documentation for the files in /proc/sys/fs/binfmt_misc is | 39 | ============================================================== |
34 | in Documentation/binfmt_misc.txt. | 40 | |
41 | aio-nr & aio-max-nr: | ||
42 | |||
43 | aio-nr is the running total of the number of events specified on the | ||
44 | io_setup system call for all currently active aio contexts. If aio-nr | ||
45 | reaches aio-max-nr then io_setup will fail with EAGAIN. Note that | ||
46 | raising aio-max-nr does not result in the pre-allocation or re-sizing | ||
47 | of any kernel data structures. | ||
35 | 48 | ||
36 | ============================================================== | 49 | ============================================================== |
37 | 50 | ||
@@ -178,3 +191,60 @@ requests. aio-max-nr allows you to change the maximum value | |||
178 | aio-nr can grow to. | 191 | aio-nr can grow to. |
179 | 192 | ||
180 | ============================================================== | 193 | ============================================================== |
194 | |||
195 | |||
196 | 2. /proc/sys/fs/binfmt_misc | ||
197 | ---------------------------------------------------------- | ||
198 | |||
199 | Documentation for the files in /proc/sys/fs/binfmt_misc is | ||
200 | in Documentation/binfmt_misc.txt. | ||
201 | |||
202 | |||
203 | 3. /proc/sys/fs/mqueue - POSIX message queues filesystem | ||
204 | ---------------------------------------------------------- | ||
205 | |||
206 | The "mqueue" filesystem provides the necessary kernel features to enable the | ||
207 | creation of a user space library that implements the POSIX message queues | ||
208 | API (as noted by the MSG tag in the POSIX 1003.1-2001 version of the System | ||
209 | Interfaces specification.) | ||
210 | |||
211 | The "mqueue" filesystem contains values for determining/setting the amount of | ||
212 | resources used by the file system. | ||
213 | |||
214 | /proc/sys/fs/mqueue/queues_max is a read/write file for setting/getting the | ||
215 | maximum number of message queues allowed on the system. | ||
216 | |||
217 | /proc/sys/fs/mqueue/msg_max is a read/write file for setting/getting the | ||
218 | maximum number of messages in a queue value. In fact it is the limiting value | ||
219 | for another (user) limit which is set in mq_open invocation. This attribute of | ||
220 | a queue must be less or equal then msg_max. | ||
221 | |||
222 | /proc/sys/fs/mqueue/msgsize_max is a read/write file for setting/getting the | ||
223 | maximum message size value (it is every message queue's attribute set during | ||
224 | its creation). | ||
225 | |||
226 | |||
227 | 4. /proc/sys/fs/epoll - Configuration options for the epoll interface | ||
228 | -------------------------------------------------------- | ||
229 | |||
230 | This directory contains configuration options for the epoll(7) interface. | ||
231 | |||
232 | max_user_instances | ||
233 | ------------------ | ||
234 | |||
235 | This is the maximum number of epoll file descriptors that a single user can | ||
236 | have open at a given time. The default value is 128, and should be enough | ||
237 | for normal users. | ||
238 | |||
239 | max_user_watches | ||
240 | ---------------- | ||
241 | |||
242 | Every epoll file descriptor can store a number of files to be monitored | ||
243 | for event readiness. Each one of these monitored files constitutes a "watch". | ||
244 | This configuration option sets the maximum number of "watches" that are | ||
245 | allowed for each user. | ||
246 | Each "watch" costs roughly 90 bytes on a 32bit kernel, and roughly 160 bytes | ||
247 | on a 64bit one. | ||
248 | The current default value for max_user_watches is the 1/32 of the available | ||
249 | low memory, divided for the "watch" cost in bytes. | ||
250 | |||
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index a4ccdd1981cf..322a00bb99d9 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt | |||
@@ -1,5 +1,6 @@ | |||
1 | Documentation for /proc/sys/kernel/* kernel version 2.2.10 | 1 | Documentation for /proc/sys/kernel/* kernel version 2.2.10 |
2 | (c) 1998, 1999, Rik van Riel <riel@nl.linux.org> | 2 | (c) 1998, 1999, Rik van Riel <riel@nl.linux.org> |
3 | (c) 2009, Shen Feng<shen@cn.fujitsu.com> | ||
3 | 4 | ||
4 | For general info and legal blurb, please look in README. | 5 | For general info and legal blurb, please look in README. |
5 | 6 | ||
@@ -18,6 +19,7 @@ Currently, these files might (depending on your configuration) | |||
18 | show up in /proc/sys/kernel: | 19 | show up in /proc/sys/kernel: |
19 | - acpi_video_flags | 20 | - acpi_video_flags |
20 | - acct | 21 | - acct |
22 | - auto_msgmni | ||
21 | - core_pattern | 23 | - core_pattern |
22 | - core_uses_pid | 24 | - core_uses_pid |
23 | - ctrl-alt-del | 25 | - ctrl-alt-del |
@@ -30,9 +32,11 @@ show up in /proc/sys/kernel: | |||
30 | - kstack_depth_to_print [ X86 only ] | 32 | - kstack_depth_to_print [ X86 only ] |
31 | - l2cr [ PPC only ] | 33 | - l2cr [ PPC only ] |
32 | - modprobe ==> Documentation/debugging-modules.txt | 34 | - modprobe ==> Documentation/debugging-modules.txt |
35 | - modules_disabled | ||
33 | - msgmax | 36 | - msgmax |
34 | - msgmnb | 37 | - msgmnb |
35 | - msgmni | 38 | - msgmni |
39 | - nmi_watchdog | ||
36 | - osrelease | 40 | - osrelease |
37 | - ostype | 41 | - ostype |
38 | - overflowgid | 42 | - overflowgid |
@@ -40,6 +44,7 @@ show up in /proc/sys/kernel: | |||
40 | - panic | 44 | - panic |
41 | - pid_max | 45 | - pid_max |
42 | - powersave-nap [ PPC only ] | 46 | - powersave-nap [ PPC only ] |
47 | - panic_on_unrecovered_nmi | ||
43 | - printk | 48 | - printk |
44 | - randomize_va_space | 49 | - randomize_va_space |
45 | - real-root-dev ==> Documentation/initrd.txt | 50 | - real-root-dev ==> Documentation/initrd.txt |
@@ -55,6 +60,7 @@ show up in /proc/sys/kernel: | |||
55 | - sysrq ==> Documentation/sysrq.txt | 60 | - sysrq ==> Documentation/sysrq.txt |
56 | - tainted | 61 | - tainted |
57 | - threads-max | 62 | - threads-max |
63 | - unknown_nmi_panic | ||
58 | - version | 64 | - version |
59 | 65 | ||
60 | ============================================================== | 66 | ============================================================== |
@@ -179,6 +185,16 @@ kernel stack. | |||
179 | 185 | ||
180 | ============================================================== | 186 | ============================================================== |
181 | 187 | ||
188 | modules_disabled: | ||
189 | |||
190 | A toggle value indicating if modules are allowed to be loaded | ||
191 | in an otherwise modular kernel. This toggle defaults to off | ||
192 | (0), but can be set true (1). Once true, modules can be | ||
193 | neither loaded nor unloaded, and the toggle cannot be set back | ||
194 | to false. | ||
195 | |||
196 | ============================================================== | ||
197 | |||
182 | osrelease, ostype & version: | 198 | osrelease, ostype & version: |
183 | 199 | ||
184 | # cat osrelease | 200 | # cat osrelease |
@@ -381,3 +397,51 @@ can be ORed together: | |||
381 | 512 - A kernel warning has occurred. | 397 | 512 - A kernel warning has occurred. |
382 | 1024 - A module from drivers/staging was loaded. | 398 | 1024 - A module from drivers/staging was loaded. |
383 | 399 | ||
400 | ============================================================== | ||
401 | |||
402 | auto_msgmni: | ||
403 | |||
404 | Enables/Disables automatic recomputing of msgmni upon memory add/remove or | ||
405 | upon ipc namespace creation/removal (see the msgmni description above). | ||
406 | Echoing "1" into this file enables msgmni automatic recomputing. | ||
407 | Echoing "0" turns it off. | ||
408 | auto_msgmni default value is 1. | ||
409 | |||
410 | ============================================================== | ||
411 | |||
412 | nmi_watchdog: | ||
413 | |||
414 | Enables/Disables the NMI watchdog on x86 systems. When the value is non-zero | ||
415 | the NMI watchdog is enabled and will continuously test all online cpus to | ||
416 | determine whether or not they are still functioning properly. Currently, | ||
417 | passing "nmi_watchdog=" parameter at boot time is required for this function | ||
418 | to work. | ||
419 | |||
420 | If LAPIC NMI watchdog method is in use (nmi_watchdog=2 kernel parameter), the | ||
421 | NMI watchdog shares registers with oprofile. By disabling the NMI watchdog, | ||
422 | oprofile may have more registers to utilize. | ||
423 | |||
424 | ============================================================== | ||
425 | |||
426 | unknown_nmi_panic: | ||
427 | |||
428 | The value in this file affects behavior of handling NMI. When the value is | ||
429 | non-zero, unknown NMI is trapped and then panic occurs. At that time, kernel | ||
430 | debugging information is displayed on console. | ||
431 | |||
432 | NMI switch that most IA32 servers have fires unknown NMI up, for example. | ||
433 | If a system hangs up, try pressing the NMI switch. | ||
434 | |||
435 | ============================================================== | ||
436 | |||
437 | panic_on_unrecovered_nmi: | ||
438 | |||
439 | The default Linux behaviour on an NMI of either memory or unknown is to continue | ||
440 | operation. For many environments such as scientific computing it is preferable | ||
441 | that the box is taken out and the error dealt with than an uncorrected | ||
442 | parity/ECC error get propogated. | ||
443 | |||
444 | A small number of systems do generate NMI's for bizarre random reasons such as | ||
445 | power management so the default is off. That sysctl works like the existing | ||
446 | panic controls already in that directory. | ||
447 | |||
diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt new file mode 100644 index 000000000000..df38ef046f8d --- /dev/null +++ b/Documentation/sysctl/net.txt | |||
@@ -0,0 +1,175 @@ | |||
1 | Documentation for /proc/sys/net/* kernel version 2.4.0-test11-pre4 | ||
2 | (c) 1999 Terrehon Bowden <terrehon@pacbell.net> | ||
3 | Bodo Bauer <bb@ricochet.net> | ||
4 | (c) 2000 Jorge Nerin <comandante@zaralinux.com> | ||
5 | (c) 2009 Shen Feng <shen@cn.fujitsu.com> | ||
6 | |||
7 | For general info and legal blurb, please look in README. | ||
8 | |||
9 | ============================================================== | ||
10 | |||
11 | This file contains the documentation for the sysctl files in | ||
12 | /proc/sys/net and is valid for Linux kernel version 2.4.0-test11-pre4. | ||
13 | |||
14 | The interface to the networking parts of the kernel is located in | ||
15 | /proc/sys/net. The following table shows all possible subdirectories.You may | ||
16 | see only some of them, depending on your kernel's configuration. | ||
17 | |||
18 | |||
19 | Table : Subdirectories in /proc/sys/net | ||
20 | .............................................................................. | ||
21 | Directory Content Directory Content | ||
22 | core General parameter appletalk Appletalk protocol | ||
23 | unix Unix domain sockets netrom NET/ROM | ||
24 | 802 E802 protocol ax25 AX25 | ||
25 | ethernet Ethernet protocol rose X.25 PLP layer | ||
26 | ipv4 IP version 4 x25 X.25 protocol | ||
27 | ipx IPX token-ring IBM token ring | ||
28 | bridge Bridging decnet DEC net | ||
29 | ipv6 IP version 6 | ||
30 | .............................................................................. | ||
31 | |||
32 | 1. /proc/sys/net/core - Network core options | ||
33 | ------------------------------------------------------- | ||
34 | |||
35 | rmem_default | ||
36 | ------------ | ||
37 | |||
38 | The default setting of the socket receive buffer in bytes. | ||
39 | |||
40 | rmem_max | ||
41 | -------- | ||
42 | |||
43 | The maximum receive socket buffer size in bytes. | ||
44 | |||
45 | wmem_default | ||
46 | ------------ | ||
47 | |||
48 | The default setting (in bytes) of the socket send buffer. | ||
49 | |||
50 | wmem_max | ||
51 | -------- | ||
52 | |||
53 | The maximum send socket buffer size in bytes. | ||
54 | |||
55 | message_burst and message_cost | ||
56 | ------------------------------ | ||
57 | |||
58 | These parameters are used to limit the warning messages written to the kernel | ||
59 | log from the networking code. They enforce a rate limit to make a | ||
60 | denial-of-service attack impossible. A higher message_cost factor, results in | ||
61 | fewer messages that will be written. Message_burst controls when messages will | ||
62 | be dropped. The default settings limit warning messages to one every five | ||
63 | seconds. | ||
64 | |||
65 | warnings | ||
66 | -------- | ||
67 | |||
68 | This controls console messages from the networking stack that can occur because | ||
69 | of problems on the network like duplicate address or bad checksums. Normally, | ||
70 | this should be enabled, but if the problem persists the messages can be | ||
71 | disabled. | ||
72 | |||
73 | netdev_budget | ||
74 | ------------- | ||
75 | |||
76 | Maximum number of packets taken from all interfaces in one polling cycle (NAPI | ||
77 | poll). In one polling cycle interfaces which are registered to polling are | ||
78 | probed in a round-robin manner. The limit of packets in one such probe can be | ||
79 | set per-device via sysfs class/net/<device>/weight . | ||
80 | |||
81 | netdev_max_backlog | ||
82 | ------------------ | ||
83 | |||
84 | Maximum number of packets, queued on the INPUT side, when the interface | ||
85 | receives packets faster than kernel can process them. | ||
86 | |||
87 | optmem_max | ||
88 | ---------- | ||
89 | |||
90 | Maximum ancillary buffer size allowed per socket. Ancillary data is a sequence | ||
91 | of struct cmsghdr structures with appended data. | ||
92 | |||
93 | 2. /proc/sys/net/unix - Parameters for Unix domain sockets | ||
94 | ------------------------------------------------------- | ||
95 | |||
96 | There is only one file in this directory. | ||
97 | unix_dgram_qlen limits the max number of datagrams queued in Unix domain | ||
98 | socket's buffer. It will not take effect unless PF_UNIX flag is specified. | ||
99 | |||
100 | |||
101 | 3. /proc/sys/net/ipv4 - IPV4 settings | ||
102 | ------------------------------------------------------- | ||
103 | Please see: Documentation/networking/ip-sysctl.txt and ipvs-sysctl.txt for | ||
104 | descriptions of these entries. | ||
105 | |||
106 | |||
107 | 4. Appletalk | ||
108 | ------------------------------------------------------- | ||
109 | |||
110 | The /proc/sys/net/appletalk directory holds the Appletalk configuration data | ||
111 | when Appletalk is loaded. The configurable parameters are: | ||
112 | |||
113 | aarp-expiry-time | ||
114 | ---------------- | ||
115 | |||
116 | The amount of time we keep an ARP entry before expiring it. Used to age out | ||
117 | old hosts. | ||
118 | |||
119 | aarp-resolve-time | ||
120 | ----------------- | ||
121 | |||
122 | The amount of time we will spend trying to resolve an Appletalk address. | ||
123 | |||
124 | aarp-retransmit-limit | ||
125 | --------------------- | ||
126 | |||
127 | The number of times we will retransmit a query before giving up. | ||
128 | |||
129 | aarp-tick-time | ||
130 | -------------- | ||
131 | |||
132 | Controls the rate at which expires are checked. | ||
133 | |||
134 | The directory /proc/net/appletalk holds the list of active Appletalk sockets | ||
135 | on a machine. | ||
136 | |||
137 | The fields indicate the DDP type, the local address (in network:node format) | ||
138 | the remote address, the size of the transmit pending queue, the size of the | ||
139 | received queue (bytes waiting for applications to read) the state and the uid | ||
140 | owning the socket. | ||
141 | |||
142 | /proc/net/atalk_iface lists all the interfaces configured for appletalk.It | ||
143 | shows the name of the interface, its Appletalk address, the network range on | ||
144 | that address (or network number for phase 1 networks), and the status of the | ||
145 | interface. | ||
146 | |||
147 | /proc/net/atalk_route lists each known network route. It lists the target | ||
148 | (network) that the route leads to, the router (may be directly connected), the | ||
149 | route flags, and the device the route is using. | ||
150 | |||
151 | |||
152 | 5. IPX | ||
153 | ------------------------------------------------------- | ||
154 | |||
155 | The IPX protocol has no tunable values in proc/sys/net. | ||
156 | |||
157 | The IPX protocol does, however, provide proc/net/ipx. This lists each IPX | ||
158 | socket giving the local and remote addresses in Novell format (that is | ||
159 | network:node:port). In accordance with the strange Novell tradition, | ||
160 | everything but the port is in hex. Not_Connected is displayed for sockets that | ||
161 | are not tied to a specific remote address. The Tx and Rx queue sizes indicate | ||
162 | the number of bytes pending for transmission and reception. The state | ||
163 | indicates the state the socket is in and the uid is the owning uid of the | ||
164 | socket. | ||
165 | |||
166 | The /proc/net/ipx_interface file lists all IPX interfaces. For each interface | ||
167 | it gives the network number, the node number, and indicates if the network is | ||
168 | the primary network. It also indicates which device it is bound to (or | ||
169 | Internal for internal networks) and the Frame Type if appropriate. Linux | ||
170 | supports 802.3, 802.2, 802.2 SNAP and DIX (Blue Book) ethernet framing for | ||
171 | IPX. | ||
172 | |||
173 | The /proc/net/ipx_route table holds a list of IPX routes. For each route it | ||
174 | gives the destination network, the router node (or Directly) and the network | ||
175 | address of the router (or Connected) for internal networks. | ||
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 3197fc83bc51..c4de6359d440 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt | |||
@@ -88,6 +88,10 @@ will itself start writeback. | |||
88 | If dirty_bytes is written, dirty_ratio becomes a function of its value | 88 | If dirty_bytes is written, dirty_ratio becomes a function of its value |
89 | (dirty_bytes / the amount of dirtyable system memory). | 89 | (dirty_bytes / the amount of dirtyable system memory). |
90 | 90 | ||
91 | Note: the minimum value allowed for dirty_bytes is two pages (in bytes); any | ||
92 | value lower than this limit will be ignored and the old configuration will be | ||
93 | retained. | ||
94 | |||
91 | ============================================================== | 95 | ============================================================== |
92 | 96 | ||
93 | dirty_expire_centisecs | 97 | dirty_expire_centisecs |
@@ -229,8 +233,8 @@ These protections are added to score to judge whether this zone should be used | |||
229 | for page allocation or should be reclaimed. | 233 | for page allocation or should be reclaimed. |
230 | 234 | ||
231 | In this example, if normal pages (index=2) are required to this DMA zone and | 235 | In this example, if normal pages (index=2) are required to this DMA zone and |
232 | pages_high is used for watermark, the kernel judges this zone should not be | 236 | watermark[WMARK_HIGH] is used for watermark, the kernel judges this zone should |
233 | used because pages_free(1355) is smaller than watermark + protection[2] | 237 | not be used because pages_free(1355) is smaller than watermark + protection[2] |
234 | (4 + 2004 = 2008). If this protection value is 0, this zone would be used for | 238 | (4 + 2004 = 2008). If this protection value is 0, this zone would be used for |
235 | normal page requirement. If requirement is DMA zone(index=0), protection[0] | 239 | normal page requirement. If requirement is DMA zone(index=0), protection[0] |
236 | (=0) is used. | 240 | (=0) is used. |
@@ -276,9 +280,10 @@ The default value is 65536. | |||
276 | min_free_kbytes: | 280 | min_free_kbytes: |
277 | 281 | ||
278 | This is used to force the Linux VM to keep a minimum number | 282 | This is used to force the Linux VM to keep a minimum number |
279 | of kilobytes free. The VM uses this number to compute a pages_min | 283 | of kilobytes free. The VM uses this number to compute a |
280 | value for each lowmem zone in the system. Each lowmem zone gets | 284 | watermark[WMARK_MIN] value for each lowmem zone in the system. |
281 | a number of reserved free pages based proportionally on its size. | 285 | Each lowmem zone gets a number of reserved free pages based |
286 | proportionally on its size. | ||
282 | 287 | ||
283 | Some minimal amount of memory is needed to satisfy PF_MEMALLOC | 288 | Some minimal amount of memory is needed to satisfy PF_MEMALLOC |
284 | allocations; if you set this to lower than 1024KB, your system will | 289 | allocations; if you set this to lower than 1024KB, your system will |
@@ -310,10 +315,14 @@ min_unmapped_ratio: | |||
310 | 315 | ||
311 | This is available only on NUMA kernels. | 316 | This is available only on NUMA kernels. |
312 | 317 | ||
313 | A percentage of the total pages in each zone. Zone reclaim will only | 318 | This is a percentage of the total pages in each zone. Zone reclaim will |
314 | occur if more than this percentage of pages are file backed and unmapped. | 319 | only occur if more than this percentage of pages are in a state that |
315 | This is to insure that a minimal amount of local pages is still available for | 320 | zone_reclaim_mode allows to be reclaimed. |
316 | file I/O even if the node is overallocated. | 321 | |
322 | If zone_reclaim_mode has the value 4 OR'd, then the percentage is compared | ||
323 | against all file-backed unmapped pages including swapcache pages and tmpfs | ||
324 | files. Otherwise, only unmapped pages backed by normal files but not tmpfs | ||
325 | files and similar are considered. | ||
317 | 326 | ||
318 | The default is 1 percent. | 327 | The default is 1 percent. |
319 | 328 | ||
@@ -354,7 +363,7 @@ nr_pdflush_threads | |||
354 | The current number of pdflush threads. This value is read-only. | 363 | The current number of pdflush threads. This value is read-only. |
355 | The value changes according to the number of dirty pages in the system. | 364 | The value changes according to the number of dirty pages in the system. |
356 | 365 | ||
357 | When neccessary, additional pdflush threads are created, one per second, up to | 366 | When necessary, additional pdflush threads are created, one per second, up to |
358 | nr_pdflush_threads_max. | 367 | nr_pdflush_threads_max. |
359 | 368 | ||
360 | ============================================================== | 369 | ============================================================== |
@@ -561,7 +570,7 @@ swappiness | |||
561 | 570 | ||
562 | This control is used to define how aggressive the kernel will swap | 571 | This control is used to define how aggressive the kernel will swap |
563 | memory pages. Higher values will increase agressiveness, lower values | 572 | memory pages. Higher values will increase agressiveness, lower values |
564 | descrease the amount of swap. | 573 | decrease the amount of swap. |
565 | 574 | ||
566 | The default value is 60. | 575 | The default value is 60. |
567 | 576 | ||
diff --git a/Documentation/sysfs-rules.txt b/Documentation/sysfs-rules.txt index 6049a2a84dda..5d8bc2cd250c 100644 --- a/Documentation/sysfs-rules.txt +++ b/Documentation/sysfs-rules.txt | |||
@@ -113,7 +113,7 @@ versions of the sysfs interface. | |||
113 | "devices" directory at /sys/subsystem/<name>/devices. | 113 | "devices" directory at /sys/subsystem/<name>/devices. |
114 | 114 | ||
115 | If /sys/subsystem exists, /sys/bus, /sys/class and /sys/block can be | 115 | If /sys/subsystem exists, /sys/bus, /sys/class and /sys/block can be |
116 | ignored. If it does not exist, you have always to scan all three | 116 | ignored. If it does not exist, you always have to scan all three |
117 | places, as the kernel is free to move a subsystem from one place to | 117 | places, as the kernel is free to move a subsystem from one place to |
118 | the other, as long as the devices are still reachable by the same | 118 | the other, as long as the devices are still reachable by the same |
119 | subsystem name. | 119 | subsystem name. |
diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt index 9e592c718afb..cf42b820ff9d 100644 --- a/Documentation/sysrq.txt +++ b/Documentation/sysrq.txt | |||
@@ -81,6 +81,8 @@ On all - write a character to /proc/sysrq-trigger. e.g.: | |||
81 | 81 | ||
82 | 'i' - Send a SIGKILL to all processes, except for init. | 82 | 'i' - Send a SIGKILL to all processes, except for init. |
83 | 83 | ||
84 | 'j' - Forcibly "Just thaw it" - filesystems frozen by the FIFREEZE ioctl. | ||
85 | |||
84 | 'k' - Secure Access Key (SAK) Kills all programs on the current virtual | 86 | 'k' - Secure Access Key (SAK) Kills all programs on the current virtual |
85 | console. NOTE: See important comments below in SAK section. | 87 | console. NOTE: See important comments below in SAK section. |
86 | 88 | ||
@@ -113,6 +115,8 @@ On all - write a character to /proc/sysrq-trigger. e.g.: | |||
113 | 115 | ||
114 | 'x' - Used by xmon interface on ppc/powerpc platforms. | 116 | 'x' - Used by xmon interface on ppc/powerpc platforms. |
115 | 117 | ||
118 | 'z' - Dump the ftrace buffer | ||
119 | |||
116 | '0'-'9' - Sets the console log level, controlling which kernel messages | 120 | '0'-'9' - Sets the console log level, controlling which kernel messages |
117 | will be printed to your console. ('0', for example would make | 121 | will be printed to your console. ('0', for example would make |
118 | it so that only emergency messages like PANICs or OOPSes would | 122 | it so that only emergency messages like PANICs or OOPSes would |
@@ -160,6 +164,9 @@ t'E'rm and k'I'll are useful if you have some sort of runaway process you | |||
160 | are unable to kill any other way, especially if it's spawning other | 164 | are unable to kill any other way, especially if it's spawning other |
161 | processes. | 165 | processes. |
162 | 166 | ||
167 | "'J'ust thaw it" is useful if your system becomes unresponsive due to a frozen | ||
168 | (probably root) filesystem via the FIFREEZE ioctl. | ||
169 | |||
163 | * Sometimes SysRq seems to get 'stuck' after using it, what can I do? | 170 | * Sometimes SysRq seems to get 'stuck' after using it, what can I do? |
164 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | 171 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
165 | That happens to me, also. I've found that tapping shift, alt, and control | 172 | That happens to me, also. I've found that tapping shift, alt, and control |
diff --git a/Documentation/timers/hpet.txt b/Documentation/timers/hpet.txt index e7c09abcfab4..04763a325520 100644 --- a/Documentation/timers/hpet.txt +++ b/Documentation/timers/hpet.txt | |||
@@ -7,7 +7,7 @@ by Intel and Microsoft which can be found at | |||
7 | 7 | ||
8 | Each HPET has one fixed-rate counter (at 10+ MHz, hence "High Precision") | 8 | Each HPET has one fixed-rate counter (at 10+ MHz, hence "High Precision") |
9 | and up to 32 comparators. Normally three or more comparators are provided, | 9 | and up to 32 comparators. Normally three or more comparators are provided, |
10 | each of which can generate oneshot interupts and at least one of which has | 10 | each of which can generate oneshot interrupts and at least one of which has |
11 | additional hardware to support periodic interrupts. The comparators are | 11 | additional hardware to support periodic interrupts. The comparators are |
12 | also called "timers", which can be misleading since usually timers are | 12 | also called "timers", which can be misleading since usually timers are |
13 | independent of each other ... these share a counter, complicating resets. | 13 | independent of each other ... these share a counter, complicating resets. |
diff --git a/Documentation/timers/timer_stats.txt b/Documentation/timers/timer_stats.txt index 20d368c59814..9bd00fc2e823 100644 --- a/Documentation/timers/timer_stats.txt +++ b/Documentation/timers/timer_stats.txt | |||
@@ -62,7 +62,7 @@ Timerstats sample period: 3.888770 s | |||
62 | 62 | ||
63 | The first column is the number of events, the second column the pid, the third | 63 | The first column is the number of events, the second column the pid, the third |
64 | column is the name of the process. The forth column shows the function which | 64 | column is the name of the process. The forth column shows the function which |
65 | initialized the timer and in parantheses the callback function which was | 65 | initialized the timer and in parenthesis the callback function which was |
66 | executed on expiry. | 66 | executed on expiry. |
67 | 67 | ||
68 | Thomas, Ingo | 68 | Thomas, Ingo |
diff --git a/Documentation/tomoyo.txt b/Documentation/tomoyo.txt new file mode 100644 index 000000000000..b3a232cae7f8 --- /dev/null +++ b/Documentation/tomoyo.txt | |||
@@ -0,0 +1,55 @@ | |||
1 | --- What is TOMOYO? --- | ||
2 | |||
3 | TOMOYO is a name-based MAC extension (LSM module) for the Linux kernel. | ||
4 | |||
5 | LiveCD-based tutorials are available at | ||
6 | http://tomoyo.sourceforge.jp/en/1.6.x/1st-step/ubuntu8.04-live/ | ||
7 | http://tomoyo.sourceforge.jp/en/1.6.x/1st-step/centos5-live/ . | ||
8 | Though these tutorials use non-LSM version of TOMOYO, they are useful for you | ||
9 | to know what TOMOYO is. | ||
10 | |||
11 | --- How to enable TOMOYO? --- | ||
12 | |||
13 | Build the kernel with CONFIG_SECURITY_TOMOYO=y and pass "security=tomoyo" on | ||
14 | kernel's command line. | ||
15 | |||
16 | Please see http://tomoyo.sourceforge.jp/en/2.2.x/ for details. | ||
17 | |||
18 | --- Where is documentation? --- | ||
19 | |||
20 | User <-> Kernel interface documentation is available at | ||
21 | http://tomoyo.sourceforge.jp/en/2.2.x/policy-reference.html . | ||
22 | |||
23 | Materials we prepared for seminars and symposiums are available at | ||
24 | http://sourceforge.jp/projects/tomoyo/docs/?category_id=532&language_id=1 . | ||
25 | Below lists are chosen from three aspects. | ||
26 | |||
27 | What is TOMOYO? | ||
28 | TOMOYO Linux Overview | ||
29 | http://sourceforge.jp/projects/tomoyo/docs/lca2009-takeda.pdf | ||
30 | TOMOYO Linux: pragmatic and manageable security for Linux | ||
31 | http://sourceforge.jp/projects/tomoyo/docs/freedomhectaipei-tomoyo.pdf | ||
32 | TOMOYO Linux: A Practical Method to Understand and Protect Your Own Linux Box | ||
33 | http://sourceforge.jp/projects/tomoyo/docs/PacSec2007-en-no-demo.pdf | ||
34 | |||
35 | What can TOMOYO do? | ||
36 | Deep inside TOMOYO Linux | ||
37 | http://sourceforge.jp/projects/tomoyo/docs/lca2009-kumaneko.pdf | ||
38 | The role of "pathname based access control" in security. | ||
39 | http://sourceforge.jp/projects/tomoyo/docs/lfj2008-bof.pdf | ||
40 | |||
41 | History of TOMOYO? | ||
42 | Realities of Mainlining | ||
43 | http://sourceforge.jp/projects/tomoyo/docs/lfj2008.pdf | ||
44 | |||
45 | --- What is future plan? --- | ||
46 | |||
47 | We believe that inode based security and name based security are complementary | ||
48 | and both should be used together. But unfortunately, so far, we cannot enable | ||
49 | multiple LSM modules at the same time. We feel sorry that you have to give up | ||
50 | SELinux/SMACK/AppArmor etc. when you want to use TOMOYO. | ||
51 | |||
52 | We hope that LSM becomes stackable in future. Meanwhile, you can use non-LSM | ||
53 | version of TOMOYO, available at http://tomoyo.sourceforge.jp/en/1.6.x/ . | ||
54 | LSM version of TOMOYO is a subset of non-LSM version of TOMOYO. We are planning | ||
55 | to port non-LSM version's functionalities to LSM versions. | ||
diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.txt new file mode 100644 index 000000000000..f157d7594ea7 --- /dev/null +++ b/Documentation/trace/events.txt | |||
@@ -0,0 +1,90 @@ | |||
1 | Event Tracing | ||
2 | |||
3 | Documentation written by Theodore Ts'o | ||
4 | Updated by Li Zefan | ||
5 | |||
6 | 1. Introduction | ||
7 | =============== | ||
8 | |||
9 | Tracepoints (see Documentation/trace/tracepoints.txt) can be used | ||
10 | without creating custom kernel modules to register probe functions | ||
11 | using the event tracing infrastructure. | ||
12 | |||
13 | Not all tracepoints can be traced using the event tracing system; | ||
14 | the kernel developer must provide code snippets which define how the | ||
15 | tracing information is saved into the tracing buffer, and how the | ||
16 | tracing information should be printed. | ||
17 | |||
18 | 2. Using Event Tracing | ||
19 | ====================== | ||
20 | |||
21 | 2.1 Via the 'set_event' interface | ||
22 | --------------------------------- | ||
23 | |||
24 | The events which are available for tracing can be found in the file | ||
25 | /debug/tracing/available_events. | ||
26 | |||
27 | To enable a particular event, such as 'sched_wakeup', simply echo it | ||
28 | to /debug/tracing/set_event. For example: | ||
29 | |||
30 | # echo sched_wakeup >> /debug/tracing/set_event | ||
31 | |||
32 | [ Note: '>>' is necessary, otherwise it will firstly disable | ||
33 | all the events. ] | ||
34 | |||
35 | To disable an event, echo the event name to the set_event file prefixed | ||
36 | with an exclamation point: | ||
37 | |||
38 | # echo '!sched_wakeup' >> /debug/tracing/set_event | ||
39 | |||
40 | To disable all events, echo an empty line to the set_event file: | ||
41 | |||
42 | # echo > /debug/tracing/set_event | ||
43 | |||
44 | To enable all events, echo '*:*' or '*:' to the set_event file: | ||
45 | |||
46 | # echo *:* > /debug/tracing/set_event | ||
47 | |||
48 | The events are organized into subsystems, such as ext4, irq, sched, | ||
49 | etc., and a full event name looks like this: <subsystem>:<event>. The | ||
50 | subsystem name is optional, but it is displayed in the available_events | ||
51 | file. All of the events in a subsystem can be specified via the syntax | ||
52 | "<subsystem>:*"; for example, to enable all irq events, you can use the | ||
53 | command: | ||
54 | |||
55 | # echo 'irq:*' > /debug/tracing/set_event | ||
56 | |||
57 | 2.2 Via the 'enable' toggle | ||
58 | --------------------------- | ||
59 | |||
60 | The events available are also listed in /debug/tracing/events/ hierarchy | ||
61 | of directories. | ||
62 | |||
63 | To enable event 'sched_wakeup': | ||
64 | |||
65 | # echo 1 > /debug/tracing/events/sched/sched_wakeup/enable | ||
66 | |||
67 | To disable it: | ||
68 | |||
69 | # echo 0 > /debug/tracing/events/sched/sched_wakeup/enable | ||
70 | |||
71 | To enable all events in sched subsystem: | ||
72 | |||
73 | # echo 1 > /debug/tracing/events/sched/enable | ||
74 | |||
75 | To eanble all events: | ||
76 | |||
77 | # echo 1 > /debug/tracing/events/enable | ||
78 | |||
79 | When reading one of these enable files, there are four results: | ||
80 | |||
81 | 0 - all events this file affects are disabled | ||
82 | 1 - all events this file affects are enabled | ||
83 | X - there is a mixture of events enabled and disabled | ||
84 | ? - this file does not affect any event | ||
85 | |||
86 | 3. Defining an event-enabled tracepoint | ||
87 | ======================================= | ||
88 | |||
89 | See The example provided in samples/trace_events | ||
90 | |||
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt new file mode 100644 index 000000000000..a39b3c749de5 --- /dev/null +++ b/Documentation/trace/ftrace.txt | |||
@@ -0,0 +1,1888 @@ | |||
1 | ftrace - Function Tracer | ||
2 | ======================== | ||
3 | |||
4 | Copyright 2008 Red Hat Inc. | ||
5 | Author: Steven Rostedt <srostedt@redhat.com> | ||
6 | License: The GNU Free Documentation License, Version 1.2 | ||
7 | (dual licensed under the GPL v2) | ||
8 | Reviewers: Elias Oltmanns, Randy Dunlap, Andrew Morton, | ||
9 | John Kacur, and David Teigland. | ||
10 | Written for: 2.6.28-rc2 | ||
11 | |||
12 | Introduction | ||
13 | ------------ | ||
14 | |||
15 | Ftrace is an internal tracer designed to help out developers and | ||
16 | designers of systems to find what is going on inside the kernel. | ||
17 | It can be used for debugging or analyzing latencies and | ||
18 | performance issues that take place outside of user-space. | ||
19 | |||
20 | Although ftrace is the function tracer, it also includes an | ||
21 | infrastructure that allows for other types of tracing. Some of | ||
22 | the tracers that are currently in ftrace include a tracer to | ||
23 | trace context switches, the time it takes for a high priority | ||
24 | task to run after it was woken up, the time interrupts are | ||
25 | disabled, and more (ftrace allows for tracer plugins, which | ||
26 | means that the list of tracers can always grow). | ||
27 | |||
28 | |||
29 | The File System | ||
30 | --------------- | ||
31 | |||
32 | Ftrace uses the debugfs file system to hold the control files as | ||
33 | well as the files to display output. | ||
34 | |||
35 | When debugfs is configured into the kernel (which selecting any ftrace | ||
36 | option will do) the directory /sys/kernel/debug will be created. To mount | ||
37 | this directory, you can add to your /etc/fstab file: | ||
38 | |||
39 | debugfs /sys/kernel/debug debugfs defaults 0 0 | ||
40 | |||
41 | Or you can mount it at run time with: | ||
42 | |||
43 | mount -t debugfs nodev /sys/kernel/debug | ||
44 | |||
45 | For quicker access to that directory you may want to make a soft link to | ||
46 | it: | ||
47 | |||
48 | ln -s /sys/kernel/debug /debug | ||
49 | |||
50 | Any selected ftrace option will also create a directory called tracing | ||
51 | within the debugfs. The rest of the document will assume that you are in | ||
52 | the ftrace directory (cd /sys/kernel/debug/tracing) and will only concentrate | ||
53 | on the files within that directory and not distract from the content with | ||
54 | the extended "/sys/kernel/debug/tracing" path name. | ||
55 | |||
56 | That's it! (assuming that you have ftrace configured into your kernel) | ||
57 | |||
58 | After mounting the debugfs, you can see a directory called | ||
59 | "tracing". This directory contains the control and output files | ||
60 | of ftrace. Here is a list of some of the key files: | ||
61 | |||
62 | |||
63 | Note: all time values are in microseconds. | ||
64 | |||
65 | current_tracer: | ||
66 | |||
67 | This is used to set or display the current tracer | ||
68 | that is configured. | ||
69 | |||
70 | available_tracers: | ||
71 | |||
72 | This holds the different types of tracers that | ||
73 | have been compiled into the kernel. The | ||
74 | tracers listed here can be configured by | ||
75 | echoing their name into current_tracer. | ||
76 | |||
77 | tracing_enabled: | ||
78 | |||
79 | This sets or displays whether the current_tracer | ||
80 | is activated and tracing or not. Echo 0 into this | ||
81 | file to disable the tracer or 1 to enable it. | ||
82 | |||
83 | trace: | ||
84 | |||
85 | This file holds the output of the trace in a human | ||
86 | readable format (described below). | ||
87 | |||
88 | latency_trace: | ||
89 | |||
90 | This file shows the same trace but the information | ||
91 | is organized more to display possible latencies | ||
92 | in the system (described below). | ||
93 | |||
94 | trace_pipe: | ||
95 | |||
96 | The output is the same as the "trace" file but this | ||
97 | file is meant to be streamed with live tracing. | ||
98 | Reads from this file will block until new data | ||
99 | is retrieved. Unlike the "trace" and "latency_trace" | ||
100 | files, this file is a consumer. This means reading | ||
101 | from this file causes sequential reads to display | ||
102 | more current data. Once data is read from this | ||
103 | file, it is consumed, and will not be read | ||
104 | again with a sequential read. The "trace" and | ||
105 | "latency_trace" files are static, and if the | ||
106 | tracer is not adding more data, they will display | ||
107 | the same information every time they are read. | ||
108 | |||
109 | trace_options: | ||
110 | |||
111 | This file lets the user control the amount of data | ||
112 | that is displayed in one of the above output | ||
113 | files. | ||
114 | |||
115 | tracing_max_latency: | ||
116 | |||
117 | Some of the tracers record the max latency. | ||
118 | For example, the time interrupts are disabled. | ||
119 | This time is saved in this file. The max trace | ||
120 | will also be stored, and displayed by either | ||
121 | "trace" or "latency_trace". A new max trace will | ||
122 | only be recorded if the latency is greater than | ||
123 | the value in this file. (in microseconds) | ||
124 | |||
125 | buffer_size_kb: | ||
126 | |||
127 | This sets or displays the number of kilobytes each CPU | ||
128 | buffer can hold. The tracer buffers are the same size | ||
129 | for each CPU. The displayed number is the size of the | ||
130 | CPU buffer and not total size of all buffers. The | ||
131 | trace buffers are allocated in pages (blocks of memory | ||
132 | that the kernel uses for allocation, usually 4 KB in size). | ||
133 | If the last page allocated has room for more bytes | ||
134 | than requested, the rest of the page will be used, | ||
135 | making the actual allocation bigger than requested. | ||
136 | ( Note, the size may not be a multiple of the page size | ||
137 | due to buffer managment overhead. ) | ||
138 | |||
139 | This can only be updated when the current_tracer | ||
140 | is set to "nop". | ||
141 | |||
142 | tracing_cpumask: | ||
143 | |||
144 | This is a mask that lets the user only trace | ||
145 | on specified CPUS. The format is a hex string | ||
146 | representing the CPUS. | ||
147 | |||
148 | set_ftrace_filter: | ||
149 | |||
150 | When dynamic ftrace is configured in (see the | ||
151 | section below "dynamic ftrace"), the code is dynamically | ||
152 | modified (code text rewrite) to disable calling of the | ||
153 | function profiler (mcount). This lets tracing be configured | ||
154 | in with practically no overhead in performance. This also | ||
155 | has a side effect of enabling or disabling specific functions | ||
156 | to be traced. Echoing names of functions into this file | ||
157 | will limit the trace to only those functions. | ||
158 | |||
159 | set_ftrace_notrace: | ||
160 | |||
161 | This has an effect opposite to that of | ||
162 | set_ftrace_filter. Any function that is added here will not | ||
163 | be traced. If a function exists in both set_ftrace_filter | ||
164 | and set_ftrace_notrace, the function will _not_ be traced. | ||
165 | |||
166 | set_ftrace_pid: | ||
167 | |||
168 | Have the function tracer only trace a single thread. | ||
169 | |||
170 | set_graph_function: | ||
171 | |||
172 | Set a "trigger" function where tracing should start | ||
173 | with the function graph tracer (See the section | ||
174 | "dynamic ftrace" for more details). | ||
175 | |||
176 | available_filter_functions: | ||
177 | |||
178 | This lists the functions that ftrace | ||
179 | has processed and can trace. These are the function | ||
180 | names that you can pass to "set_ftrace_filter" or | ||
181 | "set_ftrace_notrace". (See the section "dynamic ftrace" | ||
182 | below for more details.) | ||
183 | |||
184 | |||
185 | The Tracers | ||
186 | ----------- | ||
187 | |||
188 | Here is the list of current tracers that may be configured. | ||
189 | |||
190 | "function" | ||
191 | |||
192 | Function call tracer to trace all kernel functions. | ||
193 | |||
194 | "function_graph" | ||
195 | |||
196 | Similar to the function tracer except that the | ||
197 | function tracer probes the functions on their entry | ||
198 | whereas the function graph tracer traces on both entry | ||
199 | and exit of the functions. It then provides the ability | ||
200 | to draw a graph of function calls similar to C code | ||
201 | source. | ||
202 | |||
203 | "sched_switch" | ||
204 | |||
205 | Traces the context switches and wakeups between tasks. | ||
206 | |||
207 | "irqsoff" | ||
208 | |||
209 | Traces the areas that disable interrupts and saves | ||
210 | the trace with the longest max latency. | ||
211 | See tracing_max_latency. When a new max is recorded, | ||
212 | it replaces the old trace. It is best to view this | ||
213 | trace via the latency_trace file. | ||
214 | |||
215 | "preemptoff" | ||
216 | |||
217 | Similar to irqsoff but traces and records the amount of | ||
218 | time for which preemption is disabled. | ||
219 | |||
220 | "preemptirqsoff" | ||
221 | |||
222 | Similar to irqsoff and preemptoff, but traces and | ||
223 | records the largest time for which irqs and/or preemption | ||
224 | is disabled. | ||
225 | |||
226 | "wakeup" | ||
227 | |||
228 | Traces and records the max latency that it takes for | ||
229 | the highest priority task to get scheduled after | ||
230 | it has been woken up. | ||
231 | |||
232 | "hw-branch-tracer" | ||
233 | |||
234 | Uses the BTS CPU feature on x86 CPUs to traces all | ||
235 | branches executed. | ||
236 | |||
237 | "nop" | ||
238 | |||
239 | This is the "trace nothing" tracer. To remove all | ||
240 | tracers from tracing simply echo "nop" into | ||
241 | current_tracer. | ||
242 | |||
243 | |||
244 | Examples of using the tracer | ||
245 | ---------------------------- | ||
246 | |||
247 | Here are typical examples of using the tracers when controlling | ||
248 | them only with the debugfs interface (without using any | ||
249 | user-land utilities). | ||
250 | |||
251 | Output format: | ||
252 | -------------- | ||
253 | |||
254 | Here is an example of the output format of the file "trace" | ||
255 | |||
256 | -------- | ||
257 | # tracer: function | ||
258 | # | ||
259 | # TASK-PID CPU# TIMESTAMP FUNCTION | ||
260 | # | | | | | | ||
261 | bash-4251 [01] 10152.583854: path_put <-path_walk | ||
262 | bash-4251 [01] 10152.583855: dput <-path_put | ||
263 | bash-4251 [01] 10152.583855: _atomic_dec_and_lock <-dput | ||
264 | -------- | ||
265 | |||
266 | A header is printed with the tracer name that is represented by | ||
267 | the trace. In this case the tracer is "function". Then a header | ||
268 | showing the format. Task name "bash", the task PID "4251", the | ||
269 | CPU that it was running on "01", the timestamp in <secs>.<usecs> | ||
270 | format, the function name that was traced "path_put" and the | ||
271 | parent function that called this function "path_walk". The | ||
272 | timestamp is the time at which the function was entered. | ||
273 | |||
274 | The sched_switch tracer also includes tracing of task wakeups | ||
275 | and context switches. | ||
276 | |||
277 | ksoftirqd/1-7 [01] 1453.070013: 7:115:R + 2916:115:S | ||
278 | ksoftirqd/1-7 [01] 1453.070013: 7:115:R + 10:115:S | ||
279 | ksoftirqd/1-7 [01] 1453.070013: 7:115:R ==> 10:115:R | ||
280 | events/1-10 [01] 1453.070013: 10:115:S ==> 2916:115:R | ||
281 | kondemand/1-2916 [01] 1453.070013: 2916:115:S ==> 7:115:R | ||
282 | ksoftirqd/1-7 [01] 1453.070013: 7:115:S ==> 0:140:R | ||
283 | |||
284 | Wake ups are represented by a "+" and the context switches are | ||
285 | shown as "==>". The format is: | ||
286 | |||
287 | Context switches: | ||
288 | |||
289 | Previous task Next Task | ||
290 | |||
291 | <pid>:<prio>:<state> ==> <pid>:<prio>:<state> | ||
292 | |||
293 | Wake ups: | ||
294 | |||
295 | Current task Task waking up | ||
296 | |||
297 | <pid>:<prio>:<state> + <pid>:<prio>:<state> | ||
298 | |||
299 | The prio is the internal kernel priority, which is the inverse | ||
300 | of the priority that is usually displayed by user-space tools. | ||
301 | Zero represents the highest priority (99). Prio 100 starts the | ||
302 | "nice" priorities with 100 being equal to nice -20 and 139 being | ||
303 | nice 19. The prio "140" is reserved for the idle task which is | ||
304 | the lowest priority thread (pid 0). | ||
305 | |||
306 | |||
307 | Latency trace format | ||
308 | -------------------- | ||
309 | |||
310 | For traces that display latency times, the latency_trace file | ||
311 | gives somewhat more information to see why a latency happened. | ||
312 | Here is a typical trace. | ||
313 | |||
314 | # tracer: irqsoff | ||
315 | # | ||
316 | irqsoff latency trace v1.1.5 on 2.6.26-rc8 | ||
317 | -------------------------------------------------------------------- | ||
318 | latency: 97 us, #3/3, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | ||
319 | ----------------- | ||
320 | | task: swapper-0 (uid:0 nice:0 policy:0 rt_prio:0) | ||
321 | ----------------- | ||
322 | => started at: apic_timer_interrupt | ||
323 | => ended at: do_softirq | ||
324 | |||
325 | # _------=> CPU# | ||
326 | # / _-----=> irqs-off | ||
327 | # | / _----=> need-resched | ||
328 | # || / _---=> hardirq/softirq | ||
329 | # ||| / _--=> preempt-depth | ||
330 | # |||| / | ||
331 | # ||||| delay | ||
332 | # cmd pid ||||| time | caller | ||
333 | # \ / ||||| \ | / | ||
334 | <idle>-0 0d..1 0us+: trace_hardirqs_off_thunk (apic_timer_interrupt) | ||
335 | <idle>-0 0d.s. 97us : __do_softirq (do_softirq) | ||
336 | <idle>-0 0d.s1 98us : trace_hardirqs_on (do_softirq) | ||
337 | |||
338 | |||
339 | This shows that the current tracer is "irqsoff" tracing the time | ||
340 | for which interrupts were disabled. It gives the trace version | ||
341 | and the version of the kernel upon which this was executed on | ||
342 | (2.6.26-rc8). Then it displays the max latency in microsecs (97 | ||
343 | us). The number of trace entries displayed and the total number | ||
344 | recorded (both are three: #3/3). The type of preemption that was | ||
345 | used (PREEMPT). VP, KP, SP, and HP are always zero and are | ||
346 | reserved for later use. #P is the number of online CPUS (#P:2). | ||
347 | |||
348 | The task is the process that was running when the latency | ||
349 | occurred. (swapper pid: 0). | ||
350 | |||
351 | The start and stop (the functions in which the interrupts were | ||
352 | disabled and enabled respectively) that caused the latencies: | ||
353 | |||
354 | apic_timer_interrupt is where the interrupts were disabled. | ||
355 | do_softirq is where they were enabled again. | ||
356 | |||
357 | The next lines after the header are the trace itself. The header | ||
358 | explains which is which. | ||
359 | |||
360 | cmd: The name of the process in the trace. | ||
361 | |||
362 | pid: The PID of that process. | ||
363 | |||
364 | CPU#: The CPU which the process was running on. | ||
365 | |||
366 | irqs-off: 'd' interrupts are disabled. '.' otherwise. | ||
367 | Note: If the architecture does not support a way to | ||
368 | read the irq flags variable, an 'X' will always | ||
369 | be printed here. | ||
370 | |||
371 | need-resched: 'N' task need_resched is set, '.' otherwise. | ||
372 | |||
373 | hardirq/softirq: | ||
374 | 'H' - hard irq occurred inside a softirq. | ||
375 | 'h' - hard irq is running | ||
376 | 's' - soft irq is running | ||
377 | '.' - normal context. | ||
378 | |||
379 | preempt-depth: The level of preempt_disabled | ||
380 | |||
381 | The above is mostly meaningful for kernel developers. | ||
382 | |||
383 | time: This differs from the trace file output. The trace file output | ||
384 | includes an absolute timestamp. The timestamp used by the | ||
385 | latency_trace file is relative to the start of the trace. | ||
386 | |||
387 | delay: This is just to help catch your eye a bit better. And | ||
388 | needs to be fixed to be only relative to the same CPU. | ||
389 | The marks are determined by the difference between this | ||
390 | current trace and the next trace. | ||
391 | '!' - greater than preempt_mark_thresh (default 100) | ||
392 | '+' - greater than 1 microsecond | ||
393 | ' ' - less than or equal to 1 microsecond. | ||
394 | |||
395 | The rest is the same as the 'trace' file. | ||
396 | |||
397 | |||
398 | trace_options | ||
399 | ------------- | ||
400 | |||
401 | The trace_options file is used to control what gets printed in | ||
402 | the trace output. To see what is available, simply cat the file: | ||
403 | |||
404 | cat trace_options | ||
405 | print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \ | ||
406 | noblock nostacktrace nosched-tree nouserstacktrace nosym-userobj | ||
407 | |||
408 | To disable one of the options, echo in the option prepended with | ||
409 | "no". | ||
410 | |||
411 | echo noprint-parent > trace_options | ||
412 | |||
413 | To enable an option, leave off the "no". | ||
414 | |||
415 | echo sym-offset > trace_options | ||
416 | |||
417 | Here are the available options: | ||
418 | |||
419 | print-parent - On function traces, display the calling (parent) | ||
420 | function as well as the function being traced. | ||
421 | |||
422 | print-parent: | ||
423 | bash-4000 [01] 1477.606694: simple_strtoul <-strict_strtoul | ||
424 | |||
425 | noprint-parent: | ||
426 | bash-4000 [01] 1477.606694: simple_strtoul | ||
427 | |||
428 | |||
429 | sym-offset - Display not only the function name, but also the | ||
430 | offset in the function. For example, instead of | ||
431 | seeing just "ktime_get", you will see | ||
432 | "ktime_get+0xb/0x20". | ||
433 | |||
434 | sym-offset: | ||
435 | bash-4000 [01] 1477.606694: simple_strtoul+0x6/0xa0 | ||
436 | |||
437 | sym-addr - this will also display the function address as well | ||
438 | as the function name. | ||
439 | |||
440 | sym-addr: | ||
441 | bash-4000 [01] 1477.606694: simple_strtoul <c0339346> | ||
442 | |||
443 | verbose - This deals with the latency_trace file. | ||
444 | |||
445 | bash 4000 1 0 00000000 00010a95 [58127d26] 1720.415ms \ | ||
446 | (+0.000ms): simple_strtoul (strict_strtoul) | ||
447 | |||
448 | raw - This will display raw numbers. This option is best for | ||
449 | use with user applications that can translate the raw | ||
450 | numbers better than having it done in the kernel. | ||
451 | |||
452 | hex - Similar to raw, but the numbers will be in a hexadecimal | ||
453 | format. | ||
454 | |||
455 | bin - This will print out the formats in raw binary. | ||
456 | |||
457 | block - TBD (needs update) | ||
458 | |||
459 | stacktrace - This is one of the options that changes the trace | ||
460 | itself. When a trace is recorded, so is the stack | ||
461 | of functions. This allows for back traces of | ||
462 | trace sites. | ||
463 | |||
464 | userstacktrace - This option changes the trace. It records a | ||
465 | stacktrace of the current userspace thread. | ||
466 | |||
467 | sym-userobj - when user stacktrace are enabled, look up which | ||
468 | object the address belongs to, and print a | ||
469 | relative address. This is especially useful when | ||
470 | ASLR is on, otherwise you don't get a chance to | ||
471 | resolve the address to object/file/line after | ||
472 | the app is no longer running | ||
473 | |||
474 | The lookup is performed when you read | ||
475 | trace,trace_pipe,latency_trace. Example: | ||
476 | |||
477 | a.out-1623 [000] 40874.465068: /root/a.out[+0x480] <-/root/a.out[+0 | ||
478 | x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6] | ||
479 | |||
480 | sched-tree - trace all tasks that are on the runqueue, at | ||
481 | every scheduling event. Will add overhead if | ||
482 | there's a lot of tasks running at once. | ||
483 | |||
484 | |||
485 | sched_switch | ||
486 | ------------ | ||
487 | |||
488 | This tracer simply records schedule switches. Here is an example | ||
489 | of how to use it. | ||
490 | |||
491 | # echo sched_switch > current_tracer | ||
492 | # echo 1 > tracing_enabled | ||
493 | # sleep 1 | ||
494 | # echo 0 > tracing_enabled | ||
495 | # cat trace | ||
496 | |||
497 | # tracer: sched_switch | ||
498 | # | ||
499 | # TASK-PID CPU# TIMESTAMP FUNCTION | ||
500 | # | | | | | | ||
501 | bash-3997 [01] 240.132281: 3997:120:R + 4055:120:R | ||
502 | bash-3997 [01] 240.132284: 3997:120:R ==> 4055:120:R | ||
503 | sleep-4055 [01] 240.132371: 4055:120:S ==> 3997:120:R | ||
504 | bash-3997 [01] 240.132454: 3997:120:R + 4055:120:S | ||
505 | bash-3997 [01] 240.132457: 3997:120:R ==> 4055:120:R | ||
506 | sleep-4055 [01] 240.132460: 4055:120:D ==> 3997:120:R | ||
507 | bash-3997 [01] 240.132463: 3997:120:R + 4055:120:D | ||
508 | bash-3997 [01] 240.132465: 3997:120:R ==> 4055:120:R | ||
509 | <idle>-0 [00] 240.132589: 0:140:R + 4:115:S | ||
510 | <idle>-0 [00] 240.132591: 0:140:R ==> 4:115:R | ||
511 | ksoftirqd/0-4 [00] 240.132595: 4:115:S ==> 0:140:R | ||
512 | <idle>-0 [00] 240.132598: 0:140:R + 4:115:S | ||
513 | <idle>-0 [00] 240.132599: 0:140:R ==> 4:115:R | ||
514 | ksoftirqd/0-4 [00] 240.132603: 4:115:S ==> 0:140:R | ||
515 | sleep-4055 [01] 240.133058: 4055:120:S ==> 3997:120:R | ||
516 | [...] | ||
517 | |||
518 | |||
519 | As we have discussed previously about this format, the header | ||
520 | shows the name of the trace and points to the options. The | ||
521 | "FUNCTION" is a misnomer since here it represents the wake ups | ||
522 | and context switches. | ||
523 | |||
524 | The sched_switch file only lists the wake ups (represented with | ||
525 | '+') and context switches ('==>') with the previous task or | ||
526 | current task first followed by the next task or task waking up. | ||
527 | The format for both of these is PID:KERNEL-PRIO:TASK-STATE. | ||
528 | Remember that the KERNEL-PRIO is the inverse of the actual | ||
529 | priority with zero (0) being the highest priority and the nice | ||
530 | values starting at 100 (nice -20). Below is a quick chart to map | ||
531 | the kernel priority to user land priorities. | ||
532 | |||
533 | Kernel Space User Space | ||
534 | =============================================================== | ||
535 | 0(high) to 98(low) user RT priority 99(high) to 1(low) | ||
536 | with SCHED_RR or SCHED_FIFO | ||
537 | --------------------------------------------------------------- | ||
538 | 99 sched_priority is not used in scheduling | ||
539 | decisions(it must be specified as 0) | ||
540 | --------------------------------------------------------------- | ||
541 | 100(high) to 139(low) user nice -20(high) to 19(low) | ||
542 | --------------------------------------------------------------- | ||
543 | 140 idle task priority | ||
544 | --------------------------------------------------------------- | ||
545 | |||
546 | The task states are: | ||
547 | |||
548 | R - running : wants to run, may not actually be running | ||
549 | S - sleep : process is waiting to be woken up (handles signals) | ||
550 | D - disk sleep (uninterruptible sleep) : process must be woken up | ||
551 | (ignores signals) | ||
552 | T - stopped : process suspended | ||
553 | t - traced : process is being traced (with something like gdb) | ||
554 | Z - zombie : process waiting to be cleaned up | ||
555 | X - unknown | ||
556 | |||
557 | |||
558 | ftrace_enabled | ||
559 | -------------- | ||
560 | |||
561 | The following tracers (listed below) give different output | ||
562 | depending on whether or not the sysctl ftrace_enabled is set. To | ||
563 | set ftrace_enabled, one can either use the sysctl function or | ||
564 | set it via the proc file system interface. | ||
565 | |||
566 | sysctl kernel.ftrace_enabled=1 | ||
567 | |||
568 | or | ||
569 | |||
570 | echo 1 > /proc/sys/kernel/ftrace_enabled | ||
571 | |||
572 | To disable ftrace_enabled simply replace the '1' with '0' in the | ||
573 | above commands. | ||
574 | |||
575 | When ftrace_enabled is set the tracers will also record the | ||
576 | functions that are within the trace. The descriptions of the | ||
577 | tracers will also show an example with ftrace enabled. | ||
578 | |||
579 | |||
580 | irqsoff | ||
581 | ------- | ||
582 | |||
583 | When interrupts are disabled, the CPU can not react to any other | ||
584 | external event (besides NMIs and SMIs). This prevents the timer | ||
585 | interrupt from triggering or the mouse interrupt from letting | ||
586 | the kernel know of a new mouse event. The result is a latency | ||
587 | with the reaction time. | ||
588 | |||
589 | The irqsoff tracer tracks the time for which interrupts are | ||
590 | disabled. When a new maximum latency is hit, the tracer saves | ||
591 | the trace leading up to that latency point so that every time a | ||
592 | new maximum is reached, the old saved trace is discarded and the | ||
593 | new trace is saved. | ||
594 | |||
595 | To reset the maximum, echo 0 into tracing_max_latency. Here is | ||
596 | an example: | ||
597 | |||
598 | # echo irqsoff > current_tracer | ||
599 | # echo 0 > tracing_max_latency | ||
600 | # echo 1 > tracing_enabled | ||
601 | # ls -ltr | ||
602 | [...] | ||
603 | # echo 0 > tracing_enabled | ||
604 | # cat latency_trace | ||
605 | # tracer: irqsoff | ||
606 | # | ||
607 | irqsoff latency trace v1.1.5 on 2.6.26 | ||
608 | -------------------------------------------------------------------- | ||
609 | latency: 12 us, #3/3, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | ||
610 | ----------------- | ||
611 | | task: bash-3730 (uid:0 nice:0 policy:0 rt_prio:0) | ||
612 | ----------------- | ||
613 | => started at: sys_setpgid | ||
614 | => ended at: sys_setpgid | ||
615 | |||
616 | # _------=> CPU# | ||
617 | # / _-----=> irqs-off | ||
618 | # | / _----=> need-resched | ||
619 | # || / _---=> hardirq/softirq | ||
620 | # ||| / _--=> preempt-depth | ||
621 | # |||| / | ||
622 | # ||||| delay | ||
623 | # cmd pid ||||| time | caller | ||
624 | # \ / ||||| \ | / | ||
625 | bash-3730 1d... 0us : _write_lock_irq (sys_setpgid) | ||
626 | bash-3730 1d..1 1us+: _write_unlock_irq (sys_setpgid) | ||
627 | bash-3730 1d..2 14us : trace_hardirqs_on (sys_setpgid) | ||
628 | |||
629 | |||
630 | Here we see that that we had a latency of 12 microsecs (which is | ||
631 | very good). The _write_lock_irq in sys_setpgid disabled | ||
632 | interrupts. The difference between the 12 and the displayed | ||
633 | timestamp 14us occurred because the clock was incremented | ||
634 | between the time of recording the max latency and the time of | ||
635 | recording the function that had that latency. | ||
636 | |||
637 | Note the above example had ftrace_enabled not set. If we set the | ||
638 | ftrace_enabled, we get a much larger output: | ||
639 | |||
640 | # tracer: irqsoff | ||
641 | # | ||
642 | irqsoff latency trace v1.1.5 on 2.6.26-rc8 | ||
643 | -------------------------------------------------------------------- | ||
644 | latency: 50 us, #101/101, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | ||
645 | ----------------- | ||
646 | | task: ls-4339 (uid:0 nice:0 policy:0 rt_prio:0) | ||
647 | ----------------- | ||
648 | => started at: __alloc_pages_internal | ||
649 | => ended at: __alloc_pages_internal | ||
650 | |||
651 | # _------=> CPU# | ||
652 | # / _-----=> irqs-off | ||
653 | # | / _----=> need-resched | ||
654 | # || / _---=> hardirq/softirq | ||
655 | # ||| / _--=> preempt-depth | ||
656 | # |||| / | ||
657 | # ||||| delay | ||
658 | # cmd pid ||||| time | caller | ||
659 | # \ / ||||| \ | / | ||
660 | ls-4339 0...1 0us+: get_page_from_freelist (__alloc_pages_internal) | ||
661 | ls-4339 0d..1 3us : rmqueue_bulk (get_page_from_freelist) | ||
662 | ls-4339 0d..1 3us : _spin_lock (rmqueue_bulk) | ||
663 | ls-4339 0d..1 4us : add_preempt_count (_spin_lock) | ||
664 | ls-4339 0d..2 4us : __rmqueue (rmqueue_bulk) | ||
665 | ls-4339 0d..2 5us : __rmqueue_smallest (__rmqueue) | ||
666 | ls-4339 0d..2 5us : __mod_zone_page_state (__rmqueue_smallest) | ||
667 | ls-4339 0d..2 6us : __rmqueue (rmqueue_bulk) | ||
668 | ls-4339 0d..2 6us : __rmqueue_smallest (__rmqueue) | ||
669 | ls-4339 0d..2 7us : __mod_zone_page_state (__rmqueue_smallest) | ||
670 | ls-4339 0d..2 7us : __rmqueue (rmqueue_bulk) | ||
671 | ls-4339 0d..2 8us : __rmqueue_smallest (__rmqueue) | ||
672 | [...] | ||
673 | ls-4339 0d..2 46us : __rmqueue_smallest (__rmqueue) | ||
674 | ls-4339 0d..2 47us : __mod_zone_page_state (__rmqueue_smallest) | ||
675 | ls-4339 0d..2 47us : __rmqueue (rmqueue_bulk) | ||
676 | ls-4339 0d..2 48us : __rmqueue_smallest (__rmqueue) | ||
677 | ls-4339 0d..2 48us : __mod_zone_page_state (__rmqueue_smallest) | ||
678 | ls-4339 0d..2 49us : _spin_unlock (rmqueue_bulk) | ||
679 | ls-4339 0d..2 49us : sub_preempt_count (_spin_unlock) | ||
680 | ls-4339 0d..1 50us : get_page_from_freelist (__alloc_pages_internal) | ||
681 | ls-4339 0d..2 51us : trace_hardirqs_on (__alloc_pages_internal) | ||
682 | |||
683 | |||
684 | |||
685 | Here we traced a 50 microsecond latency. But we also see all the | ||
686 | functions that were called during that time. Note that by | ||
687 | enabling function tracing, we incur an added overhead. This | ||
688 | overhead may extend the latency times. But nevertheless, this | ||
689 | trace has provided some very helpful debugging information. | ||
690 | |||
691 | |||
692 | preemptoff | ||
693 | ---------- | ||
694 | |||
695 | When preemption is disabled, we may be able to receive | ||
696 | interrupts but the task cannot be preempted and a higher | ||
697 | priority task must wait for preemption to be enabled again | ||
698 | before it can preempt a lower priority task. | ||
699 | |||
700 | The preemptoff tracer traces the places that disable preemption. | ||
701 | Like the irqsoff tracer, it records the maximum latency for | ||
702 | which preemption was disabled. The control of preemptoff tracer | ||
703 | is much like the irqsoff tracer. | ||
704 | |||
705 | # echo preemptoff > current_tracer | ||
706 | # echo 0 > tracing_max_latency | ||
707 | # echo 1 > tracing_enabled | ||
708 | # ls -ltr | ||
709 | [...] | ||
710 | # echo 0 > tracing_enabled | ||
711 | # cat latency_trace | ||
712 | # tracer: preemptoff | ||
713 | # | ||
714 | preemptoff latency trace v1.1.5 on 2.6.26-rc8 | ||
715 | -------------------------------------------------------------------- | ||
716 | latency: 29 us, #3/3, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | ||
717 | ----------------- | ||
718 | | task: sshd-4261 (uid:0 nice:0 policy:0 rt_prio:0) | ||
719 | ----------------- | ||
720 | => started at: do_IRQ | ||
721 | => ended at: __do_softirq | ||
722 | |||
723 | # _------=> CPU# | ||
724 | # / _-----=> irqs-off | ||
725 | # | / _----=> need-resched | ||
726 | # || / _---=> hardirq/softirq | ||
727 | # ||| / _--=> preempt-depth | ||
728 | # |||| / | ||
729 | # ||||| delay | ||
730 | # cmd pid ||||| time | caller | ||
731 | # \ / ||||| \ | / | ||
732 | sshd-4261 0d.h. 0us+: irq_enter (do_IRQ) | ||
733 | sshd-4261 0d.s. 29us : _local_bh_enable (__do_softirq) | ||
734 | sshd-4261 0d.s1 30us : trace_preempt_on (__do_softirq) | ||
735 | |||
736 | |||
737 | This has some more changes. Preemption was disabled when an | ||
738 | interrupt came in (notice the 'h'), and was enabled while doing | ||
739 | a softirq. (notice the 's'). But we also see that interrupts | ||
740 | have been disabled when entering the preempt off section and | ||
741 | leaving it (the 'd'). We do not know if interrupts were enabled | ||
742 | in the mean time. | ||
743 | |||
744 | # tracer: preemptoff | ||
745 | # | ||
746 | preemptoff latency trace v1.1.5 on 2.6.26-rc8 | ||
747 | -------------------------------------------------------------------- | ||
748 | latency: 63 us, #87/87, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | ||
749 | ----------------- | ||
750 | | task: sshd-4261 (uid:0 nice:0 policy:0 rt_prio:0) | ||
751 | ----------------- | ||
752 | => started at: remove_wait_queue | ||
753 | => ended at: __do_softirq | ||
754 | |||
755 | # _------=> CPU# | ||
756 | # / _-----=> irqs-off | ||
757 | # | / _----=> need-resched | ||
758 | # || / _---=> hardirq/softirq | ||
759 | # ||| / _--=> preempt-depth | ||
760 | # |||| / | ||
761 | # ||||| delay | ||
762 | # cmd pid ||||| time | caller | ||
763 | # \ / ||||| \ | / | ||
764 | sshd-4261 0d..1 0us : _spin_lock_irqsave (remove_wait_queue) | ||
765 | sshd-4261 0d..1 1us : _spin_unlock_irqrestore (remove_wait_queue) | ||
766 | sshd-4261 0d..1 2us : do_IRQ (common_interrupt) | ||
767 | sshd-4261 0d..1 2us : irq_enter (do_IRQ) | ||
768 | sshd-4261 0d..1 2us : idle_cpu (irq_enter) | ||
769 | sshd-4261 0d..1 3us : add_preempt_count (irq_enter) | ||
770 | sshd-4261 0d.h1 3us : idle_cpu (irq_enter) | ||
771 | sshd-4261 0d.h. 4us : handle_fasteoi_irq (do_IRQ) | ||
772 | [...] | ||
773 | sshd-4261 0d.h. 12us : add_preempt_count (_spin_lock) | ||
774 | sshd-4261 0d.h1 12us : ack_ioapic_quirk_irq (handle_fasteoi_irq) | ||
775 | sshd-4261 0d.h1 13us : move_native_irq (ack_ioapic_quirk_irq) | ||
776 | sshd-4261 0d.h1 13us : _spin_unlock (handle_fasteoi_irq) | ||
777 | sshd-4261 0d.h1 14us : sub_preempt_count (_spin_unlock) | ||
778 | sshd-4261 0d.h1 14us : irq_exit (do_IRQ) | ||
779 | sshd-4261 0d.h1 15us : sub_preempt_count (irq_exit) | ||
780 | sshd-4261 0d..2 15us : do_softirq (irq_exit) | ||
781 | sshd-4261 0d... 15us : __do_softirq (do_softirq) | ||
782 | sshd-4261 0d... 16us : __local_bh_disable (__do_softirq) | ||
783 | sshd-4261 0d... 16us+: add_preempt_count (__local_bh_disable) | ||
784 | sshd-4261 0d.s4 20us : add_preempt_count (__local_bh_disable) | ||
785 | sshd-4261 0d.s4 21us : sub_preempt_count (local_bh_enable) | ||
786 | sshd-4261 0d.s5 21us : sub_preempt_count (local_bh_enable) | ||
787 | [...] | ||
788 | sshd-4261 0d.s6 41us : add_preempt_count (__local_bh_disable) | ||
789 | sshd-4261 0d.s6 42us : sub_preempt_count (local_bh_enable) | ||
790 | sshd-4261 0d.s7 42us : sub_preempt_count (local_bh_enable) | ||
791 | sshd-4261 0d.s5 43us : add_preempt_count (__local_bh_disable) | ||
792 | sshd-4261 0d.s5 43us : sub_preempt_count (local_bh_enable_ip) | ||
793 | sshd-4261 0d.s6 44us : sub_preempt_count (local_bh_enable_ip) | ||
794 | sshd-4261 0d.s5 44us : add_preempt_count (__local_bh_disable) | ||
795 | sshd-4261 0d.s5 45us : sub_preempt_count (local_bh_enable) | ||
796 | [...] | ||
797 | sshd-4261 0d.s. 63us : _local_bh_enable (__do_softirq) | ||
798 | sshd-4261 0d.s1 64us : trace_preempt_on (__do_softirq) | ||
799 | |||
800 | |||
801 | The above is an example of the preemptoff trace with | ||
802 | ftrace_enabled set. Here we see that interrupts were disabled | ||
803 | the entire time. The irq_enter code lets us know that we entered | ||
804 | an interrupt 'h'. Before that, the functions being traced still | ||
805 | show that it is not in an interrupt, but we can see from the | ||
806 | functions themselves that this is not the case. | ||
807 | |||
808 | Notice that __do_softirq when called does not have a | ||
809 | preempt_count. It may seem that we missed a preempt enabling. | ||
810 | What really happened is that the preempt count is held on the | ||
811 | thread's stack and we switched to the softirq stack (4K stacks | ||
812 | in effect). The code does not copy the preempt count, but | ||
813 | because interrupts are disabled, we do not need to worry about | ||
814 | it. Having a tracer like this is good for letting people know | ||
815 | what really happens inside the kernel. | ||
816 | |||
817 | |||
818 | preemptirqsoff | ||
819 | -------------- | ||
820 | |||
821 | Knowing the locations that have interrupts disabled or | ||
822 | preemption disabled for the longest times is helpful. But | ||
823 | sometimes we would like to know when either preemption and/or | ||
824 | interrupts are disabled. | ||
825 | |||
826 | Consider the following code: | ||
827 | |||
828 | local_irq_disable(); | ||
829 | call_function_with_irqs_off(); | ||
830 | preempt_disable(); | ||
831 | call_function_with_irqs_and_preemption_off(); | ||
832 | local_irq_enable(); | ||
833 | call_function_with_preemption_off(); | ||
834 | preempt_enable(); | ||
835 | |||
836 | The irqsoff tracer will record the total length of | ||
837 | call_function_with_irqs_off() and | ||
838 | call_function_with_irqs_and_preemption_off(). | ||
839 | |||
840 | The preemptoff tracer will record the total length of | ||
841 | call_function_with_irqs_and_preemption_off() and | ||
842 | call_function_with_preemption_off(). | ||
843 | |||
844 | But neither will trace the time that interrupts and/or | ||
845 | preemption is disabled. This total time is the time that we can | ||
846 | not schedule. To record this time, use the preemptirqsoff | ||
847 | tracer. | ||
848 | |||
849 | Again, using this trace is much like the irqsoff and preemptoff | ||
850 | tracers. | ||
851 | |||
852 | # echo preemptirqsoff > current_tracer | ||
853 | # echo 0 > tracing_max_latency | ||
854 | # echo 1 > tracing_enabled | ||
855 | # ls -ltr | ||
856 | [...] | ||
857 | # echo 0 > tracing_enabled | ||
858 | # cat latency_trace | ||
859 | # tracer: preemptirqsoff | ||
860 | # | ||
861 | preemptirqsoff latency trace v1.1.5 on 2.6.26-rc8 | ||
862 | -------------------------------------------------------------------- | ||
863 | latency: 293 us, #3/3, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | ||
864 | ----------------- | ||
865 | | task: ls-4860 (uid:0 nice:0 policy:0 rt_prio:0) | ||
866 | ----------------- | ||
867 | => started at: apic_timer_interrupt | ||
868 | => ended at: __do_softirq | ||
869 | |||
870 | # _------=> CPU# | ||
871 | # / _-----=> irqs-off | ||
872 | # | / _----=> need-resched | ||
873 | # || / _---=> hardirq/softirq | ||
874 | # ||| / _--=> preempt-depth | ||
875 | # |||| / | ||
876 | # ||||| delay | ||
877 | # cmd pid ||||| time | caller | ||
878 | # \ / ||||| \ | / | ||
879 | ls-4860 0d... 0us!: trace_hardirqs_off_thunk (apic_timer_interrupt) | ||
880 | ls-4860 0d.s. 294us : _local_bh_enable (__do_softirq) | ||
881 | ls-4860 0d.s1 294us : trace_preempt_on (__do_softirq) | ||
882 | |||
883 | |||
884 | |||
885 | The trace_hardirqs_off_thunk is called from assembly on x86 when | ||
886 | interrupts are disabled in the assembly code. Without the | ||
887 | function tracing, we do not know if interrupts were enabled | ||
888 | within the preemption points. We do see that it started with | ||
889 | preemption enabled. | ||
890 | |||
891 | Here is a trace with ftrace_enabled set: | ||
892 | |||
893 | |||
894 | # tracer: preemptirqsoff | ||
895 | # | ||
896 | preemptirqsoff latency trace v1.1.5 on 2.6.26-rc8 | ||
897 | -------------------------------------------------------------------- | ||
898 | latency: 105 us, #183/183, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | ||
899 | ----------------- | ||
900 | | task: sshd-4261 (uid:0 nice:0 policy:0 rt_prio:0) | ||
901 | ----------------- | ||
902 | => started at: write_chan | ||
903 | => ended at: __do_softirq | ||
904 | |||
905 | # _------=> CPU# | ||
906 | # / _-----=> irqs-off | ||
907 | # | / _----=> need-resched | ||
908 | # || / _---=> hardirq/softirq | ||
909 | # ||| / _--=> preempt-depth | ||
910 | # |||| / | ||
911 | # ||||| delay | ||
912 | # cmd pid ||||| time | caller | ||
913 | # \ / ||||| \ | / | ||
914 | ls-4473 0.N.. 0us : preempt_schedule (write_chan) | ||
915 | ls-4473 0dN.1 1us : _spin_lock (schedule) | ||
916 | ls-4473 0dN.1 2us : add_preempt_count (_spin_lock) | ||
917 | ls-4473 0d..2 2us : put_prev_task_fair (schedule) | ||
918 | [...] | ||
919 | ls-4473 0d..2 13us : set_normalized_timespec (ktime_get_ts) | ||
920 | ls-4473 0d..2 13us : __switch_to (schedule) | ||
921 | sshd-4261 0d..2 14us : finish_task_switch (schedule) | ||
922 | sshd-4261 0d..2 14us : _spin_unlock_irq (finish_task_switch) | ||
923 | sshd-4261 0d..1 15us : add_preempt_count (_spin_lock_irqsave) | ||
924 | sshd-4261 0d..2 16us : _spin_unlock_irqrestore (hrtick_set) | ||
925 | sshd-4261 0d..2 16us : do_IRQ (common_interrupt) | ||
926 | sshd-4261 0d..2 17us : irq_enter (do_IRQ) | ||
927 | sshd-4261 0d..2 17us : idle_cpu (irq_enter) | ||
928 | sshd-4261 0d..2 18us : add_preempt_count (irq_enter) | ||
929 | sshd-4261 0d.h2 18us : idle_cpu (irq_enter) | ||
930 | sshd-4261 0d.h. 18us : handle_fasteoi_irq (do_IRQ) | ||
931 | sshd-4261 0d.h. 19us : _spin_lock (handle_fasteoi_irq) | ||
932 | sshd-4261 0d.h. 19us : add_preempt_count (_spin_lock) | ||
933 | sshd-4261 0d.h1 20us : _spin_unlock (handle_fasteoi_irq) | ||
934 | sshd-4261 0d.h1 20us : sub_preempt_count (_spin_unlock) | ||
935 | [...] | ||
936 | sshd-4261 0d.h1 28us : _spin_unlock (handle_fasteoi_irq) | ||
937 | sshd-4261 0d.h1 29us : sub_preempt_count (_spin_unlock) | ||
938 | sshd-4261 0d.h2 29us : irq_exit (do_IRQ) | ||
939 | sshd-4261 0d.h2 29us : sub_preempt_count (irq_exit) | ||
940 | sshd-4261 0d..3 30us : do_softirq (irq_exit) | ||
941 | sshd-4261 0d... 30us : __do_softirq (do_softirq) | ||
942 | sshd-4261 0d... 31us : __local_bh_disable (__do_softirq) | ||
943 | sshd-4261 0d... 31us+: add_preempt_count (__local_bh_disable) | ||
944 | sshd-4261 0d.s4 34us : add_preempt_count (__local_bh_disable) | ||
945 | [...] | ||
946 | sshd-4261 0d.s3 43us : sub_preempt_count (local_bh_enable_ip) | ||
947 | sshd-4261 0d.s4 44us : sub_preempt_count (local_bh_enable_ip) | ||
948 | sshd-4261 0d.s3 44us : smp_apic_timer_interrupt (apic_timer_interrupt) | ||
949 | sshd-4261 0d.s3 45us : irq_enter (smp_apic_timer_interrupt) | ||
950 | sshd-4261 0d.s3 45us : idle_cpu (irq_enter) | ||
951 | sshd-4261 0d.s3 46us : add_preempt_count (irq_enter) | ||
952 | sshd-4261 0d.H3 46us : idle_cpu (irq_enter) | ||
953 | sshd-4261 0d.H3 47us : hrtimer_interrupt (smp_apic_timer_interrupt) | ||
954 | sshd-4261 0d.H3 47us : ktime_get (hrtimer_interrupt) | ||
955 | [...] | ||
956 | sshd-4261 0d.H3 81us : tick_program_event (hrtimer_interrupt) | ||
957 | sshd-4261 0d.H3 82us : ktime_get (tick_program_event) | ||
958 | sshd-4261 0d.H3 82us : ktime_get_ts (ktime_get) | ||
959 | sshd-4261 0d.H3 83us : getnstimeofday (ktime_get_ts) | ||
960 | sshd-4261 0d.H3 83us : set_normalized_timespec (ktime_get_ts) | ||
961 | sshd-4261 0d.H3 84us : clockevents_program_event (tick_program_event) | ||
962 | sshd-4261 0d.H3 84us : lapic_next_event (clockevents_program_event) | ||
963 | sshd-4261 0d.H3 85us : irq_exit (smp_apic_timer_interrupt) | ||
964 | sshd-4261 0d.H3 85us : sub_preempt_count (irq_exit) | ||
965 | sshd-4261 0d.s4 86us : sub_preempt_count (irq_exit) | ||
966 | sshd-4261 0d.s3 86us : add_preempt_count (__local_bh_disable) | ||
967 | [...] | ||
968 | sshd-4261 0d.s1 98us : sub_preempt_count (net_rx_action) | ||
969 | sshd-4261 0d.s. 99us : add_preempt_count (_spin_lock_irq) | ||
970 | sshd-4261 0d.s1 99us+: _spin_unlock_irq (run_timer_softirq) | ||
971 | sshd-4261 0d.s. 104us : _local_bh_enable (__do_softirq) | ||
972 | sshd-4261 0d.s. 104us : sub_preempt_count (_local_bh_enable) | ||
973 | sshd-4261 0d.s. 105us : _local_bh_enable (__do_softirq) | ||
974 | sshd-4261 0d.s1 105us : trace_preempt_on (__do_softirq) | ||
975 | |||
976 | |||
977 | This is a very interesting trace. It started with the preemption | ||
978 | of the ls task. We see that the task had the "need_resched" bit | ||
979 | set via the 'N' in the trace. Interrupts were disabled before | ||
980 | the spin_lock at the beginning of the trace. We see that a | ||
981 | schedule took place to run sshd. When the interrupts were | ||
982 | enabled, we took an interrupt. On return from the interrupt | ||
983 | handler, the softirq ran. We took another interrupt while | ||
984 | running the softirq as we see from the capital 'H'. | ||
985 | |||
986 | |||
987 | wakeup | ||
988 | ------ | ||
989 | |||
990 | In a Real-Time environment it is very important to know the | ||
991 | wakeup time it takes for the highest priority task that is woken | ||
992 | up to the time that it executes. This is also known as "schedule | ||
993 | latency". I stress the point that this is about RT tasks. It is | ||
994 | also important to know the scheduling latency of non-RT tasks, | ||
995 | but the average schedule latency is better for non-RT tasks. | ||
996 | Tools like LatencyTop are more appropriate for such | ||
997 | measurements. | ||
998 | |||
999 | Real-Time environments are interested in the worst case latency. | ||
1000 | That is the longest latency it takes for something to happen, | ||
1001 | and not the average. We can have a very fast scheduler that may | ||
1002 | only have a large latency once in a while, but that would not | ||
1003 | work well with Real-Time tasks. The wakeup tracer was designed | ||
1004 | to record the worst case wakeups of RT tasks. Non-RT tasks are | ||
1005 | not recorded because the tracer only records one worst case and | ||
1006 | tracing non-RT tasks that are unpredictable will overwrite the | ||
1007 | worst case latency of RT tasks. | ||
1008 | |||
1009 | Since this tracer only deals with RT tasks, we will run this | ||
1010 | slightly differently than we did with the previous tracers. | ||
1011 | Instead of performing an 'ls', we will run 'sleep 1' under | ||
1012 | 'chrt' which changes the priority of the task. | ||
1013 | |||
1014 | # echo wakeup > current_tracer | ||
1015 | # echo 0 > tracing_max_latency | ||
1016 | # echo 1 > tracing_enabled | ||
1017 | # chrt -f 5 sleep 1 | ||
1018 | # echo 0 > tracing_enabled | ||
1019 | # cat latency_trace | ||
1020 | # tracer: wakeup | ||
1021 | # | ||
1022 | wakeup latency trace v1.1.5 on 2.6.26-rc8 | ||
1023 | -------------------------------------------------------------------- | ||
1024 | latency: 4 us, #2/2, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | ||
1025 | ----------------- | ||
1026 | | task: sleep-4901 (uid:0 nice:0 policy:1 rt_prio:5) | ||
1027 | ----------------- | ||
1028 | |||
1029 | # _------=> CPU# | ||
1030 | # / _-----=> irqs-off | ||
1031 | # | / _----=> need-resched | ||
1032 | # || / _---=> hardirq/softirq | ||
1033 | # ||| / _--=> preempt-depth | ||
1034 | # |||| / | ||
1035 | # ||||| delay | ||
1036 | # cmd pid ||||| time | caller | ||
1037 | # \ / ||||| \ | / | ||
1038 | <idle>-0 1d.h4 0us+: try_to_wake_up (wake_up_process) | ||
1039 | <idle>-0 1d..4 4us : schedule (cpu_idle) | ||
1040 | |||
1041 | |||
1042 | Running this on an idle system, we see that it only took 4 | ||
1043 | microseconds to perform the task switch. Note, since the trace | ||
1044 | marker in the schedule is before the actual "switch", we stop | ||
1045 | the tracing when the recorded task is about to schedule in. This | ||
1046 | may change if we add a new marker at the end of the scheduler. | ||
1047 | |||
1048 | Notice that the recorded task is 'sleep' with the PID of 4901 | ||
1049 | and it has an rt_prio of 5. This priority is user-space priority | ||
1050 | and not the internal kernel priority. The policy is 1 for | ||
1051 | SCHED_FIFO and 2 for SCHED_RR. | ||
1052 | |||
1053 | Doing the same with chrt -r 5 and ftrace_enabled set. | ||
1054 | |||
1055 | # tracer: wakeup | ||
1056 | # | ||
1057 | wakeup latency trace v1.1.5 on 2.6.26-rc8 | ||
1058 | -------------------------------------------------------------------- | ||
1059 | latency: 50 us, #60/60, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | ||
1060 | ----------------- | ||
1061 | | task: sleep-4068 (uid:0 nice:0 policy:2 rt_prio:5) | ||
1062 | ----------------- | ||
1063 | |||
1064 | # _------=> CPU# | ||
1065 | # / _-----=> irqs-off | ||
1066 | # | / _----=> need-resched | ||
1067 | # || / _---=> hardirq/softirq | ||
1068 | # ||| / _--=> preempt-depth | ||
1069 | # |||| / | ||
1070 | # ||||| delay | ||
1071 | # cmd pid ||||| time | caller | ||
1072 | # \ / ||||| \ | / | ||
1073 | ksoftirq-7 1d.H3 0us : try_to_wake_up (wake_up_process) | ||
1074 | ksoftirq-7 1d.H4 1us : sub_preempt_count (marker_probe_cb) | ||
1075 | ksoftirq-7 1d.H3 2us : check_preempt_wakeup (try_to_wake_up) | ||
1076 | ksoftirq-7 1d.H3 3us : update_curr (check_preempt_wakeup) | ||
1077 | ksoftirq-7 1d.H3 4us : calc_delta_mine (update_curr) | ||
1078 | ksoftirq-7 1d.H3 5us : __resched_task (check_preempt_wakeup) | ||
1079 | ksoftirq-7 1d.H3 6us : task_wake_up_rt (try_to_wake_up) | ||
1080 | ksoftirq-7 1d.H3 7us : _spin_unlock_irqrestore (try_to_wake_up) | ||
1081 | [...] | ||
1082 | ksoftirq-7 1d.H2 17us : irq_exit (smp_apic_timer_interrupt) | ||
1083 | ksoftirq-7 1d.H2 18us : sub_preempt_count (irq_exit) | ||
1084 | ksoftirq-7 1d.s3 19us : sub_preempt_count (irq_exit) | ||
1085 | ksoftirq-7 1..s2 20us : rcu_process_callbacks (__do_softirq) | ||
1086 | [...] | ||
1087 | ksoftirq-7 1..s2 26us : __rcu_process_callbacks (rcu_process_callbacks) | ||
1088 | ksoftirq-7 1d.s2 27us : _local_bh_enable (__do_softirq) | ||
1089 | ksoftirq-7 1d.s2 28us : sub_preempt_count (_local_bh_enable) | ||
1090 | ksoftirq-7 1.N.3 29us : sub_preempt_count (ksoftirqd) | ||
1091 | ksoftirq-7 1.N.2 30us : _cond_resched (ksoftirqd) | ||
1092 | ksoftirq-7 1.N.2 31us : __cond_resched (_cond_resched) | ||
1093 | ksoftirq-7 1.N.2 32us : add_preempt_count (__cond_resched) | ||
1094 | ksoftirq-7 1.N.2 33us : schedule (__cond_resched) | ||
1095 | ksoftirq-7 1.N.2 33us : add_preempt_count (schedule) | ||
1096 | ksoftirq-7 1.N.3 34us : hrtick_clear (schedule) | ||
1097 | ksoftirq-7 1dN.3 35us : _spin_lock (schedule) | ||
1098 | ksoftirq-7 1dN.3 36us : add_preempt_count (_spin_lock) | ||
1099 | ksoftirq-7 1d..4 37us : put_prev_task_fair (schedule) | ||
1100 | ksoftirq-7 1d..4 38us : update_curr (put_prev_task_fair) | ||
1101 | [...] | ||
1102 | ksoftirq-7 1d..5 47us : _spin_trylock (tracing_record_cmdline) | ||
1103 | ksoftirq-7 1d..5 48us : add_preempt_count (_spin_trylock) | ||
1104 | ksoftirq-7 1d..6 49us : _spin_unlock (tracing_record_cmdline) | ||
1105 | ksoftirq-7 1d..6 49us : sub_preempt_count (_spin_unlock) | ||
1106 | ksoftirq-7 1d..4 50us : schedule (__cond_resched) | ||
1107 | |||
1108 | The interrupt went off while running ksoftirqd. This task runs | ||
1109 | at SCHED_OTHER. Why did not we see the 'N' set early? This may | ||
1110 | be a harmless bug with x86_32 and 4K stacks. On x86_32 with 4K | ||
1111 | stacks configured, the interrupt and softirq run with their own | ||
1112 | stack. Some information is held on the top of the task's stack | ||
1113 | (need_resched and preempt_count are both stored there). The | ||
1114 | setting of the NEED_RESCHED bit is done directly to the task's | ||
1115 | stack, but the reading of the NEED_RESCHED is done by looking at | ||
1116 | the current stack, which in this case is the stack for the hard | ||
1117 | interrupt. This hides the fact that NEED_RESCHED has been set. | ||
1118 | We do not see the 'N' until we switch back to the task's | ||
1119 | assigned stack. | ||
1120 | |||
1121 | function | ||
1122 | -------- | ||
1123 | |||
1124 | This tracer is the function tracer. Enabling the function tracer | ||
1125 | can be done from the debug file system. Make sure the | ||
1126 | ftrace_enabled is set; otherwise this tracer is a nop. | ||
1127 | |||
1128 | # sysctl kernel.ftrace_enabled=1 | ||
1129 | # echo function > current_tracer | ||
1130 | # echo 1 > tracing_enabled | ||
1131 | # usleep 1 | ||
1132 | # echo 0 > tracing_enabled | ||
1133 | # cat trace | ||
1134 | # tracer: function | ||
1135 | # | ||
1136 | # TASK-PID CPU# TIMESTAMP FUNCTION | ||
1137 | # | | | | | | ||
1138 | bash-4003 [00] 123.638713: finish_task_switch <-schedule | ||
1139 | bash-4003 [00] 123.638714: _spin_unlock_irq <-finish_task_switch | ||
1140 | bash-4003 [00] 123.638714: sub_preempt_count <-_spin_unlock_irq | ||
1141 | bash-4003 [00] 123.638715: hrtick_set <-schedule | ||
1142 | bash-4003 [00] 123.638715: _spin_lock_irqsave <-hrtick_set | ||
1143 | bash-4003 [00] 123.638716: add_preempt_count <-_spin_lock_irqsave | ||
1144 | bash-4003 [00] 123.638716: _spin_unlock_irqrestore <-hrtick_set | ||
1145 | bash-4003 [00] 123.638717: sub_preempt_count <-_spin_unlock_irqrestore | ||
1146 | bash-4003 [00] 123.638717: hrtick_clear <-hrtick_set | ||
1147 | bash-4003 [00] 123.638718: sub_preempt_count <-schedule | ||
1148 | bash-4003 [00] 123.638718: sub_preempt_count <-preempt_schedule | ||
1149 | bash-4003 [00] 123.638719: wait_for_completion <-__stop_machine_run | ||
1150 | bash-4003 [00] 123.638719: wait_for_common <-wait_for_completion | ||
1151 | bash-4003 [00] 123.638720: _spin_lock_irq <-wait_for_common | ||
1152 | bash-4003 [00] 123.638720: add_preempt_count <-_spin_lock_irq | ||
1153 | [...] | ||
1154 | |||
1155 | |||
1156 | Note: function tracer uses ring buffers to store the above | ||
1157 | entries. The newest data may overwrite the oldest data. | ||
1158 | Sometimes using echo to stop the trace is not sufficient because | ||
1159 | the tracing could have overwritten the data that you wanted to | ||
1160 | record. For this reason, it is sometimes better to disable | ||
1161 | tracing directly from a program. This allows you to stop the | ||
1162 | tracing at the point that you hit the part that you are | ||
1163 | interested in. To disable the tracing directly from a C program, | ||
1164 | something like following code snippet can be used: | ||
1165 | |||
1166 | int trace_fd; | ||
1167 | [...] | ||
1168 | int main(int argc, char *argv[]) { | ||
1169 | [...] | ||
1170 | trace_fd = open(tracing_file("tracing_enabled"), O_WRONLY); | ||
1171 | [...] | ||
1172 | if (condition_hit()) { | ||
1173 | write(trace_fd, "0", 1); | ||
1174 | } | ||
1175 | [...] | ||
1176 | } | ||
1177 | |||
1178 | |||
1179 | Single thread tracing | ||
1180 | --------------------- | ||
1181 | |||
1182 | By writing into set_ftrace_pid you can trace a | ||
1183 | single thread. For example: | ||
1184 | |||
1185 | # cat set_ftrace_pid | ||
1186 | no pid | ||
1187 | # echo 3111 > set_ftrace_pid | ||
1188 | # cat set_ftrace_pid | ||
1189 | 3111 | ||
1190 | # echo function > current_tracer | ||
1191 | # cat trace | head | ||
1192 | # tracer: function | ||
1193 | # | ||
1194 | # TASK-PID CPU# TIMESTAMP FUNCTION | ||
1195 | # | | | | | | ||
1196 | yum-updatesd-3111 [003] 1637.254676: finish_task_switch <-thread_return | ||
1197 | yum-updatesd-3111 [003] 1637.254681: hrtimer_cancel <-schedule_hrtimeout_range | ||
1198 | yum-updatesd-3111 [003] 1637.254682: hrtimer_try_to_cancel <-hrtimer_cancel | ||
1199 | yum-updatesd-3111 [003] 1637.254683: lock_hrtimer_base <-hrtimer_try_to_cancel | ||
1200 | yum-updatesd-3111 [003] 1637.254685: fget_light <-do_sys_poll | ||
1201 | yum-updatesd-3111 [003] 1637.254686: pipe_poll <-do_sys_poll | ||
1202 | # echo -1 > set_ftrace_pid | ||
1203 | # cat trace |head | ||
1204 | # tracer: function | ||
1205 | # | ||
1206 | # TASK-PID CPU# TIMESTAMP FUNCTION | ||
1207 | # | | | | | | ||
1208 | ##### CPU 3 buffer started #### | ||
1209 | yum-updatesd-3111 [003] 1701.957688: free_poll_entry <-poll_freewait | ||
1210 | yum-updatesd-3111 [003] 1701.957689: remove_wait_queue <-free_poll_entry | ||
1211 | yum-updatesd-3111 [003] 1701.957691: fput <-free_poll_entry | ||
1212 | yum-updatesd-3111 [003] 1701.957692: audit_syscall_exit <-sysret_audit | ||
1213 | yum-updatesd-3111 [003] 1701.957693: path_put <-audit_syscall_exit | ||
1214 | |||
1215 | If you want to trace a function when executing, you could use | ||
1216 | something like this simple program: | ||
1217 | |||
1218 | #include <stdio.h> | ||
1219 | #include <stdlib.h> | ||
1220 | #include <sys/types.h> | ||
1221 | #include <sys/stat.h> | ||
1222 | #include <fcntl.h> | ||
1223 | #include <unistd.h> | ||
1224 | |||
1225 | #define _STR(x) #x | ||
1226 | #define STR(x) _STR(x) | ||
1227 | #define MAX_PATH 256 | ||
1228 | |||
1229 | const char *find_debugfs(void) | ||
1230 | { | ||
1231 | static char debugfs[MAX_PATH+1]; | ||
1232 | static int debugfs_found; | ||
1233 | char type[100]; | ||
1234 | FILE *fp; | ||
1235 | |||
1236 | if (debugfs_found) | ||
1237 | return debugfs; | ||
1238 | |||
1239 | if ((fp = fopen("/proc/mounts","r")) == NULL) { | ||
1240 | perror("/proc/mounts"); | ||
1241 | return NULL; | ||
1242 | } | ||
1243 | |||
1244 | while (fscanf(fp, "%*s %" | ||
1245 | STR(MAX_PATH) | ||
1246 | "s %99s %*s %*d %*d\n", | ||
1247 | debugfs, type) == 2) { | ||
1248 | if (strcmp(type, "debugfs") == 0) | ||
1249 | break; | ||
1250 | } | ||
1251 | fclose(fp); | ||
1252 | |||
1253 | if (strcmp(type, "debugfs") != 0) { | ||
1254 | fprintf(stderr, "debugfs not mounted"); | ||
1255 | return NULL; | ||
1256 | } | ||
1257 | |||
1258 | debugfs_found = 1; | ||
1259 | |||
1260 | return debugfs; | ||
1261 | } | ||
1262 | |||
1263 | const char *tracing_file(const char *file_name) | ||
1264 | { | ||
1265 | static char trace_file[MAX_PATH+1]; | ||
1266 | snprintf(trace_file, MAX_PATH, "%s/%s", find_debugfs(), file_name); | ||
1267 | return trace_file; | ||
1268 | } | ||
1269 | |||
1270 | int main (int argc, char **argv) | ||
1271 | { | ||
1272 | if (argc < 1) | ||
1273 | exit(-1); | ||
1274 | |||
1275 | if (fork() > 0) { | ||
1276 | int fd, ffd; | ||
1277 | char line[64]; | ||
1278 | int s; | ||
1279 | |||
1280 | ffd = open(tracing_file("current_tracer"), O_WRONLY); | ||
1281 | if (ffd < 0) | ||
1282 | exit(-1); | ||
1283 | write(ffd, "nop", 3); | ||
1284 | |||
1285 | fd = open(tracing_file("set_ftrace_pid"), O_WRONLY); | ||
1286 | s = sprintf(line, "%d\n", getpid()); | ||
1287 | write(fd, line, s); | ||
1288 | |||
1289 | write(ffd, "function", 8); | ||
1290 | |||
1291 | close(fd); | ||
1292 | close(ffd); | ||
1293 | |||
1294 | execvp(argv[1], argv+1); | ||
1295 | } | ||
1296 | |||
1297 | return 0; | ||
1298 | } | ||
1299 | |||
1300 | |||
1301 | hw-branch-tracer (x86 only) | ||
1302 | --------------------------- | ||
1303 | |||
1304 | This tracer uses the x86 last branch tracing hardware feature to | ||
1305 | collect a branch trace on all cpus with relatively low overhead. | ||
1306 | |||
1307 | The tracer uses a fixed-size circular buffer per cpu and only | ||
1308 | traces ring 0 branches. The trace file dumps that buffer in the | ||
1309 | following format: | ||
1310 | |||
1311 | # tracer: hw-branch-tracer | ||
1312 | # | ||
1313 | # CPU# TO <- FROM | ||
1314 | 0 scheduler_tick+0xb5/0x1bf <- task_tick_idle+0x5/0x6 | ||
1315 | 2 run_posix_cpu_timers+0x2b/0x72a <- run_posix_cpu_timers+0x25/0x72a | ||
1316 | 0 scheduler_tick+0x139/0x1bf <- scheduler_tick+0xed/0x1bf | ||
1317 | 0 scheduler_tick+0x17c/0x1bf <- scheduler_tick+0x148/0x1bf | ||
1318 | 2 run_posix_cpu_timers+0x9e/0x72a <- run_posix_cpu_timers+0x5e/0x72a | ||
1319 | 0 scheduler_tick+0x1b6/0x1bf <- scheduler_tick+0x1aa/0x1bf | ||
1320 | |||
1321 | |||
1322 | The tracer may be used to dump the trace for the oops'ing cpu on | ||
1323 | a kernel oops into the system log. To enable this, | ||
1324 | ftrace_dump_on_oops must be set. To set ftrace_dump_on_oops, one | ||
1325 | can either use the sysctl function or set it via the proc system | ||
1326 | interface. | ||
1327 | |||
1328 | sysctl kernel.ftrace_dump_on_oops=1 | ||
1329 | |||
1330 | or | ||
1331 | |||
1332 | echo 1 > /proc/sys/kernel/ftrace_dump_on_oops | ||
1333 | |||
1334 | |||
1335 | Here's an example of such a dump after a null pointer | ||
1336 | dereference in a kernel module: | ||
1337 | |||
1338 | [57848.105921] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 | ||
1339 | [57848.106019] IP: [<ffffffffa0000006>] open+0x6/0x14 [oops] | ||
1340 | [57848.106019] PGD 2354e9067 PUD 2375e7067 PMD 0 | ||
1341 | [57848.106019] Oops: 0002 [#1] SMP | ||
1342 | [57848.106019] last sysfs file: /sys/devices/pci0000:00/0000:00:1e.0/0000:20:05.0/local_cpus | ||
1343 | [57848.106019] Dumping ftrace buffer: | ||
1344 | [57848.106019] --------------------------------- | ||
1345 | [...] | ||
1346 | [57848.106019] 0 chrdev_open+0xe6/0x165 <- cdev_put+0x23/0x24 | ||
1347 | [57848.106019] 0 chrdev_open+0x117/0x165 <- chrdev_open+0xfa/0x165 | ||
1348 | [57848.106019] 0 chrdev_open+0x120/0x165 <- chrdev_open+0x11c/0x165 | ||
1349 | [57848.106019] 0 chrdev_open+0x134/0x165 <- chrdev_open+0x12b/0x165 | ||
1350 | [57848.106019] 0 open+0x0/0x14 [oops] <- chrdev_open+0x144/0x165 | ||
1351 | [57848.106019] 0 page_fault+0x0/0x30 <- open+0x6/0x14 [oops] | ||
1352 | [57848.106019] 0 error_entry+0x0/0x5b <- page_fault+0x4/0x30 | ||
1353 | [57848.106019] 0 error_kernelspace+0x0/0x31 <- error_entry+0x59/0x5b | ||
1354 | [57848.106019] 0 error_sti+0x0/0x1 <- error_kernelspace+0x2d/0x31 | ||
1355 | [57848.106019] 0 page_fault+0x9/0x30 <- error_sti+0x0/0x1 | ||
1356 | [57848.106019] 0 do_page_fault+0x0/0x881 <- page_fault+0x1a/0x30 | ||
1357 | [...] | ||
1358 | [57848.106019] 0 do_page_fault+0x66b/0x881 <- is_prefetch+0x1ee/0x1f2 | ||
1359 | [57848.106019] 0 do_page_fault+0x6e0/0x881 <- do_page_fault+0x67a/0x881 | ||
1360 | [57848.106019] 0 oops_begin+0x0/0x96 <- do_page_fault+0x6e0/0x881 | ||
1361 | [57848.106019] 0 trace_hw_branch_oops+0x0/0x2d <- oops_begin+0x9/0x96 | ||
1362 | [...] | ||
1363 | [57848.106019] 0 ds_suspend_bts+0x2a/0xe3 <- ds_suspend_bts+0x1a/0xe3 | ||
1364 | [57848.106019] --------------------------------- | ||
1365 | [57848.106019] CPU 0 | ||
1366 | [57848.106019] Modules linked in: oops | ||
1367 | [57848.106019] Pid: 5542, comm: cat Tainted: G W 2.6.28 #23 | ||
1368 | [57848.106019] RIP: 0010:[<ffffffffa0000006>] [<ffffffffa0000006>] open+0x6/0x14 [oops] | ||
1369 | [57848.106019] RSP: 0018:ffff880235457d48 EFLAGS: 00010246 | ||
1370 | [...] | ||
1371 | |||
1372 | |||
1373 | function graph tracer | ||
1374 | --------------------------- | ||
1375 | |||
1376 | This tracer is similar to the function tracer except that it | ||
1377 | probes a function on its entry and its exit. This is done by | ||
1378 | using a dynamically allocated stack of return addresses in each | ||
1379 | task_struct. On function entry the tracer overwrites the return | ||
1380 | address of each function traced to set a custom probe. Thus the | ||
1381 | original return address is stored on the stack of return address | ||
1382 | in the task_struct. | ||
1383 | |||
1384 | Probing on both ends of a function leads to special features | ||
1385 | such as: | ||
1386 | |||
1387 | - measure of a function's time execution | ||
1388 | - having a reliable call stack to draw function calls graph | ||
1389 | |||
1390 | This tracer is useful in several situations: | ||
1391 | |||
1392 | - you want to find the reason of a strange kernel behavior and | ||
1393 | need to see what happens in detail on any areas (or specific | ||
1394 | ones). | ||
1395 | |||
1396 | - you are experiencing weird latencies but it's difficult to | ||
1397 | find its origin. | ||
1398 | |||
1399 | - you want to find quickly which path is taken by a specific | ||
1400 | function | ||
1401 | |||
1402 | - you just want to peek inside a working kernel and want to see | ||
1403 | what happens there. | ||
1404 | |||
1405 | # tracer: function_graph | ||
1406 | # | ||
1407 | # CPU DURATION FUNCTION CALLS | ||
1408 | # | | | | | | | | ||
1409 | |||
1410 | 0) | sys_open() { | ||
1411 | 0) | do_sys_open() { | ||
1412 | 0) | getname() { | ||
1413 | 0) | kmem_cache_alloc() { | ||
1414 | 0) 1.382 us | __might_sleep(); | ||
1415 | 0) 2.478 us | } | ||
1416 | 0) | strncpy_from_user() { | ||
1417 | 0) | might_fault() { | ||
1418 | 0) 1.389 us | __might_sleep(); | ||
1419 | 0) 2.553 us | } | ||
1420 | 0) 3.807 us | } | ||
1421 | 0) 7.876 us | } | ||
1422 | 0) | alloc_fd() { | ||
1423 | 0) 0.668 us | _spin_lock(); | ||
1424 | 0) 0.570 us | expand_files(); | ||
1425 | 0) 0.586 us | _spin_unlock(); | ||
1426 | |||
1427 | |||
1428 | There are several columns that can be dynamically | ||
1429 | enabled/disabled. You can use every combination of options you | ||
1430 | want, depending on your needs. | ||
1431 | |||
1432 | - The cpu number on which the function executed is default | ||
1433 | enabled. It is sometimes better to only trace one cpu (see | ||
1434 | tracing_cpu_mask file) or you might sometimes see unordered | ||
1435 | function calls while cpu tracing switch. | ||
1436 | |||
1437 | hide: echo nofuncgraph-cpu > trace_options | ||
1438 | show: echo funcgraph-cpu > trace_options | ||
1439 | |||
1440 | - The duration (function's time of execution) is displayed on | ||
1441 | the closing bracket line of a function or on the same line | ||
1442 | than the current function in case of a leaf one. It is default | ||
1443 | enabled. | ||
1444 | |||
1445 | hide: echo nofuncgraph-duration > trace_options | ||
1446 | show: echo funcgraph-duration > trace_options | ||
1447 | |||
1448 | - The overhead field precedes the duration field in case of | ||
1449 | reached duration thresholds. | ||
1450 | |||
1451 | hide: echo nofuncgraph-overhead > trace_options | ||
1452 | show: echo funcgraph-overhead > trace_options | ||
1453 | depends on: funcgraph-duration | ||
1454 | |||
1455 | ie: | ||
1456 | |||
1457 | 0) | up_write() { | ||
1458 | 0) 0.646 us | _spin_lock_irqsave(); | ||
1459 | 0) 0.684 us | _spin_unlock_irqrestore(); | ||
1460 | 0) 3.123 us | } | ||
1461 | 0) 0.548 us | fput(); | ||
1462 | 0) + 58.628 us | } | ||
1463 | |||
1464 | [...] | ||
1465 | |||
1466 | 0) | putname() { | ||
1467 | 0) | kmem_cache_free() { | ||
1468 | 0) 0.518 us | __phys_addr(); | ||
1469 | 0) 1.757 us | } | ||
1470 | 0) 2.861 us | } | ||
1471 | 0) ! 115.305 us | } | ||
1472 | 0) ! 116.402 us | } | ||
1473 | |||
1474 | + means that the function exceeded 10 usecs. | ||
1475 | ! means that the function exceeded 100 usecs. | ||
1476 | |||
1477 | |||
1478 | - The task/pid field displays the thread cmdline and pid which | ||
1479 | executed the function. It is default disabled. | ||
1480 | |||
1481 | hide: echo nofuncgraph-proc > trace_options | ||
1482 | show: echo funcgraph-proc > trace_options | ||
1483 | |||
1484 | ie: | ||
1485 | |||
1486 | # tracer: function_graph | ||
1487 | # | ||
1488 | # CPU TASK/PID DURATION FUNCTION CALLS | ||
1489 | # | | | | | | | | | | ||
1490 | 0) sh-4802 | | d_free() { | ||
1491 | 0) sh-4802 | | call_rcu() { | ||
1492 | 0) sh-4802 | | __call_rcu() { | ||
1493 | 0) sh-4802 | 0.616 us | rcu_process_gp_end(); | ||
1494 | 0) sh-4802 | 0.586 us | check_for_new_grace_period(); | ||
1495 | 0) sh-4802 | 2.899 us | } | ||
1496 | 0) sh-4802 | 4.040 us | } | ||
1497 | 0) sh-4802 | 5.151 us | } | ||
1498 | 0) sh-4802 | + 49.370 us | } | ||
1499 | |||
1500 | |||
1501 | - The absolute time field is an absolute timestamp given by the | ||
1502 | system clock since it started. A snapshot of this time is | ||
1503 | given on each entry/exit of functions | ||
1504 | |||
1505 | hide: echo nofuncgraph-abstime > trace_options | ||
1506 | show: echo funcgraph-abstime > trace_options | ||
1507 | |||
1508 | ie: | ||
1509 | |||
1510 | # | ||
1511 | # TIME CPU DURATION FUNCTION CALLS | ||
1512 | # | | | | | | | | | ||
1513 | 360.774522 | 1) 0.541 us | } | ||
1514 | 360.774522 | 1) 4.663 us | } | ||
1515 | 360.774523 | 1) 0.541 us | __wake_up_bit(); | ||
1516 | 360.774524 | 1) 6.796 us | } | ||
1517 | 360.774524 | 1) 7.952 us | } | ||
1518 | 360.774525 | 1) 9.063 us | } | ||
1519 | 360.774525 | 1) 0.615 us | journal_mark_dirty(); | ||
1520 | 360.774527 | 1) 0.578 us | __brelse(); | ||
1521 | 360.774528 | 1) | reiserfs_prepare_for_journal() { | ||
1522 | 360.774528 | 1) | unlock_buffer() { | ||
1523 | 360.774529 | 1) | wake_up_bit() { | ||
1524 | 360.774529 | 1) | bit_waitqueue() { | ||
1525 | 360.774530 | 1) 0.594 us | __phys_addr(); | ||
1526 | |||
1527 | |||
1528 | You can put some comments on specific functions by using | ||
1529 | trace_printk() For example, if you want to put a comment inside | ||
1530 | the __might_sleep() function, you just have to include | ||
1531 | <linux/ftrace.h> and call trace_printk() inside __might_sleep() | ||
1532 | |||
1533 | trace_printk("I'm a comment!\n") | ||
1534 | |||
1535 | will produce: | ||
1536 | |||
1537 | 1) | __might_sleep() { | ||
1538 | 1) | /* I'm a comment! */ | ||
1539 | 1) 1.449 us | } | ||
1540 | |||
1541 | |||
1542 | You might find other useful features for this tracer in the | ||
1543 | following "dynamic ftrace" section such as tracing only specific | ||
1544 | functions or tasks. | ||
1545 | |||
1546 | dynamic ftrace | ||
1547 | -------------- | ||
1548 | |||
1549 | If CONFIG_DYNAMIC_FTRACE is set, the system will run with | ||
1550 | virtually no overhead when function tracing is disabled. The way | ||
1551 | this works is the mcount function call (placed at the start of | ||
1552 | every kernel function, produced by the -pg switch in gcc), | ||
1553 | starts of pointing to a simple return. (Enabling FTRACE will | ||
1554 | include the -pg switch in the compiling of the kernel.) | ||
1555 | |||
1556 | At compile time every C file object is run through the | ||
1557 | recordmcount.pl script (located in the scripts directory). This | ||
1558 | script will process the C object using objdump to find all the | ||
1559 | locations in the .text section that call mcount. (Note, only the | ||
1560 | .text section is processed, since processing other sections like | ||
1561 | .init.text may cause races due to those sections being freed). | ||
1562 | |||
1563 | A new section called "__mcount_loc" is created that holds | ||
1564 | references to all the mcount call sites in the .text section. | ||
1565 | This section is compiled back into the original object. The | ||
1566 | final linker will add all these references into a single table. | ||
1567 | |||
1568 | On boot up, before SMP is initialized, the dynamic ftrace code | ||
1569 | scans this table and updates all the locations into nops. It | ||
1570 | also records the locations, which are added to the | ||
1571 | available_filter_functions list. Modules are processed as they | ||
1572 | are loaded and before they are executed. When a module is | ||
1573 | unloaded, it also removes its functions from the ftrace function | ||
1574 | list. This is automatic in the module unload code, and the | ||
1575 | module author does not need to worry about it. | ||
1576 | |||
1577 | When tracing is enabled, kstop_machine is called to prevent | ||
1578 | races with the CPUS executing code being modified (which can | ||
1579 | cause the CPU to do undesireable things), and the nops are | ||
1580 | patched back to calls. But this time, they do not call mcount | ||
1581 | (which is just a function stub). They now call into the ftrace | ||
1582 | infrastructure. | ||
1583 | |||
1584 | One special side-effect to the recording of the functions being | ||
1585 | traced is that we can now selectively choose which functions we | ||
1586 | wish to trace and which ones we want the mcount calls to remain | ||
1587 | as nops. | ||
1588 | |||
1589 | Two files are used, one for enabling and one for disabling the | ||
1590 | tracing of specified functions. They are: | ||
1591 | |||
1592 | set_ftrace_filter | ||
1593 | |||
1594 | and | ||
1595 | |||
1596 | set_ftrace_notrace | ||
1597 | |||
1598 | A list of available functions that you can add to these files is | ||
1599 | listed in: | ||
1600 | |||
1601 | available_filter_functions | ||
1602 | |||
1603 | # cat available_filter_functions | ||
1604 | put_prev_task_idle | ||
1605 | kmem_cache_create | ||
1606 | pick_next_task_rt | ||
1607 | get_online_cpus | ||
1608 | pick_next_task_fair | ||
1609 | mutex_lock | ||
1610 | [...] | ||
1611 | |||
1612 | If I am only interested in sys_nanosleep and hrtimer_interrupt: | ||
1613 | |||
1614 | # echo sys_nanosleep hrtimer_interrupt \ | ||
1615 | > set_ftrace_filter | ||
1616 | # echo ftrace > current_tracer | ||
1617 | # echo 1 > tracing_enabled | ||
1618 | # usleep 1 | ||
1619 | # echo 0 > tracing_enabled | ||
1620 | # cat trace | ||
1621 | # tracer: ftrace | ||
1622 | # | ||
1623 | # TASK-PID CPU# TIMESTAMP FUNCTION | ||
1624 | # | | | | | | ||
1625 | usleep-4134 [00] 1317.070017: hrtimer_interrupt <-smp_apic_timer_interrupt | ||
1626 | usleep-4134 [00] 1317.070111: sys_nanosleep <-syscall_call | ||
1627 | <idle>-0 [00] 1317.070115: hrtimer_interrupt <-smp_apic_timer_interrupt | ||
1628 | |||
1629 | To see which functions are being traced, you can cat the file: | ||
1630 | |||
1631 | # cat set_ftrace_filter | ||
1632 | hrtimer_interrupt | ||
1633 | sys_nanosleep | ||
1634 | |||
1635 | |||
1636 | Perhaps this is not enough. The filters also allow simple wild | ||
1637 | cards. Only the following are currently available | ||
1638 | |||
1639 | <match>* - will match functions that begin with <match> | ||
1640 | *<match> - will match functions that end with <match> | ||
1641 | *<match>* - will match functions that have <match> in it | ||
1642 | |||
1643 | These are the only wild cards which are supported. | ||
1644 | |||
1645 | <match>*<match> will not work. | ||
1646 | |||
1647 | Note: It is better to use quotes to enclose the wild cards, | ||
1648 | otherwise the shell may expand the parameters into names | ||
1649 | of files in the local directory. | ||
1650 | |||
1651 | # echo 'hrtimer_*' > set_ftrace_filter | ||
1652 | |||
1653 | Produces: | ||
1654 | |||
1655 | # tracer: ftrace | ||
1656 | # | ||
1657 | # TASK-PID CPU# TIMESTAMP FUNCTION | ||
1658 | # | | | | | | ||
1659 | bash-4003 [00] 1480.611794: hrtimer_init <-copy_process | ||
1660 | bash-4003 [00] 1480.611941: hrtimer_start <-hrtick_set | ||
1661 | bash-4003 [00] 1480.611956: hrtimer_cancel <-hrtick_clear | ||
1662 | bash-4003 [00] 1480.611956: hrtimer_try_to_cancel <-hrtimer_cancel | ||
1663 | <idle>-0 [00] 1480.612019: hrtimer_get_next_event <-get_next_timer_interrupt | ||
1664 | <idle>-0 [00] 1480.612025: hrtimer_get_next_event <-get_next_timer_interrupt | ||
1665 | <idle>-0 [00] 1480.612032: hrtimer_get_next_event <-get_next_timer_interrupt | ||
1666 | <idle>-0 [00] 1480.612037: hrtimer_get_next_event <-get_next_timer_interrupt | ||
1667 | <idle>-0 [00] 1480.612382: hrtimer_get_next_event <-get_next_timer_interrupt | ||
1668 | |||
1669 | |||
1670 | Notice that we lost the sys_nanosleep. | ||
1671 | |||
1672 | # cat set_ftrace_filter | ||
1673 | hrtimer_run_queues | ||
1674 | hrtimer_run_pending | ||
1675 | hrtimer_init | ||
1676 | hrtimer_cancel | ||
1677 | hrtimer_try_to_cancel | ||
1678 | hrtimer_forward | ||
1679 | hrtimer_start | ||
1680 | hrtimer_reprogram | ||
1681 | hrtimer_force_reprogram | ||
1682 | hrtimer_get_next_event | ||
1683 | hrtimer_interrupt | ||
1684 | hrtimer_nanosleep | ||
1685 | hrtimer_wakeup | ||
1686 | hrtimer_get_remaining | ||
1687 | hrtimer_get_res | ||
1688 | hrtimer_init_sleeper | ||
1689 | |||
1690 | |||
1691 | This is because the '>' and '>>' act just like they do in bash. | ||
1692 | To rewrite the filters, use '>' | ||
1693 | To append to the filters, use '>>' | ||
1694 | |||
1695 | To clear out a filter so that all functions will be recorded | ||
1696 | again: | ||
1697 | |||
1698 | # echo > set_ftrace_filter | ||
1699 | # cat set_ftrace_filter | ||
1700 | # | ||
1701 | |||
1702 | Again, now we want to append. | ||
1703 | |||
1704 | # echo sys_nanosleep > set_ftrace_filter | ||
1705 | # cat set_ftrace_filter | ||
1706 | sys_nanosleep | ||
1707 | # echo 'hrtimer_*' >> set_ftrace_filter | ||
1708 | # cat set_ftrace_filter | ||
1709 | hrtimer_run_queues | ||
1710 | hrtimer_run_pending | ||
1711 | hrtimer_init | ||
1712 | hrtimer_cancel | ||
1713 | hrtimer_try_to_cancel | ||
1714 | hrtimer_forward | ||
1715 | hrtimer_start | ||
1716 | hrtimer_reprogram | ||
1717 | hrtimer_force_reprogram | ||
1718 | hrtimer_get_next_event | ||
1719 | hrtimer_interrupt | ||
1720 | sys_nanosleep | ||
1721 | hrtimer_nanosleep | ||
1722 | hrtimer_wakeup | ||
1723 | hrtimer_get_remaining | ||
1724 | hrtimer_get_res | ||
1725 | hrtimer_init_sleeper | ||
1726 | |||
1727 | |||
1728 | The set_ftrace_notrace prevents those functions from being | ||
1729 | traced. | ||
1730 | |||
1731 | # echo '*preempt*' '*lock*' > set_ftrace_notrace | ||
1732 | |||
1733 | Produces: | ||
1734 | |||
1735 | # tracer: ftrace | ||
1736 | # | ||
1737 | # TASK-PID CPU# TIMESTAMP FUNCTION | ||
1738 | # | | | | | | ||
1739 | bash-4043 [01] 115.281644: finish_task_switch <-schedule | ||
1740 | bash-4043 [01] 115.281645: hrtick_set <-schedule | ||
1741 | bash-4043 [01] 115.281645: hrtick_clear <-hrtick_set | ||
1742 | bash-4043 [01] 115.281646: wait_for_completion <-__stop_machine_run | ||
1743 | bash-4043 [01] 115.281647: wait_for_common <-wait_for_completion | ||
1744 | bash-4043 [01] 115.281647: kthread_stop <-stop_machine_run | ||
1745 | bash-4043 [01] 115.281648: init_waitqueue_head <-kthread_stop | ||
1746 | bash-4043 [01] 115.281648: wake_up_process <-kthread_stop | ||
1747 | bash-4043 [01] 115.281649: try_to_wake_up <-wake_up_process | ||
1748 | |||
1749 | We can see that there's no more lock or preempt tracing. | ||
1750 | |||
1751 | |||
1752 | Dynamic ftrace with the function graph tracer | ||
1753 | --------------------------------------------- | ||
1754 | |||
1755 | Although what has been explained above concerns both the | ||
1756 | function tracer and the function-graph-tracer, there are some | ||
1757 | special features only available in the function-graph tracer. | ||
1758 | |||
1759 | If you want to trace only one function and all of its children, | ||
1760 | you just have to echo its name into set_graph_function: | ||
1761 | |||
1762 | echo __do_fault > set_graph_function | ||
1763 | |||
1764 | will produce the following "expanded" trace of the __do_fault() | ||
1765 | function: | ||
1766 | |||
1767 | 0) | __do_fault() { | ||
1768 | 0) | filemap_fault() { | ||
1769 | 0) | find_lock_page() { | ||
1770 | 0) 0.804 us | find_get_page(); | ||
1771 | 0) | __might_sleep() { | ||
1772 | 0) 1.329 us | } | ||
1773 | 0) 3.904 us | } | ||
1774 | 0) 4.979 us | } | ||
1775 | 0) 0.653 us | _spin_lock(); | ||
1776 | 0) 0.578 us | page_add_file_rmap(); | ||
1777 | 0) 0.525 us | native_set_pte_at(); | ||
1778 | 0) 0.585 us | _spin_unlock(); | ||
1779 | 0) | unlock_page() { | ||
1780 | 0) 0.541 us | page_waitqueue(); | ||
1781 | 0) 0.639 us | __wake_up_bit(); | ||
1782 | 0) 2.786 us | } | ||
1783 | 0) + 14.237 us | } | ||
1784 | 0) | __do_fault() { | ||
1785 | 0) | filemap_fault() { | ||
1786 | 0) | find_lock_page() { | ||
1787 | 0) 0.698 us | find_get_page(); | ||
1788 | 0) | __might_sleep() { | ||
1789 | 0) 1.412 us | } | ||
1790 | 0) 3.950 us | } | ||
1791 | 0) 5.098 us | } | ||
1792 | 0) 0.631 us | _spin_lock(); | ||
1793 | 0) 0.571 us | page_add_file_rmap(); | ||
1794 | 0) 0.526 us | native_set_pte_at(); | ||
1795 | 0) 0.586 us | _spin_unlock(); | ||
1796 | 0) | unlock_page() { | ||
1797 | 0) 0.533 us | page_waitqueue(); | ||
1798 | 0) 0.638 us | __wake_up_bit(); | ||
1799 | 0) 2.793 us | } | ||
1800 | 0) + 14.012 us | } | ||
1801 | |||
1802 | You can also expand several functions at once: | ||
1803 | |||
1804 | echo sys_open > set_graph_function | ||
1805 | echo sys_close >> set_graph_function | ||
1806 | |||
1807 | Now if you want to go back to trace all functions you can clear | ||
1808 | this special filter via: | ||
1809 | |||
1810 | echo > set_graph_function | ||
1811 | |||
1812 | |||
1813 | trace_pipe | ||
1814 | ---------- | ||
1815 | |||
1816 | The trace_pipe outputs the same content as the trace file, but | ||
1817 | the effect on the tracing is different. Every read from | ||
1818 | trace_pipe is consumed. This means that subsequent reads will be | ||
1819 | different. The trace is live. | ||
1820 | |||
1821 | # echo function > current_tracer | ||
1822 | # cat trace_pipe > /tmp/trace.out & | ||
1823 | [1] 4153 | ||
1824 | # echo 1 > tracing_enabled | ||
1825 | # usleep 1 | ||
1826 | # echo 0 > tracing_enabled | ||
1827 | # cat trace | ||
1828 | # tracer: function | ||
1829 | # | ||
1830 | # TASK-PID CPU# TIMESTAMP FUNCTION | ||
1831 | # | | | | | | ||
1832 | |||
1833 | # | ||
1834 | # cat /tmp/trace.out | ||
1835 | bash-4043 [00] 41.267106: finish_task_switch <-schedule | ||
1836 | bash-4043 [00] 41.267106: hrtick_set <-schedule | ||
1837 | bash-4043 [00] 41.267107: hrtick_clear <-hrtick_set | ||
1838 | bash-4043 [00] 41.267108: wait_for_completion <-__stop_machine_run | ||
1839 | bash-4043 [00] 41.267108: wait_for_common <-wait_for_completion | ||
1840 | bash-4043 [00] 41.267109: kthread_stop <-stop_machine_run | ||
1841 | bash-4043 [00] 41.267109: init_waitqueue_head <-kthread_stop | ||
1842 | bash-4043 [00] 41.267110: wake_up_process <-kthread_stop | ||
1843 | bash-4043 [00] 41.267110: try_to_wake_up <-wake_up_process | ||
1844 | bash-4043 [00] 41.267111: select_task_rq_rt <-try_to_wake_up | ||
1845 | |||
1846 | |||
1847 | Note, reading the trace_pipe file will block until more input is | ||
1848 | added. By changing the tracer, trace_pipe will issue an EOF. We | ||
1849 | needed to set the function tracer _before_ we "cat" the | ||
1850 | trace_pipe file. | ||
1851 | |||
1852 | |||
1853 | trace entries | ||
1854 | ------------- | ||
1855 | |||
1856 | Having too much or not enough data can be troublesome in | ||
1857 | diagnosing an issue in the kernel. The file buffer_size_kb is | ||
1858 | used to modify the size of the internal trace buffers. The | ||
1859 | number listed is the number of entries that can be recorded per | ||
1860 | CPU. To know the full size, multiply the number of possible CPUS | ||
1861 | with the number of entries. | ||
1862 | |||
1863 | # cat buffer_size_kb | ||
1864 | 1408 (units kilobytes) | ||
1865 | |||
1866 | Note, to modify this, you must have tracing completely disabled. | ||
1867 | To do that, echo "nop" into the current_tracer. If the | ||
1868 | current_tracer is not set to "nop", an EINVAL error will be | ||
1869 | returned. | ||
1870 | |||
1871 | # echo nop > current_tracer | ||
1872 | # echo 10000 > buffer_size_kb | ||
1873 | # cat buffer_size_kb | ||
1874 | 10000 (units kilobytes) | ||
1875 | |||
1876 | The number of pages which will be allocated is limited to a | ||
1877 | percentage of available memory. Allocating too much will produce | ||
1878 | an error. | ||
1879 | |||
1880 | # echo 1000000000000 > buffer_size_kb | ||
1881 | -bash: echo: write error: Cannot allocate memory | ||
1882 | # cat buffer_size_kb | ||
1883 | 85 | ||
1884 | |||
1885 | ----------- | ||
1886 | |||
1887 | More details can be found in the source code, in the | ||
1888 | kernel/trace/*.c files. | ||
diff --git a/Documentation/trace/kmemtrace.txt b/Documentation/trace/kmemtrace.txt new file mode 100644 index 000000000000..6308735e58ca --- /dev/null +++ b/Documentation/trace/kmemtrace.txt | |||
@@ -0,0 +1,126 @@ | |||
1 | kmemtrace - Kernel Memory Tracer | ||
2 | |||
3 | by Eduard - Gabriel Munteanu | ||
4 | <eduard.munteanu@linux360.ro> | ||
5 | |||
6 | I. Introduction | ||
7 | =============== | ||
8 | |||
9 | kmemtrace helps kernel developers figure out two things: | ||
10 | 1) how different allocators (SLAB, SLUB etc.) perform | ||
11 | 2) how kernel code allocates memory and how much | ||
12 | |||
13 | To do this, we trace every allocation and export information to the userspace | ||
14 | through the relay interface. We export things such as the number of requested | ||
15 | bytes, the number of bytes actually allocated (i.e. including internal | ||
16 | fragmentation), whether this is a slab allocation or a plain kmalloc() and so | ||
17 | on. | ||
18 | |||
19 | The actual analysis is performed by a userspace tool (see section III for | ||
20 | details on where to get it from). It logs the data exported by the kernel, | ||
21 | processes it and (as of writing this) can provide the following information: | ||
22 | - the total amount of memory allocated and fragmentation per call-site | ||
23 | - the amount of memory allocated and fragmentation per allocation | ||
24 | - total memory allocated and fragmentation in the collected dataset | ||
25 | - number of cross-CPU allocation and frees (makes sense in NUMA environments) | ||
26 | |||
27 | Moreover, it can potentially find inconsistent and erroneous behavior in | ||
28 | kernel code, such as using slab free functions on kmalloc'ed memory or | ||
29 | allocating less memory than requested (but not truly failed allocations). | ||
30 | |||
31 | kmemtrace also makes provisions for tracing on some arch and analysing the | ||
32 | data on another. | ||
33 | |||
34 | II. Design and goals | ||
35 | ==================== | ||
36 | |||
37 | kmemtrace was designed to handle rather large amounts of data. Thus, it uses | ||
38 | the relay interface to export whatever is logged to userspace, which then | ||
39 | stores it. Analysis and reporting is done asynchronously, that is, after the | ||
40 | data is collected and stored. By design, it allows one to log and analyse | ||
41 | on different machines and different arches. | ||
42 | |||
43 | As of writing this, the ABI is not considered stable, though it might not | ||
44 | change much. However, no guarantees are made about compatibility yet. When | ||
45 | deemed stable, the ABI should still allow easy extension while maintaining | ||
46 | backward compatibility. This is described further in Documentation/ABI. | ||
47 | |||
48 | Summary of design goals: | ||
49 | - allow logging and analysis to be done across different machines | ||
50 | - be fast and anticipate usage in high-load environments (*) | ||
51 | - be reasonably extensible | ||
52 | - make it possible for GNU/Linux distributions to have kmemtrace | ||
53 | included in their repositories | ||
54 | |||
55 | (*) - one of the reasons Pekka Enberg's original userspace data analysis | ||
56 | tool's code was rewritten from Perl to C (although this is more than a | ||
57 | simple conversion) | ||
58 | |||
59 | |||
60 | III. Quick usage guide | ||
61 | ====================== | ||
62 | |||
63 | 1) Get a kernel that supports kmemtrace and build it accordingly (i.e. enable | ||
64 | CONFIG_KMEMTRACE). | ||
65 | |||
66 | 2) Get the userspace tool and build it: | ||
67 | $ git clone git://repo.or.cz/kmemtrace-user.git # current repository | ||
68 | $ cd kmemtrace-user/ | ||
69 | $ ./autogen.sh | ||
70 | $ ./configure | ||
71 | $ make | ||
72 | |||
73 | 3) Boot the kmemtrace-enabled kernel if you haven't, preferably in the | ||
74 | 'single' runlevel (so that relay buffers don't fill up easily), and run | ||
75 | kmemtrace: | ||
76 | # '$' does not mean user, but root here. | ||
77 | $ mount -t debugfs none /sys/kernel/debug | ||
78 | $ mount -t proc none /proc | ||
79 | $ cd path/to/kmemtrace-user/ | ||
80 | $ ./kmemtraced | ||
81 | Wait a bit, then stop it with CTRL+C. | ||
82 | $ cat /sys/kernel/debug/kmemtrace/total_overruns # Check if we didn't | ||
83 | # overrun, should | ||
84 | # be zero. | ||
85 | $ (Optionally) [Run kmemtrace_check separately on each cpu[0-9]*.out file to | ||
86 | check its correctness] | ||
87 | $ ./kmemtrace-report | ||
88 | |||
89 | Now you should have a nice and short summary of how the allocator performs. | ||
90 | |||
91 | IV. FAQ and known issues | ||
92 | ======================== | ||
93 | |||
94 | Q: 'cat /sys/kernel/debug/kmemtrace/total_overruns' is non-zero, how do I fix | ||
95 | this? Should I worry? | ||
96 | A: If it's non-zero, this affects kmemtrace's accuracy, depending on how | ||
97 | large the number is. You can fix it by supplying a higher | ||
98 | 'kmemtrace.subbufs=N' kernel parameter. | ||
99 | --- | ||
100 | |||
101 | Q: kmemtrace_check reports errors, how do I fix this? Should I worry? | ||
102 | A: This is a bug and should be reported. It can occur for a variety of | ||
103 | reasons: | ||
104 | - possible bugs in relay code | ||
105 | - possible misuse of relay by kmemtrace | ||
106 | - timestamps being collected unorderly | ||
107 | Or you may fix it yourself and send us a patch. | ||
108 | --- | ||
109 | |||
110 | Q: kmemtrace_report shows many errors, how do I fix this? Should I worry? | ||
111 | A: This is a known issue and I'm working on it. These might be true errors | ||
112 | in kernel code, which may have inconsistent behavior (e.g. allocating memory | ||
113 | with kmem_cache_alloc() and freeing it with kfree()). Pekka Enberg pointed | ||
114 | out this behavior may work with SLAB, but may fail with other allocators. | ||
115 | |||
116 | It may also be due to lack of tracing in some unusual allocator functions. | ||
117 | |||
118 | We don't want bug reports regarding this issue yet. | ||
119 | --- | ||
120 | |||
121 | V. See also | ||
122 | =========== | ||
123 | |||
124 | Documentation/kernel-parameters.txt | ||
125 | Documentation/ABI/testing/debugfs-kmemtrace | ||
126 | |||
diff --git a/Documentation/tracers/mmiotrace.txt b/Documentation/trace/mmiotrace.txt index 5731c67abc55..162effbfbdec 100644 --- a/Documentation/tracers/mmiotrace.txt +++ b/Documentation/trace/mmiotrace.txt | |||
@@ -32,41 +32,41 @@ is no way to automatically detect if you are losing events due to CPUs racing. | |||
32 | Usage Quick Reference | 32 | Usage Quick Reference |
33 | --------------------- | 33 | --------------------- |
34 | 34 | ||
35 | $ mount -t debugfs debugfs /debug | 35 | $ mount -t debugfs debugfs /sys/kernel/debug |
36 | $ echo mmiotrace > /debug/tracing/current_tracer | 36 | $ echo mmiotrace > /sys/kernel/debug/tracing/current_tracer |
37 | $ cat /debug/tracing/trace_pipe > mydump.txt & | 37 | $ cat /sys/kernel/debug/tracing/trace_pipe > mydump.txt & |
38 | Start X or whatever. | 38 | Start X or whatever. |
39 | $ echo "X is up" > /debug/tracing/trace_marker | 39 | $ echo "X is up" > /sys/kernel/debug/tracing/trace_marker |
40 | $ echo nop > /debug/tracing/current_tracer | 40 | $ echo nop > /sys/kernel/debug/tracing/current_tracer |
41 | Check for lost events. | 41 | Check for lost events. |
42 | 42 | ||
43 | 43 | ||
44 | Usage | 44 | Usage |
45 | ----- | 45 | ----- |
46 | 46 | ||
47 | Make sure debugfs is mounted to /debug. If not, (requires root privileges) | 47 | Make sure debugfs is mounted to /sys/kernel/debug. If not, (requires root privileges) |
48 | $ mount -t debugfs debugfs /debug | 48 | $ mount -t debugfs debugfs /sys/kernel/debug |
49 | 49 | ||
50 | Check that the driver you are about to trace is not loaded. | 50 | Check that the driver you are about to trace is not loaded. |
51 | 51 | ||
52 | Activate mmiotrace (requires root privileges): | 52 | Activate mmiotrace (requires root privileges): |
53 | $ echo mmiotrace > /debug/tracing/current_tracer | 53 | $ echo mmiotrace > /sys/kernel/debug/tracing/current_tracer |
54 | 54 | ||
55 | Start storing the trace: | 55 | Start storing the trace: |
56 | $ cat /debug/tracing/trace_pipe > mydump.txt & | 56 | $ cat /sys/kernel/debug/tracing/trace_pipe > mydump.txt & |
57 | The 'cat' process should stay running (sleeping) in the background. | 57 | The 'cat' process should stay running (sleeping) in the background. |
58 | 58 | ||
59 | Load the driver you want to trace and use it. Mmiotrace will only catch MMIO | 59 | Load the driver you want to trace and use it. Mmiotrace will only catch MMIO |
60 | accesses to areas that are ioremapped while mmiotrace is active. | 60 | accesses to areas that are ioremapped while mmiotrace is active. |
61 | 61 | ||
62 | During tracing you can place comments (markers) into the trace by | 62 | During tracing you can place comments (markers) into the trace by |
63 | $ echo "X is up" > /debug/tracing/trace_marker | 63 | $ echo "X is up" > /sys/kernel/debug/tracing/trace_marker |
64 | This makes it easier to see which part of the (huge) trace corresponds to | 64 | This makes it easier to see which part of the (huge) trace corresponds to |
65 | which action. It is recommended to place descriptive markers about what you | 65 | which action. It is recommended to place descriptive markers about what you |
66 | do. | 66 | do. |
67 | 67 | ||
68 | Shut down mmiotrace (requires root privileges): | 68 | Shut down mmiotrace (requires root privileges): |
69 | $ echo nop > /debug/tracing/current_tracer | 69 | $ echo nop > /sys/kernel/debug/tracing/current_tracer |
70 | The 'cat' process exits. If it does not, kill it by issuing 'fg' command and | 70 | The 'cat' process exits. If it does not, kill it by issuing 'fg' command and |
71 | pressing ctrl+c. | 71 | pressing ctrl+c. |
72 | 72 | ||
@@ -78,10 +78,10 @@ to view your kernel log and look for "mmiotrace has lost events" warning. If | |||
78 | events were lost, the trace is incomplete. You should enlarge the buffers and | 78 | events were lost, the trace is incomplete. You should enlarge the buffers and |
79 | try again. Buffers are enlarged by first seeing how large the current buffers | 79 | try again. Buffers are enlarged by first seeing how large the current buffers |
80 | are: | 80 | are: |
81 | $ cat /debug/tracing/buffer_size_kb | 81 | $ cat /sys/kernel/debug/tracing/buffer_size_kb |
82 | gives you a number. Approximately double this number and write it back, for | 82 | gives you a number. Approximately double this number and write it back, for |
83 | instance: | 83 | instance: |
84 | $ echo 128000 > /debug/tracing/buffer_size_kb | 84 | $ echo 128000 > /sys/kernel/debug/tracing/buffer_size_kb |
85 | Then start again from the top. | 85 | Then start again from the top. |
86 | 86 | ||
87 | If you are doing a trace for a driver project, e.g. Nouveau, you should also | 87 | If you are doing a trace for a driver project, e.g. Nouveau, you should also |
diff --git a/Documentation/trace/power.txt b/Documentation/trace/power.txt new file mode 100644 index 000000000000..cd805e16dc27 --- /dev/null +++ b/Documentation/trace/power.txt | |||
@@ -0,0 +1,17 @@ | |||
1 | The power tracer collects detailed information about C-state and P-state | ||
2 | transitions, instead of just looking at the high-level "average" | ||
3 | information. | ||
4 | |||
5 | There is a helper script found in scrips/tracing/power.pl in the kernel | ||
6 | sources which can be used to parse this information and create a | ||
7 | Scalable Vector Graphics (SVG) picture from the trace data. | ||
8 | |||
9 | To use this tracer: | ||
10 | |||
11 | echo 0 > /sys/kernel/debug/tracing/tracing_enabled | ||
12 | echo power > /sys/kernel/debug/tracing/current_tracer | ||
13 | echo 1 > /sys/kernel/debug/tracing/tracing_enabled | ||
14 | sleep 1 | ||
15 | echo 0 > /sys/kernel/debug/tracing/tracing_enabled | ||
16 | cat /sys/kernel/debug/tracing/trace | \ | ||
17 | perl scripts/tracing/power.pl > out.sv | ||
diff --git a/Documentation/tracepoints.txt b/Documentation/trace/tracepoints.txt index 6f0a044f5b5e..c0e1ceed75a4 100644 --- a/Documentation/tracepoints.txt +++ b/Documentation/trace/tracepoints.txt | |||
@@ -45,8 +45,8 @@ In include/trace/subsys.h : | |||
45 | #include <linux/tracepoint.h> | 45 | #include <linux/tracepoint.h> |
46 | 46 | ||
47 | DECLARE_TRACE(subsys_eventname, | 47 | DECLARE_TRACE(subsys_eventname, |
48 | TPPROTO(int firstarg, struct task_struct *p), | 48 | TP_PROTO(int firstarg, struct task_struct *p), |
49 | TPARGS(firstarg, p)); | 49 | TP_ARGS(firstarg, p)); |
50 | 50 | ||
51 | In subsys/file.c (where the tracing statement must be added) : | 51 | In subsys/file.c (where the tracing statement must be added) : |
52 | 52 | ||
@@ -66,10 +66,10 @@ Where : | |||
66 | - subsys is the name of your subsystem. | 66 | - subsys is the name of your subsystem. |
67 | - eventname is the name of the event to trace. | 67 | - eventname is the name of the event to trace. |
68 | 68 | ||
69 | - TPPROTO(int firstarg, struct task_struct *p) is the prototype of the | 69 | - TP_PROTO(int firstarg, struct task_struct *p) is the prototype of the |
70 | function called by this tracepoint. | 70 | function called by this tracepoint. |
71 | 71 | ||
72 | - TPARGS(firstarg, p) are the parameters names, same as found in the | 72 | - TP_ARGS(firstarg, p) are the parameters names, same as found in the |
73 | prototype. | 73 | prototype. |
74 | 74 | ||
75 | Connecting a function (probe) to a tracepoint is done by providing a | 75 | Connecting a function (probe) to a tracepoint is done by providing a |
@@ -103,13 +103,14 @@ used to export the defined tracepoints. | |||
103 | 103 | ||
104 | * Probe / tracepoint example | 104 | * Probe / tracepoint example |
105 | 105 | ||
106 | See the example provided in samples/tracepoints/src | 106 | See the example provided in samples/tracepoints |
107 | 107 | ||
108 | Compile them with your kernel. | 108 | Compile them with your kernel. They are built during 'make' (not |
109 | 'make modules') when CONFIG_SAMPLE_TRACEPOINTS=m. | ||
109 | 110 | ||
110 | Run, as root : | 111 | Run, as root : |
111 | modprobe tracepoint-example (insmod order is not important) | 112 | modprobe tracepoint-sample (insmod order is not important) |
112 | modprobe tracepoint-probe-example | 113 | modprobe tracepoint-probe-sample |
113 | cat /proc/tracepoint-example (returns an expected error) | 114 | cat /proc/tracepoint-sample (returns an expected error) |
114 | rmmod tracepoint-example tracepoint-probe-example | 115 | rmmod tracepoint-sample tracepoint-probe-sample |
115 | dmesg | 116 | dmesg |
diff --git a/Documentation/usb/WUSB-Design-overview.txt b/Documentation/usb/WUSB-Design-overview.txt index 4c3d62c7843a..c480e9c32dbd 100644 --- a/Documentation/usb/WUSB-Design-overview.txt +++ b/Documentation/usb/WUSB-Design-overview.txt | |||
@@ -84,7 +84,7 @@ The different logical parts of this driver are: | |||
84 | 84 | ||
85 | *UWB*: the Ultra-Wide-Band stack -- manages the radio and | 85 | *UWB*: the Ultra-Wide-Band stack -- manages the radio and |
86 | associated spectrum to allow for devices sharing it. Allows to | 86 | associated spectrum to allow for devices sharing it. Allows to |
87 | control bandwidth assingment, beaconing, scanning, etc | 87 | control bandwidth assignment, beaconing, scanning, etc |
88 | 88 | ||
89 | * | 89 | * |
90 | 90 | ||
@@ -184,7 +184,7 @@ and sends the replies and notifications back to the API | |||
184 | [/uwb_rc_neh_grok()/]. Notifications are handled to the UWB daemon, that | 184 | [/uwb_rc_neh_grok()/]. Notifications are handled to the UWB daemon, that |
185 | is chartered, among other things, to keep the tab of how the UWB radio | 185 | is chartered, among other things, to keep the tab of how the UWB radio |
186 | neighborhood looks, creating and destroying devices as they show up or | 186 | neighborhood looks, creating and destroying devices as they show up or |
187 | dissapear. | 187 | disappear. |
188 | 188 | ||
189 | Command execution is very simple: a command block is sent and a event | 189 | Command execution is very simple: a command block is sent and a event |
190 | block or reply is expected back. For sending/receiving command/events, a | 190 | block or reply is expected back. For sending/receiving command/events, a |
@@ -333,7 +333,7 @@ read descriptors and move our data. | |||
333 | 333 | ||
334 | *Device life cycle and keep alives* | 334 | *Device life cycle and keep alives* |
335 | 335 | ||
336 | Everytime there is a succesful transfer to/from a device, we update a | 336 | Every time there is a successful transfer to/from a device, we update a |
337 | per-device activity timestamp. If not, every now and then we check and | 337 | per-device activity timestamp. If not, every now and then we check and |
338 | if the activity timestamp gets old, we ping the device by sending it a | 338 | if the activity timestamp gets old, we ping the device by sending it a |
339 | Keep Alive IE; it responds with a /DN_Alive/ pong during the DNTS (this | 339 | Keep Alive IE; it responds with a /DN_Alive/ pong during the DNTS (this |
@@ -411,7 +411,7 @@ context (wa_xfer) and submit it. When the xfer is done, our callback is | |||
411 | called and we assign the status bits and release the xfer resources. | 411 | called and we assign the status bits and release the xfer resources. |
412 | 412 | ||
413 | In dequeue() we are basically cancelling/aborting the transfer. We issue | 413 | In dequeue() we are basically cancelling/aborting the transfer. We issue |
414 | a xfer abort request to the HC, cancell all the URBs we had submitted | 414 | a xfer abort request to the HC, cancel all the URBs we had submitted |
415 | and not yet done and when all that is done, the xfer callback will be | 415 | and not yet done and when all that is done, the xfer callback will be |
416 | called--this will call the URB callback. | 416 | called--this will call the URB callback. |
417 | 417 | ||
diff --git a/Documentation/usb/anchors.txt b/Documentation/usb/anchors.txt index 6f24f566955a..fe6a99a32bbd 100644 --- a/Documentation/usb/anchors.txt +++ b/Documentation/usb/anchors.txt | |||
@@ -27,7 +27,7 @@ Association and disassociation of URBs with anchors | |||
27 | 27 | ||
28 | An association of URBs to an anchor is made by an explicit | 28 | An association of URBs to an anchor is made by an explicit |
29 | call to usb_anchor_urb(). The association is maintained until | 29 | call to usb_anchor_urb(). The association is maintained until |
30 | an URB is finished by (successfull) completion. Thus disassociation | 30 | an URB is finished by (successful) completion. Thus disassociation |
31 | is automatic. A function is provided to forcibly finish (kill) | 31 | is automatic. A function is provided to forcibly finish (kill) |
32 | all URBs associated with an anchor. | 32 | all URBs associated with an anchor. |
33 | Furthermore, disassociation can be made with usb_unanchor_urb() | 33 | Furthermore, disassociation can be made with usb_unanchor_urb() |
@@ -76,4 +76,4 @@ usb_get_from_anchor() | |||
76 | Returns the oldest anchored URB of an anchor. The URB is unanchored | 76 | Returns the oldest anchored URB of an anchor. The URB is unanchored |
77 | and returned with a reference. As you may mix URBs to several | 77 | and returned with a reference. As you may mix URBs to several |
78 | destinations in one anchor you have no guarantee the chronologically | 78 | destinations in one anchor you have no guarantee the chronologically |
79 | first submitted URB is returned. \ No newline at end of file | 79 | first submitted URB is returned. |
diff --git a/Documentation/usb/callbacks.txt b/Documentation/usb/callbacks.txt index 7c812411945b..bfb36b34b79e 100644 --- a/Documentation/usb/callbacks.txt +++ b/Documentation/usb/callbacks.txt | |||
@@ -65,7 +65,7 @@ Accept or decline an interface. If you accept the device return 0, | |||
65 | otherwise -ENODEV or -ENXIO. Other error codes should be used only if a | 65 | otherwise -ENODEV or -ENXIO. Other error codes should be used only if a |
66 | genuine error occurred during initialisation which prevented a driver | 66 | genuine error occurred during initialisation which prevented a driver |
67 | from accepting a device that would else have been accepted. | 67 | from accepting a device that would else have been accepted. |
68 | You are strongly encouraged to use usbcore'sfacility, | 68 | You are strongly encouraged to use usbcore's facility, |
69 | usb_set_intfdata(), to associate a data structure with an interface, so | 69 | usb_set_intfdata(), to associate a data structure with an interface, so |
70 | that you know which internal state and identity you associate with a | 70 | that you know which internal state and identity you associate with a |
71 | particular interface. The device will not be suspended and you may do IO | 71 | particular interface. The device will not be suspended and you may do IO |
diff --git a/Documentation/usb/usbmon.txt b/Documentation/usb/usbmon.txt index 270481906dc8..6c3c625b7f30 100644 --- a/Documentation/usb/usbmon.txt +++ b/Documentation/usb/usbmon.txt | |||
@@ -229,16 +229,26 @@ struct usbmon_packet { | |||
229 | int status; /* 28: */ | 229 | int status; /* 28: */ |
230 | unsigned int length; /* 32: Length of data (submitted or actual) */ | 230 | unsigned int length; /* 32: Length of data (submitted or actual) */ |
231 | unsigned int len_cap; /* 36: Delivered length */ | 231 | unsigned int len_cap; /* 36: Delivered length */ |
232 | unsigned char setup[8]; /* 40: Only for Control 'S' */ | 232 | union { /* 40: */ |
233 | }; /* 48 bytes total */ | 233 | unsigned char setup[SETUP_LEN]; /* Only for Control S-type */ |
234 | struct iso_rec { /* Only for ISO */ | ||
235 | int error_count; | ||
236 | int numdesc; | ||
237 | } iso; | ||
238 | } s; | ||
239 | int interval; /* 48: Only for Interrupt and ISO */ | ||
240 | int start_frame; /* 52: For ISO */ | ||
241 | unsigned int xfer_flags; /* 56: copy of URB's transfer_flags */ | ||
242 | unsigned int ndesc; /* 60: Actual number of ISO descriptors */ | ||
243 | }; /* 64 total length */ | ||
234 | 244 | ||
235 | These events can be received from a character device by reading with read(2), | 245 | These events can be received from a character device by reading with read(2), |
236 | with an ioctl(2), or by accessing the buffer with mmap. | 246 | with an ioctl(2), or by accessing the buffer with mmap. However, read(2) |
247 | only returns first 48 bytes for compatibility reasons. | ||
237 | 248 | ||
238 | The character device is usually called /dev/usbmonN, where N is the USB bus | 249 | The character device is usually called /dev/usbmonN, where N is the USB bus |
239 | number. Number zero (/dev/usbmon0) is special and means "all buses". | 250 | number. Number zero (/dev/usbmon0) is special and means "all buses". |
240 | However, this feature is not implemented yet. Note that specific naming | 251 | Note that specific naming policy is set by your Linux distribution. |
241 | policy is set by your Linux distribution. | ||
242 | 252 | ||
243 | If you create /dev/usbmon0 by hand, make sure that it is owned by root | 253 | If you create /dev/usbmon0 by hand, make sure that it is owned by root |
244 | and has mode 0600. Otherwise, unpriviledged users will be able to snoop | 254 | and has mode 0600. Otherwise, unpriviledged users will be able to snoop |
@@ -279,9 +289,10 @@ size is out of [unspecified] bounds for this kernel, the call fails with | |||
279 | This call returns the current size of the buffer in bytes. | 289 | This call returns the current size of the buffer in bytes. |
280 | 290 | ||
281 | MON_IOCX_GET, defined as _IOW(MON_IOC_MAGIC, 6, struct mon_get_arg) | 291 | MON_IOCX_GET, defined as _IOW(MON_IOC_MAGIC, 6, struct mon_get_arg) |
292 | MON_IOCX_GETX, defined as _IOW(MON_IOC_MAGIC, 10, struct mon_get_arg) | ||
282 | 293 | ||
283 | This call waits for events to arrive if none were in the kernel buffer, | 294 | These calls wait for events to arrive if none were in the kernel buffer, |
284 | then returns the first event. Its argument is a pointer to the following | 295 | then return the first event. The argument is a pointer to the following |
285 | structure: | 296 | structure: |
286 | 297 | ||
287 | struct mon_get_arg { | 298 | struct mon_get_arg { |
@@ -294,6 +305,8 @@ Before the call, hdr, data, and alloc should be filled. Upon return, the area | |||
294 | pointed by hdr contains the next event structure, and the data buffer contains | 305 | pointed by hdr contains the next event structure, and the data buffer contains |
295 | the data, if any. The event is removed from the kernel buffer. | 306 | the data, if any. The event is removed from the kernel buffer. |
296 | 307 | ||
308 | The MON_IOCX_GET copies 48 bytes, MON_IOCX_GETX copies 64 bytes. | ||
309 | |||
297 | MON_IOCX_MFETCH, defined as _IOWR(MON_IOC_MAGIC, 7, struct mon_mfetch_arg) | 310 | MON_IOCX_MFETCH, defined as _IOWR(MON_IOC_MAGIC, 7, struct mon_mfetch_arg) |
298 | 311 | ||
299 | This ioctl is primarily used when the application accesses the buffer | 312 | This ioctl is primarily used when the application accesses the buffer |
diff --git a/Documentation/video4linux/CARDLIST.bttv b/Documentation/video4linux/CARDLIST.bttv index 0d93fa1ac25e..f11c583295e9 100644 --- a/Documentation/video4linux/CARDLIST.bttv +++ b/Documentation/video4linux/CARDLIST.bttv | |||
@@ -135,7 +135,7 @@ | |||
135 | 134 -> Adlink RTV24 | 135 | 134 -> Adlink RTV24 |
136 | 135 -> DViCO FusionHDTV 5 Lite [18ac:d500] | 136 | 135 -> DViCO FusionHDTV 5 Lite [18ac:d500] |
137 | 136 -> Acorp Y878F [9511:1540] | 137 | 136 -> Acorp Y878F [9511:1540] |
138 | 137 -> Conceptronic CTVFMi v2 | 138 | 137 -> Conceptronic CTVFMi v2 [036e:109e] |
139 | 138 -> Prolink Pixelview PV-BT878P+ (Rev.2E) | 139 | 138 -> Prolink Pixelview PV-BT878P+ (Rev.2E) |
140 | 139 -> Prolink PixelView PlayTV MPEG2 PV-M4900 | 140 | 139 -> Prolink PixelView PlayTV MPEG2 PV-M4900 |
141 | 140 -> Osprey 440 [0070:ff07] | 141 | 140 -> Osprey 440 [0070:ff07] |
@@ -154,3 +154,7 @@ | |||
154 | 153 -> PHYTEC VD-012 (bt878) | 154 | 153 -> PHYTEC VD-012 (bt878) |
155 | 154 -> PHYTEC VD-012-X1 (bt878) | 155 | 154 -> PHYTEC VD-012-X1 (bt878) |
156 | 155 -> PHYTEC VD-012-X2 (bt878) | 156 | 155 -> PHYTEC VD-012-X2 (bt878) |
157 | 156 -> IVCE-8784 [0000:f050,0001:f050,0002:f050,0003:f050] | ||
158 | 157 -> Geovision GV-800(S) (master) [800a:763d] | ||
159 | 158 -> Geovision GV-800(S) (slave) [800b:763d,800c:763d,800d:763d] | ||
160 | 159 -> ProVideo PV183 [1830:1540,1831:1540,1832:1540,1833:1540,1834:1540,1835:1540,1836:1540,1837:1540] | ||
diff --git a/Documentation/video4linux/CARDLIST.cx23885 b/Documentation/video4linux/CARDLIST.cx23885 index 35ea130e9898..450b8f8c389b 100644 --- a/Documentation/video4linux/CARDLIST.cx23885 +++ b/Documentation/video4linux/CARDLIST.cx23885 | |||
@@ -12,3 +12,12 @@ | |||
12 | 11 -> DViCO FusionHDTV DVB-T Dual Express [18ac:db78] | 12 | 11 -> DViCO FusionHDTV DVB-T Dual Express [18ac:db78] |
13 | 12 -> Leadtek Winfast PxDVR3200 H [107d:6681] | 13 | 12 -> Leadtek Winfast PxDVR3200 H [107d:6681] |
14 | 13 -> Compro VideoMate E650F [185b:e800] | 14 | 13 -> Compro VideoMate E650F [185b:e800] |
15 | 14 -> TurboSight TBS 6920 [6920:8888] | ||
16 | 15 -> TeVii S470 [d470:9022] | ||
17 | 16 -> DVBWorld DVB-S2 2005 [0001:2005] | ||
18 | 17 -> NetUP Dual DVB-S2 CI [1b55:2a2c] | ||
19 | 18 -> Hauppauge WinTV-HVR1270 [0070:2211] | ||
20 | 19 -> Hauppauge WinTV-HVR1275 [0070:2215] | ||
21 | 20 -> Hauppauge WinTV-HVR1255 [0070:2251] | ||
22 | 21 -> Hauppauge WinTV-HVR1210 [0070:2291,0070:2295] | ||
23 | 22 -> Mygica X8506 DMB-TH [14f1:8651] | ||
diff --git a/Documentation/video4linux/CARDLIST.cx88 b/Documentation/video4linux/CARDLIST.cx88 index 0d08f1edcf6d..89093f531727 100644 --- a/Documentation/video4linux/CARDLIST.cx88 +++ b/Documentation/video4linux/CARDLIST.cx88 | |||
@@ -77,3 +77,6 @@ | |||
77 | 76 -> SATTRADE ST4200 DVB-S/S2 [b200:4200] | 77 | 76 -> SATTRADE ST4200 DVB-S/S2 [b200:4200] |
78 | 77 -> TBS 8910 DVB-S [8910:8888] | 78 | 77 -> TBS 8910 DVB-S [8910:8888] |
79 | 78 -> Prof 6200 DVB-S [b022:3022] | 79 | 78 -> Prof 6200 DVB-S [b022:3022] |
80 | 79 -> Terratec Cinergy HT PCI MKII [153b:1177] | ||
81 | 80 -> Hauppauge WinTV-IR Only [0070:9290] | ||
82 | 81 -> Leadtek WinFast DTV1800 Hybrid [107d:6654] | ||
diff --git a/Documentation/video4linux/CARDLIST.em28xx b/Documentation/video4linux/CARDLIST.em28xx index 75bded8a4aa2..a98a688c11b8 100644 --- a/Documentation/video4linux/CARDLIST.em28xx +++ b/Documentation/video4linux/CARDLIST.em28xx | |||
@@ -7,17 +7,17 @@ | |||
7 | 6 -> Terratec Cinergy 200 USB (em2800) | 7 | 6 -> Terratec Cinergy 200 USB (em2800) |
8 | 7 -> Leadtek Winfast USB II (em2800) [0413:6023] | 8 | 7 -> Leadtek Winfast USB II (em2800) [0413:6023] |
9 | 8 -> Kworld USB2800 (em2800) | 9 | 8 -> Kworld USB2800 (em2800) |
10 | 9 -> Pinnacle Dazzle DVC 90/DVC 100 (em2820/em2840) [2304:0207,2304:021a] | 10 | 9 -> Pinnacle Dazzle DVC 90/100/101/107 / Kaiser Baas Video to DVD maker (em2820/em2840) [1b80:e302,2304:0207,2304:021a] |
11 | 10 -> Hauppauge WinTV HVR 900 (em2880) [2040:6500] | 11 | 10 -> Hauppauge WinTV HVR 900 (em2880) [2040:6500] |
12 | 11 -> Terratec Hybrid XS (em2880) [0ccd:0042] | 12 | 11 -> Terratec Hybrid XS (em2880) [0ccd:0042] |
13 | 12 -> Kworld PVR TV 2800 RF (em2820/em2840) | 13 | 12 -> Kworld PVR TV 2800 RF (em2820/em2840) |
14 | 13 -> Terratec Prodigy XS (em2880) [0ccd:0047] | 14 | 13 -> Terratec Prodigy XS (em2880) [0ccd:0047] |
15 | 14 -> Pixelview Prolink PlayTV USB 2.0 (em2820/em2840) | 15 | 14 -> SIIG AVTuner-PVR / Pixelview Prolink PlayTV USB 2.0 (em2820/em2840) |
16 | 15 -> V-Gear PocketTV (em2800) | 16 | 15 -> V-Gear PocketTV (em2800) |
17 | 16 -> Hauppauge WinTV HVR 950 (em2883) [2040:6513,2040:6517,2040:651b] | 17 | 16 -> Hauppauge WinTV HVR 950 (em2883) [2040:6513,2040:6517,2040:651b] |
18 | 17 -> Pinnacle PCTV HD Pro Stick (em2880) [2304:0227] | 18 | 17 -> Pinnacle PCTV HD Pro Stick (em2880) [2304:0227] |
19 | 18 -> Hauppauge WinTV HVR 900 (R2) (em2880) [2040:6502] | 19 | 18 -> Hauppauge WinTV HVR 900 (R2) (em2880) [2040:6502] |
20 | 19 -> PointNix Intra-Oral Camera (em2860) | 20 | 19 -> EM2860/SAA711X Reference Design (em2860) |
21 | 20 -> AMD ATI TV Wonder HD 600 (em2880) [0438:b002] | 21 | 20 -> AMD ATI TV Wonder HD 600 (em2880) [0438:b002] |
22 | 21 -> eMPIA Technology, Inc. GrabBeeX+ Video Encoder (em2800) [eb1a:2801] | 22 | 21 -> eMPIA Technology, Inc. GrabBeeX+ Video Encoder (em2800) [eb1a:2801] |
23 | 22 -> Unknown EM2750/EM2751 webcam grabber (em2750) [eb1a:2750,eb1a:2751] | 23 | 22 -> Unknown EM2750/EM2751 webcam grabber (em2750) [eb1a:2750,eb1a:2751] |
@@ -30,7 +30,6 @@ | |||
30 | 30 -> Videology 20K14XUSB USB2.0 (em2820/em2840) | 30 | 30 -> Videology 20K14XUSB USB2.0 (em2820/em2840) |
31 | 31 -> Usbgear VD204v9 (em2821) | 31 | 31 -> Usbgear VD204v9 (em2821) |
32 | 32 -> Supercomp USB 2.0 TV (em2821) | 32 | 32 -> Supercomp USB 2.0 TV (em2821) |
33 | 33 -> SIIG AVTuner-PVR/Prolink PlayTV USB 2.0 (em2821) | ||
34 | 34 -> Terratec Cinergy A Hybrid XS (em2860) [0ccd:004f] | 33 | 34 -> Terratec Cinergy A Hybrid XS (em2860) [0ccd:004f] |
35 | 35 -> Typhoon DVD Maker (em2860) | 34 | 35 -> Typhoon DVD Maker (em2860) |
36 | 36 -> NetGMBH Cam (em2860) | 35 | 36 -> NetGMBH Cam (em2860) |
@@ -58,3 +57,11 @@ | |||
58 | 58 -> Compro VideoMate ForYou/Stereo (em2820/em2840) [185b:2041] | 57 | 58 -> Compro VideoMate ForYou/Stereo (em2820/em2840) [185b:2041] |
59 | 60 -> Hauppauge WinTV HVR 850 (em2883) [2040:651f] | 58 | 60 -> Hauppauge WinTV HVR 850 (em2883) [2040:651f] |
60 | 61 -> Pixelview PlayTV Box 4 USB 2.0 (em2820/em2840) | 59 | 61 -> Pixelview PlayTV Box 4 USB 2.0 (em2820/em2840) |
60 | 62 -> Gadmei TVR200 (em2820/em2840) | ||
61 | 63 -> Kaiomy TVnPC U2 (em2860) [eb1a:e303] | ||
62 | 64 -> Easy Cap Capture DC-60 (em2860) | ||
63 | 65 -> IO-DATA GV-MVP/SZ (em2820/em2840) [04bb:0515] | ||
64 | 66 -> Empire dual TV (em2880) | ||
65 | 67 -> Terratec Grabby (em2860) [0ccd:0096] | ||
66 | 68 -> Terratec AV350 (em2860) [0ccd:0084] | ||
67 | 69 -> KWorld ATSC 315U HDTV TV Box (em2882) [eb1a:a313] | ||
diff --git a/Documentation/video4linux/CARDLIST.saa7134 b/Documentation/video4linux/CARDLIST.saa7134 index b8d470596b0c..15562427e8a9 100644 --- a/Documentation/video4linux/CARDLIST.saa7134 +++ b/Documentation/video4linux/CARDLIST.saa7134 | |||
@@ -124,10 +124,10 @@ | |||
124 | 123 -> Beholder BeholdTV 407 [0000:4070] | 124 | 123 -> Beholder BeholdTV 407 [0000:4070] |
125 | 124 -> Beholder BeholdTV 407 FM [0000:4071] | 125 | 124 -> Beholder BeholdTV 407 FM [0000:4071] |
126 | 125 -> Beholder BeholdTV 409 [0000:4090] | 126 | 125 -> Beholder BeholdTV 409 [0000:4090] |
127 | 126 -> Beholder BeholdTV 505 FM/RDS [0000:5051,0000:505B,5ace:5050] | 127 | 126 -> Beholder BeholdTV 505 FM [5ace:5050] |
128 | 127 -> Beholder BeholdTV 507 FM/RDS / BeholdTV 509 FM [0000:5071,0000:507B,5ace:5070,5ace:5090] | 128 | 127 -> Beholder BeholdTV 507 FM / BeholdTV 509 FM [5ace:5070,5ace:5090] |
129 | 128 -> Beholder BeholdTV Columbus TVFM [0000:5201] | 129 | 128 -> Beholder BeholdTV Columbus TVFM [0000:5201] |
130 | 129 -> Beholder BeholdTV 607 / BeholdTV 609 [5ace:6070,5ace:6071,5ace:6072,5ace:6073,5ace:6090,5ace:6091,5ace:6092,5ace:6093] | 130 | 129 -> Beholder BeholdTV 607 FM [5ace:6070] |
131 | 130 -> Beholder BeholdTV M6 [5ace:6190] | 131 | 130 -> Beholder BeholdTV M6 [5ace:6190] |
132 | 131 -> Twinhan Hybrid DTV-DVB 3056 PCI [1822:0022] | 132 | 131 -> Twinhan Hybrid DTV-DVB 3056 PCI [1822:0022] |
133 | 132 -> Genius TVGO AM11MCE | 133 | 132 -> Genius TVGO AM11MCE |
@@ -143,7 +143,7 @@ | |||
143 | 142 -> Beholder BeholdTV H6 [5ace:6290] | 143 | 142 -> Beholder BeholdTV H6 [5ace:6290] |
144 | 143 -> Beholder BeholdTV M63 [5ace:6191] | 144 | 143 -> Beholder BeholdTV M63 [5ace:6191] |
145 | 144 -> Beholder BeholdTV M6 Extra [5ace:6193] | 145 | 144 -> Beholder BeholdTV M6 Extra [5ace:6193] |
146 | 145 -> AVerMedia MiniPCI DVB-T Hybrid M103 [1461:f636] | 146 | 145 -> AVerMedia MiniPCI DVB-T Hybrid M103 [1461:f636,1461:f736] |
147 | 146 -> ASUSTeK P7131 Analog | 147 | 146 -> ASUSTeK P7131 Analog |
148 | 147 -> Asus Tiger 3in1 [1043:4878] | 148 | 147 -> Asus Tiger 3in1 [1043:4878] |
149 | 148 -> Encore ENLTV-FM v5.3 [1a7f:2008] | 149 | 148 -> Encore ENLTV-FM v5.3 [1a7f:2008] |
@@ -153,3 +153,17 @@ | |||
153 | 152 -> Asus Tiger Rev:1.00 [1043:4857] | 153 | 152 -> Asus Tiger Rev:1.00 [1043:4857] |
154 | 153 -> Kworld Plus TV Analog Lite PCI [17de:7128] | 154 | 153 -> Kworld Plus TV Analog Lite PCI [17de:7128] |
155 | 154 -> Avermedia AVerTV GO 007 FM Plus [1461:f31d] | 155 | 154 -> Avermedia AVerTV GO 007 FM Plus [1461:f31d] |
156 | 155 -> Hauppauge WinTV-HVR1120 ATSC/QAM-Hybrid [0070:6706,0070:6708] | ||
157 | 156 -> Hauppauge WinTV-HVR1110r3 DVB-T/Hybrid [0070:6707,0070:6709,0070:670a] | ||
158 | 157 -> Avermedia AVerTV Studio 507UA [1461:a11b] | ||
159 | 158 -> AVerMedia Cardbus TV/Radio (E501R) [1461:b7e9] | ||
160 | 159 -> Beholder BeholdTV 505 RDS [0000:505B] | ||
161 | 160 -> Beholder BeholdTV 507 RDS [0000:5071] | ||
162 | 161 -> Beholder BeholdTV 507 RDS [0000:507B] | ||
163 | 162 -> Beholder BeholdTV 607 FM [5ace:6071] | ||
164 | 163 -> Beholder BeholdTV 609 FM [5ace:6090] | ||
165 | 164 -> Beholder BeholdTV 609 FM [5ace:6091] | ||
166 | 165 -> Beholder BeholdTV 607 RDS [5ace:6072] | ||
167 | 166 -> Beholder BeholdTV 607 RDS [5ace:6073] | ||
168 | 167 -> Beholder BeholdTV 609 RDS [5ace:6092] | ||
169 | 168 -> Beholder BeholdTV 609 RDS [5ace:6093] | ||
diff --git a/Documentation/video4linux/CARDLIST.tuner b/Documentation/video4linux/CARDLIST.tuner index 691d2f37dc57..be67844074dd 100644 --- a/Documentation/video4linux/CARDLIST.tuner +++ b/Documentation/video4linux/CARDLIST.tuner | |||
@@ -76,3 +76,5 @@ tuner=75 - Philips TEA5761 FM Radio | |||
76 | tuner=76 - Xceive 5000 tuner | 76 | tuner=76 - Xceive 5000 tuner |
77 | tuner=77 - TCL tuner MF02GIP-5N-E | 77 | tuner=77 - TCL tuner MF02GIP-5N-E |
78 | tuner=78 - Philips FMD1216MEX MK3 Hybrid Tuner | 78 | tuner=78 - Philips FMD1216MEX MK3 Hybrid Tuner |
79 | tuner=79 - Philips PAL/SECAM multi (FM1216 MK5) | ||
80 | tuner=80 - Philips FQ1216LME MK3 PAL/SECAM w/active loopthrough | ||
diff --git a/Documentation/video4linux/Zoran b/Documentation/video4linux/Zoran index 295462b2317a..0e89e7676298 100644 --- a/Documentation/video4linux/Zoran +++ b/Documentation/video4linux/Zoran | |||
@@ -401,8 +401,7 @@ Additional notes for software developers: | |||
401 | first set the correct norm. Well, it seems logically correct: TV | 401 | first set the correct norm. Well, it seems logically correct: TV |
402 | standard is "more constant" for current country than geometry | 402 | standard is "more constant" for current country than geometry |
403 | settings of a variety of TV capture cards which may work in ITU or | 403 | settings of a variety of TV capture cards which may work in ITU or |
404 | square pixel format. Remember that users now can lock the norm to | 404 | square pixel format. |
405 | avoid any ambiguity. | ||
406 | -- | 405 | -- |
407 | Please note that lavplay/lavrec are also included in the MJPEG-tools | 406 | Please note that lavplay/lavrec are also included in the MJPEG-tools |
408 | (http://mjpeg.sf.net/). | 407 | (http://mjpeg.sf.net/). |
diff --git a/Documentation/video4linux/bttv/Insmod-options b/Documentation/video4linux/bttv/Insmod-options index 5ef75787f83a..bbe3ed667d91 100644 --- a/Documentation/video4linux/bttv/Insmod-options +++ b/Documentation/video4linux/bttv/Insmod-options | |||
@@ -81,16 +81,6 @@ tuner.o | |||
81 | pal=[bdgil] select PAL variant (used for some tuners | 81 | pal=[bdgil] select PAL variant (used for some tuners |
82 | only, important for the audio carrier). | 82 | only, important for the audio carrier). |
83 | 83 | ||
84 | tvmixer.o | ||
85 | registers a mixer device for the TV card's volume/bass/treble | ||
86 | controls (requires a i2c audio control chip like the msp3400). | ||
87 | |||
88 | insmod args: | ||
89 | debug=1 print some debug info to the syslog. | ||
90 | devnr=n allocate device #n (0 == /dev/mixer, | ||
91 | 1 = /dev/mixer1, ...), default is to | ||
92 | use the first free one. | ||
93 | |||
94 | tvaudio.o | 84 | tvaudio.o |
95 | new, experimental module which is supported to provide a single | 85 | new, experimental module which is supported to provide a single |
96 | driver for all simple i2c audio control chips (tda/tea*). | 86 | driver for all simple i2c audio control chips (tda/tea*). |
diff --git a/Documentation/video4linux/bttv/README b/Documentation/video4linux/bttv/README index 7ca2154c2bf5..3a367cdb664e 100644 --- a/Documentation/video4linux/bttv/README +++ b/Documentation/video4linux/bttv/README | |||
@@ -63,8 +63,8 @@ If you have some knowledge and spare time, please try to fix this | |||
63 | yourself (patches very welcome of course...) You know: The linux | 63 | yourself (patches very welcome of course...) You know: The linux |
64 | slogan is "Do it yourself". | 64 | slogan is "Do it yourself". |
65 | 65 | ||
66 | There is a mailing list: video4linux-list@redhat.com. | 66 | There is a mailing list: linux-media@vger.kernel.org |
67 | https://listman.redhat.com/mailman/listinfo/video4linux-list | 67 | http://vger.kernel.org/vger-lists.html#linux-media |
68 | 68 | ||
69 | If you have trouble with some specific TV card, try to ask there | 69 | If you have trouble with some specific TV card, try to ask there |
70 | instead of mailing me directly. The chance that someone with the | 70 | instead of mailing me directly. The chance that someone with the |
diff --git a/Documentation/video4linux/cx18.txt b/Documentation/video4linux/cx18.txt index 914cb7e734a2..4652c0f5da32 100644 --- a/Documentation/video4linux/cx18.txt +++ b/Documentation/video4linux/cx18.txt | |||
@@ -11,7 +11,7 @@ encoder chip: | |||
11 | 2) Some people have problems getting the i2c bus to work. | 11 | 2) Some people have problems getting the i2c bus to work. |
12 | The symptom is that the eeprom cannot be read and the card is | 12 | The symptom is that the eeprom cannot be read and the card is |
13 | unusable. This is probably fixed, but if you have problems | 13 | unusable. This is probably fixed, but if you have problems |
14 | then post to the video4linux or ivtv-users mailinglist. | 14 | then post to the video4linux or ivtv-users mailing list. |
15 | 15 | ||
16 | 3) VBI (raw or sliced) has not yet been implemented. | 16 | 3) VBI (raw or sliced) has not yet been implemented. |
17 | 17 | ||
diff --git a/Documentation/video4linux/cx2341x/README.hm12 b/Documentation/video4linux/cx2341x/README.hm12 index 0e213ed095e6..b36148ea0750 100644 --- a/Documentation/video4linux/cx2341x/README.hm12 +++ b/Documentation/video4linux/cx2341x/README.hm12 | |||
@@ -32,6 +32,10 @@ Y, U and V planes. This code assumes frames of 720x576 (PAL) pixels. | |||
32 | The width of a frame is always 720 pixels, regardless of the actual specified | 32 | The width of a frame is always 720 pixels, regardless of the actual specified |
33 | width. | 33 | width. |
34 | 34 | ||
35 | If the height is not a multiple of 32 lines, then the captured video is | ||
36 | missing macroblocks at the end and is unusable. So the height must be a | ||
37 | multiple of 32. | ||
38 | |||
35 | -------------------------------------------------------------------------- | 39 | -------------------------------------------------------------------------- |
36 | 40 | ||
37 | #include <stdio.h> | 41 | #include <stdio.h> |
diff --git a/Documentation/video4linux/gspca.txt b/Documentation/video4linux/gspca.txt index 1c58a7630146..2bcf78896e22 100644 --- a/Documentation/video4linux/gspca.txt +++ b/Documentation/video4linux/gspca.txt | |||
@@ -32,6 +32,7 @@ spca561 041e:403b Creative Webcam Vista (VF0010) | |||
32 | zc3xx 041e:4051 Creative Live!Cam Notebook Pro (VF0250) | 32 | zc3xx 041e:4051 Creative Live!Cam Notebook Pro (VF0250) |
33 | ov519 041e:4052 Creative Live! VISTA IM | 33 | ov519 041e:4052 Creative Live! VISTA IM |
34 | zc3xx 041e:4053 Creative Live!Cam Video IM | 34 | zc3xx 041e:4053 Creative Live!Cam Video IM |
35 | vc032x 041e:405b Creative Live! Cam Notebook Ultra (VC0130) | ||
35 | ov519 041e:405f Creative Live! VISTA VF0330 | 36 | ov519 041e:405f Creative Live! VISTA VF0330 |
36 | ov519 041e:4060 Creative Live! VISTA VF0350 | 37 | ov519 041e:4060 Creative Live! VISTA VF0350 |
37 | ov519 041e:4061 Creative Live! VISTA VF0400 | 38 | ov519 041e:4061 Creative Live! VISTA VF0400 |
@@ -162,10 +163,11 @@ sunplus 055f:c650 Mustek MDC5500Z | |||
162 | zc3xx 055f:d003 Mustek WCam300A | 163 | zc3xx 055f:d003 Mustek WCam300A |
163 | zc3xx 055f:d004 Mustek WCam300 AN | 164 | zc3xx 055f:d004 Mustek WCam300 AN |
164 | conex 0572:0041 Creative Notebook cx11646 | 165 | conex 0572:0041 Creative Notebook cx11646 |
165 | ov519 05a9:0519 OmniVision | 166 | ov519 05a9:0519 OV519 Microphone |
166 | ov519 05a9:0530 OmniVision | 167 | ov519 05a9:0530 OmniVision |
167 | ov519 05a9:4519 OmniVision | 168 | ov519 05a9:4519 Webcam Classic |
168 | ov519 05a9:8519 OmniVision | 169 | ov519 05a9:8519 OmniVision |
170 | ov519 05a9:a518 D-Link DSB-C310 Webcam | ||
169 | sunplus 05da:1018 Digital Dream Enigma 1.3 | 171 | sunplus 05da:1018 Digital Dream Enigma 1.3 |
170 | stk014 05e1:0893 Syntek DV4000 | 172 | stk014 05e1:0893 Syntek DV4000 |
171 | spca561 060b:a001 Maxell Compact Pc PM3 | 173 | spca561 060b:a001 Maxell Compact Pc PM3 |
@@ -177,6 +179,7 @@ spca506 06e1:a190 ADS Instant VCD | |||
177 | ov534 06f8:3002 Hercules Blog Webcam | 179 | ov534 06f8:3002 Hercules Blog Webcam |
178 | ov534 06f8:3003 Hercules Dualpix HD Weblog | 180 | ov534 06f8:3003 Hercules Dualpix HD Weblog |
179 | sonixj 06f8:3004 Hercules Classic Silver | 181 | sonixj 06f8:3004 Hercules Classic Silver |
182 | sonixj 06f8:3008 Hercules Deluxe Optical Glass | ||
180 | spca508 0733:0110 ViewQuest VQ110 | 183 | spca508 0733:0110 ViewQuest VQ110 |
181 | spca508 0130:0130 Clone Digital Webcam 11043 | 184 | spca508 0130:0130 Clone Digital Webcam 11043 |
182 | spca501 0733:0401 Intel Create and Share | 185 | spca501 0733:0401 Intel Create and Share |
@@ -193,6 +196,7 @@ spca500 084d:0003 D-Link DSC-350 | |||
193 | spca500 08ca:0103 Aiptek PocketDV | 196 | spca500 08ca:0103 Aiptek PocketDV |
194 | sunplus 08ca:0104 Aiptek PocketDVII 1.3 | 197 | sunplus 08ca:0104 Aiptek PocketDVII 1.3 |
195 | sunplus 08ca:0106 Aiptek Pocket DV3100+ | 198 | sunplus 08ca:0106 Aiptek Pocket DV3100+ |
199 | mr97310a 08ca:0111 Aiptek PenCam VGA+ | ||
196 | sunplus 08ca:2008 Aiptek Mini PenCam 2 M | 200 | sunplus 08ca:2008 Aiptek Mini PenCam 2 M |
197 | sunplus 08ca:2010 Aiptek PocketCam 3M | 201 | sunplus 08ca:2010 Aiptek PocketCam 3M |
198 | sunplus 08ca:2016 Aiptek PocketCam 2 Mega | 202 | sunplus 08ca:2016 Aiptek PocketCam 2 Mega |
@@ -207,6 +211,7 @@ sunplus 08ca:2050 Medion MD 41437 | |||
207 | sunplus 08ca:2060 Aiptek PocketDV5300 | 211 | sunplus 08ca:2060 Aiptek PocketDV5300 |
208 | tv8532 0923:010f ICM532 cams | 212 | tv8532 0923:010f ICM532 cams |
209 | mars 093a:050f Mars-Semi Pc-Camera | 213 | mars 093a:050f Mars-Semi Pc-Camera |
214 | mr97310a 093a:010f Sakar Digital no. 77379 | ||
210 | pac207 093a:2460 Qtec Webcam 100 | 215 | pac207 093a:2460 Qtec Webcam 100 |
211 | pac207 093a:2461 HP Webcam | 216 | pac207 093a:2461 HP Webcam |
212 | pac207 093a:2463 Philips SPC 220 NC | 217 | pac207 093a:2463 Philips SPC 220 NC |
@@ -215,6 +220,7 @@ pac207 093a:2468 PAC207 | |||
215 | pac207 093a:2470 Genius GF112 | 220 | pac207 093a:2470 Genius GF112 |
216 | pac207 093a:2471 Genius VideoCam ge111 | 221 | pac207 093a:2471 Genius VideoCam ge111 |
217 | pac207 093a:2472 Genius VideoCam ge110 | 222 | pac207 093a:2472 Genius VideoCam ge110 |
223 | pac207 093a:2474 Genius iLook 111 | ||
218 | pac207 093a:2476 Genius e-Messenger 112 | 224 | pac207 093a:2476 Genius e-Messenger 112 |
219 | pac7311 093a:2600 PAC7311 Typhoon | 225 | pac7311 093a:2600 PAC7311 Typhoon |
220 | pac7311 093a:2601 Philips SPC 610 NC | 226 | pac7311 093a:2601 Philips SPC 610 NC |
@@ -262,6 +268,11 @@ sonixj 0c45:60ec SN9C105+MO4000 | |||
262 | sonixj 0c45:60fb Surfer NoName | 268 | sonixj 0c45:60fb Surfer NoName |
263 | sonixj 0c45:60fc LG-LIC300 | 269 | sonixj 0c45:60fc LG-LIC300 |
264 | sonixj 0c45:60fe Microdia Audio | 270 | sonixj 0c45:60fe Microdia Audio |
271 | sonixj 0c45:6100 PC Camera (SN9C128) | ||
272 | sonixj 0c45:610a PC Camera (SN9C128) | ||
273 | sonixj 0c45:610b PC Camera (SN9C128) | ||
274 | sonixj 0c45:610c PC Camera (SN9C128) | ||
275 | sonixj 0c45:610e PC Camera (SN9C128) | ||
265 | sonixj 0c45:6128 Microdia/Sonix SNP325 | 276 | sonixj 0c45:6128 Microdia/Sonix SNP325 |
266 | sonixj 0c45:612a Avant Camera | 277 | sonixj 0c45:612a Avant Camera |
267 | sonixj 0c45:612c Typhoon Rasy Cam 1.3MPix | 278 | sonixj 0c45:612c Typhoon Rasy Cam 1.3MPix |
@@ -279,6 +290,7 @@ spca561 10fd:7e50 FlyCam Usb 100 | |||
279 | zc3xx 10fd:8050 Typhoon Webshot II USB 300k | 290 | zc3xx 10fd:8050 Typhoon Webshot II USB 300k |
280 | ov534 1415:2000 Sony HD Eye for PS3 (SLEH 00201) | 291 | ov534 1415:2000 Sony HD Eye for PS3 (SLEH 00201) |
281 | pac207 145f:013a Trust WB-1300N | 292 | pac207 145f:013a Trust WB-1300N |
293 | vc032x 15b8:6001 HP 2.0 Megapixel | ||
282 | vc032x 15b8:6002 HP 2.0 Megapixel rz406aa | 294 | vc032x 15b8:6002 HP 2.0 Megapixel rz406aa |
283 | spca501 1776:501c Arowana 300K CMOS Camera | 295 | spca501 1776:501c Arowana 300K CMOS Camera |
284 | t613 17a1:0128 TASCORP JPEG Webcam, NGS Cyclops | 296 | t613 17a1:0128 TASCORP JPEG Webcam, NGS Cyclops |
diff --git a/Documentation/video4linux/pxa_camera.txt b/Documentation/video4linux/pxa_camera.txt new file mode 100644 index 000000000000..4f6d0ca01956 --- /dev/null +++ b/Documentation/video4linux/pxa_camera.txt | |||
@@ -0,0 +1,174 @@ | |||
1 | PXA-Camera Host Driver | ||
2 | ====================== | ||
3 | |||
4 | Constraints | ||
5 | ----------- | ||
6 | a) Image size for YUV422P format | ||
7 | All YUV422P images are enforced to have width x height % 16 = 0. | ||
8 | This is due to DMA constraints, which transfers only planes of 8 byte | ||
9 | multiples. | ||
10 | |||
11 | |||
12 | Global video workflow | ||
13 | --------------------- | ||
14 | a) QCI stopped | ||
15 | Initialy, the QCI interface is stopped. | ||
16 | When a buffer is queued (pxa_videobuf_ops->buf_queue), the QCI starts. | ||
17 | |||
18 | b) QCI started | ||
19 | More buffers can be queued while the QCI is started without halting the | ||
20 | capture. The new buffers are "appended" at the tail of the DMA chain, and | ||
21 | smoothly captured one frame after the other. | ||
22 | |||
23 | Once a buffer is filled in the QCI interface, it is marked as "DONE" and | ||
24 | removed from the active buffers list. It can be then requeud or dequeued by | ||
25 | userland application. | ||
26 | |||
27 | Once the last buffer is filled in, the QCI interface stops. | ||
28 | |||
29 | c) Capture global finite state machine schema | ||
30 | |||
31 | +----+ +---+ +----+ | ||
32 | | DQ | | Q | | DQ | | ||
33 | | v | v | v | ||
34 | +-----------+ +------------------------+ | ||
35 | | STOP | | Wait for capture start | | ||
36 | +-----------+ Q +------------------------+ | ||
37 | +-> | QCI: stop | ------------------> | QCI: run | <------------+ | ||
38 | | | DMA: stop | | DMA: stop | | | ||
39 | | +-----------+ +-----> +------------------------+ | | ||
40 | | / | | | ||
41 | | / +---+ +----+ | | | ||
42 | |capture list empty / | Q | | DQ | | QCI Irq EOF | | ||
43 | | / | v | v v | | ||
44 | | +--------------------+ +----------------------+ | | ||
45 | | | DMA hotlink missed | | Capture running | | | ||
46 | | +--------------------+ +----------------------+ | | ||
47 | | | QCI: run | +-----> | QCI: run | <-+ | | ||
48 | | | DMA: stop | / | DMA: run | | | | ||
49 | | +--------------------+ / +----------------------+ | Other | | ||
50 | | ^ /DMA still | | channels | | ||
51 | | | capture list / running | DMA Irq End | not | | ||
52 | | | not empty / | | finished | | ||
53 | | | / v | yet | | ||
54 | | +----------------------+ +----------------------+ | | | ||
55 | | | Videobuf released | | Channel completed | | | | ||
56 | | +----------------------+ +----------------------+ | | | ||
57 | +-- | QCI: run | | QCI: run | --+ | | ||
58 | | DMA: run | | DMA: run | | | ||
59 | +----------------------+ +----------------------+ | | ||
60 | ^ / | | | ||
61 | | no overrun / | overrun | | ||
62 | | / v | | ||
63 | +--------------------+ / +----------------------+ | | ||
64 | | Frame completed | / | Frame overran | | | ||
65 | +--------------------+ <-----+ +----------------------+ restart frame | | ||
66 | | QCI: run | | QCI: stop | --------------+ | ||
67 | | DMA: run | | DMA: stop | | ||
68 | +--------------------+ +----------------------+ | ||
69 | |||
70 | Legend: - each box is a FSM state | ||
71 | - each arrow is the condition to transition to another state | ||
72 | - an arrow with a comment is a mandatory transition (no condition) | ||
73 | - arrow "Q" means : a buffer was enqueued | ||
74 | - arrow "DQ" means : a buffer was dequeued | ||
75 | - "QCI: stop" means the QCI interface is not enabled | ||
76 | - "DMA: stop" means all 3 DMA channels are stopped | ||
77 | - "DMA: run" means at least 1 DMA channel is still running | ||
78 | |||
79 | DMA usage | ||
80 | --------- | ||
81 | a) DMA flow | ||
82 | - first buffer queued for capture | ||
83 | Once a first buffer is queued for capture, the QCI is started, but data | ||
84 | transfer is not started. On "End Of Frame" interrupt, the irq handler | ||
85 | starts the DMA chain. | ||
86 | - capture of one videobuffer | ||
87 | The DMA chain starts transfering data into videobuffer RAM pages. | ||
88 | When all pages are transfered, the DMA irq is raised on "ENDINTR" status | ||
89 | - finishing one videobuffer | ||
90 | The DMA irq handler marks the videobuffer as "done", and removes it from | ||
91 | the active running queue | ||
92 | Meanwhile, the next videobuffer (if there is one), is transfered by DMA | ||
93 | - finishing the last videobuffer | ||
94 | On the DMA irq of the last videobuffer, the QCI is stopped. | ||
95 | |||
96 | b) DMA prepared buffer will have this structure | ||
97 | |||
98 | +------------+-----+---------------+-----------------+ | ||
99 | | desc-sg[0] | ... | desc-sg[last] | finisher/linker | | ||
100 | +------------+-----+---------------+-----------------+ | ||
101 | |||
102 | This structure is pointed by dma->sg_cpu. | ||
103 | The descriptors are used as follows : | ||
104 | - desc-sg[i]: i-th descriptor, transfering the i-th sg | ||
105 | element to the video buffer scatter gather | ||
106 | - finisher: has ddadr=DADDR_STOP, dcmd=ENDIRQEN | ||
107 | - linker: has ddadr= desc-sg[0] of next video buffer, dcmd=0 | ||
108 | |||
109 | For the next schema, let's assume d0=desc-sg[0] .. dN=desc-sg[N], | ||
110 | "f" stands for finisher and "l" for linker. | ||
111 | A typical running chain is : | ||
112 | |||
113 | Videobuffer 1 Videobuffer 2 | ||
114 | +---------+----+---+ +----+----+----+---+ | ||
115 | | d0 | .. | dN | l | | d0 | .. | dN | f | | ||
116 | +---------+----+-|-+ ^----+----+----+---+ | ||
117 | | | | ||
118 | +----+ | ||
119 | |||
120 | After the chaining is finished, the chain looks like : | ||
121 | |||
122 | Videobuffer 1 Videobuffer 2 Videobuffer 3 | ||
123 | +---------+----+---+ +----+----+----+---+ +----+----+----+---+ | ||
124 | | d0 | .. | dN | l | | d0 | .. | dN | l | | d0 | .. | dN | f | | ||
125 | +---------+----+-|-+ ^----+----+----+-|-+ ^----+----+----+---+ | ||
126 | | | | | | ||
127 | +----+ +----+ | ||
128 | new_link | ||
129 | |||
130 | c) DMA hot chaining timeslice issue | ||
131 | |||
132 | As DMA chaining is done while DMA _is_ running, the linking may be done | ||
133 | while the DMA jumps from one Videobuffer to another. On the schema, that | ||
134 | would be a problem if the following sequence is encountered : | ||
135 | |||
136 | - DMA chain is Videobuffer1 + Videobuffer2 | ||
137 | - pxa_videobuf_queue() is called to queue Videobuffer3 | ||
138 | - DMA controller finishes Videobuffer2, and DMA stops | ||
139 | => | ||
140 | Videobuffer 1 Videobuffer 2 | ||
141 | +---------+----+---+ +----+----+----+---+ | ||
142 | | d0 | .. | dN | l | | d0 | .. | dN | f | | ||
143 | +---------+----+-|-+ ^----+----+----+-^-+ | ||
144 | | | | | ||
145 | +----+ +-- DMA DDADR loads DDADR_STOP | ||
146 | |||
147 | - pxa_dma_add_tail_buf() is called, the Videobuffer2 "finisher" is | ||
148 | replaced by a "linker" to Videobuffer3 (creation of new_link) | ||
149 | - pxa_videobuf_queue() finishes | ||
150 | - the DMA irq handler is called, which terminates Videobuffer2 | ||
151 | - Videobuffer3 capture is not scheduled on DMA chain (as it stopped !!!) | ||
152 | |||
153 | Videobuffer 1 Videobuffer 2 Videobuffer 3 | ||
154 | +---------+----+---+ +----+----+----+---+ +----+----+----+---+ | ||
155 | | d0 | .. | dN | l | | d0 | .. | dN | l | | d0 | .. | dN | f | | ||
156 | +---------+----+-|-+ ^----+----+----+-|-+ ^----+----+----+---+ | ||
157 | | | | | | ||
158 | +----+ +----+ | ||
159 | new_link | ||
160 | DMA DDADR still is DDADR_STOP | ||
161 | |||
162 | - pxa_camera_check_link_miss() is called | ||
163 | This checks if the DMA is finished and a buffer is still on the | ||
164 | pcdev->capture list. If that's the case, the capture will be restarted, | ||
165 | and Videobuffer3 is scheduled on DMA chain. | ||
166 | - the DMA irq handler finishes | ||
167 | |||
168 | Note: if DMA stops just after pxa_camera_check_link_miss() reads DDADR() | ||
169 | value, we have the guarantee that the DMA irq handler will be called back | ||
170 | when the DMA will finish the buffer, and pxa_camera_check_link_miss() will | ||
171 | be called again, to reschedule Videobuffer3. | ||
172 | |||
173 | -- | ||
174 | Author: Robert Jarzmik <robert.jarzmik@free.fr> | ||
diff --git a/Documentation/video4linux/si470x.txt b/Documentation/video4linux/si470x.txt index 49679e6aaa76..3a7823e01b4d 100644 --- a/Documentation/video4linux/si470x.txt +++ b/Documentation/video4linux/si470x.txt | |||
@@ -1,6 +1,6 @@ | |||
1 | Driver for USB radios for the Silicon Labs Si470x FM Radio Receivers | 1 | Driver for USB radios for the Silicon Labs Si470x FM Radio Receivers |
2 | 2 | ||
3 | Copyright (c) 2008 Tobias Lorenz <tobias.lorenz@gmx.net> | 3 | Copyright (c) 2009 Tobias Lorenz <tobias.lorenz@gmx.net> |
4 | 4 | ||
5 | 5 | ||
6 | Information from Silicon Labs | 6 | Information from Silicon Labs |
@@ -41,7 +41,7 @@ chips are known to work: | |||
41 | - 10c4:818a: Silicon Labs USB FM Radio Reference Design | 41 | - 10c4:818a: Silicon Labs USB FM Radio Reference Design |
42 | - 06e1:a155: ADS/Tech FM Radio Receiver (formerly Instant FM Music) (RDX-155-EF) | 42 | - 06e1:a155: ADS/Tech FM Radio Receiver (formerly Instant FM Music) (RDX-155-EF) |
43 | - 1b80:d700: KWorld USB FM Radio SnapMusic Mobile 700 (FM700) | 43 | - 1b80:d700: KWorld USB FM Radio SnapMusic Mobile 700 (FM700) |
44 | - 10c5:819a: DealExtreme USB Radio | 44 | - 10c5:819a: Sanei Electric, Inc. FM USB Radio (sold as DealExtreme.com PCear) |
45 | 45 | ||
46 | 46 | ||
47 | Software | 47 | Software |
@@ -52,6 +52,7 @@ Testing is usually done with most application under Debian/testing: | |||
52 | - gradio - GTK FM radio tuner | 52 | - gradio - GTK FM radio tuner |
53 | - kradio - Comfortable Radio Application for KDE | 53 | - kradio - Comfortable Radio Application for KDE |
54 | - radio - ncurses-based radio application | 54 | - radio - ncurses-based radio application |
55 | - mplayer - The Ultimate Movie Player For Linux | ||
55 | 56 | ||
56 | There is also a library libv4l, which can be used. It's going to have a function | 57 | There is also a library libv4l, which can be used. It's going to have a function |
57 | for frequency seeking, either by using hardware functionality as in radio-si470x | 58 | for frequency seeking, either by using hardware functionality as in radio-si470x |
@@ -69,7 +70,7 @@ Audio Listing | |||
69 | USB Audio is provided by the ALSA snd_usb_audio module. It is recommended to | 70 | USB Audio is provided by the ALSA snd_usb_audio module. It is recommended to |
70 | also select SND_USB_AUDIO, as this is required to get sound from the radio. For | 71 | also select SND_USB_AUDIO, as this is required to get sound from the radio. For |
71 | listing you have to redirect the sound, for example using one of the following | 72 | listing you have to redirect the sound, for example using one of the following |
72 | commands. | 73 | commands. Please adjust the audio devices to your needs (/dev/dsp* and hw:x,x). |
73 | 74 | ||
74 | If you just want to test audio (very poor quality): | 75 | If you just want to test audio (very poor quality): |
75 | cat /dev/dsp1 > /dev/dsp | 76 | cat /dev/dsp1 > /dev/dsp |
@@ -80,6 +81,10 @@ sox -2 --endian little -r 96000 -t oss /dev/dsp1 -t oss /dev/dsp | |||
80 | If you use arts try: | 81 | If you use arts try: |
81 | arecord -D hw:1,0 -r96000 -c2 -f S16_LE | artsdsp aplay -B - | 82 | arecord -D hw:1,0 -r96000 -c2 -f S16_LE | artsdsp aplay -B - |
82 | 83 | ||
84 | If you use mplayer try: | ||
85 | mplayer -radio adevice=hw=1.0:arate=96000 \ | ||
86 | -rawaudio rate=96000 \ | ||
87 | radio://<frequency>/capture | ||
83 | 88 | ||
84 | Module Parameters | 89 | Module Parameters |
85 | ================= | 90 | ================= |
diff --git a/Documentation/video4linux/v4l2-framework.txt b/Documentation/video4linux/v4l2-framework.txt index ff124374e9ba..d54c1e4c6a9c 100644 --- a/Documentation/video4linux/v4l2-framework.txt +++ b/Documentation/video4linux/v4l2-framework.txt | |||
@@ -47,7 +47,9 @@ All drivers have the following structure: | |||
47 | 3) Creating V4L2 device nodes (/dev/videoX, /dev/vbiX, /dev/radioX and | 47 | 3) Creating V4L2 device nodes (/dev/videoX, /dev/vbiX, /dev/radioX and |
48 | /dev/vtxX) and keeping track of device-node specific data. | 48 | /dev/vtxX) and keeping track of device-node specific data. |
49 | 49 | ||
50 | 4) Filehandle-specific structs containing per-filehandle data. | 50 | 4) Filehandle-specific structs containing per-filehandle data; |
51 | |||
52 | 5) video buffer handling. | ||
51 | 53 | ||
52 | This is a rough schematic of how it all relates: | 54 | This is a rough schematic of how it all relates: |
53 | 55 | ||
@@ -82,12 +84,25 @@ You must register the device instance: | |||
82 | v4l2_device_register(struct device *dev, struct v4l2_device *v4l2_dev); | 84 | v4l2_device_register(struct device *dev, struct v4l2_device *v4l2_dev); |
83 | 85 | ||
84 | Registration will initialize the v4l2_device struct and link dev->driver_data | 86 | Registration will initialize the v4l2_device struct and link dev->driver_data |
85 | to v4l2_dev. Registration will also set v4l2_dev->name to a value derived from | 87 | to v4l2_dev. If v4l2_dev->name is empty then it will be set to a value derived |
86 | dev (driver name followed by the bus_id, to be precise). You may change the | 88 | from dev (driver name followed by the bus_id, to be precise). If you set it |
87 | name after registration if you want. | 89 | up before calling v4l2_device_register then it will be untouched. If dev is |
90 | NULL, then you *must* setup v4l2_dev->name before calling v4l2_device_register. | ||
91 | |||
92 | You can use v4l2_device_set_name() to set the name based on a driver name and | ||
93 | a driver-global atomic_t instance. This will generate names like ivtv0, ivtv1, | ||
94 | etc. If the name ends with a digit, then it will insert a dash: cx18-0, | ||
95 | cx18-1, etc. This function returns the instance number. | ||
88 | 96 | ||
89 | The first 'dev' argument is normally the struct device pointer of a pci_dev, | 97 | The first 'dev' argument is normally the struct device pointer of a pci_dev, |
90 | usb_device or platform_device. | 98 | usb_interface or platform_device. It is rare for dev to be NULL, but it happens |
99 | with ISA devices or when one device creates multiple PCI devices, thus making | ||
100 | it impossible to associate v4l2_dev with a particular parent. | ||
101 | |||
102 | You can also supply a notify() callback that can be called by sub-devices to | ||
103 | notify you of events. Whether you need to set this depends on the sub-device. | ||
104 | Any notifications a sub-device supports must be defined in a header in | ||
105 | include/media/<subdevice>.h. | ||
91 | 106 | ||
92 | You unregister with: | 107 | You unregister with: |
93 | 108 | ||
@@ -95,6 +110,17 @@ You unregister with: | |||
95 | 110 | ||
96 | Unregistering will also automatically unregister all subdevs from the device. | 111 | Unregistering will also automatically unregister all subdevs from the device. |
97 | 112 | ||
113 | If you have a hotpluggable device (e.g. a USB device), then when a disconnect | ||
114 | happens the parent device becomes invalid. Since v4l2_device has a pointer to | ||
115 | that parent device it has to be cleared as well to mark that the parent is | ||
116 | gone. To do this call: | ||
117 | |||
118 | v4l2_device_disconnect(struct v4l2_device *v4l2_dev); | ||
119 | |||
120 | This does *not* unregister the subdevs, so you still need to call the | ||
121 | v4l2_device_unregister() function for that. If your driver is not hotpluggable, | ||
122 | then there is no need to call v4l2_device_disconnect(). | ||
123 | |||
98 | Sometimes you need to iterate over all devices registered by a specific | 124 | Sometimes you need to iterate over all devices registered by a specific |
99 | driver. This is usually the case if multiple device drivers use the same | 125 | driver. This is usually the case if multiple device drivers use the same |
100 | hardware. E.g. the ivtvfb driver is a framebuffer driver that uses the ivtv | 126 | hardware. E.g. the ivtvfb driver is a framebuffer driver that uses the ivtv |
@@ -134,7 +160,7 @@ The recommended approach is as follows: | |||
134 | 160 | ||
135 | static atomic_t drv_instance = ATOMIC_INIT(0); | 161 | static atomic_t drv_instance = ATOMIC_INIT(0); |
136 | 162 | ||
137 | static int __devinit drv_probe(struct pci_dev *dev, | 163 | static int __devinit drv_probe(struct pci_dev *pdev, |
138 | const struct pci_device_id *pci_id) | 164 | const struct pci_device_id *pci_id) |
139 | { | 165 | { |
140 | ... | 166 | ... |
@@ -218,7 +244,7 @@ to add new ops and categories. | |||
218 | 244 | ||
219 | A sub-device driver initializes the v4l2_subdev struct using: | 245 | A sub-device driver initializes the v4l2_subdev struct using: |
220 | 246 | ||
221 | v4l2_subdev_init(subdev, &ops); | 247 | v4l2_subdev_init(sd, &ops); |
222 | 248 | ||
223 | Afterwards you need to initialize subdev->name with a unique name and set the | 249 | Afterwards you need to initialize subdev->name with a unique name and set the |
224 | module owner. This is done for you if you use the i2c helper functions. | 250 | module owner. This is done for you if you use the i2c helper functions. |
@@ -226,7 +252,7 @@ module owner. This is done for you if you use the i2c helper functions. | |||
226 | A device (bridge) driver needs to register the v4l2_subdev with the | 252 | A device (bridge) driver needs to register the v4l2_subdev with the |
227 | v4l2_device: | 253 | v4l2_device: |
228 | 254 | ||
229 | int err = v4l2_device_register_subdev(device, subdev); | 255 | int err = v4l2_device_register_subdev(v4l2_dev, sd); |
230 | 256 | ||
231 | This can fail if the subdev module disappeared before it could be registered. | 257 | This can fail if the subdev module disappeared before it could be registered. |
232 | After this function was called successfully the subdev->dev field points to | 258 | After this function was called successfully the subdev->dev field points to |
@@ -234,17 +260,17 @@ the v4l2_device. | |||
234 | 260 | ||
235 | You can unregister a sub-device using: | 261 | You can unregister a sub-device using: |
236 | 262 | ||
237 | v4l2_device_unregister_subdev(subdev); | 263 | v4l2_device_unregister_subdev(sd); |
238 | 264 | ||
239 | Afterwards the subdev module can be unloaded and subdev->dev == NULL. | 265 | Afterwards the subdev module can be unloaded and sd->dev == NULL. |
240 | 266 | ||
241 | You can call an ops function either directly: | 267 | You can call an ops function either directly: |
242 | 268 | ||
243 | err = subdev->ops->core->g_chip_ident(subdev, &chip); | 269 | err = sd->ops->core->g_chip_ident(sd, &chip); |
244 | 270 | ||
245 | but it is better and easier to use this macro: | 271 | but it is better and easier to use this macro: |
246 | 272 | ||
247 | err = v4l2_subdev_call(subdev, core, g_chip_ident, &chip); | 273 | err = v4l2_subdev_call(sd, core, g_chip_ident, &chip); |
248 | 274 | ||
249 | The macro will to the right NULL pointer checks and returns -ENODEV if subdev | 275 | The macro will to the right NULL pointer checks and returns -ENODEV if subdev |
250 | is NULL, -ENOIOCTLCMD if either subdev->core or subdev->core->g_chip_ident is | 276 | is NULL, -ENOIOCTLCMD if either subdev->core or subdev->core->g_chip_ident is |
@@ -252,19 +278,19 @@ NULL, or the actual result of the subdev->ops->core->g_chip_ident ops. | |||
252 | 278 | ||
253 | It is also possible to call all or a subset of the sub-devices: | 279 | It is also possible to call all or a subset of the sub-devices: |
254 | 280 | ||
255 | v4l2_device_call_all(dev, 0, core, g_chip_ident, &chip); | 281 | v4l2_device_call_all(v4l2_dev, 0, core, g_chip_ident, &chip); |
256 | 282 | ||
257 | Any subdev that does not support this ops is skipped and error results are | 283 | Any subdev that does not support this ops is skipped and error results are |
258 | ignored. If you want to check for errors use this: | 284 | ignored. If you want to check for errors use this: |
259 | 285 | ||
260 | err = v4l2_device_call_until_err(dev, 0, core, g_chip_ident, &chip); | 286 | err = v4l2_device_call_until_err(v4l2_dev, 0, core, g_chip_ident, &chip); |
261 | 287 | ||
262 | Any error except -ENOIOCTLCMD will exit the loop with that error. If no | 288 | Any error except -ENOIOCTLCMD will exit the loop with that error. If no |
263 | errors (except -ENOIOCTLCMD) occured, then 0 is returned. | 289 | errors (except -ENOIOCTLCMD) occured, then 0 is returned. |
264 | 290 | ||
265 | The second argument to both calls is a group ID. If 0, then all subdevs are | 291 | The second argument to both calls is a group ID. If 0, then all subdevs are |
266 | called. If non-zero, then only those whose group ID match that value will | 292 | called. If non-zero, then only those whose group ID match that value will |
267 | be called. Before a bridge driver registers a subdev it can set subdev->grp_id | 293 | be called. Before a bridge driver registers a subdev it can set sd->grp_id |
268 | to whatever value it wants (it's 0 by default). This value is owned by the | 294 | to whatever value it wants (it's 0 by default). This value is owned by the |
269 | bridge driver and the sub-device driver will never modify or use it. | 295 | bridge driver and the sub-device driver will never modify or use it. |
270 | 296 | ||
@@ -276,6 +302,11 @@ e.g. AUDIO_CONTROLLER and specify that as the group ID value when calling | |||
276 | v4l2_device_call_all(). That ensures that it will only go to the subdev | 302 | v4l2_device_call_all(). That ensures that it will only go to the subdev |
277 | that needs it. | 303 | that needs it. |
278 | 304 | ||
305 | If the sub-device needs to notify its v4l2_device parent of an event, then | ||
306 | it can call v4l2_subdev_notify(sd, notification, arg). This macro checks | ||
307 | whether there is a notify() callback defined and returns -ENODEV if not. | ||
308 | Otherwise the result of the notify() call is returned. | ||
309 | |||
279 | The advantage of using v4l2_subdev is that it is a generic struct and does | 310 | The advantage of using v4l2_subdev is that it is a generic struct and does |
280 | not contain any knowledge about the underlying hardware. So a driver might | 311 | not contain any knowledge about the underlying hardware. So a driver might |
281 | contain several subdevs that use an I2C bus, but also a subdev that is | 312 | contain several subdevs that use an I2C bus, but also a subdev that is |
@@ -325,32 +356,25 @@ And this to go from an i2c_client to a v4l2_subdev struct: | |||
325 | 356 | ||
326 | struct v4l2_subdev *sd = i2c_get_clientdata(client); | 357 | struct v4l2_subdev *sd = i2c_get_clientdata(client); |
327 | 358 | ||
328 | Finally you need to make a command function to make driver->command() | ||
329 | call the right subdev_ops functions: | ||
330 | |||
331 | static int subdev_command(struct i2c_client *client, unsigned cmd, void *arg) | ||
332 | { | ||
333 | return v4l2_subdev_command(i2c_get_clientdata(client), cmd, arg); | ||
334 | } | ||
335 | |||
336 | If driver->command is never used then you can leave this out. Eventually the | ||
337 | driver->command usage should be removed from v4l. | ||
338 | |||
339 | Make sure to call v4l2_device_unregister_subdev(sd) when the remove() callback | 359 | Make sure to call v4l2_device_unregister_subdev(sd) when the remove() callback |
340 | is called. This will unregister the sub-device from the bridge driver. It is | 360 | is called. This will unregister the sub-device from the bridge driver. It is |
341 | safe to call this even if the sub-device was never registered. | 361 | safe to call this even if the sub-device was never registered. |
342 | 362 | ||
363 | You need to do this because when the bridge driver destroys the i2c adapter | ||
364 | the remove() callbacks are called of the i2c devices on that adapter. | ||
365 | After that the corresponding v4l2_subdev structures are invalid, so they | ||
366 | have to be unregistered first. Calling v4l2_device_unregister_subdev(sd) | ||
367 | from the remove() callback ensures that this is always done correctly. | ||
368 | |||
343 | 369 | ||
344 | The bridge driver also has some helper functions it can use: | 370 | The bridge driver also has some helper functions it can use: |
345 | 371 | ||
346 | struct v4l2_subdev *sd = v4l2_i2c_new_subdev(adapter, "module_foo", "chipid", 0x36); | 372 | struct v4l2_subdev *sd = v4l2_i2c_new_subdev(v4l2_dev, adapter, |
373 | "module_foo", "chipid", 0x36); | ||
347 | 374 | ||
348 | This loads the given module (can be NULL if no module needs to be loaded) and | 375 | This loads the given module (can be NULL if no module needs to be loaded) and |
349 | calls i2c_new_device() with the given i2c_adapter and chip/address arguments. | 376 | calls i2c_new_device() with the given i2c_adapter and chip/address arguments. |
350 | If all goes well, then it registers the subdev with the v4l2_device. It gets | 377 | If all goes well, then it registers the subdev with the v4l2_device. |
351 | the v4l2_device by calling i2c_get_adapdata(adapter), so you should make sure | ||
352 | that adapdata is set to v4l2_device when you setup the i2c_adapter in your | ||
353 | driver. | ||
354 | 378 | ||
355 | You can also use v4l2_i2c_new_probed_subdev() which is very similar to | 379 | You can also use v4l2_i2c_new_probed_subdev() which is very similar to |
356 | v4l2_i2c_new_subdev(), except that it has an array of possible I2C addresses | 380 | v4l2_i2c_new_subdev(), except that it has an array of possible I2C addresses |
@@ -358,6 +382,14 @@ that it should probe. Internally it calls i2c_new_probed_device(). | |||
358 | 382 | ||
359 | Both functions return NULL if something went wrong. | 383 | Both functions return NULL if something went wrong. |
360 | 384 | ||
385 | Note that the chipid you pass to v4l2_i2c_new_(probed_)subdev() is usually | ||
386 | the same as the module name. It allows you to specify a chip variant, e.g. | ||
387 | "saa7114" or "saa7115". In general though the i2c driver autodetects this. | ||
388 | The use of chipid is something that needs to be looked at more closely at a | ||
389 | later date. It differs between i2c drivers and as such can be confusing. | ||
390 | To see which chip variants are supported you can look in the i2c driver code | ||
391 | for the i2c_device_id table. This lists all the possibilities. | ||
392 | |||
361 | 393 | ||
362 | struct video_device | 394 | struct video_device |
363 | ------------------- | 395 | ------------------- |
@@ -396,6 +428,15 @@ You should also set these fields: | |||
396 | - ioctl_ops: if you use the v4l2_ioctl_ops to simplify ioctl maintenance | 428 | - ioctl_ops: if you use the v4l2_ioctl_ops to simplify ioctl maintenance |
397 | (highly recommended to use this and it might become compulsory in the | 429 | (highly recommended to use this and it might become compulsory in the |
398 | future!), then set this to your v4l2_ioctl_ops struct. | 430 | future!), then set this to your v4l2_ioctl_ops struct. |
431 | - parent: you only set this if v4l2_device was registered with NULL as | ||
432 | the parent device struct. This only happens in cases where one hardware | ||
433 | device has multiple PCI devices that all share the same v4l2_device core. | ||
434 | |||
435 | The cx88 driver is an example of this: one core v4l2_device struct, but | ||
436 | it is used by both an raw video PCI device (cx8800) and a MPEG PCI device | ||
437 | (cx8802). Since the v4l2_device cannot be associated with a particular | ||
438 | PCI device it is setup without a parent device. But when the struct | ||
439 | video_device is setup you do know which parent PCI device to use. | ||
399 | 440 | ||
400 | If you use v4l2_ioctl_ops, then you should set either .unlocked_ioctl or | 441 | If you use v4l2_ioctl_ops, then you should set either .unlocked_ioctl or |
401 | .ioctl to video_ioctl2 in your v4l2_file_operations struct. | 442 | .ioctl to video_ioctl2 in your v4l2_file_operations struct. |
@@ -499,8 +540,8 @@ There are a few useful helper functions: | |||
499 | 540 | ||
500 | You can set/get driver private data in the video_device struct using: | 541 | You can set/get driver private data in the video_device struct using: |
501 | 542 | ||
502 | void *video_get_drvdata(struct video_device *dev); | 543 | void *video_get_drvdata(struct video_device *vdev); |
503 | void video_set_drvdata(struct video_device *dev, void *data); | 544 | void video_set_drvdata(struct video_device *vdev, void *data); |
504 | 545 | ||
505 | Note that you can safely call video_set_drvdata() before calling | 546 | Note that you can safely call video_set_drvdata() before calling |
506 | video_register_device(). | 547 | video_register_device(). |
@@ -519,3 +560,103 @@ void *video_drvdata(struct file *file); | |||
519 | You can go from a video_device struct to the v4l2_device struct using: | 560 | You can go from a video_device struct to the v4l2_device struct using: |
520 | 561 | ||
521 | struct v4l2_device *v4l2_dev = vdev->v4l2_dev; | 562 | struct v4l2_device *v4l2_dev = vdev->v4l2_dev; |
563 | |||
564 | video buffer helper functions | ||
565 | ----------------------------- | ||
566 | |||
567 | The v4l2 core API provides a standard method for dealing with video | ||
568 | buffers. Those methods allow a driver to implement read(), mmap() and | ||
569 | overlay() on a consistent way. | ||
570 | |||
571 | There are currently methods for using video buffers on devices that | ||
572 | supports DMA with scatter/gather method (videobuf-dma-sg), DMA with | ||
573 | linear access (videobuf-dma-contig), and vmalloced buffers, mostly | ||
574 | used on USB drivers (videobuf-vmalloc). | ||
575 | |||
576 | Any driver using videobuf should provide operations (callbacks) for | ||
577 | four handlers: | ||
578 | |||
579 | ops->buf_setup - calculates the size of the video buffers and avoid they | ||
580 | to waste more than some maximum limit of RAM; | ||
581 | ops->buf_prepare - fills the video buffer structs and calls | ||
582 | videobuf_iolock() to alloc and prepare mmaped memory; | ||
583 | ops->buf_queue - advices the driver that another buffer were | ||
584 | requested (by read() or by QBUF); | ||
585 | ops->buf_release - frees any buffer that were allocated. | ||
586 | |||
587 | In order to use it, the driver need to have a code (generally called at | ||
588 | interrupt context) that will properly handle the buffer request lists, | ||
589 | announcing that a new buffer were filled. | ||
590 | |||
591 | The irq handling code should handle the videobuf task lists, in order | ||
592 | to advice videobuf that a new frame were filled, in order to honor to a | ||
593 | request. The code is generally like this one: | ||
594 | if (list_empty(&dma_q->active)) | ||
595 | return; | ||
596 | |||
597 | buf = list_entry(dma_q->active.next, struct vbuffer, vb.queue); | ||
598 | |||
599 | if (!waitqueue_active(&buf->vb.done)) | ||
600 | return; | ||
601 | |||
602 | /* Some logic to handle the buf may be needed here */ | ||
603 | |||
604 | list_del(&buf->vb.queue); | ||
605 | do_gettimeofday(&buf->vb.ts); | ||
606 | wake_up(&buf->vb.done); | ||
607 | |||
608 | Those are the videobuffer functions used on drivers, implemented on | ||
609 | videobuf-core: | ||
610 | |||
611 | - Videobuf init functions | ||
612 | videobuf_queue_sg_init() | ||
613 | Initializes the videobuf infrastructure. This function should be | ||
614 | called before any other videobuf function on drivers that uses DMA | ||
615 | Scatter/Gather buffers. | ||
616 | |||
617 | videobuf_queue_dma_contig_init | ||
618 | Initializes the videobuf infrastructure. This function should be | ||
619 | called before any other videobuf function on drivers that need DMA | ||
620 | contiguous buffers. | ||
621 | |||
622 | videobuf_queue_vmalloc_init() | ||
623 | Initializes the videobuf infrastructure. This function should be | ||
624 | called before any other videobuf function on USB (and other drivers) | ||
625 | that need a vmalloced type of videobuf. | ||
626 | |||
627 | - videobuf_iolock() | ||
628 | Prepares the videobuf memory for the proper method (read, mmap, overlay). | ||
629 | |||
630 | - videobuf_queue_is_busy() | ||
631 | Checks if a videobuf is streaming. | ||
632 | |||
633 | - videobuf_queue_cancel() | ||
634 | Stops video handling. | ||
635 | |||
636 | - videobuf_mmap_free() | ||
637 | frees mmap buffers. | ||
638 | |||
639 | - videobuf_stop() | ||
640 | Stops video handling, ends mmap and frees mmap and other buffers. | ||
641 | |||
642 | - V4L2 api functions. Those functions correspond to VIDIOC_foo ioctls: | ||
643 | videobuf_reqbufs(), videobuf_querybuf(), videobuf_qbuf(), | ||
644 | videobuf_dqbuf(), videobuf_streamon(), videobuf_streamoff(). | ||
645 | |||
646 | - V4L1 api function (corresponds to VIDIOCMBUF ioctl): | ||
647 | videobuf_cgmbuf() | ||
648 | This function is used to provide backward compatibility with V4L1 | ||
649 | API. | ||
650 | |||
651 | - Some help functions for read()/poll() operations: | ||
652 | videobuf_read_stream() | ||
653 | For continuous stream read() | ||
654 | videobuf_read_one() | ||
655 | For snapshot read() | ||
656 | videobuf_poll_stream() | ||
657 | polling help function | ||
658 | |||
659 | The better way to understand it is to take a look at vivi driver. One | ||
660 | of the main reasons for vivi is to be a videobuf usage example. the | ||
661 | vivi_thread_tick() does the task that the IRQ callback would do on PCI | ||
662 | drivers (or the irq callback on USB). | ||
diff --git a/Documentation/video4linux/v4lgrab.c b/Documentation/video4linux/v4lgrab.c index d6e70bef8ad0..05769cff1009 100644 --- a/Documentation/video4linux/v4lgrab.c +++ b/Documentation/video4linux/v4lgrab.c | |||
@@ -105,8 +105,8 @@ int main(int argc, char ** argv) | |||
105 | struct video_picture vpic; | 105 | struct video_picture vpic; |
106 | 106 | ||
107 | unsigned char *buffer, *src; | 107 | unsigned char *buffer, *src; |
108 | int bpp = 24, r, g, b; | 108 | int bpp = 24, r = 0, g = 0, b = 0; |
109 | unsigned int i, src_depth; | 109 | unsigned int i, src_depth = 16; |
110 | 110 | ||
111 | if (fd < 0) { | 111 | if (fd < 0) { |
112 | perror(VIDEO_DEV); | 112 | perror(VIDEO_DEV); |
diff --git a/Documentation/video4linux/zr364xx.txt b/Documentation/video4linux/zr364xx.txt index 5c81e3ae6458..7f3d1955d214 100644 --- a/Documentation/video4linux/zr364xx.txt +++ b/Documentation/video4linux/zr364xx.txt | |||
@@ -65,3 +65,4 @@ Vendor Product Distributor Model | |||
65 | 0x06d6 0x003b Trust Powerc@m 970Z | 65 | 0x06d6 0x003b Trust Powerc@m 970Z |
66 | 0x0a17 0x004e Pentax Optio 50 | 66 | 0x0a17 0x004e Pentax Optio 50 |
67 | 0x041e 0x405d Creative DiVi CAM 516 | 67 | 0x041e 0x405d Creative DiVi CAM 516 |
68 | 0x08ca 0x2102 Aiptek DV T300 | ||
diff --git a/Documentation/vm/00-INDEX b/Documentation/vm/00-INDEX index 2131b00b63f6..2f77ced35df7 100644 --- a/Documentation/vm/00-INDEX +++ b/Documentation/vm/00-INDEX | |||
@@ -1,5 +1,7 @@ | |||
1 | 00-INDEX | 1 | 00-INDEX |
2 | - this file. | 2 | - this file. |
3 | active_mm.txt | ||
4 | - An explanation from Linus about tsk->active_mm vs tsk->mm. | ||
3 | balance | 5 | balance |
4 | - various information on memory balancing. | 6 | - various information on memory balancing. |
5 | hugetlbpage.txt | 7 | hugetlbpage.txt |
diff --git a/Documentation/vm/Makefile b/Documentation/vm/Makefile index 6f562f778b28..5bd269b3731a 100644 --- a/Documentation/vm/Makefile +++ b/Documentation/vm/Makefile | |||
@@ -2,7 +2,7 @@ | |||
2 | obj- := dummy.o | 2 | obj- := dummy.o |
3 | 3 | ||
4 | # List of programs to build | 4 | # List of programs to build |
5 | hostprogs-y := slabinfo | 5 | hostprogs-y := slabinfo page-types |
6 | 6 | ||
7 | # Tell kbuild to always build the programs | 7 | # Tell kbuild to always build the programs |
8 | always := $(hostprogs-y) | 8 | always := $(hostprogs-y) |
diff --git a/Documentation/vm/active_mm.txt b/Documentation/vm/active_mm.txt new file mode 100644 index 000000000000..4ee1f643d897 --- /dev/null +++ b/Documentation/vm/active_mm.txt | |||
@@ -0,0 +1,83 @@ | |||
1 | List: linux-kernel | ||
2 | Subject: Re: active_mm | ||
3 | From: Linus Torvalds <torvalds () transmeta ! com> | ||
4 | Date: 1999-07-30 21:36:24 | ||
5 | |||
6 | Cc'd to linux-kernel, because I don't write explanations all that often, | ||
7 | and when I do I feel better about more people reading them. | ||
8 | |||
9 | On Fri, 30 Jul 1999, David Mosberger wrote: | ||
10 | > | ||
11 | > Is there a brief description someplace on how "mm" vs. "active_mm" in | ||
12 | > the task_struct are supposed to be used? (My apologies if this was | ||
13 | > discussed on the mailing lists---I just returned from vacation and | ||
14 | > wasn't able to follow linux-kernel for a while). | ||
15 | |||
16 | Basically, the new setup is: | ||
17 | |||
18 | - we have "real address spaces" and "anonymous address spaces". The | ||
19 | difference is that an anonymous address space doesn't care about the | ||
20 | user-level page tables at all, so when we do a context switch into an | ||
21 | anonymous address space we just leave the previous address space | ||
22 | active. | ||
23 | |||
24 | The obvious use for a "anonymous address space" is any thread that | ||
25 | doesn't need any user mappings - all kernel threads basically fall into | ||
26 | this category, but even "real" threads can temporarily say that for | ||
27 | some amount of time they are not going to be interested in user space, | ||
28 | and that the scheduler might as well try to avoid wasting time on | ||
29 | switching the VM state around. Currently only the old-style bdflush | ||
30 | sync does that. | ||
31 | |||
32 | - "tsk->mm" points to the "real address space". For an anonymous process, | ||
33 | tsk->mm will be NULL, for the logical reason that an anonymous process | ||
34 | really doesn't _have_ a real address space at all. | ||
35 | |||
36 | - however, we obviously need to keep track of which address space we | ||
37 | "stole" for such an anonymous user. For that, we have "tsk->active_mm", | ||
38 | which shows what the currently active address space is. | ||
39 | |||
40 | The rule is that for a process with a real address space (ie tsk->mm is | ||
41 | non-NULL) the active_mm obviously always has to be the same as the real | ||
42 | one. | ||
43 | |||
44 | For a anonymous process, tsk->mm == NULL, and tsk->active_mm is the | ||
45 | "borrowed" mm while the anonymous process is running. When the | ||
46 | anonymous process gets scheduled away, the borrowed address space is | ||
47 | returned and cleared. | ||
48 | |||
49 | To support all that, the "struct mm_struct" now has two counters: a | ||
50 | "mm_users" counter that is how many "real address space users" there are, | ||
51 | and a "mm_count" counter that is the number of "lazy" users (ie anonymous | ||
52 | users) plus one if there are any real users. | ||
53 | |||
54 | Usually there is at least one real user, but it could be that the real | ||
55 | user exited on another CPU while a lazy user was still active, so you do | ||
56 | actually get cases where you have a address space that is _only_ used by | ||
57 | lazy users. That is often a short-lived state, because once that thread | ||
58 | gets scheduled away in favour of a real thread, the "zombie" mm gets | ||
59 | released because "mm_users" becomes zero. | ||
60 | |||
61 | Also, a new rule is that _nobody_ ever has "init_mm" as a real MM any | ||
62 | more. "init_mm" should be considered just a "lazy context when no other | ||
63 | context is available", and in fact it is mainly used just at bootup when | ||
64 | no real VM has yet been created. So code that used to check | ||
65 | |||
66 | if (current->mm == &init_mm) | ||
67 | |||
68 | should generally just do | ||
69 | |||
70 | if (!current->mm) | ||
71 | |||
72 | instead (which makes more sense anyway - the test is basically one of "do | ||
73 | we have a user context", and is generally done by the page fault handler | ||
74 | and things like that). | ||
75 | |||
76 | Anyway, I put a pre-patch-2.3.13-1 on ftp.kernel.org just a moment ago, | ||
77 | because it slightly changes the interfaces to accomodate the alpha (who | ||
78 | would have thought it, but the alpha actually ends up having one of the | ||
79 | ugliest context switch codes - unlike the other architectures where the MM | ||
80 | and register state is separate, the alpha PALcode joins the two, and you | ||
81 | need to switch both together). | ||
82 | |||
83 | (From http://marc.info/?l=linux-kernel&m=93337278602211&w=2) | ||
diff --git a/Documentation/vm/balance b/Documentation/vm/balance index bd3d31bc4915..c46e68cf9344 100644 --- a/Documentation/vm/balance +++ b/Documentation/vm/balance | |||
@@ -75,15 +75,15 @@ Page stealing from process memory and shm is done if stealing the page would | |||
75 | alleviate memory pressure on any zone in the page's node that has fallen below | 75 | alleviate memory pressure on any zone in the page's node that has fallen below |
76 | its watermark. | 76 | its watermark. |
77 | 77 | ||
78 | pages_min/pages_low/pages_high/low_on_memory/zone_wake_kswapd: These are | 78 | watemark[WMARK_MIN/WMARK_LOW/WMARK_HIGH]/low_on_memory/zone_wake_kswapd: These |
79 | per-zone fields, used to determine when a zone needs to be balanced. When | 79 | are per-zone fields, used to determine when a zone needs to be balanced. When |
80 | the number of pages falls below pages_min, the hysteric field low_on_memory | 80 | the number of pages falls below watermark[WMARK_MIN], the hysteric field |
81 | gets set. This stays set till the number of free pages becomes pages_high. | 81 | low_on_memory gets set. This stays set till the number of free pages becomes |
82 | When low_on_memory is set, page allocation requests will try to free some | 82 | watermark[WMARK_HIGH]. When low_on_memory is set, page allocation requests will |
83 | pages in the zone (providing GFP_WAIT is set in the request). Orthogonal | 83 | try to free some pages in the zone (providing GFP_WAIT is set in the request). |
84 | to this, is the decision to poke kswapd to free some zone pages. That | 84 | Orthogonal to this, is the decision to poke kswapd to free some zone pages. |
85 | decision is not hysteresis based, and is done when the number of free | 85 | That decision is not hysteresis based, and is done when the number of free |
86 | pages is below pages_low; in which case zone_wake_kswapd is also set. | 86 | pages is below watermark[WMARK_LOW]; in which case zone_wake_kswapd is also set. |
87 | 87 | ||
88 | 88 | ||
89 | (Good) Ideas that I have heard: | 89 | (Good) Ideas that I have heard: |
diff --git a/Documentation/vm/numa_memory_policy.txt b/Documentation/vm/numa_memory_policy.txt index 6aaaeb38730c..be45dbb9d7f2 100644 --- a/Documentation/vm/numa_memory_policy.txt +++ b/Documentation/vm/numa_memory_policy.txt | |||
@@ -8,7 +8,8 @@ The current memory policy support was added to Linux 2.6 around May 2004. This | |||
8 | document attempts to describe the concepts and APIs of the 2.6 memory policy | 8 | document attempts to describe the concepts and APIs of the 2.6 memory policy |
9 | support. | 9 | support. |
10 | 10 | ||
11 | Memory policies should not be confused with cpusets (Documentation/cpusets.txt) | 11 | Memory policies should not be confused with cpusets |
12 | (Documentation/cgroups/cpusets.txt) | ||
12 | which is an administrative mechanism for restricting the nodes from which | 13 | which is an administrative mechanism for restricting the nodes from which |
13 | memory may be allocated by a set of processes. Memory policies are a | 14 | memory may be allocated by a set of processes. Memory policies are a |
14 | programming interface that a NUMA-aware application can take advantage of. When | 15 | programming interface that a NUMA-aware application can take advantage of. When |
diff --git a/Documentation/vm/page-types.c b/Documentation/vm/page-types.c new file mode 100644 index 000000000000..0833f44ba16b --- /dev/null +++ b/Documentation/vm/page-types.c | |||
@@ -0,0 +1,698 @@ | |||
1 | /* | ||
2 | * page-types: Tool for querying page flags | ||
3 | * | ||
4 | * Copyright (C) 2009 Intel corporation | ||
5 | * Copyright (C) 2009 Wu Fengguang <fengguang.wu@intel.com> | ||
6 | */ | ||
7 | |||
8 | #include <stdio.h> | ||
9 | #include <stdlib.h> | ||
10 | #include <unistd.h> | ||
11 | #include <stdint.h> | ||
12 | #include <stdarg.h> | ||
13 | #include <string.h> | ||
14 | #include <getopt.h> | ||
15 | #include <limits.h> | ||
16 | #include <sys/types.h> | ||
17 | #include <sys/errno.h> | ||
18 | #include <sys/fcntl.h> | ||
19 | |||
20 | |||
21 | /* | ||
22 | * kernel page flags | ||
23 | */ | ||
24 | |||
25 | #define KPF_BYTES 8 | ||
26 | #define PROC_KPAGEFLAGS "/proc/kpageflags" | ||
27 | |||
28 | /* copied from kpageflags_read() */ | ||
29 | #define KPF_LOCKED 0 | ||
30 | #define KPF_ERROR 1 | ||
31 | #define KPF_REFERENCED 2 | ||
32 | #define KPF_UPTODATE 3 | ||
33 | #define KPF_DIRTY 4 | ||
34 | #define KPF_LRU 5 | ||
35 | #define KPF_ACTIVE 6 | ||
36 | #define KPF_SLAB 7 | ||
37 | #define KPF_WRITEBACK 8 | ||
38 | #define KPF_RECLAIM 9 | ||
39 | #define KPF_BUDDY 10 | ||
40 | |||
41 | /* [11-20] new additions in 2.6.31 */ | ||
42 | #define KPF_MMAP 11 | ||
43 | #define KPF_ANON 12 | ||
44 | #define KPF_SWAPCACHE 13 | ||
45 | #define KPF_SWAPBACKED 14 | ||
46 | #define KPF_COMPOUND_HEAD 15 | ||
47 | #define KPF_COMPOUND_TAIL 16 | ||
48 | #define KPF_HUGE 17 | ||
49 | #define KPF_UNEVICTABLE 18 | ||
50 | #define KPF_NOPAGE 20 | ||
51 | |||
52 | /* [32-] kernel hacking assistances */ | ||
53 | #define KPF_RESERVED 32 | ||
54 | #define KPF_MLOCKED 33 | ||
55 | #define KPF_MAPPEDTODISK 34 | ||
56 | #define KPF_PRIVATE 35 | ||
57 | #define KPF_PRIVATE_2 36 | ||
58 | #define KPF_OWNER_PRIVATE 37 | ||
59 | #define KPF_ARCH 38 | ||
60 | #define KPF_UNCACHED 39 | ||
61 | |||
62 | /* [48-] take some arbitrary free slots for expanding overloaded flags | ||
63 | * not part of kernel API | ||
64 | */ | ||
65 | #define KPF_READAHEAD 48 | ||
66 | #define KPF_SLOB_FREE 49 | ||
67 | #define KPF_SLUB_FROZEN 50 | ||
68 | #define KPF_SLUB_DEBUG 51 | ||
69 | |||
70 | #define KPF_ALL_BITS ((uint64_t)~0ULL) | ||
71 | #define KPF_HACKERS_BITS (0xffffULL << 32) | ||
72 | #define KPF_OVERLOADED_BITS (0xffffULL << 48) | ||
73 | #define BIT(name) (1ULL << KPF_##name) | ||
74 | #define BITS_COMPOUND (BIT(COMPOUND_HEAD) | BIT(COMPOUND_TAIL)) | ||
75 | |||
76 | static char *page_flag_names[] = { | ||
77 | [KPF_LOCKED] = "L:locked", | ||
78 | [KPF_ERROR] = "E:error", | ||
79 | [KPF_REFERENCED] = "R:referenced", | ||
80 | [KPF_UPTODATE] = "U:uptodate", | ||
81 | [KPF_DIRTY] = "D:dirty", | ||
82 | [KPF_LRU] = "l:lru", | ||
83 | [KPF_ACTIVE] = "A:active", | ||
84 | [KPF_SLAB] = "S:slab", | ||
85 | [KPF_WRITEBACK] = "W:writeback", | ||
86 | [KPF_RECLAIM] = "I:reclaim", | ||
87 | [KPF_BUDDY] = "B:buddy", | ||
88 | |||
89 | [KPF_MMAP] = "M:mmap", | ||
90 | [KPF_ANON] = "a:anonymous", | ||
91 | [KPF_SWAPCACHE] = "s:swapcache", | ||
92 | [KPF_SWAPBACKED] = "b:swapbacked", | ||
93 | [KPF_COMPOUND_HEAD] = "H:compound_head", | ||
94 | [KPF_COMPOUND_TAIL] = "T:compound_tail", | ||
95 | [KPF_HUGE] = "G:huge", | ||
96 | [KPF_UNEVICTABLE] = "u:unevictable", | ||
97 | [KPF_NOPAGE] = "n:nopage", | ||
98 | |||
99 | [KPF_RESERVED] = "r:reserved", | ||
100 | [KPF_MLOCKED] = "m:mlocked", | ||
101 | [KPF_MAPPEDTODISK] = "d:mappedtodisk", | ||
102 | [KPF_PRIVATE] = "P:private", | ||
103 | [KPF_PRIVATE_2] = "p:private_2", | ||
104 | [KPF_OWNER_PRIVATE] = "O:owner_private", | ||
105 | [KPF_ARCH] = "h:arch", | ||
106 | [KPF_UNCACHED] = "c:uncached", | ||
107 | |||
108 | [KPF_READAHEAD] = "I:readahead", | ||
109 | [KPF_SLOB_FREE] = "P:slob_free", | ||
110 | [KPF_SLUB_FROZEN] = "A:slub_frozen", | ||
111 | [KPF_SLUB_DEBUG] = "E:slub_debug", | ||
112 | }; | ||
113 | |||
114 | |||
115 | /* | ||
116 | * data structures | ||
117 | */ | ||
118 | |||
119 | static int opt_raw; /* for kernel developers */ | ||
120 | static int opt_list; /* list pages (in ranges) */ | ||
121 | static int opt_no_summary; /* don't show summary */ | ||
122 | static pid_t opt_pid; /* process to walk */ | ||
123 | |||
124 | #define MAX_ADDR_RANGES 1024 | ||
125 | static int nr_addr_ranges; | ||
126 | static unsigned long opt_offset[MAX_ADDR_RANGES]; | ||
127 | static unsigned long opt_size[MAX_ADDR_RANGES]; | ||
128 | |||
129 | #define MAX_BIT_FILTERS 64 | ||
130 | static int nr_bit_filters; | ||
131 | static uint64_t opt_mask[MAX_BIT_FILTERS]; | ||
132 | static uint64_t opt_bits[MAX_BIT_FILTERS]; | ||
133 | |||
134 | static int page_size; | ||
135 | |||
136 | #define PAGES_BATCH (64 << 10) /* 64k pages */ | ||
137 | static int kpageflags_fd; | ||
138 | static uint64_t kpageflags_buf[KPF_BYTES * PAGES_BATCH]; | ||
139 | |||
140 | #define HASH_SHIFT 13 | ||
141 | #define HASH_SIZE (1 << HASH_SHIFT) | ||
142 | #define HASH_MASK (HASH_SIZE - 1) | ||
143 | #define HASH_KEY(flags) (flags & HASH_MASK) | ||
144 | |||
145 | static unsigned long total_pages; | ||
146 | static unsigned long nr_pages[HASH_SIZE]; | ||
147 | static uint64_t page_flags[HASH_SIZE]; | ||
148 | |||
149 | |||
150 | /* | ||
151 | * helper functions | ||
152 | */ | ||
153 | |||
154 | #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) | ||
155 | |||
156 | #define min_t(type, x, y) ({ \ | ||
157 | type __min1 = (x); \ | ||
158 | type __min2 = (y); \ | ||
159 | __min1 < __min2 ? __min1 : __min2; }) | ||
160 | |||
161 | unsigned long pages2mb(unsigned long pages) | ||
162 | { | ||
163 | return (pages * page_size) >> 20; | ||
164 | } | ||
165 | |||
166 | void fatal(const char *x, ...) | ||
167 | { | ||
168 | va_list ap; | ||
169 | |||
170 | va_start(ap, x); | ||
171 | vfprintf(stderr, x, ap); | ||
172 | va_end(ap); | ||
173 | exit(EXIT_FAILURE); | ||
174 | } | ||
175 | |||
176 | |||
177 | /* | ||
178 | * page flag names | ||
179 | */ | ||
180 | |||
181 | char *page_flag_name(uint64_t flags) | ||
182 | { | ||
183 | static char buf[65]; | ||
184 | int present; | ||
185 | int i, j; | ||
186 | |||
187 | for (i = 0, j = 0; i < ARRAY_SIZE(page_flag_names); i++) { | ||
188 | present = (flags >> i) & 1; | ||
189 | if (!page_flag_names[i]) { | ||
190 | if (present) | ||
191 | fatal("unkown flag bit %d\n", i); | ||
192 | continue; | ||
193 | } | ||
194 | buf[j++] = present ? page_flag_names[i][0] : '_'; | ||
195 | } | ||
196 | |||
197 | return buf; | ||
198 | } | ||
199 | |||
200 | char *page_flag_longname(uint64_t flags) | ||
201 | { | ||
202 | static char buf[1024]; | ||
203 | int i, n; | ||
204 | |||
205 | for (i = 0, n = 0; i < ARRAY_SIZE(page_flag_names); i++) { | ||
206 | if (!page_flag_names[i]) | ||
207 | continue; | ||
208 | if ((flags >> i) & 1) | ||
209 | n += snprintf(buf + n, sizeof(buf) - n, "%s,", | ||
210 | page_flag_names[i] + 2); | ||
211 | } | ||
212 | if (n) | ||
213 | n--; | ||
214 | buf[n] = '\0'; | ||
215 | |||
216 | return buf; | ||
217 | } | ||
218 | |||
219 | |||
220 | /* | ||
221 | * page list and summary | ||
222 | */ | ||
223 | |||
224 | void show_page_range(unsigned long offset, uint64_t flags) | ||
225 | { | ||
226 | static uint64_t flags0; | ||
227 | static unsigned long index; | ||
228 | static unsigned long count; | ||
229 | |||
230 | if (flags == flags0 && offset == index + count) { | ||
231 | count++; | ||
232 | return; | ||
233 | } | ||
234 | |||
235 | if (count) | ||
236 | printf("%lu\t%lu\t%s\n", | ||
237 | index, count, page_flag_name(flags0)); | ||
238 | |||
239 | flags0 = flags; | ||
240 | index = offset; | ||
241 | count = 1; | ||
242 | } | ||
243 | |||
244 | void show_page(unsigned long offset, uint64_t flags) | ||
245 | { | ||
246 | printf("%lu\t%s\n", offset, page_flag_name(flags)); | ||
247 | } | ||
248 | |||
249 | void show_summary(void) | ||
250 | { | ||
251 | int i; | ||
252 | |||
253 | printf(" flags\tpage-count MB" | ||
254 | " symbolic-flags\t\t\tlong-symbolic-flags\n"); | ||
255 | |||
256 | for (i = 0; i < ARRAY_SIZE(nr_pages); i++) { | ||
257 | if (nr_pages[i]) | ||
258 | printf("0x%016llx\t%10lu %8lu %s\t%s\n", | ||
259 | (unsigned long long)page_flags[i], | ||
260 | nr_pages[i], | ||
261 | pages2mb(nr_pages[i]), | ||
262 | page_flag_name(page_flags[i]), | ||
263 | page_flag_longname(page_flags[i])); | ||
264 | } | ||
265 | |||
266 | printf(" total\t%10lu %8lu\n", | ||
267 | total_pages, pages2mb(total_pages)); | ||
268 | } | ||
269 | |||
270 | |||
271 | /* | ||
272 | * page flag filters | ||
273 | */ | ||
274 | |||
275 | int bit_mask_ok(uint64_t flags) | ||
276 | { | ||
277 | int i; | ||
278 | |||
279 | for (i = 0; i < nr_bit_filters; i++) { | ||
280 | if (opt_bits[i] == KPF_ALL_BITS) { | ||
281 | if ((flags & opt_mask[i]) == 0) | ||
282 | return 0; | ||
283 | } else { | ||
284 | if ((flags & opt_mask[i]) != opt_bits[i]) | ||
285 | return 0; | ||
286 | } | ||
287 | } | ||
288 | |||
289 | return 1; | ||
290 | } | ||
291 | |||
292 | uint64_t expand_overloaded_flags(uint64_t flags) | ||
293 | { | ||
294 | /* SLOB/SLUB overload several page flags */ | ||
295 | if (flags & BIT(SLAB)) { | ||
296 | if (flags & BIT(PRIVATE)) | ||
297 | flags ^= BIT(PRIVATE) | BIT(SLOB_FREE); | ||
298 | if (flags & BIT(ACTIVE)) | ||
299 | flags ^= BIT(ACTIVE) | BIT(SLUB_FROZEN); | ||
300 | if (flags & BIT(ERROR)) | ||
301 | flags ^= BIT(ERROR) | BIT(SLUB_DEBUG); | ||
302 | } | ||
303 | |||
304 | /* PG_reclaim is overloaded as PG_readahead in the read path */ | ||
305 | if ((flags & (BIT(RECLAIM) | BIT(WRITEBACK))) == BIT(RECLAIM)) | ||
306 | flags ^= BIT(RECLAIM) | BIT(READAHEAD); | ||
307 | |||
308 | return flags; | ||
309 | } | ||
310 | |||
311 | uint64_t well_known_flags(uint64_t flags) | ||
312 | { | ||
313 | /* hide flags intended only for kernel hacker */ | ||
314 | flags &= ~KPF_HACKERS_BITS; | ||
315 | |||
316 | /* hide non-hugeTLB compound pages */ | ||
317 | if ((flags & BITS_COMPOUND) && !(flags & BIT(HUGE))) | ||
318 | flags &= ~BITS_COMPOUND; | ||
319 | |||
320 | return flags; | ||
321 | } | ||
322 | |||
323 | |||
324 | /* | ||
325 | * page frame walker | ||
326 | */ | ||
327 | |||
328 | int hash_slot(uint64_t flags) | ||
329 | { | ||
330 | int k = HASH_KEY(flags); | ||
331 | int i; | ||
332 | |||
333 | /* Explicitly reserve slot 0 for flags 0: the following logic | ||
334 | * cannot distinguish an unoccupied slot from slot (flags==0). | ||
335 | */ | ||
336 | if (flags == 0) | ||
337 | return 0; | ||
338 | |||
339 | /* search through the remaining (HASH_SIZE-1) slots */ | ||
340 | for (i = 1; i < ARRAY_SIZE(page_flags); i++, k++) { | ||
341 | if (!k || k >= ARRAY_SIZE(page_flags)) | ||
342 | k = 1; | ||
343 | if (page_flags[k] == 0) { | ||
344 | page_flags[k] = flags; | ||
345 | return k; | ||
346 | } | ||
347 | if (page_flags[k] == flags) | ||
348 | return k; | ||
349 | } | ||
350 | |||
351 | fatal("hash table full: bump up HASH_SHIFT?\n"); | ||
352 | exit(EXIT_FAILURE); | ||
353 | } | ||
354 | |||
355 | void add_page(unsigned long offset, uint64_t flags) | ||
356 | { | ||
357 | flags = expand_overloaded_flags(flags); | ||
358 | |||
359 | if (!opt_raw) | ||
360 | flags = well_known_flags(flags); | ||
361 | |||
362 | if (!bit_mask_ok(flags)) | ||
363 | return; | ||
364 | |||
365 | if (opt_list == 1) | ||
366 | show_page_range(offset, flags); | ||
367 | else if (opt_list == 2) | ||
368 | show_page(offset, flags); | ||
369 | |||
370 | nr_pages[hash_slot(flags)]++; | ||
371 | total_pages++; | ||
372 | } | ||
373 | |||
374 | void walk_pfn(unsigned long index, unsigned long count) | ||
375 | { | ||
376 | unsigned long batch; | ||
377 | unsigned long n; | ||
378 | unsigned long i; | ||
379 | |||
380 | if (index > ULONG_MAX / KPF_BYTES) | ||
381 | fatal("index overflow: %lu\n", index); | ||
382 | |||
383 | lseek(kpageflags_fd, index * KPF_BYTES, SEEK_SET); | ||
384 | |||
385 | while (count) { | ||
386 | batch = min_t(unsigned long, count, PAGES_BATCH); | ||
387 | n = read(kpageflags_fd, kpageflags_buf, batch * KPF_BYTES); | ||
388 | if (n == 0) | ||
389 | break; | ||
390 | if (n < 0) { | ||
391 | perror(PROC_KPAGEFLAGS); | ||
392 | exit(EXIT_FAILURE); | ||
393 | } | ||
394 | |||
395 | if (n % KPF_BYTES != 0) | ||
396 | fatal("partial read: %lu bytes\n", n); | ||
397 | n = n / KPF_BYTES; | ||
398 | |||
399 | for (i = 0; i < n; i++) | ||
400 | add_page(index + i, kpageflags_buf[i]); | ||
401 | |||
402 | index += batch; | ||
403 | count -= batch; | ||
404 | } | ||
405 | } | ||
406 | |||
407 | void walk_addr_ranges(void) | ||
408 | { | ||
409 | int i; | ||
410 | |||
411 | kpageflags_fd = open(PROC_KPAGEFLAGS, O_RDONLY); | ||
412 | if (kpageflags_fd < 0) { | ||
413 | perror(PROC_KPAGEFLAGS); | ||
414 | exit(EXIT_FAILURE); | ||
415 | } | ||
416 | |||
417 | if (!nr_addr_ranges) | ||
418 | walk_pfn(0, ULONG_MAX); | ||
419 | |||
420 | for (i = 0; i < nr_addr_ranges; i++) | ||
421 | walk_pfn(opt_offset[i], opt_size[i]); | ||
422 | |||
423 | close(kpageflags_fd); | ||
424 | } | ||
425 | |||
426 | |||
427 | /* | ||
428 | * user interface | ||
429 | */ | ||
430 | |||
431 | const char *page_flag_type(uint64_t flag) | ||
432 | { | ||
433 | if (flag & KPF_HACKERS_BITS) | ||
434 | return "(r)"; | ||
435 | if (flag & KPF_OVERLOADED_BITS) | ||
436 | return "(o)"; | ||
437 | return " "; | ||
438 | } | ||
439 | |||
440 | void usage(void) | ||
441 | { | ||
442 | int i, j; | ||
443 | |||
444 | printf( | ||
445 | "page-types [options]\n" | ||
446 | " -r|--raw Raw mode, for kernel developers\n" | ||
447 | " -a|--addr addr-spec Walk a range of pages\n" | ||
448 | " -b|--bits bits-spec Walk pages with specified bits\n" | ||
449 | #if 0 /* planned features */ | ||
450 | " -p|--pid pid Walk process address space\n" | ||
451 | " -f|--file filename Walk file address space\n" | ||
452 | #endif | ||
453 | " -l|--list Show page details in ranges\n" | ||
454 | " -L|--list-each Show page details one by one\n" | ||
455 | " -N|--no-summary Don't show summay info\n" | ||
456 | " -h|--help Show this usage message\n" | ||
457 | "addr-spec:\n" | ||
458 | " N one page at offset N (unit: pages)\n" | ||
459 | " N+M pages range from N to N+M-1\n" | ||
460 | " N,M pages range from N to M-1\n" | ||
461 | " N, pages range from N to end\n" | ||
462 | " ,M pages range from 0 to M\n" | ||
463 | "bits-spec:\n" | ||
464 | " bit1,bit2 (flags & (bit1|bit2)) != 0\n" | ||
465 | " bit1,bit2=bit1 (flags & (bit1|bit2)) == bit1\n" | ||
466 | " bit1,~bit2 (flags & (bit1|bit2)) == bit1\n" | ||
467 | " =bit1,bit2 flags == (bit1|bit2)\n" | ||
468 | "bit-names:\n" | ||
469 | ); | ||
470 | |||
471 | for (i = 0, j = 0; i < ARRAY_SIZE(page_flag_names); i++) { | ||
472 | if (!page_flag_names[i]) | ||
473 | continue; | ||
474 | printf("%16s%s", page_flag_names[i] + 2, | ||
475 | page_flag_type(1ULL << i)); | ||
476 | if (++j > 3) { | ||
477 | j = 0; | ||
478 | putchar('\n'); | ||
479 | } | ||
480 | } | ||
481 | printf("\n " | ||
482 | "(r) raw mode bits (o) overloaded bits\n"); | ||
483 | } | ||
484 | |||
485 | unsigned long long parse_number(const char *str) | ||
486 | { | ||
487 | unsigned long long n; | ||
488 | |||
489 | n = strtoll(str, NULL, 0); | ||
490 | |||
491 | if (n == 0 && str[0] != '0') | ||
492 | fatal("invalid name or number: %s\n", str); | ||
493 | |||
494 | return n; | ||
495 | } | ||
496 | |||
497 | void parse_pid(const char *str) | ||
498 | { | ||
499 | opt_pid = parse_number(str); | ||
500 | } | ||
501 | |||
502 | void parse_file(const char *name) | ||
503 | { | ||
504 | } | ||
505 | |||
506 | void add_addr_range(unsigned long offset, unsigned long size) | ||
507 | { | ||
508 | if (nr_addr_ranges >= MAX_ADDR_RANGES) | ||
509 | fatal("too much addr ranges\n"); | ||
510 | |||
511 | opt_offset[nr_addr_ranges] = offset; | ||
512 | opt_size[nr_addr_ranges] = size; | ||
513 | nr_addr_ranges++; | ||
514 | } | ||
515 | |||
516 | void parse_addr_range(const char *optarg) | ||
517 | { | ||
518 | unsigned long offset; | ||
519 | unsigned long size; | ||
520 | char *p; | ||
521 | |||
522 | p = strchr(optarg, ','); | ||
523 | if (!p) | ||
524 | p = strchr(optarg, '+'); | ||
525 | |||
526 | if (p == optarg) { | ||
527 | offset = 0; | ||
528 | size = parse_number(p + 1); | ||
529 | } else if (p) { | ||
530 | offset = parse_number(optarg); | ||
531 | if (p[1] == '\0') | ||
532 | size = ULONG_MAX; | ||
533 | else { | ||
534 | size = parse_number(p + 1); | ||
535 | if (*p == ',') { | ||
536 | if (size < offset) | ||
537 | fatal("invalid range: %lu,%lu\n", | ||
538 | offset, size); | ||
539 | size -= offset; | ||
540 | } | ||
541 | } | ||
542 | } else { | ||
543 | offset = parse_number(optarg); | ||
544 | size = 1; | ||
545 | } | ||
546 | |||
547 | add_addr_range(offset, size); | ||
548 | } | ||
549 | |||
550 | void add_bits_filter(uint64_t mask, uint64_t bits) | ||
551 | { | ||
552 | if (nr_bit_filters >= MAX_BIT_FILTERS) | ||
553 | fatal("too much bit filters\n"); | ||
554 | |||
555 | opt_mask[nr_bit_filters] = mask; | ||
556 | opt_bits[nr_bit_filters] = bits; | ||
557 | nr_bit_filters++; | ||
558 | } | ||
559 | |||
560 | uint64_t parse_flag_name(const char *str, int len) | ||
561 | { | ||
562 | int i; | ||
563 | |||
564 | if (!*str || !len) | ||
565 | return 0; | ||
566 | |||
567 | if (len <= 8 && !strncmp(str, "compound", len)) | ||
568 | return BITS_COMPOUND; | ||
569 | |||
570 | for (i = 0; i < ARRAY_SIZE(page_flag_names); i++) { | ||
571 | if (!page_flag_names[i]) | ||
572 | continue; | ||
573 | if (!strncmp(str, page_flag_names[i] + 2, len)) | ||
574 | return 1ULL << i; | ||
575 | } | ||
576 | |||
577 | return parse_number(str); | ||
578 | } | ||
579 | |||
580 | uint64_t parse_flag_names(const char *str, int all) | ||
581 | { | ||
582 | const char *p = str; | ||
583 | uint64_t flags = 0; | ||
584 | |||
585 | while (1) { | ||
586 | if (*p == ',' || *p == '=' || *p == '\0') { | ||
587 | if ((*str != '~') || (*str == '~' && all && *++str)) | ||
588 | flags |= parse_flag_name(str, p - str); | ||
589 | if (*p != ',') | ||
590 | break; | ||
591 | str = p + 1; | ||
592 | } | ||
593 | p++; | ||
594 | } | ||
595 | |||
596 | return flags; | ||
597 | } | ||
598 | |||
599 | void parse_bits_mask(const char *optarg) | ||
600 | { | ||
601 | uint64_t mask; | ||
602 | uint64_t bits; | ||
603 | const char *p; | ||
604 | |||
605 | p = strchr(optarg, '='); | ||
606 | if (p == optarg) { | ||
607 | mask = KPF_ALL_BITS; | ||
608 | bits = parse_flag_names(p + 1, 0); | ||
609 | } else if (p) { | ||
610 | mask = parse_flag_names(optarg, 0); | ||
611 | bits = parse_flag_names(p + 1, 0); | ||
612 | } else if (strchr(optarg, '~')) { | ||
613 | mask = parse_flag_names(optarg, 1); | ||
614 | bits = parse_flag_names(optarg, 0); | ||
615 | } else { | ||
616 | mask = parse_flag_names(optarg, 0); | ||
617 | bits = KPF_ALL_BITS; | ||
618 | } | ||
619 | |||
620 | add_bits_filter(mask, bits); | ||
621 | } | ||
622 | |||
623 | |||
624 | struct option opts[] = { | ||
625 | { "raw" , 0, NULL, 'r' }, | ||
626 | { "pid" , 1, NULL, 'p' }, | ||
627 | { "file" , 1, NULL, 'f' }, | ||
628 | { "addr" , 1, NULL, 'a' }, | ||
629 | { "bits" , 1, NULL, 'b' }, | ||
630 | { "list" , 0, NULL, 'l' }, | ||
631 | { "list-each" , 0, NULL, 'L' }, | ||
632 | { "no-summary", 0, NULL, 'N' }, | ||
633 | { "help" , 0, NULL, 'h' }, | ||
634 | { NULL , 0, NULL, 0 } | ||
635 | }; | ||
636 | |||
637 | int main(int argc, char *argv[]) | ||
638 | { | ||
639 | int c; | ||
640 | |||
641 | page_size = getpagesize(); | ||
642 | |||
643 | while ((c = getopt_long(argc, argv, | ||
644 | "rp:f:a:b:lLNh", opts, NULL)) != -1) { | ||
645 | switch (c) { | ||
646 | case 'r': | ||
647 | opt_raw = 1; | ||
648 | break; | ||
649 | case 'p': | ||
650 | parse_pid(optarg); | ||
651 | break; | ||
652 | case 'f': | ||
653 | parse_file(optarg); | ||
654 | break; | ||
655 | case 'a': | ||
656 | parse_addr_range(optarg); | ||
657 | break; | ||
658 | case 'b': | ||
659 | parse_bits_mask(optarg); | ||
660 | break; | ||
661 | case 'l': | ||
662 | opt_list = 1; | ||
663 | break; | ||
664 | case 'L': | ||
665 | opt_list = 2; | ||
666 | break; | ||
667 | case 'N': | ||
668 | opt_no_summary = 1; | ||
669 | break; | ||
670 | case 'h': | ||
671 | usage(); | ||
672 | exit(0); | ||
673 | default: | ||
674 | usage(); | ||
675 | exit(1); | ||
676 | } | ||
677 | } | ||
678 | |||
679 | if (opt_list == 1) | ||
680 | printf("offset\tcount\tflags\n"); | ||
681 | if (opt_list == 2) | ||
682 | printf("offset\tflags\n"); | ||
683 | |||
684 | walk_addr_ranges(); | ||
685 | |||
686 | if (opt_list == 1) | ||
687 | show_page_range(0, 0); /* drain the buffer */ | ||
688 | |||
689 | if (opt_no_summary) | ||
690 | return 0; | ||
691 | |||
692 | if (opt_list) | ||
693 | printf("\n\n"); | ||
694 | |||
695 | show_summary(); | ||
696 | |||
697 | return 0; | ||
698 | } | ||
diff --git a/Documentation/vm/page_migration b/Documentation/vm/page_migration index d5fdfd34bbaf..6513fe2d90b8 100644 --- a/Documentation/vm/page_migration +++ b/Documentation/vm/page_migration | |||
@@ -37,7 +37,8 @@ locations. | |||
37 | 37 | ||
38 | Larger installations usually partition the system using cpusets into | 38 | Larger installations usually partition the system using cpusets into |
39 | sections of nodes. Paul Jackson has equipped cpusets with the ability to | 39 | sections of nodes. Paul Jackson has equipped cpusets with the ability to |
40 | move pages when a task is moved to another cpuset (See ../cpusets.txt). | 40 | move pages when a task is moved to another cpuset (See |
41 | Documentation/cgroups/cpusets.txt). | ||
41 | Cpusets allows the automation of process locality. If a task is moved to | 42 | Cpusets allows the automation of process locality. If a task is moved to |
42 | a new cpuset then also all its pages are moved with it so that the | 43 | a new cpuset then also all its pages are moved with it so that the |
43 | performance of the process does not sink dramatically. Also the pages | 44 | performance of the process does not sink dramatically. Also the pages |
diff --git a/Documentation/vm/pagemap.txt b/Documentation/vm/pagemap.txt index ce72c0fe6177..600a304a828c 100644 --- a/Documentation/vm/pagemap.txt +++ b/Documentation/vm/pagemap.txt | |||
@@ -12,9 +12,9 @@ There are three components to pagemap: | |||
12 | value for each virtual page, containing the following data (from | 12 | value for each virtual page, containing the following data (from |
13 | fs/proc/task_mmu.c, above pagemap_read): | 13 | fs/proc/task_mmu.c, above pagemap_read): |
14 | 14 | ||
15 | * Bits 0-55 page frame number (PFN) if present | 15 | * Bits 0-54 page frame number (PFN) if present |
16 | * Bits 0-4 swap type if swapped | 16 | * Bits 0-4 swap type if swapped |
17 | * Bits 5-55 swap offset if swapped | 17 | * Bits 5-54 swap offset if swapped |
18 | * Bits 55-60 page shift (page size = 1<<page shift) | 18 | * Bits 55-60 page shift (page size = 1<<page shift) |
19 | * Bit 61 reserved for future use | 19 | * Bit 61 reserved for future use |
20 | * Bit 62 page swapped | 20 | * Bit 62 page swapped |
@@ -36,7 +36,7 @@ There are three components to pagemap: | |||
36 | * /proc/kpageflags. This file contains a 64-bit set of flags for each | 36 | * /proc/kpageflags. This file contains a 64-bit set of flags for each |
37 | page, indexed by PFN. | 37 | page, indexed by PFN. |
38 | 38 | ||
39 | The flags are (from fs/proc/proc_misc, above kpageflags_read): | 39 | The flags are (from fs/proc/page.c, above kpageflags_read): |
40 | 40 | ||
41 | 0. LOCKED | 41 | 0. LOCKED |
42 | 1. ERROR | 42 | 1. ERROR |
@@ -49,6 +49,68 @@ There are three components to pagemap: | |||
49 | 8. WRITEBACK | 49 | 8. WRITEBACK |
50 | 9. RECLAIM | 50 | 9. RECLAIM |
51 | 10. BUDDY | 51 | 10. BUDDY |
52 | 11. MMAP | ||
53 | 12. ANON | ||
54 | 13. SWAPCACHE | ||
55 | 14. SWAPBACKED | ||
56 | 15. COMPOUND_HEAD | ||
57 | 16. COMPOUND_TAIL | ||
58 | 16. HUGE | ||
59 | 18. UNEVICTABLE | ||
60 | 20. NOPAGE | ||
61 | |||
62 | Short descriptions to the page flags: | ||
63 | |||
64 | 0. LOCKED | ||
65 | page is being locked for exclusive access, eg. by undergoing read/write IO | ||
66 | |||
67 | 7. SLAB | ||
68 | page is managed by the SLAB/SLOB/SLUB/SLQB kernel memory allocator | ||
69 | When compound page is used, SLUB/SLQB will only set this flag on the head | ||
70 | page; SLOB will not flag it at all. | ||
71 | |||
72 | 10. BUDDY | ||
73 | a free memory block managed by the buddy system allocator | ||
74 | The buddy system organizes free memory in blocks of various orders. | ||
75 | An order N block has 2^N physically contiguous pages, with the BUDDY flag | ||
76 | set for and _only_ for the first page. | ||
77 | |||
78 | 15. COMPOUND_HEAD | ||
79 | 16. COMPOUND_TAIL | ||
80 | A compound page with order N consists of 2^N physically contiguous pages. | ||
81 | A compound page with order 2 takes the form of "HTTT", where H donates its | ||
82 | head page and T donates its tail page(s). The major consumers of compound | ||
83 | pages are hugeTLB pages (Documentation/vm/hugetlbpage.txt), the SLUB etc. | ||
84 | memory allocators and various device drivers. However in this interface, | ||
85 | only huge/giga pages are made visible to end users. | ||
86 | 17. HUGE | ||
87 | this is an integral part of a HugeTLB page | ||
88 | |||
89 | 20. NOPAGE | ||
90 | no page frame exists at the requested address | ||
91 | |||
92 | [IO related page flags] | ||
93 | 1. ERROR IO error occurred | ||
94 | 3. UPTODATE page has up-to-date data | ||
95 | ie. for file backed page: (in-memory data revision >= on-disk one) | ||
96 | 4. DIRTY page has been written to, hence contains new data | ||
97 | ie. for file backed page: (in-memory data revision > on-disk one) | ||
98 | 8. WRITEBACK page is being synced to disk | ||
99 | |||
100 | [LRU related page flags] | ||
101 | 5. LRU page is in one of the LRU lists | ||
102 | 6. ACTIVE page is in the active LRU list | ||
103 | 18. UNEVICTABLE page is in the unevictable (non-)LRU list | ||
104 | It is somehow pinned and not a candidate for LRU page reclaims, | ||
105 | eg. ramfs pages, shmctl(SHM_LOCK) and mlock() memory segments | ||
106 | 2. REFERENCED page has been referenced since last LRU list enqueue/requeue | ||
107 | 9. RECLAIM page will be reclaimed soon after its pageout IO completed | ||
108 | 11. MMAP a memory mapped page | ||
109 | 12. ANON a memory mapped page that is not part of a file | ||
110 | 13. SWAPCACHE page is mapped to swap space, ie. has an associated swap entry | ||
111 | 14. SWAPBACKED page is backed by swap/RAM | ||
112 | |||
113 | The page-types tool in this directory can be used to query the above flags. | ||
52 | 114 | ||
53 | Using pagemap to do something useful: | 115 | Using pagemap to do something useful: |
54 | 116 | ||
diff --git a/Documentation/vm/unevictable-lru.txt b/Documentation/vm/unevictable-lru.txt index 0706a7282a8c..2d70d0d95108 100644 --- a/Documentation/vm/unevictable-lru.txt +++ b/Documentation/vm/unevictable-lru.txt | |||
@@ -1,588 +1,691 @@ | |||
1 | 1 | ============================== | |
2 | This document describes the Linux memory management "Unevictable LRU" | 2 | UNEVICTABLE LRU INFRASTRUCTURE |
3 | infrastructure and the use of this infrastructure to manage several types | 3 | ============================== |
4 | of "unevictable" pages. The document attempts to provide the overall | 4 | |
5 | rationale behind this mechanism and the rationale for some of the design | 5 | ======== |
6 | decisions that drove the implementation. The latter design rationale is | 6 | CONTENTS |
7 | discussed in the context of an implementation description. Admittedly, one | 7 | ======== |
8 | can obtain the implementation details--the "what does it do?"--by reading the | 8 | |
9 | code. One hopes that the descriptions below add value by provide the answer | 9 | (*) The Unevictable LRU |
10 | to "why does it do that?". | 10 | |
11 | 11 | - The unevictable page list. | |
12 | Unevictable LRU Infrastructure: | 12 | - Memory control group interaction. |
13 | 13 | - Marking address spaces unevictable. | |
14 | The Unevictable LRU adds an additional LRU list to track unevictable pages | 14 | - Detecting Unevictable Pages. |
15 | and to hide these pages from vmscan. This mechanism is based on a patch by | 15 | - vmscan's handling of unevictable pages. |
16 | Larry Woodman of Red Hat to address several scalability problems with page | 16 | |
17 | (*) mlock()'d pages. | ||
18 | |||
19 | - History. | ||
20 | - Basic management. | ||
21 | - mlock()/mlockall() system call handling. | ||
22 | - Filtering special vmas. | ||
23 | - munlock()/munlockall() system call handling. | ||
24 | - Migrating mlocked pages. | ||
25 | - mmap(MAP_LOCKED) system call handling. | ||
26 | - munmap()/exit()/exec() system call handling. | ||
27 | - try_to_unmap(). | ||
28 | - try_to_munlock() reverse map scan. | ||
29 | - Page reclaim in shrink_*_list(). | ||
30 | |||
31 | |||
32 | ============ | ||
33 | INTRODUCTION | ||
34 | ============ | ||
35 | |||
36 | This document describes the Linux memory manager's "Unevictable LRU" | ||
37 | infrastructure and the use of this to manage several types of "unevictable" | ||
38 | pages. | ||
39 | |||
40 | The document attempts to provide the overall rationale behind this mechanism | ||
41 | and the rationale for some of the design decisions that drove the | ||
42 | implementation. The latter design rationale is discussed in the context of an | ||
43 | implementation description. Admittedly, one can obtain the implementation | ||
44 | details - the "what does it do?" - by reading the code. One hopes that the | ||
45 | descriptions below add value by provide the answer to "why does it do that?". | ||
46 | |||
47 | |||
48 | =================== | ||
49 | THE UNEVICTABLE LRU | ||
50 | =================== | ||
51 | |||
52 | The Unevictable LRU facility adds an additional LRU list to track unevictable | ||
53 | pages and to hide these pages from vmscan. This mechanism is based on a patch | ||
54 | by Larry Woodman of Red Hat to address several scalability problems with page | ||
17 | reclaim in Linux. The problems have been observed at customer sites on large | 55 | reclaim in Linux. The problems have been observed at customer sites on large |
18 | memory x86_64 systems. For example, a non-numal x86_64 platform with 128GB | 56 | memory x86_64 systems. |
19 | of main memory will have over 32 million 4k pages in a single zone. When a | 57 | |
20 | large fraction of these pages are not evictable for any reason [see below], | 58 | To illustrate this with an example, a non-NUMA x86_64 platform with 128GB of |
21 | vmscan will spend a lot of time scanning the LRU lists looking for the small | 59 | main memory will have over 32 million 4k pages in a single zone. When a large |
22 | fraction of pages that are evictable. This can result in a situation where | 60 | fraction of these pages are not evictable for any reason [see below], vmscan |
23 | all cpus are spending 100% of their time in vmscan for hours or days on end, | 61 | will spend a lot of time scanning the LRU lists looking for the small fraction |
24 | with the system completely unresponsive. | 62 | of pages that are evictable. This can result in a situation where all CPUs are |
25 | 63 | spending 100% of their time in vmscan for hours or days on end, with the system | |
26 | The Unevictable LRU infrastructure addresses the following classes of | 64 | completely unresponsive. |
27 | unevictable pages: | 65 | |
28 | 66 | The unevictable list addresses the following classes of unevictable pages: | |
29 | + page owned by ramfs | 67 | |
30 | + page mapped into SHM_LOCKed shared memory regions | 68 | (*) Those owned by ramfs. |
31 | + page mapped into VM_LOCKED [mlock()ed] vmas | 69 | |
32 | 70 | (*) Those mapped into SHM_LOCK'd shared memory regions. | |
33 | The infrastructure might be able to handle other conditions that make pages | 71 | |
72 | (*) Those mapped into VM_LOCKED [mlock()ed] VMAs. | ||
73 | |||
74 | The infrastructure may also be able to handle other conditions that make pages | ||
34 | unevictable, either by definition or by circumstance, in the future. | 75 | unevictable, either by definition or by circumstance, in the future. |
35 | 76 | ||
36 | 77 | ||
37 | The Unevictable LRU List | 78 | THE UNEVICTABLE PAGE LIST |
79 | ------------------------- | ||
38 | 80 | ||
39 | The Unevictable LRU infrastructure consists of an additional, per-zone, LRU list | 81 | The Unevictable LRU infrastructure consists of an additional, per-zone, LRU list |
40 | called the "unevictable" list and an associated page flag, PG_unevictable, to | 82 | called the "unevictable" list and an associated page flag, PG_unevictable, to |
41 | indicate that the page is being managed on the unevictable list. The | 83 | indicate that the page is being managed on the unevictable list. |
42 | PG_unevictable flag is analogous to, and mutually exclusive with, the PG_active | 84 | |
43 | flag in that it indicates on which LRU list a page resides when PG_lru is set. | 85 | The PG_unevictable flag is analogous to, and mutually exclusive with, the |
44 | The unevictable LRU list is source configurable based on the UNEVICTABLE_LRU | 86 | PG_active flag in that it indicates on which LRU list a page resides when |
45 | Kconfig option. | 87 | PG_lru is set. The unevictable list is compile-time configurable based on the |
88 | UNEVICTABLE_LRU Kconfig option. | ||
46 | 89 | ||
47 | The Unevictable LRU infrastructure maintains unevictable pages on an additional | 90 | The Unevictable LRU infrastructure maintains unevictable pages on an additional |
48 | LRU list for a few reasons: | 91 | LRU list for a few reasons: |
49 | 92 | ||
50 | 1) We get to "treat unevictable pages just like we treat other pages in the | 93 | (1) We get to "treat unevictable pages just like we treat other pages in the |
51 | system, which means we get to use the same code to manipulate them, the | 94 | system - which means we get to use the same code to manipulate them, the |
52 | same code to isolate them (for migrate, etc.), the same code to keep track | 95 | same code to isolate them (for migrate, etc.), the same code to keep track |
53 | of the statistics, etc..." [Rik van Riel] | 96 | of the statistics, etc..." [Rik van Riel] |
97 | |||
98 | (2) We want to be able to migrate unevictable pages between nodes for memory | ||
99 | defragmentation, workload management and memory hotplug. The linux kernel | ||
100 | can only migrate pages that it can successfully isolate from the LRU | ||
101 | lists. If we were to maintain pages elsewhere than on an LRU-like list, | ||
102 | where they can be found by isolate_lru_page(), we would prevent their | ||
103 | migration, unless we reworked migration code to find the unevictable pages | ||
104 | itself. | ||
54 | 105 | ||
55 | 2) We want to be able to migrate unevictable pages between nodes--for memory | ||
56 | defragmentation, workload management and memory hotplug. The linux kernel | ||
57 | can only migrate pages that it can successfully isolate from the lru lists. | ||
58 | If we were to maintain pages elsewise than on an lru-like list, where they | ||
59 | can be found by isolate_lru_page(), we would prevent their migration, unless | ||
60 | we reworked migration code to find the unevictable pages. | ||
61 | 106 | ||
107 | The unevictable list does not differentiate between file-backed and anonymous, | ||
108 | swap-backed pages. This differentiation is only important while the pages are, | ||
109 | in fact, evictable. | ||
62 | 110 | ||
63 | The unevictable LRU list does not differentiate between file backed and swap | 111 | The unevictable list benefits from the "arrayification" of the per-zone LRU |
64 | backed [anon] pages. This differentiation is only important while the pages | 112 | lists and statistics originally proposed and posted by Christoph Lameter. |
65 | are, in fact, evictable. | ||
66 | 113 | ||
67 | The unevictable LRU list benefits from the "arrayification" of the per-zone | 114 | The unevictable list does not use the LRU pagevec mechanism. Rather, |
68 | LRU lists and statistics originally proposed and posted by Christoph Lameter. | 115 | unevictable pages are placed directly on the page's zone's unevictable list |
116 | under the zone lru_lock. This allows us to prevent the stranding of pages on | ||
117 | the unevictable list when one task has the page isolated from the LRU and other | ||
118 | tasks are changing the "evictability" state of the page. | ||
69 | 119 | ||
70 | The unevictable list does not use the lru pagevec mechanism. Rather, | ||
71 | unevictable pages are placed directly on the page's zone's unevictable | ||
72 | list under the zone lru_lock. The reason for this is to prevent stranding | ||
73 | of pages on the unevictable list when one task has the page isolated from the | ||
74 | lru and other tasks are changing the "evictability" state of the page. | ||
75 | 120 | ||
121 | MEMORY CONTROL GROUP INTERACTION | ||
122 | -------------------------------- | ||
76 | 123 | ||
77 | Unevictable LRU and Memory Controller Interaction | 124 | The unevictable LRU facility interacts with the memory control group [aka |
125 | memory controller; see Documentation/cgroups/memory.txt] by extending the | ||
126 | lru_list enum. | ||
127 | |||
128 | The memory controller data structure automatically gets a per-zone unevictable | ||
129 | list as a result of the "arrayification" of the per-zone LRU lists (one per | ||
130 | lru_list enum element). The memory controller tracks the movement of pages to | ||
131 | and from the unevictable list. | ||
78 | 132 | ||
79 | The memory controller data structure automatically gets a per zone unevictable | ||
80 | lru list as a result of the "arrayification" of the per-zone LRU lists. The | ||
81 | memory controller tracks the movement of pages to and from the unevictable list. | ||
82 | When a memory control group comes under memory pressure, the controller will | 133 | When a memory control group comes under memory pressure, the controller will |
83 | not attempt to reclaim pages on the unevictable list. This has a couple of | 134 | not attempt to reclaim pages on the unevictable list. This has a couple of |
84 | effects. Because the pages are "hidden" from reclaim on the unevictable list, | 135 | effects: |
85 | the reclaim process can be more efficient, dealing only with pages that have | 136 | |
86 | a chance of being reclaimed. On the other hand, if too many of the pages | 137 | (1) Because the pages are "hidden" from reclaim on the unevictable list, the |
87 | charged to the control group are unevictable, the evictable portion of the | 138 | reclaim process can be more efficient, dealing only with pages that have a |
88 | working set of the tasks in the control group may not fit into the available | 139 | chance of being reclaimed. |
89 | memory. This can cause the control group to thrash or to oom-kill tasks. | 140 | |
90 | 141 | (2) On the other hand, if too many of the pages charged to the control group | |
91 | 142 | are unevictable, the evictable portion of the working set of the tasks in | |
92 | Unevictable LRU: Detecting Unevictable Pages | 143 | the control group may not fit into the available memory. This can cause |
93 | 144 | the control group to thrash or to OOM-kill tasks. | |
94 | The function page_evictable(page, vma) in vmscan.c determines whether a | 145 | |
95 | page is evictable or not. For ramfs pages and pages in SHM_LOCKed regions, | 146 | |
96 | page_evictable() tests a new address space flag, AS_UNEVICTABLE, in the page's | 147 | MARKING ADDRESS SPACES UNEVICTABLE |
97 | address space using a wrapper function. Wrapper functions are used to set, | 148 | ---------------------------------- |
98 | clear and test the flag to reduce the requirement for #ifdef's throughout the | 149 | |
99 | source code. AS_UNEVICTABLE is set on ramfs inode/mapping when it is created. | 150 | For facilities such as ramfs none of the pages attached to the address space |
100 | This flag remains for the life of the inode. | 151 | may be evicted. To prevent eviction of any such pages, the AS_UNEVICTABLE |
101 | 152 | address space flag is provided, and this can be manipulated by a filesystem | |
102 | For shared memory regions, AS_UNEVICTABLE is set when an application | 153 | using a number of wrapper functions: |
103 | successfully SHM_LOCKs the region and is removed when the region is | 154 | |
104 | SHM_UNLOCKed. Note that shmctl(SHM_LOCK, ...) does not populate the page | 155 | (*) void mapping_set_unevictable(struct address_space *mapping); |
105 | tables for the region as does, for example, mlock(). So, we make no special | 156 | |
106 | effort to push any pages in the SHM_LOCKed region to the unevictable list. | 157 | Mark the address space as being completely unevictable. |
107 | Vmscan will do this when/if it encounters the pages during reclaim. On | 158 | |
108 | SHM_UNLOCK, shmctl() scans the pages in the region and "rescues" them from the | 159 | (*) void mapping_clear_unevictable(struct address_space *mapping); |
109 | unevictable list if no other condition keeps them unevictable. If a SHM_LOCKed | 160 | |
110 | region is destroyed, the pages are also "rescued" from the unevictable list in | 161 | Mark the address space as being evictable. |
111 | the process of freeing them. | 162 | |
112 | 163 | (*) int mapping_unevictable(struct address_space *mapping); | |
113 | page_evictable() detects mlock()ed pages by testing an additional page flag, | 164 | |
114 | PG_mlocked via the PageMlocked() wrapper. If the page is NOT mlocked, and a | 165 | Query the address space, and return true if it is completely |
115 | non-NULL vma is supplied, page_evictable() will check whether the vma is | 166 | unevictable. |
167 | |||
168 | These are currently used in two places in the kernel: | ||
169 | |||
170 | (1) By ramfs to mark the address spaces of its inodes when they are created, | ||
171 | and this mark remains for the life of the inode. | ||
172 | |||
173 | (2) By SYSV SHM to mark SHM_LOCK'd address spaces until SHM_UNLOCK is called. | ||
174 | |||
175 | Note that SHM_LOCK is not required to page in the locked pages if they're | ||
176 | swapped out; the application must touch the pages manually if it wants to | ||
177 | ensure they're in memory. | ||
178 | |||
179 | |||
180 | DETECTING UNEVICTABLE PAGES | ||
181 | --------------------------- | ||
182 | |||
183 | The function page_evictable() in vmscan.c determines whether a page is | ||
184 | evictable or not using the query function outlined above [see section "Marking | ||
185 | address spaces unevictable"] to check the AS_UNEVICTABLE flag. | ||
186 | |||
187 | For address spaces that are so marked after being populated (as SHM regions | ||
188 | might be), the lock action (eg: SHM_LOCK) can be lazy, and need not populate | ||
189 | the page tables for the region as does, for example, mlock(), nor need it make | ||
190 | any special effort to push any pages in the SHM_LOCK'd area to the unevictable | ||
191 | list. Instead, vmscan will do this if and when it encounters the pages during | ||
192 | a reclamation scan. | ||
193 | |||
194 | On an unlock action (such as SHM_UNLOCK), the unlocker (eg: shmctl()) must scan | ||
195 | the pages in the region and "rescue" them from the unevictable list if no other | ||
196 | condition is keeping them unevictable. If an unevictable region is destroyed, | ||
197 | the pages are also "rescued" from the unevictable list in the process of | ||
198 | freeing them. | ||
199 | |||
200 | page_evictable() also checks for mlocked pages by testing an additional page | ||
201 | flag, PG_mlocked (as wrapped by PageMlocked()). If the page is NOT mlocked, | ||
202 | and a non-NULL VMA is supplied, page_evictable() will check whether the VMA is | ||
116 | VM_LOCKED via is_mlocked_vma(). is_mlocked_vma() will SetPageMlocked() and | 203 | VM_LOCKED via is_mlocked_vma(). is_mlocked_vma() will SetPageMlocked() and |
117 | update the appropriate statistics if the vma is VM_LOCKED. This method allows | 204 | update the appropriate statistics if the vma is VM_LOCKED. This method allows |
118 | efficient "culling" of pages in the fault path that are being faulted in to | 205 | efficient "culling" of pages in the fault path that are being faulted in to |
119 | VM_LOCKED vmas. | 206 | VM_LOCKED VMAs. |
120 | 207 | ||
121 | 208 | ||
122 | Unevictable Pages and Vmscan [shrink_*_list()] | 209 | VMSCAN'S HANDLING OF UNEVICTABLE PAGES |
210 | -------------------------------------- | ||
123 | 211 | ||
124 | If unevictable pages are culled in the fault path, or moved to the unevictable | 212 | If unevictable pages are culled in the fault path, or moved to the unevictable |
125 | list at mlock() or mmap() time, vmscan will never encounter the pages until | 213 | list at mlock() or mmap() time, vmscan will not encounter the pages until they |
126 | they have become evictable again, for example, via munlock() and have been | 214 | have become evictable again (via munlock() for example) and have been "rescued" |
127 | "rescued" from the unevictable list. However, there may be situations where we | 215 | from the unevictable list. However, there may be situations where we decide, |
128 | decide, for the sake of expediency, to leave a unevictable page on one of the | 216 | for the sake of expediency, to leave a unevictable page on one of the regular |
129 | regular active/inactive LRU lists for vmscan to deal with. Vmscan checks for | 217 | active/inactive LRU lists for vmscan to deal with. vmscan checks for such |
130 | such pages in all of the shrink_{active|inactive|page}_list() functions and | 218 | pages in all of the shrink_{active|inactive|page}_list() functions and will |
131 | will "cull" such pages that it encounters--that is, it diverts those pages to | 219 | "cull" such pages that it encounters: that is, it diverts those pages to the |
132 | the unevictable list for the zone being scanned. | 220 | unevictable list for the zone being scanned. |
133 | 221 | ||
134 | There may be situations where a page is mapped into a VM_LOCKED vma, but the | 222 | There may be situations where a page is mapped into a VM_LOCKED VMA, but the |
135 | page is not marked as PageMlocked. Such pages will make it all the way to | 223 | page is not marked as PG_mlocked. Such pages will make it all the way to |
136 | shrink_page_list() where they will be detected when vmscan walks the reverse | 224 | shrink_page_list() where they will be detected when vmscan walks the reverse |
137 | map in try_to_unmap(). If try_to_unmap() returns SWAP_MLOCK, shrink_page_list() | 225 | map in try_to_unmap(). If try_to_unmap() returns SWAP_MLOCK, |
138 | will cull the page at that point. | 226 | shrink_page_list() will cull the page at that point. |
139 | 227 | ||
140 | To "cull" an unevictable page, vmscan simply puts the page back on the lru | 228 | To "cull" an unevictable page, vmscan simply puts the page back on the LRU list |
141 | list using putback_lru_page()--the inverse operation to isolate_lru_page()-- | 229 | using putback_lru_page() - the inverse operation to isolate_lru_page() - after |
142 | after dropping the page lock. Because the condition which makes the page | 230 | dropping the page lock. Because the condition which makes the page unevictable |
143 | unevictable may change once the page is unlocked, putback_lru_page() will | 231 | may change once the page is unlocked, putback_lru_page() will recheck the |
144 | recheck the unevictable state of a page that it places on the unevictable lru | 232 | unevictable state of a page that it places on the unevictable list. If the |
145 | list. If the page has become unevictable, putback_lru_page() removes it from | 233 | page has become unevictable, putback_lru_page() removes it from the list and |
146 | the list and retries, including the page_unevictable() test. Because such a | 234 | retries, including the page_unevictable() test. Because such a race is a rare |
147 | race is a rare event and movement of pages onto the unevictable list should be | 235 | event and movement of pages onto the unevictable list should be rare, these |
148 | rare, these extra evictabilty checks should not occur in the majority of calls | 236 | extra evictabilty checks should not occur in the majority of calls to |
149 | to putback_lru_page(). | 237 | putback_lru_page(). |
150 | 238 | ||
151 | 239 | ||
152 | Mlocked Page: Prior Work | 240 | ============= |
241 | MLOCKED PAGES | ||
242 | ============= | ||
153 | 243 | ||
154 | The "Unevictable Mlocked Pages" infrastructure is based on work originally | 244 | The unevictable page list is also useful for mlock(), in addition to ramfs and |
245 | SYSV SHM. Note that mlock() is only available in CONFIG_MMU=y situations; in | ||
246 | NOMMU situations, all mappings are effectively mlocked. | ||
247 | |||
248 | |||
249 | HISTORY | ||
250 | ------- | ||
251 | |||
252 | The "Unevictable mlocked Pages" infrastructure is based on work originally | ||
155 | posted by Nick Piggin in an RFC patch entitled "mm: mlocked pages off LRU". | 253 | posted by Nick Piggin in an RFC patch entitled "mm: mlocked pages off LRU". |
156 | Nick posted his patch as an alternative to a patch posted by Christoph | 254 | Nick posted his patch as an alternative to a patch posted by Christoph Lameter |
157 | Lameter to achieve the same objective--hiding mlocked pages from vmscan. | 255 | to achieve the same objective: hiding mlocked pages from vmscan. |
158 | In Nick's patch, he used one of the struct page lru list link fields as a count | 256 | |
159 | of VM_LOCKED vmas that map the page. This use of the link field for a count | 257 | In Nick's patch, he used one of the struct page LRU list link fields as a count |
160 | prevented the management of the pages on an LRU list. Thus, mlocked pages were | 258 | of VM_LOCKED VMAs that map the page. This use of the link field for a count |
161 | not migratable as isolate_lru_page() could not find them and the lru list link | 259 | prevented the management of the pages on an LRU list, and thus mlocked pages |
162 | field was not available to the migration subsystem. Nick resolved this by | 260 | were not migratable as isolate_lru_page() could not find them, and the LRU list |
163 | putting mlocked pages back on the lru list before attempting to isolate them, | 261 | link field was not available to the migration subsystem. |
164 | thus abandoning the count of VM_LOCKED vmas. When Nick's patch was integrated | 262 | |
165 | with the Unevictable LRU work, the count was replaced by walking the reverse | 263 | Nick resolved this by putting mlocked pages back on the lru list before |
166 | map to determine whether any VM_LOCKED vmas mapped the page. More on this | 264 | attempting to isolate them, thus abandoning the count of VM_LOCKED VMAs. When |
167 | below. | 265 | Nick's patch was integrated with the Unevictable LRU work, the count was |
168 | 266 | replaced by walking the reverse map to determine whether any VM_LOCKED VMAs | |
169 | 267 | mapped the page. More on this below. | |
170 | Mlocked Pages: Basic Management | 268 | |
171 | 269 | ||
172 | Mlocked pages--pages mapped into a VM_LOCKED vma--represent one class of | 270 | BASIC MANAGEMENT |
173 | unevictable pages. When such a page has been "noticed" by the memory | 271 | ---------------- |
174 | management subsystem, the page is marked with the PG_mlocked [PageMlocked()] | 272 | |
175 | flag. A PageMlocked() page will be placed on the unevictable LRU list when | 273 | mlocked pages - pages mapped into a VM_LOCKED VMA - are a class of unevictable |
176 | it is added to the LRU. Pages can be "noticed" by memory management in | 274 | pages. When such a page has been "noticed" by the memory management subsystem, |
177 | several places: | 275 | the page is marked with the PG_mlocked flag. This can be manipulated using the |
178 | 276 | PageMlocked() functions. | |
179 | 1) in the mlock()/mlockall() system call handlers. | 277 | |
180 | 2) in the mmap() system call handler when mmap()ing a region with the | 278 | A PG_mlocked page will be placed on the unevictable list when it is added to |
181 | MAP_LOCKED flag, or mmap()ing a region in a task that has called | 279 | the LRU. Such pages can be "noticed" by memory management in several places: |
182 | mlockall() with the MCL_FUTURE flag. Both of these conditions result | 280 | |
183 | in the VM_LOCKED flag being set for the vma. | 281 | (1) in the mlock()/mlockall() system call handlers; |
184 | 3) in the fault path, if mlocked pages are "culled" in the fault path, | 282 | |
185 | and when a VM_LOCKED stack segment is expanded. | 283 | (2) in the mmap() system call handler when mmapping a region with the |
186 | 4) as mentioned above, in vmscan:shrink_page_list() when attempting to | 284 | MAP_LOCKED flag; |
187 | reclaim a page in a VM_LOCKED vma via try_to_unmap(). | 285 | |
188 | 286 | (3) mmapping a region in a task that has called mlockall() with the MCL_FUTURE | |
189 | Mlocked pages become unlocked and rescued from the unevictable list when: | 287 | flag |
190 | 288 | ||
191 | 1) mapped in a range unlocked via the munlock()/munlockall() system calls. | 289 | (4) in the fault path, if mlocked pages are "culled" in the fault path, |
192 | 2) munmapped() out of the last VM_LOCKED vma that maps the page, including | 290 | and when a VM_LOCKED stack segment is expanded; or |
193 | unmapping at task exit. | 291 | |
194 | 3) when the page is truncated from the last VM_LOCKED vma of an mmap()ed file. | 292 | (5) as mentioned above, in vmscan:shrink_page_list() when attempting to |
195 | 4) before a page is COWed in a VM_LOCKED vma. | 293 | reclaim a page in a VM_LOCKED VMA via try_to_unmap() |
196 | 294 | ||
197 | 295 | all of which result in the VM_LOCKED flag being set for the VMA if it doesn't | |
198 | Mlocked Pages: mlock()/mlockall() System Call Handling | 296 | already have it set. |
297 | |||
298 | mlocked pages become unlocked and rescued from the unevictable list when: | ||
299 | |||
300 | (1) mapped in a range unlocked via the munlock()/munlockall() system calls; | ||
301 | |||
302 | (2) munmap()'d out of the last VM_LOCKED VMA that maps the page, including | ||
303 | unmapping at task exit; | ||
304 | |||
305 | (3) when the page is truncated from the last VM_LOCKED VMA of an mmapped file; | ||
306 | or | ||
307 | |||
308 | (4) before a page is COW'd in a VM_LOCKED VMA. | ||
309 | |||
310 | |||
311 | mlock()/mlockall() SYSTEM CALL HANDLING | ||
312 | --------------------------------------- | ||
199 | 313 | ||
200 | Both [do_]mlock() and [do_]mlockall() system call handlers call mlock_fixup() | 314 | Both [do_]mlock() and [do_]mlockall() system call handlers call mlock_fixup() |
201 | for each vma in the range specified by the call. In the case of mlockall(), | 315 | for each VMA in the range specified by the call. In the case of mlockall(), |
202 | this is the entire active address space of the task. Note that mlock_fixup() | 316 | this is the entire active address space of the task. Note that mlock_fixup() |
203 | is used for both mlock()ing and munlock()ing a range of memory. A call to | 317 | is used for both mlocking and munlocking a range of memory. A call to mlock() |
204 | mlock() an already VM_LOCKED vma, or to munlock() a vma that is not VM_LOCKED | 318 | an already VM_LOCKED VMA, or to munlock() a VMA that is not VM_LOCKED is |
205 | is treated as a no-op--mlock_fixup() simply returns. | 319 | treated as a no-op, and mlock_fixup() simply returns. |
206 | 320 | ||
207 | If the vma passes some filtering described in "Mlocked Pages: Filtering Vmas" | 321 | If the VMA passes some filtering as described in "Filtering Special Vmas" |
208 | below, mlock_fixup() will attempt to merge the vma with its neighbors or split | 322 | below, mlock_fixup() will attempt to merge the VMA with its neighbors or split |
209 | off a subset of the vma if the range does not cover the entire vma. Once the | 323 | off a subset of the VMA if the range does not cover the entire VMA. Once the |
210 | vma has been merged or split or neither, mlock_fixup() will call | 324 | VMA has been merged or split or neither, mlock_fixup() will call |
211 | __mlock_vma_pages_range() to fault in the pages via get_user_pages() and | 325 | __mlock_vma_pages_range() to fault in the pages via get_user_pages() and to |
212 | to mark the pages as mlocked via mlock_vma_page(). | 326 | mark the pages as mlocked via mlock_vma_page(). |
213 | 327 | ||
214 | Note that the vma being mlocked might be mapped with PROT_NONE. In this case, | 328 | Note that the VMA being mlocked might be mapped with PROT_NONE. In this case, |
215 | get_user_pages() will be unable to fault in the pages. That's OK. If pages | 329 | get_user_pages() will be unable to fault in the pages. That's okay. If pages |
216 | do end up getting faulted into this VM_LOCKED vma, we'll handle them in the | 330 | do end up getting faulted into this VM_LOCKED VMA, we'll handle them in the |
217 | fault path or in vmscan. | 331 | fault path or in vmscan. |
218 | 332 | ||
219 | Also note that a page returned by get_user_pages() could be truncated or | 333 | Also note that a page returned by get_user_pages() could be truncated or |
220 | migrated out from under us, while we're trying to mlock it. To detect | 334 | migrated out from under us, while we're trying to mlock it. To detect this, |
221 | this, __mlock_vma_pages_range() tests the page_mapping after acquiring | 335 | __mlock_vma_pages_range() checks page_mapping() after acquiring the page lock. |
222 | the page lock. If the page is still associated with its mapping, we'll | 336 | If the page is still associated with its mapping, we'll go ahead and call |
223 | go ahead and call mlock_vma_page(). If the mapping is gone, we just | 337 | mlock_vma_page(). If the mapping is gone, we just unlock the page and move on. |
224 | unlock the page and move on. Worse case, this results in page mapped | 338 | In the worst case, this will result in a page mapped in a VM_LOCKED VMA |
225 | in a VM_LOCKED vma remaining on a normal LRU list without being | 339 | remaining on a normal LRU list without being PageMlocked(). Again, vmscan will |
226 | PageMlocked(). Again, vmscan will detect and cull such pages. | 340 | detect and cull such pages. |
227 | 341 | ||
228 | mlock_vma_page(), called with the page locked [N.B., not "mlocked"], will | 342 | mlock_vma_page() will call TestSetPageMlocked() for each page returned by |
229 | TestSetPageMlocked() for each page returned by get_user_pages(). We use | 343 | get_user_pages(). We use TestSetPageMlocked() because the page might already |
230 | TestSetPageMlocked() because the page might already be mlocked by another | 344 | be mlocked by another task/VMA and we don't want to do extra work. We |
231 | task/vma and we don't want to do extra work. We especially do not want to | 345 | especially do not want to count an mlocked page more than once in the |
232 | count an mlocked page more than once in the statistics. If the page was | 346 | statistics. If the page was already mlocked, mlock_vma_page() need do nothing |
233 | already mlocked, mlock_vma_page() is done. | 347 | more. |
234 | 348 | ||
235 | If the page was NOT already mlocked, mlock_vma_page() attempts to isolate the | 349 | If the page was NOT already mlocked, mlock_vma_page() attempts to isolate the |
236 | page from the LRU, as it is likely on the appropriate active or inactive list | 350 | page from the LRU, as it is likely on the appropriate active or inactive list |
237 | at that time. If the isolate_lru_page() succeeds, mlock_vma_page() will | 351 | at that time. If the isolate_lru_page() succeeds, mlock_vma_page() will put |
238 | putback the page--putback_lru_page()--which will notice that the page is now | 352 | back the page - by calling putback_lru_page() - which will notice that the page |
239 | mlocked and divert the page to the zone's unevictable LRU list. If | 353 | is now mlocked and divert the page to the zone's unevictable list. If |
240 | mlock_vma_page() is unable to isolate the page from the LRU, vmscan will handle | 354 | mlock_vma_page() is unable to isolate the page from the LRU, vmscan will handle |
241 | it later if/when it attempts to reclaim the page. | 355 | it later if and when it attempts to reclaim the page. |
242 | 356 | ||
243 | 357 | ||
244 | Mlocked Pages: Filtering Special Vmas | 358 | FILTERING SPECIAL VMAS |
359 | ---------------------- | ||
245 | 360 | ||
246 | mlock_fixup() filters several classes of "special" vmas: | 361 | mlock_fixup() filters several classes of "special" VMAs: |
247 | 362 | ||
248 | 1) vmas with VM_IO|VM_PFNMAP set are skipped entirely. The pages behind | 363 | 1) VMAs with VM_IO or VM_PFNMAP set are skipped entirely. The pages behind |
249 | these mappings are inherently pinned, so we don't need to mark them as | 364 | these mappings are inherently pinned, so we don't need to mark them as |
250 | mlocked. In any case, most of the pages have no struct page in which to | 365 | mlocked. In any case, most of the pages have no struct page in which to so |
251 | so mark the page. Because of this, get_user_pages() will fail for these | 366 | mark the page. Because of this, get_user_pages() will fail for these VMAs, |
252 | vmas, so there is no sense in attempting to visit them. | 367 | so there is no sense in attempting to visit them. |
253 | 368 | ||
254 | 2) vmas mapping hugetlbfs page are already effectively pinned into memory. | 369 | 2) VMAs mapping hugetlbfs page are already effectively pinned into memory. We |
255 | We don't need nor want to mlock() these pages. However, to preserve the | 370 | neither need nor want to mlock() these pages. However, to preserve the |
256 | prior behavior of mlock()--before the unevictable/mlock changes-- | 371 | prior behavior of mlock() - before the unevictable/mlock changes - |
257 | mlock_fixup() will call make_pages_present() in the hugetlbfs vma range | 372 | mlock_fixup() will call make_pages_present() in the hugetlbfs VMA range to |
258 | to allocate the huge pages and populate the ptes. | 373 | allocate the huge pages and populate the ptes. |
259 | 374 | ||
260 | 3) vmas with VM_DONTEXPAND|VM_RESERVED are generally user space mappings of | 375 | 3) VMAs with VM_DONTEXPAND or VM_RESERVED are generally userspace mappings of |
261 | kernel pages, such as the vdso page, relay channel pages, etc. These pages | 376 | kernel pages, such as the VDSO page, relay channel pages, etc. These pages |
262 | are inherently unevictable and are not managed on the LRU lists. | 377 | are inherently unevictable and are not managed on the LRU lists. |
263 | mlock_fixup() treats these vmas the same as hugetlbfs vmas. It calls | 378 | mlock_fixup() treats these VMAs the same as hugetlbfs VMAs. It calls |
264 | make_pages_present() to populate the ptes. | 379 | make_pages_present() to populate the ptes. |
265 | 380 | ||
266 | Note that for all of these special vmas, mlock_fixup() does not set the | 381 | Note that for all of these special VMAs, mlock_fixup() does not set the |
267 | VM_LOCKED flag. Therefore, we won't have to deal with them later during | 382 | VM_LOCKED flag. Therefore, we won't have to deal with them later during |
268 | munlock() or munmap()--for example, at task exit. Neither does mlock_fixup() | 383 | munlock(), munmap() or task exit. Neither does mlock_fixup() account these |
269 | account these vmas against the task's "locked_vm". | 384 | VMAs against the task's "locked_vm". |
270 | 385 | ||
271 | Mlocked Pages: Downgrading the Mmap Semaphore. | 386 | |
272 | 387 | munlock()/munlockall() SYSTEM CALL HANDLING | |
273 | mlock_fixup() must be called with the mmap semaphore held for write, because | 388 | ------------------------------------------- |
274 | it may have to merge or split vmas. However, mlocking a large region of | 389 | |
275 | memory can take a long time--especially if vmscan must reclaim pages to | 390 | The munlock() and munlockall() system calls are handled by the same functions - |
276 | satisfy the regions requirements. Faulting in a large region with the mmap | 391 | do_mlock[all]() - as the mlock() and mlockall() system calls with the unlock vs |
277 | semaphore held for write can hold off other faults on the address space, in | 392 | lock operation indicated by an argument. So, these system calls are also |
278 | the case of a multi-threaded task. It can also hold off scans of the task's | 393 | handled by mlock_fixup(). Again, if called for an already munlocked VMA, |
279 | address space via /proc. While testing under heavy load, it was observed that | 394 | mlock_fixup() simply returns. Because of the VMA filtering discussed above, |
280 | the ps(1) command could be held off for many minutes while a large segment was | 395 | VM_LOCKED will not be set in any "special" VMAs. So, these VMAs will be |
281 | mlock()ed down. | ||
282 | |||
283 | To address this issue, and to make the system more responsive during mlock()ing | ||
284 | of large segments, mlock_fixup() downgrades the mmap semaphore to read mode | ||
285 | during the call to __mlock_vma_pages_range(). This works fine. However, the | ||
286 | callers of mlock_fixup() expect the semaphore to be returned in write mode. | ||
287 | So, mlock_fixup() "upgrades" the semphore to write mode. Linux does not | ||
288 | support an atomic upgrade_sem() call, so mlock_fixup() must drop the semaphore | ||
289 | and reacquire it in write mode. In a multi-threaded task, it is possible for | ||
290 | the task memory map to change while the semaphore is dropped. Therefore, | ||
291 | mlock_fixup() looks up the vma at the range start address after reacquiring | ||
292 | the semaphore in write mode and verifies that it still covers the original | ||
293 | range. If not, mlock_fixup() returns an error [-EAGAIN]. All callers of | ||
294 | mlock_fixup() have been changed to deal with this new error condition. | ||
295 | |||
296 | Note: when munlocking a region, all of the pages should already be resident-- | ||
297 | unless we have racing threads mlocking() and munlocking() regions. So, | ||
298 | unlocking should not have to wait for page allocations nor faults of any kind. | ||
299 | Therefore mlock_fixup() does not downgrade the semaphore for munlock(). | ||
300 | |||
301 | |||
302 | Mlocked Pages: munlock()/munlockall() System Call Handling | ||
303 | |||
304 | The munlock() and munlockall() system calls are handled by the same functions-- | ||
305 | do_mlock[all]()--as the mlock() and mlockall() system calls with the unlock | ||
306 | vs lock operation indicated by an argument. So, these system calls are also | ||
307 | handled by mlock_fixup(). Again, if called for an already munlock()ed vma, | ||
308 | mlock_fixup() simply returns. Because of the vma filtering discussed above, | ||
309 | VM_LOCKED will not be set in any "special" vmas. So, these vmas will be | ||
310 | ignored for munlock. | 396 | ignored for munlock. |
311 | 397 | ||
312 | If the vma is VM_LOCKED, mlock_fixup() again attempts to merge or split off | 398 | If the VMA is VM_LOCKED, mlock_fixup() again attempts to merge or split off the |
313 | the specified range. The range is then munlocked via the function | 399 | specified range. The range is then munlocked via the function |
314 | __mlock_vma_pages_range()--the same function used to mlock a vma range-- | 400 | __mlock_vma_pages_range() - the same function used to mlock a VMA range - |
315 | passing a flag to indicate that munlock() is being performed. | 401 | passing a flag to indicate that munlock() is being performed. |
316 | 402 | ||
317 | Because the vma access protections could have been changed to PROT_NONE after | 403 | Because the VMA access protections could have been changed to PROT_NONE after |
318 | faulting in and mlocking pages, get_user_pages() was unreliable for visiting | 404 | faulting in and mlocking pages, get_user_pages() was unreliable for visiting |
319 | these pages for munlocking. Because we don't want to leave pages mlocked(), | 405 | these pages for munlocking. Because we don't want to leave pages mlocked, |
320 | get_user_pages() was enhanced to accept a flag to ignore the permissions when | 406 | get_user_pages() was enhanced to accept a flag to ignore the permissions when |
321 | fetching the pages--all of which should be resident as a result of previous | 407 | fetching the pages - all of which should be resident as a result of previous |
322 | mlock()ing. | 408 | mlocking. |
323 | 409 | ||
324 | For munlock(), __mlock_vma_pages_range() unlocks individual pages by calling | 410 | For munlock(), __mlock_vma_pages_range() unlocks individual pages by calling |
325 | munlock_vma_page(). munlock_vma_page() unconditionally clears the PG_mlocked | 411 | munlock_vma_page(). munlock_vma_page() unconditionally clears the PG_mlocked |
326 | flag using TestClearPageMlocked(). As with mlock_vma_page(), munlock_vma_page() | 412 | flag using TestClearPageMlocked(). As with mlock_vma_page(), |
327 | use the Test*PageMlocked() function to handle the case where the page might | 413 | munlock_vma_page() use the Test*PageMlocked() function to handle the case where |
328 | have already been unlocked by another task. If the page was mlocked, | 414 | the page might have already been unlocked by another task. If the page was |
329 | munlock_vma_page() updates that zone statistics for the number of mlocked | 415 | mlocked, munlock_vma_page() updates that zone statistics for the number of |
330 | pages. Note, however, that at this point we haven't checked whether the page | 416 | mlocked pages. Note, however, that at this point we haven't checked whether |
331 | is mapped by other VM_LOCKED vmas. | 417 | the page is mapped by other VM_LOCKED VMAs. |
332 | 418 | ||
333 | We can't call try_to_munlock(), the function that walks the reverse map to check | 419 | We can't call try_to_munlock(), the function that walks the reverse map to |
334 | for other VM_LOCKED vmas, without first isolating the page from the LRU. | 420 | check for other VM_LOCKED VMAs, without first isolating the page from the LRU. |
335 | try_to_munlock() is a variant of try_to_unmap() and thus requires that the page | 421 | try_to_munlock() is a variant of try_to_unmap() and thus requires that the page |
336 | not be on an lru list. [More on these below.] However, the call to | 422 | not be on an LRU list [more on these below]. However, the call to |
337 | isolate_lru_page() could fail, in which case we couldn't try_to_munlock(). | 423 | isolate_lru_page() could fail, in which case we couldn't try_to_munlock(). So, |
338 | So, we go ahead and clear PG_mlocked up front, as this might be the only chance | 424 | we go ahead and clear PG_mlocked up front, as this might be the only chance we |
339 | we have. If we can successfully isolate the page, we go ahead and | 425 | have. If we can successfully isolate the page, we go ahead and |
340 | try_to_munlock(), which will restore the PG_mlocked flag and update the zone | 426 | try_to_munlock(), which will restore the PG_mlocked flag and update the zone |
341 | page statistics if it finds another vma holding the page mlocked. If we fail | 427 | page statistics if it finds another VMA holding the page mlocked. If we fail |
342 | to isolate the page, we'll have left a potentially mlocked page on the LRU. | 428 | to isolate the page, we'll have left a potentially mlocked page on the LRU. |
343 | This is fine, because we'll catch it later when/if vmscan tries to reclaim the | 429 | This is fine, because we'll catch it later if and if vmscan tries to reclaim |
344 | page. This should be relatively rare. | 430 | the page. This should be relatively rare. |
345 | 431 | ||
346 | Mlocked Pages: Migrating Them... | 432 | |
347 | 433 | MIGRATING MLOCKED PAGES | |
348 | A page that is being migrated has been isolated from the lru lists and is | 434 | ----------------------- |
349 | held locked across unmapping of the page, updating the page's mapping | 435 | |
350 | [address_space] entry and copying the contents and state, until the | 436 | A page that is being migrated has been isolated from the LRU lists and is held |
351 | page table entry has been replaced with an entry that refers to the new | 437 | locked across unmapping of the page, updating the page's address space entry |
352 | page. Linux supports migration of mlocked pages and other unevictable | 438 | and copying the contents and state, until the page table entry has been |
353 | pages. This involves simply moving the PageMlocked and PageUnevictable states | 439 | replaced with an entry that refers to the new page. Linux supports migration |
354 | from the old page to the new page. | 440 | of mlocked pages and other unevictable pages. This involves simply moving the |
355 | 441 | PG_mlocked and PG_unevictable states from the old page to the new page. | |
356 | Note that page migration can race with mlocking or munlocking of the same | 442 | |
357 | page. This has been discussed from the mlock/munlock perspective in the | 443 | Note that page migration can race with mlocking or munlocking of the same page. |
358 | respective sections above. Both processes [migration, m[un]locking], hold | 444 | This has been discussed from the mlock/munlock perspective in the respective |
359 | the page locked. This provides the first level of synchronization. Page | 445 | sections above. Both processes (migration and m[un]locking) hold the page |
360 | migration zeros out the page_mapping of the old page before unlocking it, | 446 | locked. This provides the first level of synchronization. Page migration |
361 | so m[un]lock can skip these pages by testing the page mapping under page | 447 | zeros out the page_mapping of the old page before unlocking it, so m[un]lock |
362 | lock. | 448 | can skip these pages by testing the page mapping under page lock. |
363 | 449 | ||
364 | When completing page migration, we place the new and old pages back onto the | 450 | To complete page migration, we place the new and old pages back onto the LRU |
365 | lru after dropping the page lock. The "unneeded" page--old page on success, | 451 | after dropping the page lock. The "unneeded" page - old page on success, new |
366 | new page on failure--will be freed when the reference count held by the | 452 | page on failure - will be freed when the reference count held by the migration |
367 | migration process is released. To ensure that we don't strand pages on the | 453 | process is released. To ensure that we don't strand pages on the unevictable |
368 | unevictable list because of a race between munlock and migration, page | 454 | list because of a race between munlock and migration, page migration uses the |
369 | migration uses the putback_lru_page() function to add migrated pages back to | 455 | putback_lru_page() function to add migrated pages back to the LRU. |
370 | the lru. | 456 | |
371 | 457 | ||
372 | 458 | mmap(MAP_LOCKED) SYSTEM CALL HANDLING | |
373 | Mlocked Pages: mmap(MAP_LOCKED) System Call Handling | 459 | ------------------------------------- |
374 | 460 | ||
375 | In addition the the mlock()/mlockall() system calls, an application can request | 461 | In addition the the mlock()/mlockall() system calls, an application can request |
376 | that a region of memory be mlocked using the MAP_LOCKED flag with the mmap() | 462 | that a region of memory be mlocked supplying the MAP_LOCKED flag to the mmap() |
377 | call. Furthermore, any mmap() call or brk() call that expands the heap by a | 463 | call. Furthermore, any mmap() call or brk() call that expands the heap by a |
378 | task that has previously called mlockall() with the MCL_FUTURE flag will result | 464 | task that has previously called mlockall() with the MCL_FUTURE flag will result |
379 | in the newly mapped memory being mlocked. Before the unevictable/mlock changes, | 465 | in the newly mapped memory being mlocked. Before the unevictable/mlock |
380 | the kernel simply called make_pages_present() to allocate pages and populate | 466 | changes, the kernel simply called make_pages_present() to allocate pages and |
381 | the page table. | 467 | populate the page table. |
382 | 468 | ||
383 | To mlock a range of memory under the unevictable/mlock infrastructure, the | 469 | To mlock a range of memory under the unevictable/mlock infrastructure, the |
384 | mmap() handler and task address space expansion functions call | 470 | mmap() handler and task address space expansion functions call |
385 | mlock_vma_pages_range() specifying the vma and the address range to mlock. | 471 | mlock_vma_pages_range() specifying the vma and the address range to mlock. |
386 | mlock_vma_pages_range() filters vmas like mlock_fixup(), as described above in | 472 | mlock_vma_pages_range() filters VMAs like mlock_fixup(), as described above in |
387 | "Mlocked Pages: Filtering Vmas". It will clear the VM_LOCKED flag, which will | 473 | "Filtering Special VMAs". It will clear the VM_LOCKED flag, which will have |
388 | have already been set by the caller, in filtered vmas. Thus these vma's need | 474 | already been set by the caller, in filtered VMAs. Thus these VMA's need not be |
389 | not be visited for munlock when the region is unmapped. | 475 | visited for munlock when the region is unmapped. |
390 | 476 | ||
391 | For "normal" vmas, mlock_vma_pages_range() calls __mlock_vma_pages_range() to | 477 | For "normal" VMAs, mlock_vma_pages_range() calls __mlock_vma_pages_range() to |
392 | fault/allocate the pages and mlock them. Again, like mlock_fixup(), | 478 | fault/allocate the pages and mlock them. Again, like mlock_fixup(), |
393 | mlock_vma_pages_range() downgrades the mmap semaphore to read mode before | 479 | mlock_vma_pages_range() downgrades the mmap semaphore to read mode before |
394 | attempting to fault/allocate and mlock the pages; and "upgrades" the semaphore | 480 | attempting to fault/allocate and mlock the pages and "upgrades" the semaphore |
395 | back to write mode before returning. | 481 | back to write mode before returning. |
396 | 482 | ||
397 | The callers of mlock_vma_pages_range() will have already added the memory | 483 | The callers of mlock_vma_pages_range() will have already added the memory range |
398 | range to be mlocked to the task's "locked_vm". To account for filtered vmas, | 484 | to be mlocked to the task's "locked_vm". To account for filtered VMAs, |
399 | mlock_vma_pages_range() returns the number of pages NOT mlocked. All of the | 485 | mlock_vma_pages_range() returns the number of pages NOT mlocked. All of the |
400 | callers then subtract a non-negative return value from the task's locked_vm. | 486 | callers then subtract a non-negative return value from the task's locked_vm. A |
401 | A negative return value represent an error--for example, from get_user_pages() | 487 | negative return value represent an error - for example, from get_user_pages() |
402 | attempting to fault in a vma with PROT_NONE access. In this case, we leave | 488 | attempting to fault in a VMA with PROT_NONE access. In this case, we leave the |
403 | the memory range accounted as locked_vm, as the protections could be changed | 489 | memory range accounted as locked_vm, as the protections could be changed later |
404 | later and pages allocated into that region. | 490 | and pages allocated into that region. |
405 | 491 | ||
406 | 492 | ||
407 | Mlocked Pages: munmap()/exit()/exec() System Call Handling | 493 | munmap()/exit()/exec() SYSTEM CALL HANDLING |
494 | ------------------------------------------- | ||
408 | 495 | ||
409 | When unmapping an mlocked region of memory, whether by an explicit call to | 496 | When unmapping an mlocked region of memory, whether by an explicit call to |
410 | munmap() or via an internal unmap from exit() or exec() processing, we must | 497 | munmap() or via an internal unmap from exit() or exec() processing, we must |
411 | munlock the pages if we're removing the last VM_LOCKED vma that maps the pages. | 498 | munlock the pages if we're removing the last VM_LOCKED VMA that maps the pages. |
412 | Before the unevictable/mlock changes, mlocking did not mark the pages in any | 499 | Before the unevictable/mlock changes, mlocking did not mark the pages in any |
413 | way, so unmapping them required no processing. | 500 | way, so unmapping them required no processing. |
414 | 501 | ||
415 | To munlock a range of memory under the unevictable/mlock infrastructure, the | 502 | To munlock a range of memory under the unevictable/mlock infrastructure, the |
416 | munmap() hander and task address space tear down function call | 503 | munmap() handler and task address space call tear down function |
417 | munlock_vma_pages_all(). The name reflects the observation that one always | 504 | munlock_vma_pages_all(). The name reflects the observation that one always |
418 | specifies the entire vma range when munlock()ing during unmap of a region. | 505 | specifies the entire VMA range when munlock()ing during unmap of a region. |
419 | Because of the vma filtering when mlocking() regions, only "normal" vmas that | 506 | Because of the VMA filtering when mlocking() regions, only "normal" VMAs that |
420 | actually contain mlocked pages will be passed to munlock_vma_pages_all(). | 507 | actually contain mlocked pages will be passed to munlock_vma_pages_all(). |
421 | 508 | ||
422 | munlock_vma_pages_all() clears the VM_LOCKED vma flag and, like mlock_fixup() | 509 | munlock_vma_pages_all() clears the VM_LOCKED VMA flag and, like mlock_fixup() |
423 | for the munlock case, calls __munlock_vma_pages_range() to walk the page table | 510 | for the munlock case, calls __munlock_vma_pages_range() to walk the page table |
424 | for the vma's memory range and munlock_vma_page() each resident page mapped by | 511 | for the VMA's memory range and munlock_vma_page() each resident page mapped by |
425 | the vma. This effectively munlocks the page, only if this is the last | 512 | the VMA. This effectively munlocks the page, only if this is the last |
426 | VM_LOCKED vma that maps the page. | 513 | VM_LOCKED VMA that maps the page. |
427 | |||
428 | 514 | ||
429 | Mlocked Page: try_to_unmap() | ||
430 | 515 | ||
431 | [Note: the code changes represented by this section are really quite small | 516 | try_to_unmap() |
432 | compared to the text to describe what happening and why, and to discuss the | 517 | -------------- |
433 | implications.] | ||
434 | 518 | ||
435 | Pages can, of course, be mapped into multiple vmas. Some of these vmas may | 519 | Pages can, of course, be mapped into multiple VMAs. Some of these VMAs may |
436 | have VM_LOCKED flag set. It is possible for a page mapped into one or more | 520 | have VM_LOCKED flag set. It is possible for a page mapped into one or more |
437 | VM_LOCKED vmas not to have the PG_mlocked flag set and therefore reside on one | 521 | VM_LOCKED VMAs not to have the PG_mlocked flag set and therefore reside on one |
438 | of the active or inactive LRU lists. This could happen if, for example, a | 522 | of the active or inactive LRU lists. This could happen if, for example, a task |
439 | task in the process of munlock()ing the page could not isolate the page from | 523 | in the process of munlocking the page could not isolate the page from the LRU. |
440 | the LRU. As a result, vmscan/shrink_page_list() might encounter such a page | 524 | As a result, vmscan/shrink_page_list() might encounter such a page as described |
441 | as described in "Unevictable Pages and Vmscan [shrink_*_list()]". To | 525 | in section "vmscan's handling of unevictable pages". To handle this situation, |
442 | handle this situation, try_to_unmap() has been enhanced to check for VM_LOCKED | 526 | try_to_unmap() checks for VM_LOCKED VMAs while it is walking a page's reverse |
443 | vmas while it is walking a page's reverse map. | 527 | map. |
444 | 528 | ||
445 | try_to_unmap() is always called, by either vmscan for reclaim or for page | 529 | try_to_unmap() is always called, by either vmscan for reclaim or for page |
446 | migration, with the argument page locked and isolated from the LRU. BUG_ON() | 530 | migration, with the argument page locked and isolated from the LRU. Separate |
447 | assertions enforce this requirement. Separate functions handle anonymous and | 531 | functions handle anonymous and mapped file pages, as these types of pages have |
448 | mapped file pages, as these types of pages have different reverse map | 532 | different reverse map mechanisms. |
449 | mechanisms. | 533 | |
450 | 534 | (*) try_to_unmap_anon() | |
451 | try_to_unmap_anon() | 535 | |
452 | 536 | To unmap anonymous pages, each VMA in the list anchored in the anon_vma | |
453 | To unmap anonymous pages, each vma in the list anchored in the anon_vma must be | 537 | must be visited - at least until a VM_LOCKED VMA is encountered. If the |
454 | visited--at least until a VM_LOCKED vma is encountered. If the page is being | 538 | page is being unmapped for migration, VM_LOCKED VMAs do not stop the |
455 | unmapped for migration, VM_LOCKED vmas do not stop the process because mlocked | 539 | process because mlocked pages are migratable. However, for reclaim, if |
456 | pages are migratable. However, for reclaim, if the page is mapped into a | 540 | the page is mapped into a VM_LOCKED VMA, the scan stops. |
457 | VM_LOCKED vma, the scan stops. try_to_unmap() attempts to acquire the mmap | 541 | |
458 | semphore of the mm_struct to which the vma belongs in read mode. If this is | 542 | try_to_unmap_anon() attempts to acquire in read mode the mmap semphore of |
459 | successful, try_to_unmap() will mlock the page via mlock_vma_page()--we | 543 | the mm_struct to which the VMA belongs. If this is successful, it will |
460 | wouldn't have gotten to try_to_unmap() if the page were already mlocked--and | 544 | mlock the page via mlock_vma_page() - we wouldn't have gotten to |
461 | will return SWAP_MLOCK, indicating that the page is unevictable. If the | 545 | try_to_unmap_anon() if the page were already mlocked - and will return |
462 | mmap semaphore cannot be acquired, we are not sure whether the page is really | 546 | SWAP_MLOCK, indicating that the page is unevictable. |
463 | unevictable or not. In this case, try_to_unmap() will return SWAP_AGAIN. | 547 | |
464 | 548 | If the mmap semaphore cannot be acquired, we are not sure whether the page | |
465 | try_to_unmap_file() -- linear mappings | 549 | is really unevictable or not. In this case, try_to_unmap_anon() will |
466 | 550 | return SWAP_AGAIN. | |
467 | Unmapping of a mapped file page works the same, except that the scan visits | 551 | |
468 | all vmas that maps the page's index/page offset in the page's mapping's | 552 | (*) try_to_unmap_file() - linear mappings |
469 | reverse map priority search tree. It must also visit each vma in the page's | 553 | |
470 | mapping's non-linear list, if the list is non-empty. As for anonymous pages, | 554 | Unmapping of a mapped file page works the same as for anonymous mappings, |
471 | on encountering a VM_LOCKED vma for a mapped file page, try_to_unmap() will | 555 | except that the scan visits all VMAs that map the page's index/page offset |
472 | attempt to acquire the associated mm_struct's mmap semaphore to mlock the page, | 556 | in the page's mapping's reverse map priority search tree. It also visits |
473 | returning SWAP_MLOCK if this is successful, and SWAP_AGAIN, if not. | 557 | each VMA in the page's mapping's non-linear list, if the list is |
474 | 558 | non-empty. | |
475 | try_to_unmap_file() -- non-linear mappings | 559 | |
476 | 560 | As for anonymous pages, on encountering a VM_LOCKED VMA for a mapped file | |
477 | If a page's mapping contains a non-empty non-linear mapping vma list, then | 561 | page, try_to_unmap_file() will attempt to acquire the associated |
478 | try_to_un{map|lock}() must also visit each vma in that list to determine | 562 | mm_struct's mmap semaphore to mlock the page, returning SWAP_MLOCK if this |
479 | whether the page is mapped in a VM_LOCKED vma. Again, the scan must visit | 563 | is successful, and SWAP_AGAIN, if not. |
480 | all vmas in the non-linear list to ensure that the pages is not/should not be | 564 | |
481 | mlocked. If a VM_LOCKED vma is found in the list, the scan could terminate. | 565 | (*) try_to_unmap_file() - non-linear mappings |
482 | However, there is no easy way to determine whether the page is actually mapped | 566 | |
483 | in a given vma--either for unmapping or testing whether the VM_LOCKED vma | 567 | If a page's mapping contains a non-empty non-linear mapping VMA list, then |
484 | actually pins the page. | 568 | try_to_un{map|lock}() must also visit each VMA in that list to determine |
485 | 569 | whether the page is mapped in a VM_LOCKED VMA. Again, the scan must visit | |
486 | So, try_to_unmap_file() handles non-linear mappings by scanning a certain | 570 | all VMAs in the non-linear list to ensure that the pages is not/should not |
487 | number of pages--a "cluster"--in each non-linear vma associated with the page's | 571 | be mlocked. |
488 | mapping, for each file mapped page that vmscan tries to unmap. If this happens | 572 | |
489 | to unmap the page we're trying to unmap, try_to_unmap() will notice this on | 573 | If a VM_LOCKED VMA is found in the list, the scan could terminate. |
490 | return--(page_mapcount(page) == 0)--and return SWAP_SUCCESS. Otherwise, it | 574 | However, there is no easy way to determine whether the page is actually |
491 | will return SWAP_AGAIN, causing vmscan to recirculate this page. We take | 575 | mapped in a given VMA - either for unmapping or testing whether the |
492 | advantage of the cluster scan in try_to_unmap_cluster() as follows: | 576 | VM_LOCKED VMA actually pins the page. |
493 | 577 | ||
494 | For each non-linear vma, try_to_unmap_cluster() attempts to acquire the mmap | 578 | try_to_unmap_file() handles non-linear mappings by scanning a certain |
495 | semaphore of the associated mm_struct for read without blocking. If this | 579 | number of pages - a "cluster" - in each non-linear VMA associated with the |
496 | attempt is successful and the vma is VM_LOCKED, try_to_unmap_cluster() will | 580 | page's mapping, for each file mapped page that vmscan tries to unmap. If |
497 | retain the mmap semaphore for the scan; otherwise it drops it here. Then, | 581 | this happens to unmap the page we're trying to unmap, try_to_unmap() will |
498 | for each page in the cluster, if we're holding the mmap semaphore for a locked | 582 | notice this on return (page_mapcount(page) will be 0) and return |
499 | vma, try_to_unmap_cluster() calls mlock_vma_page() to mlock the page. This | 583 | SWAP_SUCCESS. Otherwise, it will return SWAP_AGAIN, causing vmscan to |
500 | call is a no-op if the page is already locked, but will mlock any pages in | 584 | recirculate this page. We take advantage of the cluster scan in |
501 | the non-linear mapping that happen to be unlocked. If one of the pages so | 585 | try_to_unmap_cluster() as follows: |
502 | mlocked is the page passed in to try_to_unmap(), try_to_unmap_cluster() will | 586 | |
503 | return SWAP_MLOCK, rather than the default SWAP_AGAIN. This will allow vmscan | 587 | For each non-linear VMA, try_to_unmap_cluster() attempts to acquire the |
504 | to cull the page, rather than recirculating it on the inactive list. Again, | 588 | mmap semaphore of the associated mm_struct for read without blocking. |
505 | if try_to_unmap_cluster() cannot acquire the vma's mmap sem, it returns | 589 | |
506 | SWAP_AGAIN, indicating that the page is mapped by a VM_LOCKED vma, but | 590 | If this attempt is successful and the VMA is VM_LOCKED, |
507 | couldn't be mlocked. | 591 | try_to_unmap_cluster() will retain the mmap semaphore for the scan; |
508 | 592 | otherwise it drops it here. | |
509 | 593 | ||
510 | Mlocked pages: try_to_munlock() Reverse Map Scan | 594 | Then, for each page in the cluster, if we're holding the mmap semaphore |
511 | 595 | for a locked VMA, try_to_unmap_cluster() calls mlock_vma_page() to | |
512 | TODO/FIXME: a better name might be page_mlocked()--analogous to the | 596 | mlock the page. This call is a no-op if the page is already locked, |
513 | page_referenced() reverse map walker. | 597 | but will mlock any pages in the non-linear mapping that happen to be |
514 | 598 | unlocked. | |
515 | When munlock_vma_page()--see "Mlocked Pages: munlock()/munlockall() | 599 | |
516 | System Call Handling" above--tries to munlock a page, it needs to | 600 | If one of the pages so mlocked is the page passed in to try_to_unmap(), |
517 | determine whether or not the page is mapped by any VM_LOCKED vma, without | 601 | try_to_unmap_cluster() will return SWAP_MLOCK, rather than the default |
518 | actually attempting to unmap all ptes from the page. For this purpose, the | 602 | SWAP_AGAIN. This will allow vmscan to cull the page, rather than |
519 | unevictable/mlock infrastructure introduced a variant of try_to_unmap() called | 603 | recirculating it on the inactive list. |
520 | try_to_munlock(). | 604 | |
605 | Again, if try_to_unmap_cluster() cannot acquire the VMA's mmap sem, it | ||
606 | returns SWAP_AGAIN, indicating that the page is mapped by a VM_LOCKED | ||
607 | VMA, but couldn't be mlocked. | ||
608 | |||
609 | |||
610 | try_to_munlock() REVERSE MAP SCAN | ||
611 | --------------------------------- | ||
612 | |||
613 | [!] TODO/FIXME: a better name might be page_mlocked() - analogous to the | ||
614 | page_referenced() reverse map walker. | ||
615 | |||
616 | When munlock_vma_page() [see section "munlock()/munlockall() System Call | ||
617 | Handling" above] tries to munlock a page, it needs to determine whether or not | ||
618 | the page is mapped by any VM_LOCKED VMA without actually attempting to unmap | ||
619 | all PTEs from the page. For this purpose, the unevictable/mlock infrastructure | ||
620 | introduced a variant of try_to_unmap() called try_to_munlock(). | ||
521 | 621 | ||
522 | try_to_munlock() calls the same functions as try_to_unmap() for anonymous and | 622 | try_to_munlock() calls the same functions as try_to_unmap() for anonymous and |
523 | mapped file pages with an additional argument specifing unlock versus unmap | 623 | mapped file pages with an additional argument specifing unlock versus unmap |
524 | processing. Again, these functions walk the respective reverse maps looking | 624 | processing. Again, these functions walk the respective reverse maps looking |
525 | for VM_LOCKED vmas. When such a vma is found for anonymous pages and file | 625 | for VM_LOCKED VMAs. When such a VMA is found for anonymous pages and file |
526 | pages mapped in linear VMAs, as in the try_to_unmap() case, the functions | 626 | pages mapped in linear VMAs, as in the try_to_unmap() case, the functions |
527 | attempt to acquire the associated mmap semphore, mlock the page via | 627 | attempt to acquire the associated mmap semphore, mlock the page via |
528 | mlock_vma_page() and return SWAP_MLOCK. This effectively undoes the | 628 | mlock_vma_page() and return SWAP_MLOCK. This effectively undoes the |
529 | pre-clearing of the page's PG_mlocked done by munlock_vma_page. | 629 | pre-clearing of the page's PG_mlocked done by munlock_vma_page. |
530 | 630 | ||
531 | If try_to_unmap() is unable to acquire a VM_LOCKED vma's associated mmap | 631 | If try_to_unmap() is unable to acquire a VM_LOCKED VMA's associated mmap |
532 | semaphore, it will return SWAP_AGAIN. This will allow shrink_page_list() | 632 | semaphore, it will return SWAP_AGAIN. This will allow shrink_page_list() to |
533 | to recycle the page on the inactive list and hope that it has better luck | 633 | recycle the page on the inactive list and hope that it has better luck with the |
534 | with the page next time. | 634 | page next time. |
535 | 635 | ||
536 | For file pages mapped into non-linear vmas, the try_to_munlock() logic works | 636 | For file pages mapped into non-linear VMAs, the try_to_munlock() logic works |
537 | slightly differently. On encountering a VM_LOCKED non-linear vma that might | 637 | slightly differently. On encountering a VM_LOCKED non-linear VMA that might |
538 | map the page, try_to_munlock() returns SWAP_AGAIN without actually mlocking | 638 | map the page, try_to_munlock() returns SWAP_AGAIN without actually mlocking the |
539 | the page. munlock_vma_page() will just leave the page unlocked and let | 639 | page. munlock_vma_page() will just leave the page unlocked and let vmscan deal |
540 | vmscan deal with it--the usual fallback position. | 640 | with it - the usual fallback position. |
541 | 641 | ||
542 | Note that try_to_munlock()'s reverse map walk must visit every vma in a pages' | 642 | Note that try_to_munlock()'s reverse map walk must visit every VMA in a page's |
543 | reverse map to determine that a page is NOT mapped into any VM_LOCKED vma. | 643 | reverse map to determine that a page is NOT mapped into any VM_LOCKED VMA. |
544 | However, the scan can terminate when it encounters a VM_LOCKED vma and can | 644 | However, the scan can terminate when it encounters a VM_LOCKED VMA and can |
545 | successfully acquire the vma's mmap semphore for read and mlock the page. | 645 | successfully acquire the VMA's mmap semphore for read and mlock the page. |
546 | Although try_to_munlock() can be called many [very many!] times when | 646 | Although try_to_munlock() might be called a great many times when munlocking a |
547 | munlock()ing a large region or tearing down a large address space that has been | 647 | large region or tearing down a large address space that has been mlocked via |
548 | mlocked via mlockall(), overall this is a fairly rare event. | 648 | mlockall(), overall this is a fairly rare event. |
549 | 649 | ||
550 | Mlocked Page: Page Reclaim in shrink_*_list() | 650 | |
551 | 651 | PAGE RECLAIM IN shrink_*_list() | |
552 | shrink_active_list() culls any obviously unevictable pages--i.e., | 652 | ------------------------------- |
553 | !page_evictable(page, NULL)--diverting these to the unevictable lru | 653 | |
554 | list. However, shrink_active_list() only sees unevictable pages that | 654 | shrink_active_list() culls any obviously unevictable pages - i.e. |
555 | made it onto the active/inactive lru lists. Note that these pages do not | 655 | !page_evictable(page, NULL) - diverting these to the unevictable list. |
556 | have PageUnevictable set--otherwise, they would be on the unevictable list and | 656 | However, shrink_active_list() only sees unevictable pages that made it onto the |
557 | shrink_active_list would never see them. | 657 | active/inactive lru lists. Note that these pages do not have PageUnevictable |
658 | set - otherwise they would be on the unevictable list and shrink_active_list | ||
659 | would never see them. | ||
558 | 660 | ||
559 | Some examples of these unevictable pages on the LRU lists are: | 661 | Some examples of these unevictable pages on the LRU lists are: |
560 | 662 | ||
561 | 1) ramfs pages that have been placed on the lru lists when first allocated. | 663 | (1) ramfs pages that have been placed on the LRU lists when first allocated. |
664 | |||
665 | (2) SHM_LOCK'd shared memory pages. shmctl(SHM_LOCK) does not attempt to | ||
666 | allocate or fault in the pages in the shared memory region. This happens | ||
667 | when an application accesses the page the first time after SHM_LOCK'ing | ||
668 | the segment. | ||
562 | 669 | ||
563 | 2) SHM_LOCKed shared memory pages. shmctl(SHM_LOCK) does not attempt to | 670 | (3) mlocked pages that could not be isolated from the LRU and moved to the |
564 | allocate or fault in the pages in the shared memory region. This happens | 671 | unevictable list in mlock_vma_page(). |
565 | when an application accesses the page the first time after SHM_LOCKing | ||
566 | the segment. | ||
567 | 672 | ||
568 | 3) Mlocked pages that could not be isolated from the lru and moved to the | 673 | (4) Pages mapped into multiple VM_LOCKED VMAs, but try_to_munlock() couldn't |
569 | unevictable list in mlock_vma_page(). | 674 | acquire the VMA's mmap semaphore to test the flags and set PageMlocked. |
675 | munlock_vma_page() was forced to let the page back on to the normal LRU | ||
676 | list for vmscan to handle. | ||
570 | 677 | ||
571 | 3) Pages mapped into multiple VM_LOCKED vmas, but try_to_munlock() couldn't | 678 | shrink_inactive_list() also diverts any unevictable pages that it finds on the |
572 | acquire the vma's mmap semaphore to test the flags and set PageMlocked. | 679 | inactive lists to the appropriate zone's unevictable list. |
573 | munlock_vma_page() was forced to let the page back on to the normal | ||
574 | LRU list for vmscan to handle. | ||
575 | 680 | ||
576 | shrink_inactive_list() also culls any unevictable pages that it finds on | 681 | shrink_inactive_list() should only see SHM_LOCK'd pages that became SHM_LOCK'd |
577 | the inactive lists, again diverting them to the appropriate zone's unevictable | 682 | after shrink_active_list() had moved them to the inactive list, or pages mapped |
578 | lru list. shrink_inactive_list() should only see SHM_LOCKed pages that became | 683 | into VM_LOCKED VMAs that munlock_vma_page() couldn't isolate from the LRU to |
579 | SHM_LOCKed after shrink_active_list() had moved them to the inactive list, or | 684 | recheck via try_to_munlock(). shrink_inactive_list() won't notice the latter, |
580 | pages mapped into VM_LOCKED vmas that munlock_vma_page() couldn't isolate from | 685 | but will pass on to shrink_page_list(). |
581 | the lru to recheck via try_to_munlock(). shrink_inactive_list() won't notice | ||
582 | the latter, but will pass on to shrink_page_list(). | ||
583 | 686 | ||
584 | shrink_page_list() again culls obviously unevictable pages that it could | 687 | shrink_page_list() again culls obviously unevictable pages that it could |
585 | encounter for similar reason to shrink_inactive_list(). Pages mapped into | 688 | encounter for similar reason to shrink_inactive_list(). Pages mapped into |
586 | VM_LOCKED vmas but without PG_mlocked set will make it all the way to | 689 | VM_LOCKED VMAs but without PG_mlocked set will make it all the way to |
587 | try_to_unmap(). shrink_page_list() will divert them to the unevictable list | 690 | try_to_unmap(). shrink_page_list() will divert them to the unevictable list |
588 | when try_to_unmap() returns SWAP_MLOCK, as discussed above. | 691 | when try_to_unmap() returns SWAP_MLOCK, as discussed above. |
diff --git a/Documentation/watchdog/hpwdt.txt b/Documentation/watchdog/hpwdt.txt new file mode 100644 index 000000000000..127839e53043 --- /dev/null +++ b/Documentation/watchdog/hpwdt.txt | |||
@@ -0,0 +1,84 @@ | |||
1 | Last reviewed: 06/02/2009 | ||
2 | |||
3 | HP iLO2 NMI Watchdog Driver | ||
4 | NMI sourcing for iLO2 based ProLiant Servers | ||
5 | Documentation and Driver by | ||
6 | Thomas Mingarelli <thomas.mingarelli@hp.com> | ||
7 | |||
8 | The HP iLO2 NMI Watchdog driver is a kernel module that provides basic | ||
9 | watchdog functionality and the added benefit of NMI sourcing. Both the | ||
10 | watchdog functionality and the NMI sourcing capability need to be enabled | ||
11 | by the user. Remember that the two modes are not dependant on one another. | ||
12 | A user can have the NMI sourcing without the watchdog timer and vice-versa. | ||
13 | |||
14 | Watchdog functionality is enabled like any other common watchdog driver. That | ||
15 | is, an application needs to be started that kicks off the watchdog timer. A | ||
16 | basic application exists in the Documentation/watchdog/src directory called | ||
17 | watchdog-test.c. Simply compile the C file and kick it off. If the system | ||
18 | gets into a bad state and hangs, the HP ProLiant iLO 2 timer register will | ||
19 | not be updated in a timely fashion and a hardware system reset (also known as | ||
20 | an Automatic Server Recovery (ASR)) event will occur. | ||
21 | |||
22 | The hpwdt driver also has three (3) module parameters. They are the following: | ||
23 | |||
24 | soft_margin - allows the user to set the watchdog timer value | ||
25 | allow_kdump - allows the user to save off a kernel dump image after an NMI | ||
26 | nowayout - basic watchdog parameter that does not allow the timer to | ||
27 | be restarted or an impending ASR to be escaped. | ||
28 | |||
29 | NOTE: More information about watchdog drivers in general, including the ioctl | ||
30 | interface to /dev/watchdog can be found in | ||
31 | Documentation/watchdog/watchdog-api.txt and Documentation/IPMI.txt. | ||
32 | |||
33 | The NMI sourcing capability is disabled when the driver discovers that the | ||
34 | nmi_watchdog is turned on (nmi_watchdog = 1). This is due to the inability to | ||
35 | distinguish between "NMI Watchdog Ticks" and "HW generated NMI events" in the | ||
36 | Linux kernel. What this means is that the hpwdt nmi handler code is called | ||
37 | each time the NMI signal fires off. This could amount to several thousands of | ||
38 | NMIs in a matter of seconds. If a user sees the Linux kernel's "dazed and | ||
39 | confused" message in the logs or if the system gets into a hung state, then | ||
40 | the user should reboot with nmi_watchdog=0. | ||
41 | |||
42 | 1. If the kernel has not been booted with nmi_watchdog turned off then | ||
43 | edit /boot/grub/menu.lst and place the nmi_watchdog=0 at the end of the | ||
44 | currently booting kernel line. | ||
45 | 2. reboot the sever | ||
46 | |||
47 | Now, the hpwdt can successfully receive and source the NMI and provide a log | ||
48 | message that details the reason for the NMI (as determined by the HP BIOS). | ||
49 | |||
50 | Below is a list of NMIs the HP BIOS understands along with the associated | ||
51 | code (reason): | ||
52 | |||
53 | No source found 00h | ||
54 | |||
55 | Uncorrectable Memory Error 01h | ||
56 | |||
57 | ASR NMI 1Bh | ||
58 | |||
59 | PCI Parity Error 20h | ||
60 | |||
61 | NMI Button Press 27h | ||
62 | |||
63 | SB_BUS_NMI 28h | ||
64 | |||
65 | ILO Doorbell NMI 29h | ||
66 | |||
67 | ILO IOP NMI 2Ah | ||
68 | |||
69 | ILO Watchdog NMI 2Bh | ||
70 | |||
71 | Proc Throt NMI 2Ch | ||
72 | |||
73 | Front Side Bus NMI 2Dh | ||
74 | |||
75 | PCI Express Error 2Fh | ||
76 | |||
77 | DMA controller NMI 30h | ||
78 | |||
79 | Hypertransport/CSI Error 31h | ||
80 | |||
81 | |||
82 | |||
83 | -- Tom Mingarelli | ||
84 | (thomas.mingarelli@hp.com) | ||
diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt index 7b4596ac4120..8da3a795083f 100644 --- a/Documentation/x86/boot.txt +++ b/Documentation/x86/boot.txt | |||
@@ -50,6 +50,10 @@ Protocol 2.08: (Kernel 2.6.26) Added crc32 checksum and ELF format | |||
50 | Protocol 2.09: (Kernel 2.6.26) Added a field of 64-bit physical | 50 | Protocol 2.09: (Kernel 2.6.26) Added a field of 64-bit physical |
51 | pointer to single linked list of struct setup_data. | 51 | pointer to single linked list of struct setup_data. |
52 | 52 | ||
53 | Protocol 2.10: (Kernel 2.6.31) Added a protocol for relaxed alignment | ||
54 | beyond the kernel_alignment added, new init_size and | ||
55 | pref_address fields. Added extended boot loader IDs. | ||
56 | |||
53 | **** MEMORY LAYOUT | 57 | **** MEMORY LAYOUT |
54 | 58 | ||
55 | The traditional memory map for the kernel loader, used for Image or | 59 | The traditional memory map for the kernel loader, used for Image or |
@@ -158,7 +162,7 @@ Offset Proto Name Meaning | |||
158 | 0202/4 2.00+ header Magic signature "HdrS" | 162 | 0202/4 2.00+ header Magic signature "HdrS" |
159 | 0206/2 2.00+ version Boot protocol version supported | 163 | 0206/2 2.00+ version Boot protocol version supported |
160 | 0208/4 2.00+ realmode_swtch Boot loader hook (see below) | 164 | 0208/4 2.00+ realmode_swtch Boot loader hook (see below) |
161 | 020C/2 2.00+ start_sys The load-low segment (0x1000) (obsolete) | 165 | 020C/2 2.00+ start_sys_seg The load-low segment (0x1000) (obsolete) |
162 | 020E/2 2.00+ kernel_version Pointer to kernel version string | 166 | 020E/2 2.00+ kernel_version Pointer to kernel version string |
163 | 0210/1 2.00+ type_of_loader Boot loader identifier | 167 | 0210/1 2.00+ type_of_loader Boot loader identifier |
164 | 0211/1 2.00+ loadflags Boot protocol option flags | 168 | 0211/1 2.00+ loadflags Boot protocol option flags |
@@ -168,12 +172,14 @@ Offset Proto Name Meaning | |||
168 | 021C/4 2.00+ ramdisk_size initrd size (set by boot loader) | 172 | 021C/4 2.00+ ramdisk_size initrd size (set by boot loader) |
169 | 0220/4 2.00+ bootsect_kludge DO NOT USE - for bootsect.S use only | 173 | 0220/4 2.00+ bootsect_kludge DO NOT USE - for bootsect.S use only |
170 | 0224/2 2.01+ heap_end_ptr Free memory after setup end | 174 | 0224/2 2.01+ heap_end_ptr Free memory after setup end |
171 | 0226/2 N/A pad1 Unused | 175 | 0226/1 2.02+(3 ext_loader_ver Extended boot loader version |
176 | 0227/1 2.02+(3 ext_loader_type Extended boot loader ID | ||
172 | 0228/4 2.02+ cmd_line_ptr 32-bit pointer to the kernel command line | 177 | 0228/4 2.02+ cmd_line_ptr 32-bit pointer to the kernel command line |
173 | 022C/4 2.03+ initrd_addr_max Highest legal initrd address | 178 | 022C/4 2.03+ ramdisk_max Highest legal initrd address |
174 | 0230/4 2.05+ kernel_alignment Physical addr alignment required for kernel | 179 | 0230/4 2.05+ kernel_alignment Physical addr alignment required for kernel |
175 | 0234/1 2.05+ relocatable_kernel Whether kernel is relocatable or not | 180 | 0234/1 2.05+ relocatable_kernel Whether kernel is relocatable or not |
176 | 0235/3 N/A pad2 Unused | 181 | 0235/1 2.10+ min_alignment Minimum alignment, as a power of two |
182 | 0236/2 N/A pad3 Unused | ||
177 | 0238/4 2.06+ cmdline_size Maximum size of the kernel command line | 183 | 0238/4 2.06+ cmdline_size Maximum size of the kernel command line |
178 | 023C/4 2.07+ hardware_subarch Hardware subarchitecture | 184 | 023C/4 2.07+ hardware_subarch Hardware subarchitecture |
179 | 0240/8 2.07+ hardware_subarch_data Subarchitecture-specific data | 185 | 0240/8 2.07+ hardware_subarch_data Subarchitecture-specific data |
@@ -181,6 +187,8 @@ Offset Proto Name Meaning | |||
181 | 024C/4 2.08+ payload_length Length of kernel payload | 187 | 024C/4 2.08+ payload_length Length of kernel payload |
182 | 0250/8 2.09+ setup_data 64-bit physical pointer to linked list | 188 | 0250/8 2.09+ setup_data 64-bit physical pointer to linked list |
183 | of struct setup_data | 189 | of struct setup_data |
190 | 0258/8 2.10+ pref_address Preferred loading address | ||
191 | 0260/4 2.10+ init_size Linear memory required during initialization | ||
184 | 192 | ||
185 | (1) For backwards compatibility, if the setup_sects field contains 0, the | 193 | (1) For backwards compatibility, if the setup_sects field contains 0, the |
186 | real value is 4. | 194 | real value is 4. |
@@ -189,6 +197,8 @@ Offset Proto Name Meaning | |||
189 | field are unusable, which means the size of a bzImage kernel | 197 | field are unusable, which means the size of a bzImage kernel |
190 | cannot be determined. | 198 | cannot be determined. |
191 | 199 | ||
200 | (3) Ignored, but safe to set, for boot protocols 2.02-2.09. | ||
201 | |||
192 | If the "HdrS" (0x53726448) magic number is not found at offset 0x202, | 202 | If the "HdrS" (0x53726448) magic number is not found at offset 0x202, |
193 | the boot protocol version is "old". Loading an old kernel, the | 203 | the boot protocol version is "old". Loading an old kernel, the |
194 | following parameters should be assumed: | 204 | following parameters should be assumed: |
@@ -299,14 +309,14 @@ Protocol: 2.00+ | |||
299 | e.g. 0x0204 for version 2.04, and 0x0a11 for a hypothetical version | 309 | e.g. 0x0204 for version 2.04, and 0x0a11 for a hypothetical version |
300 | 10.17. | 310 | 10.17. |
301 | 311 | ||
302 | Field name: readmode_swtch | 312 | Field name: realmode_swtch |
303 | Type: modify (optional) | 313 | Type: modify (optional) |
304 | Offset/size: 0x208/4 | 314 | Offset/size: 0x208/4 |
305 | Protocol: 2.00+ | 315 | Protocol: 2.00+ |
306 | 316 | ||
307 | Boot loader hook (see ADVANCED BOOT LOADER HOOKS below.) | 317 | Boot loader hook (see ADVANCED BOOT LOADER HOOKS below.) |
308 | 318 | ||
309 | Field name: start_sys | 319 | Field name: start_sys_seg |
310 | Type: read | 320 | Type: read |
311 | Offset/size: 0x20c/2 | 321 | Offset/size: 0x20c/2 |
312 | Protocol: 2.00+ | 322 | Protocol: 2.00+ |
@@ -342,18 +352,32 @@ Protocol: 2.00+ | |||
342 | 0xTV here, where T is an identifier for the boot loader and V is | 352 | 0xTV here, where T is an identifier for the boot loader and V is |
343 | a version number. Otherwise, enter 0xFF here. | 353 | a version number. Otherwise, enter 0xFF here. |
344 | 354 | ||
355 | For boot loader IDs above T = 0xD, write T = 0xE to this field and | ||
356 | write the extended ID minus 0x10 to the ext_loader_type field. | ||
357 | Similarly, the ext_loader_ver field can be used to provide more than | ||
358 | four bits for the bootloader version. | ||
359 | |||
360 | For example, for T = 0x15, V = 0x234, write: | ||
361 | |||
362 | type_of_loader <- 0xE4 | ||
363 | ext_loader_type <- 0x05 | ||
364 | ext_loader_ver <- 0x23 | ||
365 | |||
345 | Assigned boot loader ids: | 366 | Assigned boot loader ids: |
346 | 0 LILO (0x00 reserved for pre-2.00 bootloader) | 367 | 0 LILO (0x00 reserved for pre-2.00 bootloader) |
347 | 1 Loadlin | 368 | 1 Loadlin |
348 | 2 bootsect-loader (0x20, all other values reserved) | 369 | 2 bootsect-loader (0x20, all other values reserved) |
349 | 3 SYSLINUX | 370 | 3 Syslinux |
350 | 4 EtherBoot | 371 | 4 Etherboot/gPXE |
351 | 5 ELILO | 372 | 5 ELILO |
352 | 7 GRUB | 373 | 7 GRUB |
353 | 8 U-BOOT | 374 | 8 U-Boot |
354 | 9 Xen | 375 | 9 Xen |
355 | A Gujin | 376 | A Gujin |
356 | B Qemu | 377 | B Qemu |
378 | C Arcturus Networks uCbootloader | ||
379 | E Extended (see ext_loader_type) | ||
380 | F Special (0xFF = undefined) | ||
357 | 381 | ||
358 | Please contact <hpa@zytor.com> if you need a bootloader ID | 382 | Please contact <hpa@zytor.com> if you need a bootloader ID |
359 | value assigned. | 383 | value assigned. |
@@ -452,6 +476,35 @@ Protocol: 2.01+ | |||
452 | Set this field to the offset (from the beginning of the real-mode | 476 | Set this field to the offset (from the beginning of the real-mode |
453 | code) of the end of the setup stack/heap, minus 0x0200. | 477 | code) of the end of the setup stack/heap, minus 0x0200. |
454 | 478 | ||
479 | Field name: ext_loader_ver | ||
480 | Type: write (optional) | ||
481 | Offset/size: 0x226/1 | ||
482 | Protocol: 2.02+ | ||
483 | |||
484 | This field is used as an extension of the version number in the | ||
485 | type_of_loader field. The total version number is considered to be | ||
486 | (type_of_loader & 0x0f) + (ext_loader_ver << 4). | ||
487 | |||
488 | The use of this field is boot loader specific. If not written, it | ||
489 | is zero. | ||
490 | |||
491 | Kernels prior to 2.6.31 did not recognize this field, but it is safe | ||
492 | to write for protocol version 2.02 or higher. | ||
493 | |||
494 | Field name: ext_loader_type | ||
495 | Type: write (obligatory if (type_of_loader & 0xf0) == 0xe0) | ||
496 | Offset/size: 0x227/1 | ||
497 | Protocol: 2.02+ | ||
498 | |||
499 | This field is used as an extension of the type number in | ||
500 | type_of_loader field. If the type in type_of_loader is 0xE, then | ||
501 | the actual type is (ext_loader_type + 0x10). | ||
502 | |||
503 | This field is ignored if the type in type_of_loader is not 0xE. | ||
504 | |||
505 | Kernels prior to 2.6.31 did not recognize this field, but it is safe | ||
506 | to write for protocol version 2.02 or higher. | ||
507 | |||
455 | Field name: cmd_line_ptr | 508 | Field name: cmd_line_ptr |
456 | Type: write (obligatory) | 509 | Type: write (obligatory) |
457 | Offset/size: 0x228/4 | 510 | Offset/size: 0x228/4 |
@@ -468,7 +521,7 @@ Protocol: 2.02+ | |||
468 | zero, the kernel will assume that your boot loader does not support | 521 | zero, the kernel will assume that your boot loader does not support |
469 | the 2.02+ protocol. | 522 | the 2.02+ protocol. |
470 | 523 | ||
471 | Field name: initrd_addr_max | 524 | Field name: ramdisk_max |
472 | Type: read | 525 | Type: read |
473 | Offset/size: 0x22c/4 | 526 | Offset/size: 0x22c/4 |
474 | Protocol: 2.03+ | 527 | Protocol: 2.03+ |
@@ -481,11 +534,19 @@ Protocol: 2.03+ | |||
481 | 0x37FFFFFF, you can start your ramdisk at 0x37FE0000.) | 534 | 0x37FFFFFF, you can start your ramdisk at 0x37FE0000.) |
482 | 535 | ||
483 | Field name: kernel_alignment | 536 | Field name: kernel_alignment |
484 | Type: read (reloc) | 537 | Type: read/modify (reloc) |
485 | Offset/size: 0x230/4 | 538 | Offset/size: 0x230/4 |
486 | Protocol: 2.05+ | 539 | Protocol: 2.05+ (read), 2.10+ (modify) |
540 | |||
541 | Alignment unit required by the kernel (if relocatable_kernel is | ||
542 | true.) A relocatable kernel that is loaded at an alignment | ||
543 | incompatible with the value in this field will be realigned during | ||
544 | kernel initialization. | ||
487 | 545 | ||
488 | Alignment unit required by the kernel (if relocatable_kernel is true.) | 546 | Starting with protocol version 2.10, this reflects the kernel |
547 | alignment preferred for optimal performance; it is possible for the | ||
548 | loader to modify this field to permit a lesser alignment. See the | ||
549 | min_alignment and pref_address field below. | ||
489 | 550 | ||
490 | Field name: relocatable_kernel | 551 | Field name: relocatable_kernel |
491 | Type: read (reloc) | 552 | Type: read (reloc) |
@@ -497,6 +558,22 @@ Protocol: 2.05+ | |||
497 | After loading, the boot loader must set the code32_start field to | 558 | After loading, the boot loader must set the code32_start field to |
498 | point to the loaded code, or to a boot loader hook. | 559 | point to the loaded code, or to a boot loader hook. |
499 | 560 | ||
561 | Field name: min_alignment | ||
562 | Type: read (reloc) | ||
563 | Offset/size: 0x235/1 | ||
564 | Protocol: 2.10+ | ||
565 | |||
566 | This field, if nonzero, indicates as a power of two the minimum | ||
567 | alignment required, as opposed to preferred, by the kernel to boot. | ||
568 | If a boot loader makes use of this field, it should update the | ||
569 | kernel_alignment field with the alignment unit desired; typically: | ||
570 | |||
571 | kernel_alignment = 1 << min_alignment | ||
572 | |||
573 | There may be a considerable performance cost with an excessively | ||
574 | misaligned kernel. Therefore, a loader should typically try each | ||
575 | power-of-two alignment from kernel_alignment down to this alignment. | ||
576 | |||
500 | Field name: cmdline_size | 577 | Field name: cmdline_size |
501 | Type: read | 578 | Type: read |
502 | Offset/size: 0x238/4 | 579 | Offset/size: 0x238/4 |
@@ -542,7 +619,10 @@ Protocol: 2.08+ | |||
542 | 619 | ||
543 | The payload may be compressed. The format of both the compressed and | 620 | The payload may be compressed. The format of both the compressed and |
544 | uncompressed data should be determined using the standard magic | 621 | uncompressed data should be determined using the standard magic |
545 | numbers. Currently only gzip compressed ELF is used. | 622 | numbers. The currently supported compression formats are gzip |
623 | (magic numbers 1F 8B or 1F 9E), bzip2 (magic number 42 5A) and LZMA | ||
624 | (magic number 5D 00). The uncompressed payload is currently always ELF | ||
625 | (magic number 7F 45 4C 46). | ||
546 | 626 | ||
547 | Field name: payload_length | 627 | Field name: payload_length |
548 | Type: read | 628 | Type: read |
@@ -578,6 +658,36 @@ Protocol: 2.09+ | |||
578 | sure to consider the case where the linked list already contains | 658 | sure to consider the case where the linked list already contains |
579 | entries. | 659 | entries. |
580 | 660 | ||
661 | Field name: pref_address | ||
662 | Type: read (reloc) | ||
663 | Offset/size: 0x258/8 | ||
664 | Protocol: 2.10+ | ||
665 | |||
666 | This field, if nonzero, represents a preferred load address for the | ||
667 | kernel. A relocating bootloader should attempt to load at this | ||
668 | address if possible. | ||
669 | |||
670 | A non-relocatable kernel will unconditionally move itself and to run | ||
671 | at this address. | ||
672 | |||
673 | Field name: init_size | ||
674 | Type: read | ||
675 | Offset/size: 0x25c/4 | ||
676 | |||
677 | This field indicates the amount of linear contiguous memory starting | ||
678 | at the kernel runtime start address that the kernel needs before it | ||
679 | is capable of examining its memory map. This is not the same thing | ||
680 | as the total amount of memory the kernel needs to boot, but it can | ||
681 | be used by a relocating boot loader to help select a safe load | ||
682 | address for the kernel. | ||
683 | |||
684 | The kernel runtime start address is determined by the following algorithm: | ||
685 | |||
686 | if (relocatable_kernel) | ||
687 | runtime_start = align_up(load_address, kernel_alignment) | ||
688 | else | ||
689 | runtime_start = pref_address | ||
690 | |||
581 | 691 | ||
582 | **** THE IMAGE CHECKSUM | 692 | **** THE IMAGE CHECKSUM |
583 | 693 | ||
diff --git a/Documentation/x86/earlyprintk.txt b/Documentation/x86/earlyprintk.txt new file mode 100644 index 000000000000..607b1a016064 --- /dev/null +++ b/Documentation/x86/earlyprintk.txt | |||
@@ -0,0 +1,101 @@ | |||
1 | |||
2 | Mini-HOWTO for using the earlyprintk=dbgp boot option with a | ||
3 | USB2 Debug port key and a debug cable, on x86 systems. | ||
4 | |||
5 | You need two computers, the 'USB debug key' special gadget and | ||
6 | and two USB cables, connected like this: | ||
7 | |||
8 | [host/target] <-------> [USB debug key] <-------> [client/console] | ||
9 | |||
10 | 1. There are three specific hardware requirements: | ||
11 | |||
12 | a.) Host/target system needs to have USB debug port capability. | ||
13 | |||
14 | You can check this capability by looking at a 'Debug port' bit in | ||
15 | the lspci -vvv output: | ||
16 | |||
17 | # lspci -vvv | ||
18 | ... | ||
19 | 00:1d.7 USB Controller: Intel Corporation 82801H (ICH8 Family) USB2 EHCI Controller #1 (rev 03) (prog-if 20 [EHCI]) | ||
20 | Subsystem: Lenovo ThinkPad T61 | ||
21 | Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR+ FastB2B- DisINTx- | ||
22 | Status: Cap+ 66MHz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx- | ||
23 | Latency: 0 | ||
24 | Interrupt: pin D routed to IRQ 19 | ||
25 | Region 0: Memory at fe227000 (32-bit, non-prefetchable) [size=1K] | ||
26 | Capabilities: [50] Power Management version 2 | ||
27 | Flags: PMEClk- DSI- D1- D2- AuxCurrent=375mA PME(D0+,D1-,D2-,D3hot+,D3cold+) | ||
28 | Status: D0 PME-Enable- DSel=0 DScale=0 PME+ | ||
29 | Capabilities: [58] Debug port: BAR=1 offset=00a0 | ||
30 | ^^^^^^^^^^^ <==================== [ HERE ] | ||
31 | Kernel driver in use: ehci_hcd | ||
32 | Kernel modules: ehci-hcd | ||
33 | ... | ||
34 | |||
35 | ( If your system does not list a debug port capability then you probably | ||
36 | wont be able to use the USB debug key. ) | ||
37 | |||
38 | b.) You also need a Netchip USB debug cable/key: | ||
39 | |||
40 | http://www.plxtech.com/products/NET2000/NET20DC/default.asp | ||
41 | |||
42 | This is a small blue plastic connector with two USB connections, | ||
43 | it draws power from its USB connections. | ||
44 | |||
45 | c.) Thirdly, you need a second client/console system with a regular USB port. | ||
46 | |||
47 | 2. Software requirements: | ||
48 | |||
49 | a.) On the host/target system: | ||
50 | |||
51 | You need to enable the following kernel config option: | ||
52 | |||
53 | CONFIG_EARLY_PRINTK_DBGP=y | ||
54 | |||
55 | And you need to add the boot command line: "earlyprintk=dbgp". | ||
56 | (If you are using Grub, append it to the 'kernel' line in | ||
57 | /etc/grub.conf) | ||
58 | |||
59 | NOTE: normally earlyprintk console gets turned off once the | ||
60 | regular console is alive - use "earlyprintk=dbgp,keep" to keep | ||
61 | this channel open beyond early bootup. This can be useful for | ||
62 | debugging crashes under Xorg, etc. | ||
63 | |||
64 | b.) On the client/console system: | ||
65 | |||
66 | You should enable the following kernel config option: | ||
67 | |||
68 | CONFIG_USB_SERIAL_DEBUG=y | ||
69 | |||
70 | On the next bootup with the modified kernel you should | ||
71 | get a /dev/ttyUSBx device(s). | ||
72 | |||
73 | Now this channel of kernel messages is ready to be used: start | ||
74 | your favorite terminal emulator (minicom, etc.) and set | ||
75 | it up to use /dev/ttyUSB0 - or use a raw 'cat /dev/ttyUSBx' to | ||
76 | see the raw output. | ||
77 | |||
78 | c.) On Nvidia Southbridge based systems: the kernel will try to probe | ||
79 | and find out which port has debug device connected. | ||
80 | |||
81 | 3. Testing that it works fine: | ||
82 | |||
83 | You can test the output by using earlyprintk=dbgp,keep and provoking | ||
84 | kernel messages on the host/target system. You can provoke a harmless | ||
85 | kernel message by for example doing: | ||
86 | |||
87 | echo h > /proc/sysrq-trigger | ||
88 | |||
89 | On the host/target system you should see this help line in "dmesg" output: | ||
90 | |||
91 | SysRq : HELP : loglevel(0-9) reBoot Crashdump terminate-all-tasks(E) memory-full-oom-kill(F) kill-all-tasks(I) saK show-backtrace-all-active-cpus(L) show-memory-usage(M) nice-all-RT-tasks(N) powerOff show-registers(P) show-all-timers(Q) unRaw Sync show-task-states(T) Unmount show-blocked-tasks(W) dump-ftrace-buffer(Z) | ||
92 | |||
93 | On the client/console system do: | ||
94 | |||
95 | cat /dev/ttyUSB0 | ||
96 | |||
97 | And you should see the help line above displayed shortly after you've | ||
98 | provoked it on the host system. | ||
99 | |||
100 | If it does not work then please ask about it on the linux-kernel@vger.kernel.org | ||
101 | mailing list or contact the x86 maintainers. | ||
diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt index 34c13040a718..29a6ff8bc7d3 100644 --- a/Documentation/x86/x86_64/boot-options.txt +++ b/Documentation/x86/x86_64/boot-options.txt | |||
@@ -5,21 +5,51 @@ only the AMD64 specific ones are listed here. | |||
5 | 5 | ||
6 | Machine check | 6 | Machine check |
7 | 7 | ||
8 | mce=off disable machine check | 8 | Please see Documentation/x86/x86_64/machinecheck for sysfs runtime tunables. |
9 | mce=bootlog Enable logging of machine checks left over from booting. | 9 | |
10 | Disabled by default on AMD because some BIOS leave bogus ones. | 10 | mce=off |
11 | If your BIOS doesn't do that it's a good idea to enable though | 11 | Disable machine check |
12 | to make sure you log even machine check events that result | 12 | mce=no_cmci |
13 | in a reboot. On Intel systems it is enabled by default. | 13 | Disable CMCI(Corrected Machine Check Interrupt) that |
14 | Intel processor supports. Usually this disablement is | ||
15 | not recommended, but it might be handy if your hardware | ||
16 | is misbehaving. | ||
17 | Note that you'll get more problems without CMCI than with | ||
18 | due to the shared banks, i.e. you might get duplicated | ||
19 | error logs. | ||
20 | mce=dont_log_ce | ||
21 | Don't make logs for corrected errors. All events reported | ||
22 | as corrected are silently cleared by OS. | ||
23 | This option will be useful if you have no interest in any | ||
24 | of corrected errors. | ||
25 | mce=ignore_ce | ||
26 | Disable features for corrected errors, e.g. polling timer | ||
27 | and CMCI. All events reported as corrected are not cleared | ||
28 | by OS and remained in its error banks. | ||
29 | Usually this disablement is not recommended, however if | ||
30 | there is an agent checking/clearing corrected errors | ||
31 | (e.g. BIOS or hardware monitoring applications), conflicting | ||
32 | with OS's error handling, and you cannot deactivate the agent, | ||
33 | then this option will be a help. | ||
34 | mce=bootlog | ||
35 | Enable logging of machine checks left over from booting. | ||
36 | Disabled by default on AMD because some BIOS leave bogus ones. | ||
37 | If your BIOS doesn't do that it's a good idea to enable though | ||
38 | to make sure you log even machine check events that result | ||
39 | in a reboot. On Intel systems it is enabled by default. | ||
14 | mce=nobootlog | 40 | mce=nobootlog |
15 | Disable boot machine check logging. | 41 | Disable boot machine check logging. |
16 | mce=tolerancelevel (number) | 42 | mce=tolerancelevel[,monarchtimeout] (number,number) |
43 | tolerance levels: | ||
17 | 0: always panic on uncorrected errors, log corrected errors | 44 | 0: always panic on uncorrected errors, log corrected errors |
18 | 1: panic or SIGBUS on uncorrected errors, log corrected errors | 45 | 1: panic or SIGBUS on uncorrected errors, log corrected errors |
19 | 2: SIGBUS or log uncorrected errors, log corrected errors | 46 | 2: SIGBUS or log uncorrected errors, log corrected errors |
20 | 3: never panic or SIGBUS, log all errors (for testing only) | 47 | 3: never panic or SIGBUS, log all errors (for testing only) |
21 | Default is 1 | 48 | Default is 1 |
22 | Can be also set using sysfs which is preferable. | 49 | Can be also set using sysfs which is preferable. |
50 | monarchtimeout: | ||
51 | Sets the time in us to wait for other CPUs on machine checks. 0 | ||
52 | to disable. | ||
23 | 53 | ||
24 | nomce (for compatibility with i386): same as mce=off | 54 | nomce (for compatibility with i386): same as mce=off |
25 | 55 | ||
@@ -150,11 +180,6 @@ NUMA | |||
150 | Otherwise, the remaining system RAM is allocated to an | 180 | Otherwise, the remaining system RAM is allocated to an |
151 | additional node. | 181 | additional node. |
152 | 182 | ||
153 | numa=hotadd=percent | ||
154 | Only allow hotadd memory to preallocate page structures upto | ||
155 | percent of already available memory. | ||
156 | numa=hotadd=0 will disable hotadd memory. | ||
157 | |||
158 | ACPI | 183 | ACPI |
159 | 184 | ||
160 | acpi=off Don't enable ACPI | 185 | acpi=off Don't enable ACPI |
diff --git a/Documentation/x86/x86_64/fake-numa-for-cpusets b/Documentation/x86/x86_64/fake-numa-for-cpusets index 33bb56655991..0f11d9becb0b 100644 --- a/Documentation/x86/x86_64/fake-numa-for-cpusets +++ b/Documentation/x86/x86_64/fake-numa-for-cpusets | |||
@@ -7,7 +7,8 @@ you can create fake NUMA nodes that represent contiguous chunks of memory and | |||
7 | assign them to cpusets and their attached tasks. This is a way of limiting the | 7 | assign them to cpusets and their attached tasks. This is a way of limiting the |
8 | amount of system memory that are available to a certain class of tasks. | 8 | amount of system memory that are available to a certain class of tasks. |
9 | 9 | ||
10 | For more information on the features of cpusets, see Documentation/cpusets.txt. | 10 | For more information on the features of cpusets, see |
11 | Documentation/cgroups/cpusets.txt. | ||
11 | There are a number of different configurations you can use for your needs. For | 12 | There are a number of different configurations you can use for your needs. For |
12 | more information on the numa=fake command line option and its various ways of | 13 | more information on the numa=fake command line option and its various ways of |
13 | configuring fake nodes, see Documentation/x86/x86_64/boot-options.txt. | 14 | configuring fake nodes, see Documentation/x86/x86_64/boot-options.txt. |
@@ -32,7 +33,7 @@ A machine may be split as follows with "numa=fake=4*512," as reported by dmesg: | |||
32 | On node 3 totalpages: 131072 | 33 | On node 3 totalpages: 131072 |
33 | 34 | ||
34 | Now following the instructions for mounting the cpusets filesystem from | 35 | Now following the instructions for mounting the cpusets filesystem from |
35 | Documentation/cpusets.txt, you can assign fake nodes (i.e. contiguous memory | 36 | Documentation/cgroups/cpusets.txt, you can assign fake nodes (i.e. contiguous memory |
36 | address spaces) to individual cpusets: | 37 | address spaces) to individual cpusets: |
37 | 38 | ||
38 | [root@xroads /]# mkdir exampleset | 39 | [root@xroads /]# mkdir exampleset |
diff --git a/Documentation/x86/x86_64/machinecheck b/Documentation/x86/x86_64/machinecheck index a05e58e7b159..b1fb30273286 100644 --- a/Documentation/x86/x86_64/machinecheck +++ b/Documentation/x86/x86_64/machinecheck | |||
@@ -41,7 +41,9 @@ check_interval | |||
41 | the polling interval. When the poller stops finding MCEs, it | 41 | the polling interval. When the poller stops finding MCEs, it |
42 | triggers an exponential backoff (poll less often) on the polling | 42 | triggers an exponential backoff (poll less often) on the polling |
43 | interval. The check_interval variable is both the initial and | 43 | interval. The check_interval variable is both the initial and |
44 | maximum polling interval. | 44 | maximum polling interval. 0 means no polling for corrected machine |
45 | check errors (but some corrected errors might be still reported | ||
46 | in other ways) | ||
45 | 47 | ||
46 | tolerant | 48 | tolerant |
47 | Tolerance level. When a machine check exception occurs for a non | 49 | Tolerance level. When a machine check exception occurs for a non |
@@ -67,6 +69,10 @@ trigger | |||
67 | Program to run when a machine check event is detected. | 69 | Program to run when a machine check event is detected. |
68 | This is an alternative to running mcelog regularly from cron | 70 | This is an alternative to running mcelog regularly from cron |
69 | and allows to detect events faster. | 71 | and allows to detect events faster. |
72 | monarch_timeout | ||
73 | How long to wait for the other CPUs to machine check too on a | ||
74 | exception. 0 to disable waiting for other CPUs. | ||
75 | Unit: us | ||
70 | 76 | ||
71 | TBD document entries for AMD threshold interrupt configuration | 77 | TBD document entries for AMD threshold interrupt configuration |
72 | 78 | ||
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index 29b52b14d0b4..d6498e3cd713 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt | |||
@@ -6,10 +6,11 @@ Virtual memory map with 4 level page tables: | |||
6 | 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm | 6 | 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm |
7 | hole caused by [48:63] sign extension | 7 | hole caused by [48:63] sign extension |
8 | ffff800000000000 - ffff80ffffffffff (=40 bits) guard hole | 8 | ffff800000000000 - ffff80ffffffffff (=40 bits) guard hole |
9 | ffff880000000000 - ffffc0ffffffffff (=57 TB) direct mapping of all phys. memory | 9 | ffff880000000000 - ffffc7ffffffffff (=64 TB) direct mapping of all phys. memory |
10 | ffffc10000000000 - ffffc1ffffffffff (=40 bits) hole | 10 | ffffc80000000000 - ffffc8ffffffffff (=40 bits) hole |
11 | ffffc20000000000 - ffffe1ffffffffff (=45 bits) vmalloc/ioremap space | 11 | ffffc90000000000 - ffffe8ffffffffff (=45 bits) vmalloc/ioremap space |
12 | ffffe20000000000 - ffffe2ffffffffff (=40 bits) virtual memory map (1TB) | 12 | ffffe90000000000 - ffffe9ffffffffff (=40 bits) hole |
13 | ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB) | ||
13 | ... unused hole ... | 14 | ... unused hole ... |
14 | ffffffff80000000 - ffffffffa0000000 (=512 MB) kernel text mapping, from phys 0 | 15 | ffffffff80000000 - ffffffffa0000000 (=512 MB) kernel text mapping, from phys 0 |
15 | ffffffffa0000000 - fffffffffff00000 (=1536 MB) module mapping space | 16 | ffffffffa0000000 - fffffffffff00000 (=1536 MB) module mapping space |