diff options
Diffstat (limited to 'Documentation')
86 files changed, 5965 insertions, 1762 deletions
diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block index 4bd9ea539129..44f52a4f5903 100644 --- a/Documentation/ABI/testing/sysfs-block +++ b/Documentation/ABI/testing/sysfs-block | |||
@@ -26,3 +26,37 @@ Description: | |||
26 | I/O statistics of partition <part>. The format is the | 26 | I/O statistics of partition <part>. The format is the |
27 | same as the above-written /sys/block/<disk>/stat | 27 | same as the above-written /sys/block/<disk>/stat |
28 | format. | 28 | format. |
29 | |||
30 | |||
31 | What: /sys/block/<disk>/integrity/format | ||
32 | Date: June 2008 | ||
33 | Contact: Martin K. Petersen <martin.petersen@oracle.com> | ||
34 | Description: | ||
35 | Metadata format for integrity capable block device. | ||
36 | E.g. T10-DIF-TYPE1-CRC. | ||
37 | |||
38 | |||
39 | What: /sys/block/<disk>/integrity/read_verify | ||
40 | Date: June 2008 | ||
41 | Contact: Martin K. Petersen <martin.petersen@oracle.com> | ||
42 | Description: | ||
43 | Indicates whether the block layer should verify the | ||
44 | integrity of read requests serviced by devices that | ||
45 | support sending integrity metadata. | ||
46 | |||
47 | |||
48 | What: /sys/block/<disk>/integrity/tag_size | ||
49 | Date: June 2008 | ||
50 | Contact: Martin K. Petersen <martin.petersen@oracle.com> | ||
51 | Description: | ||
52 | Number of bytes of integrity tag space available per | ||
53 | 512 bytes of data. | ||
54 | |||
55 | |||
56 | What: /sys/block/<disk>/integrity/write_generate | ||
57 | Date: June 2008 | ||
58 | Contact: Martin K. Petersen <martin.petersen@oracle.com> | ||
59 | Description: | ||
60 | Indicates whether the block layer should automatically | ||
61 | generate checksums for write requests bound for | ||
62 | devices that support receiving integrity metadata. | ||
diff --git a/Documentation/ABI/testing/sysfs-bus-css b/Documentation/ABI/testing/sysfs-bus-css new file mode 100644 index 000000000000..b585ec258a08 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-css | |||
@@ -0,0 +1,35 @@ | |||
1 | What: /sys/bus/css/devices/.../type | ||
2 | Date: March 2008 | ||
3 | Contact: Cornelia Huck <cornelia.huck@de.ibm.com> | ||
4 | linux-s390@vger.kernel.org | ||
5 | Description: Contains the subchannel type, as reported by the hardware. | ||
6 | This attribute is present for all subchannel types. | ||
7 | |||
8 | What: /sys/bus/css/devices/.../modalias | ||
9 | Date: March 2008 | ||
10 | Contact: Cornelia Huck <cornelia.huck@de.ibm.com> | ||
11 | linux-s390@vger.kernel.org | ||
12 | Description: Contains the module alias as reported with uevents. | ||
13 | It is of the format css:t<type> and present for all | ||
14 | subchannel types. | ||
15 | |||
16 | What: /sys/bus/css/drivers/io_subchannel/.../chpids | ||
17 | Date: December 2002 | ||
18 | Contact: Cornelia Huck <cornelia.huck@de.ibm.com> | ||
19 | linux-s390@vger.kernel.org | ||
20 | Description: Contains the ids of the channel paths used by this | ||
21 | subchannel, as reported by the channel subsystem | ||
22 | during subchannel recognition. | ||
23 | Note: This is an I/O-subchannel specific attribute. | ||
24 | Users: s390-tools, HAL | ||
25 | |||
26 | What: /sys/bus/css/drivers/io_subchannel/.../pimpampom | ||
27 | Date: December 2002 | ||
28 | Contact: Cornelia Huck <cornelia.huck@de.ibm.com> | ||
29 | linux-s390@vger.kernel.org | ||
30 | Description: Contains the PIM/PAM/POM values, as reported by the | ||
31 | channel subsystem when last queried by the common I/O | ||
32 | layer (this implies that this attribute is not neccessarily | ||
33 | in sync with the values current in the channel subsystem). | ||
34 | Note: This is an I/O-subchannel specific attribute. | ||
35 | Users: s390-tools, HAL | ||
diff --git a/Documentation/ABI/testing/sysfs-firmware-acpi b/Documentation/ABI/testing/sysfs-firmware-acpi index 9470ed9afcc0..f27be7d1a49f 100644 --- a/Documentation/ABI/testing/sysfs-firmware-acpi +++ b/Documentation/ABI/testing/sysfs-firmware-acpi | |||
@@ -29,46 +29,46 @@ Description: | |||
29 | 29 | ||
30 | $ cd /sys/firmware/acpi/interrupts | 30 | $ cd /sys/firmware/acpi/interrupts |
31 | $ grep . * | 31 | $ grep . * |
32 | error:0 | 32 | error: 0 |
33 | ff_gbl_lock:0 | 33 | ff_gbl_lock: 0 enable |
34 | ff_pmtimer:0 | 34 | ff_pmtimer: 0 invalid |
35 | ff_pwr_btn:0 | 35 | ff_pwr_btn: 0 enable |
36 | ff_rt_clk:0 | 36 | ff_rt_clk: 2 disable |
37 | ff_slp_btn:0 | 37 | ff_slp_btn: 0 invalid |
38 | gpe00:0 | 38 | gpe00: 0 invalid |
39 | gpe01:0 | 39 | gpe01: 0 enable |
40 | gpe02:0 | 40 | gpe02: 108 enable |
41 | gpe03:0 | 41 | gpe03: 0 invalid |
42 | gpe04:0 | 42 | gpe04: 0 invalid |
43 | gpe05:0 | 43 | gpe05: 0 invalid |
44 | gpe06:0 | 44 | gpe06: 0 enable |
45 | gpe07:0 | 45 | gpe07: 0 enable |
46 | gpe08:0 | 46 | gpe08: 0 invalid |
47 | gpe09:174 | 47 | gpe09: 0 invalid |
48 | gpe0A:0 | 48 | gpe0A: 0 invalid |
49 | gpe0B:0 | 49 | gpe0B: 0 invalid |
50 | gpe0C:0 | 50 | gpe0C: 0 invalid |
51 | gpe0D:0 | 51 | gpe0D: 0 invalid |
52 | gpe0E:0 | 52 | gpe0E: 0 invalid |
53 | gpe0F:0 | 53 | gpe0F: 0 invalid |
54 | gpe10:0 | 54 | gpe10: 0 invalid |
55 | gpe11:60 | 55 | gpe11: 0 invalid |
56 | gpe12:0 | 56 | gpe12: 0 invalid |
57 | gpe13:0 | 57 | gpe13: 0 invalid |
58 | gpe14:0 | 58 | gpe14: 0 invalid |
59 | gpe15:0 | 59 | gpe15: 0 invalid |
60 | gpe16:0 | 60 | gpe16: 0 invalid |
61 | gpe17:0 | 61 | gpe17: 1084 enable |
62 | gpe18:0 | 62 | gpe18: 0 enable |
63 | gpe19:7 | 63 | gpe19: 0 invalid |
64 | gpe1A:0 | 64 | gpe1A: 0 invalid |
65 | gpe1B:0 | 65 | gpe1B: 0 invalid |
66 | gpe1C:0 | 66 | gpe1C: 0 invalid |
67 | gpe1D:0 | 67 | gpe1D: 0 invalid |
68 | gpe1E:0 | 68 | gpe1E: 0 invalid |
69 | gpe1F:0 | 69 | gpe1F: 0 invalid |
70 | gpe_all:241 | 70 | gpe_all: 1192 |
71 | sci:241 | 71 | sci: 1194 |
72 | 72 | ||
73 | sci - The total number of times the ACPI SCI | 73 | sci - The total number of times the ACPI SCI |
74 | has claimed an interrupt. | 74 | has claimed an interrupt. |
@@ -89,6 +89,13 @@ Description: | |||
89 | 89 | ||
90 | error - an interrupt that can't be accounted for above. | 90 | error - an interrupt that can't be accounted for above. |
91 | 91 | ||
92 | invalid: it's either a wakeup GPE or a GPE/Fixed Event that | ||
93 | doesn't have an event handler. | ||
94 | |||
95 | disable: the GPE/Fixed Event is valid but disabled. | ||
96 | |||
97 | enable: the GPE/Fixed Event is valid and enabled. | ||
98 | |||
92 | Root has permission to clear any of these counters. Eg. | 99 | Root has permission to clear any of these counters. Eg. |
93 | # echo 0 > gpe11 | 100 | # echo 0 > gpe11 |
94 | 101 | ||
@@ -97,3 +104,43 @@ Description: | |||
97 | 104 | ||
98 | None of these counters has an effect on the function | 105 | None of these counters has an effect on the function |
99 | of the system, they are simply statistics. | 106 | of the system, they are simply statistics. |
107 | |||
108 | Besides this, user can also write specific strings to these files | ||
109 | to enable/disable/clear ACPI interrupts in user space, which can be | ||
110 | used to debug some ACPI interrupt storm issues. | ||
111 | |||
112 | Note that only writting to VALID GPE/Fixed Event is allowed, | ||
113 | i.e. user can only change the status of runtime GPE and | ||
114 | Fixed Event with event handler installed. | ||
115 | |||
116 | Let's take power button fixed event for example, please kill acpid | ||
117 | and other user space applications so that the machine won't shutdown | ||
118 | when pressing the power button. | ||
119 | # cat ff_pwr_btn | ||
120 | 0 | ||
121 | # press the power button for 3 times; | ||
122 | # cat ff_pwr_btn | ||
123 | 3 | ||
124 | # echo disable > ff_pwr_btn | ||
125 | # cat ff_pwr_btn | ||
126 | disable | ||
127 | # press the power button for 3 times; | ||
128 | # cat ff_pwr_btn | ||
129 | disable | ||
130 | # echo enable > ff_pwr_btn | ||
131 | # cat ff_pwr_btn | ||
132 | 4 | ||
133 | /* | ||
134 | * this is because the status bit is set even if the enable bit is cleared, | ||
135 | * and it triggers an ACPI fixed event when the enable bit is set again | ||
136 | */ | ||
137 | # press the power button for 3 times; | ||
138 | # cat ff_pwr_btn | ||
139 | 7 | ||
140 | # echo disable > ff_pwr_btn | ||
141 | # press the power button for 3 times; | ||
142 | # echo clear > ff_pwr_btn /* clear the status bit */ | ||
143 | # echo disable > ff_pwr_btn | ||
144 | # cat ff_pwr_btn | ||
145 | 7 | ||
146 | |||
diff --git a/Documentation/HOWTO b/Documentation/HOWTO index 0291ade44c17..619e8caf30db 100644 --- a/Documentation/HOWTO +++ b/Documentation/HOWTO | |||
@@ -377,7 +377,7 @@ Bug Reporting | |||
377 | bugzilla.kernel.org is where the Linux kernel developers track kernel | 377 | bugzilla.kernel.org is where the Linux kernel developers track kernel |
378 | bugs. Users are encouraged to report all bugs that they find in this | 378 | bugs. Users are encouraged to report all bugs that they find in this |
379 | tool. For details on how to use the kernel bugzilla, please see: | 379 | tool. For details on how to use the kernel bugzilla, please see: |
380 | http://test.kernel.org/bugzilla/faq.html | 380 | http://bugzilla.kernel.org/page.cgi?id=faq.html |
381 | 381 | ||
382 | The file REPORTING-BUGS in the main kernel source directory has a good | 382 | The file REPORTING-BUGS in the main kernel source directory has a good |
383 | template for how to report a possible kernel bug, and details what kind | 383 | template for how to report a possible kernel bug, and details what kind |
diff --git a/Documentation/IRQ-affinity.txt b/Documentation/IRQ-affinity.txt index 938d7dd05490..b4a615b78403 100644 --- a/Documentation/IRQ-affinity.txt +++ b/Documentation/IRQ-affinity.txt | |||
@@ -1,17 +1,26 @@ | |||
1 | ChangeLog: | ||
2 | Started by Ingo Molnar <mingo@redhat.com> | ||
3 | Update by Max Krasnyansky <maxk@qualcomm.com> | ||
1 | 4 | ||
2 | SMP IRQ affinity, started by Ingo Molnar <mingo@redhat.com> | 5 | SMP IRQ affinity |
3 | |||
4 | 6 | ||
5 | /proc/irq/IRQ#/smp_affinity specifies which target CPUs are permitted | 7 | /proc/irq/IRQ#/smp_affinity specifies which target CPUs are permitted |
6 | for a given IRQ source. It's a bitmask of allowed CPUs. It's not allowed | 8 | for a given IRQ source. It's a bitmask of allowed CPUs. It's not allowed |
7 | to turn off all CPUs, and if an IRQ controller does not support IRQ | 9 | to turn off all CPUs, and if an IRQ controller does not support IRQ |
8 | affinity then the value will not change from the default 0xffffffff. | 10 | affinity then the value will not change from the default 0xffffffff. |
9 | 11 | ||
12 | /proc/irq/default_smp_affinity specifies default affinity mask that applies | ||
13 | to all non-active IRQs. Once IRQ is allocated/activated its affinity bitmask | ||
14 | will be set to the default mask. It can then be changed as described above. | ||
15 | Default mask is 0xffffffff. | ||
16 | |||
10 | Here is an example of restricting IRQ44 (eth1) to CPU0-3 then restricting | 17 | Here is an example of restricting IRQ44 (eth1) to CPU0-3 then restricting |
11 | the IRQ to CPU4-7 (this is an 8-CPU SMP box): | 18 | it to CPU4-7 (this is an 8-CPU SMP box): |
12 | 19 | ||
20 | [root@moon 44]# cd /proc/irq/44 | ||
13 | [root@moon 44]# cat smp_affinity | 21 | [root@moon 44]# cat smp_affinity |
14 | ffffffff | 22 | ffffffff |
23 | |||
15 | [root@moon 44]# echo 0f > smp_affinity | 24 | [root@moon 44]# echo 0f > smp_affinity |
16 | [root@moon 44]# cat smp_affinity | 25 | [root@moon 44]# cat smp_affinity |
17 | 0000000f | 26 | 0000000f |
@@ -21,17 +30,27 @@ PING hell (195.4.7.3): 56 data bytes | |||
21 | --- hell ping statistics --- | 30 | --- hell ping statistics --- |
22 | 6029 packets transmitted, 6027 packets received, 0% packet loss | 31 | 6029 packets transmitted, 6027 packets received, 0% packet loss |
23 | round-trip min/avg/max = 0.1/0.1/0.4 ms | 32 | round-trip min/avg/max = 0.1/0.1/0.4 ms |
24 | [root@moon 44]# cat /proc/interrupts | grep 44: | 33 | [root@moon 44]# cat /proc/interrupts | grep 'CPU\|44:' |
25 | 44: 0 1785 1785 1783 1783 1 | 34 | CPU0 CPU1 CPU2 CPU3 CPU4 CPU5 CPU6 CPU7 |
26 | 1 0 IO-APIC-level eth1 | 35 | 44: 1068 1785 1785 1783 0 0 0 0 IO-APIC-level eth1 |
36 | |||
37 | As can be seen from the line above IRQ44 was delivered only to the first four | ||
38 | processors (0-3). | ||
39 | Now lets restrict that IRQ to CPU(4-7). | ||
40 | |||
27 | [root@moon 44]# echo f0 > smp_affinity | 41 | [root@moon 44]# echo f0 > smp_affinity |
42 | [root@moon 44]# cat smp_affinity | ||
43 | 000000f0 | ||
28 | [root@moon 44]# ping -f h | 44 | [root@moon 44]# ping -f h |
29 | PING hell (195.4.7.3): 56 data bytes | 45 | PING hell (195.4.7.3): 56 data bytes |
30 | .. | 46 | .. |
31 | --- hell ping statistics --- | 47 | --- hell ping statistics --- |
32 | 2779 packets transmitted, 2777 packets received, 0% packet loss | 48 | 2779 packets transmitted, 2777 packets received, 0% packet loss |
33 | round-trip min/avg/max = 0.1/0.5/585.4 ms | 49 | round-trip min/avg/max = 0.1/0.5/585.4 ms |
34 | [root@moon 44]# cat /proc/interrupts | grep 44: | 50 | [root@moon 44]# cat /proc/interrupts | 'CPU\|44:' |
35 | 44: 1068 1785 1785 1784 1784 1069 1070 1069 IO-APIC-level eth1 | 51 | CPU0 CPU1 CPU2 CPU3 CPU4 CPU5 CPU6 CPU7 |
36 | [root@moon 44]# | 52 | 44: 1068 1785 1785 1783 1784 1069 1070 1069 IO-APIC-level eth1 |
53 | |||
54 | This time around IRQ44 was delivered only to the last four processors. | ||
55 | i.e counters for the CPU0-3 did not change. | ||
37 | 56 | ||
diff --git a/Documentation/RCU/NMI-RCU.txt b/Documentation/RCU/NMI-RCU.txt index c64158ecde43..a6d32e65d222 100644 --- a/Documentation/RCU/NMI-RCU.txt +++ b/Documentation/RCU/NMI-RCU.txt | |||
@@ -93,6 +93,9 @@ Since NMI handlers disable preemption, synchronize_sched() is guaranteed | |||
93 | not to return until all ongoing NMI handlers exit. It is therefore safe | 93 | not to return until all ongoing NMI handlers exit. It is therefore safe |
94 | to free up the handler's data as soon as synchronize_sched() returns. | 94 | to free up the handler's data as soon as synchronize_sched() returns. |
95 | 95 | ||
96 | Important note: for this to work, the architecture in question must | ||
97 | invoke irq_enter() and irq_exit() on NMI entry and exit, respectively. | ||
98 | |||
96 | 99 | ||
97 | Answer to Quick Quiz | 100 | Answer to Quick Quiz |
98 | 101 | ||
diff --git a/Documentation/RCU/RTFP.txt b/Documentation/RCU/RTFP.txt index 39ad8f56783a..9f711d2df91b 100644 --- a/Documentation/RCU/RTFP.txt +++ b/Documentation/RCU/RTFP.txt | |||
@@ -52,6 +52,10 @@ of each iteration. Unfortunately, chaotic relaxation requires highly | |||
52 | structured data, such as the matrices used in scientific programs, and | 52 | structured data, such as the matrices used in scientific programs, and |
53 | is thus inapplicable to most data structures in operating-system kernels. | 53 | is thus inapplicable to most data structures in operating-system kernels. |
54 | 54 | ||
55 | In 1992, Henry (now Alexia) Massalin completed a dissertation advising | ||
56 | parallel programmers to defer processing when feasible to simplify | ||
57 | synchronization. RCU makes extremely heavy use of this advice. | ||
58 | |||
55 | In 1993, Jacobson [Jacobson93] verbally described what is perhaps the | 59 | In 1993, Jacobson [Jacobson93] verbally described what is perhaps the |
56 | simplest deferred-free technique: simply waiting a fixed amount of time | 60 | simplest deferred-free technique: simply waiting a fixed amount of time |
57 | before freeing blocks awaiting deferred free. Jacobson did not describe | 61 | before freeing blocks awaiting deferred free. Jacobson did not describe |
@@ -138,6 +142,13 @@ blocking in read-side critical sections appeared [PaulEMcKenney2006c], | |||
138 | Robert Olsson described an RCU-protected trie-hash combination | 142 | Robert Olsson described an RCU-protected trie-hash combination |
139 | [RobertOlsson2006a]. | 143 | [RobertOlsson2006a]. |
140 | 144 | ||
145 | 2007 saw the journal version of the award-winning RCU paper from 2006 | ||
146 | [ThomasEHart2007a], as well as a paper demonstrating use of Promela | ||
147 | and Spin to mechanically verify an optimization to Oleg Nesterov's | ||
148 | QRCU [PaulEMcKenney2007QRCUspin], a design document describing | ||
149 | preemptible RCU [PaulEMcKenney2007PreemptibleRCU], and the three-part | ||
150 | LWN "What is RCU?" series [PaulEMcKenney2007WhatIsRCUFundamentally, | ||
151 | PaulEMcKenney2008WhatIsRCUUsage, and PaulEMcKenney2008WhatIsRCUAPI]. | ||
141 | 152 | ||
142 | Bibtex Entries | 153 | Bibtex Entries |
143 | 154 | ||
@@ -202,6 +213,20 @@ Bibtex Entries | |||
202 | ,Year="1991" | 213 | ,Year="1991" |
203 | } | 214 | } |
204 | 215 | ||
216 | @phdthesis{HMassalinPhD | ||
217 | ,author="H. Massalin" | ||
218 | ,title="Synthesis: An Efficient Implementation of Fundamental Operating | ||
219 | System Services" | ||
220 | ,school="Columbia University" | ||
221 | ,address="New York, NY" | ||
222 | ,year="1992" | ||
223 | ,annotation=" | ||
224 | Mondo optimizing compiler. | ||
225 | Wait-free stuff. | ||
226 | Good advice: defer work to avoid synchronization. | ||
227 | " | ||
228 | } | ||
229 | |||
205 | @unpublished{Jacobson93 | 230 | @unpublished{Jacobson93 |
206 | ,author="Van Jacobson" | 231 | ,author="Van Jacobson" |
207 | ,title="Avoid Read-Side Locking Via Delayed Free" | 232 | ,title="Avoid Read-Side Locking Via Delayed Free" |
@@ -635,3 +660,86 @@ Revised: | |||
635 | " | 660 | " |
636 | } | 661 | } |
637 | 662 | ||
663 | @unpublished{PaulEMcKenney2007PreemptibleRCU | ||
664 | ,Author="Paul E. McKenney" | ||
665 | ,Title="The design of preemptible read-copy-update" | ||
666 | ,month="October" | ||
667 | ,day="8" | ||
668 | ,year="2007" | ||
669 | ,note="Available: | ||
670 | \url{http://lwn.net/Articles/253651/} | ||
671 | [Viewed October 25, 2007]" | ||
672 | ,annotation=" | ||
673 | LWN article describing the design of preemptible RCU. | ||
674 | " | ||
675 | } | ||
676 | |||
677 | ######################################################################## | ||
678 | # | ||
679 | # "What is RCU?" LWN series. | ||
680 | # | ||
681 | |||
682 | @unpublished{PaulEMcKenney2007WhatIsRCUFundamentally | ||
683 | ,Author="Paul E. McKenney and Jonathan Walpole" | ||
684 | ,Title="What is {RCU}, Fundamentally?" | ||
685 | ,month="December" | ||
686 | ,day="17" | ||
687 | ,year="2007" | ||
688 | ,note="Available: | ||
689 | \url{http://lwn.net/Articles/262464/} | ||
690 | [Viewed December 27, 2007]" | ||
691 | ,annotation=" | ||
692 | Lays out the three basic components of RCU: (1) publish-subscribe, | ||
693 | (2) wait for pre-existing readers to complete, and (2) maintain | ||
694 | multiple versions. | ||
695 | " | ||
696 | } | ||
697 | |||
698 | @unpublished{PaulEMcKenney2008WhatIsRCUUsage | ||
699 | ,Author="Paul E. McKenney" | ||
700 | ,Title="What is {RCU}? Part 2: Usage" | ||
701 | ,month="January" | ||
702 | ,day="4" | ||
703 | ,year="2008" | ||
704 | ,note="Available: | ||
705 | \url{http://lwn.net/Articles/263130/} | ||
706 | [Viewed January 4, 2008]" | ||
707 | ,annotation=" | ||
708 | Lays out six uses of RCU: | ||
709 | 1. RCU is a Reader-Writer Lock Replacement | ||
710 | 2. RCU is a Restricted Reference-Counting Mechanism | ||
711 | 3. RCU is a Bulk Reference-Counting Mechanism | ||
712 | 4. RCU is a Poor Man's Garbage Collector | ||
713 | 5. RCU is a Way of Providing Existence Guarantees | ||
714 | 6. RCU is a Way of Waiting for Things to Finish | ||
715 | " | ||
716 | } | ||
717 | |||
718 | @unpublished{PaulEMcKenney2008WhatIsRCUAPI | ||
719 | ,Author="Paul E. McKenney" | ||
720 | ,Title="{RCU} part 3: the {RCU} {API}" | ||
721 | ,month="January" | ||
722 | ,day="17" | ||
723 | ,year="2008" | ||
724 | ,note="Available: | ||
725 | \url{http://lwn.net/Articles/264090/} | ||
726 | [Viewed January 10, 2008]" | ||
727 | ,annotation=" | ||
728 | Gives an overview of the Linux-kernel RCU API and a brief annotated RCU | ||
729 | bibliography. | ||
730 | " | ||
731 | } | ||
732 | |||
733 | @article{DinakarGuniguntala2008IBMSysJ | ||
734 | ,author="D. Guniguntala and P. E. McKenney and J. Triplett and J. Walpole" | ||
735 | ,title="The read-copy-update mechanism for supporting real-time applications on shared-memory multiprocessor systems with {Linux}" | ||
736 | ,Year="2008" | ||
737 | ,Month="April" | ||
738 | ,journal="IBM Systems Journal" | ||
739 | ,volume="47" | ||
740 | ,number="2" | ||
741 | ,pages="@@-@@" | ||
742 | ,annotation=" | ||
743 | RCU, realtime RCU, sleepable RCU, performance. | ||
744 | " | ||
745 | } | ||
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt index 42b01bc2e1b4..cf5562cbe356 100644 --- a/Documentation/RCU/checklist.txt +++ b/Documentation/RCU/checklist.txt | |||
@@ -13,10 +13,13 @@ over a rather long period of time, but improvements are always welcome! | |||
13 | detailed performance measurements show that RCU is nonetheless | 13 | detailed performance measurements show that RCU is nonetheless |
14 | the right tool for the job. | 14 | the right tool for the job. |
15 | 15 | ||
16 | The other exception would be where performance is not an issue, | 16 | Another exception is where performance is not an issue, and RCU |
17 | and RCU provides a simpler implementation. An example of this | 17 | provides a simpler implementation. An example of this situation |
18 | situation is the dynamic NMI code in the Linux 2.6 kernel, | 18 | is the dynamic NMI code in the Linux 2.6 kernel, at least on |
19 | at least on architectures where NMIs are rare. | 19 | architectures where NMIs are rare. |
20 | |||
21 | Yet another exception is where the low real-time latency of RCU's | ||
22 | read-side primitives is critically important. | ||
20 | 23 | ||
21 | 1. Does the update code have proper mutual exclusion? | 24 | 1. Does the update code have proper mutual exclusion? |
22 | 25 | ||
@@ -39,9 +42,10 @@ over a rather long period of time, but improvements are always welcome! | |||
39 | 42 | ||
40 | 2. Do the RCU read-side critical sections make proper use of | 43 | 2. Do the RCU read-side critical sections make proper use of |
41 | rcu_read_lock() and friends? These primitives are needed | 44 | rcu_read_lock() and friends? These primitives are needed |
42 | to suppress preemption (or bottom halves, in the case of | 45 | to prevent grace periods from ending prematurely, which |
43 | rcu_read_lock_bh()) in the read-side critical sections, | 46 | could result in data being unceremoniously freed out from |
44 | and are also an excellent aid to readability. | 47 | under your read-side code, which can greatly increase the |
48 | actuarial risk of your kernel. | ||
45 | 49 | ||
46 | As a rough rule of thumb, any dereference of an RCU-protected | 50 | As a rough rule of thumb, any dereference of an RCU-protected |
47 | pointer must be covered by rcu_read_lock() or rcu_read_lock_bh() | 51 | pointer must be covered by rcu_read_lock() or rcu_read_lock_bh() |
@@ -54,15 +58,30 @@ over a rather long period of time, but improvements are always welcome! | |||
54 | be running while updates are in progress. There are a number | 58 | be running while updates are in progress. There are a number |
55 | of ways to handle this concurrency, depending on the situation: | 59 | of ways to handle this concurrency, depending on the situation: |
56 | 60 | ||
57 | a. Make updates appear atomic to readers. For example, | 61 | a. Use the RCU variants of the list and hlist update |
62 | primitives to add, remove, and replace elements on an | ||
63 | RCU-protected list. Alternatively, use the RCU-protected | ||
64 | trees that have been added to the Linux kernel. | ||
65 | |||
66 | This is almost always the best approach. | ||
67 | |||
68 | b. Proceed as in (a) above, but also maintain per-element | ||
69 | locks (that are acquired by both readers and writers) | ||
70 | that guard per-element state. Of course, fields that | ||
71 | the readers refrain from accessing can be guarded by the | ||
72 | update-side lock. | ||
73 | |||
74 | This works quite well, also. | ||
75 | |||
76 | c. Make updates appear atomic to readers. For example, | ||
58 | pointer updates to properly aligned fields will appear | 77 | pointer updates to properly aligned fields will appear |
59 | atomic, as will individual atomic primitives. Operations | 78 | atomic, as will individual atomic primitives. Operations |
60 | performed under a lock and sequences of multiple atomic | 79 | performed under a lock and sequences of multiple atomic |
61 | primitives will -not- appear to be atomic. | 80 | primitives will -not- appear to be atomic. |
62 | 81 | ||
63 | This is almost always the best approach. | 82 | This can work, but is starting to get a bit tricky. |
64 | 83 | ||
65 | b. Carefully order the updates and the reads so that | 84 | d. Carefully order the updates and the reads so that |
66 | readers see valid data at all phases of the update. | 85 | readers see valid data at all phases of the update. |
67 | This is often more difficult than it sounds, especially | 86 | This is often more difficult than it sounds, especially |
68 | given modern CPUs' tendency to reorder memory references. | 87 | given modern CPUs' tendency to reorder memory references. |
@@ -123,18 +142,22 @@ over a rather long period of time, but improvements are always welcome! | |||
123 | when publicizing a pointer to a structure that can | 142 | when publicizing a pointer to a structure that can |
124 | be traversed by an RCU read-side critical section. | 143 | be traversed by an RCU read-side critical section. |
125 | 144 | ||
126 | 5. If call_rcu(), or a related primitive such as call_rcu_bh(), | 145 | 5. If call_rcu(), or a related primitive such as call_rcu_bh() or |
127 | is used, the callback function must be written to be called | 146 | call_rcu_sched(), is used, the callback function must be |
128 | from softirq context. In particular, it cannot block. | 147 | written to be called from softirq context. In particular, |
148 | it cannot block. | ||
129 | 149 | ||
130 | 6. Since synchronize_rcu() can block, it cannot be called from | 150 | 6. Since synchronize_rcu() can block, it cannot be called from |
131 | any sort of irq context. | 151 | any sort of irq context. Ditto for synchronize_sched() and |
152 | synchronize_srcu(). | ||
132 | 153 | ||
133 | 7. If the updater uses call_rcu(), then the corresponding readers | 154 | 7. If the updater uses call_rcu(), then the corresponding readers |
134 | must use rcu_read_lock() and rcu_read_unlock(). If the updater | 155 | must use rcu_read_lock() and rcu_read_unlock(). If the updater |
135 | uses call_rcu_bh(), then the corresponding readers must use | 156 | uses call_rcu_bh(), then the corresponding readers must use |
136 | rcu_read_lock_bh() and rcu_read_unlock_bh(). Mixing things up | 157 | rcu_read_lock_bh() and rcu_read_unlock_bh(). If the updater |
137 | will result in confusion and broken kernels. | 158 | uses call_rcu_sched(), then the corresponding readers must |
159 | disable preemption. Mixing things up will result in confusion | ||
160 | and broken kernels. | ||
138 | 161 | ||
139 | One exception to this rule: rcu_read_lock() and rcu_read_unlock() | 162 | One exception to this rule: rcu_read_lock() and rcu_read_unlock() |
140 | may be substituted for rcu_read_lock_bh() and rcu_read_unlock_bh() | 163 | may be substituted for rcu_read_lock_bh() and rcu_read_unlock_bh() |
@@ -143,9 +166,9 @@ over a rather long period of time, but improvements are always welcome! | |||
143 | such cases is a must, of course! And the jury is still out on | 166 | such cases is a must, of course! And the jury is still out on |
144 | whether the increased speed is worth it. | 167 | whether the increased speed is worth it. |
145 | 168 | ||
146 | 8. Although synchronize_rcu() is a bit slower than is call_rcu(), | 169 | 8. Although synchronize_rcu() is slower than is call_rcu(), it |
147 | it usually results in simpler code. So, unless update | 170 | usually results in simpler code. So, unless update performance |
148 | performance is critically important or the updaters cannot block, | 171 | is critically important or the updaters cannot block, |
149 | synchronize_rcu() should be used in preference to call_rcu(). | 172 | synchronize_rcu() should be used in preference to call_rcu(). |
150 | 173 | ||
151 | An especially important property of the synchronize_rcu() | 174 | An especially important property of the synchronize_rcu() |
@@ -187,23 +210,23 @@ over a rather long period of time, but improvements are always welcome! | |||
187 | number of updates per grace period. | 210 | number of updates per grace period. |
188 | 211 | ||
189 | 9. All RCU list-traversal primitives, which include | 212 | 9. All RCU list-traversal primitives, which include |
190 | list_for_each_rcu(), list_for_each_entry_rcu(), | 213 | rcu_dereference(), list_for_each_rcu(), list_for_each_entry_rcu(), |
191 | list_for_each_continue_rcu(), and list_for_each_safe_rcu(), | 214 | list_for_each_continue_rcu(), and list_for_each_safe_rcu(), |
192 | must be within an RCU read-side critical section. RCU | 215 | must be either within an RCU read-side critical section or |
216 | must be protected by appropriate update-side locks. RCU | ||
193 | read-side critical sections are delimited by rcu_read_lock() | 217 | read-side critical sections are delimited by rcu_read_lock() |
194 | and rcu_read_unlock(), or by similar primitives such as | 218 | and rcu_read_unlock(), or by similar primitives such as |
195 | rcu_read_lock_bh() and rcu_read_unlock_bh(). | 219 | rcu_read_lock_bh() and rcu_read_unlock_bh(). |
196 | 220 | ||
197 | Use of the _rcu() list-traversal primitives outside of an | 221 | The reason that it is permissible to use RCU list-traversal |
198 | RCU read-side critical section causes no harm other than | 222 | primitives when the update-side lock is held is that doing so |
199 | a slight performance degradation on Alpha CPUs. It can | 223 | can be quite helpful in reducing code bloat when common code is |
200 | also be quite helpful in reducing code bloat when common | 224 | shared between readers and updaters. |
201 | code is shared between readers and updaters. | ||
202 | 225 | ||
203 | 10. Conversely, if you are in an RCU read-side critical section, | 226 | 10. Conversely, if you are in an RCU read-side critical section, |
204 | you -must- use the "_rcu()" variants of the list macros. | 227 | and you don't hold the appropriate update-side lock, you -must- |
205 | Failing to do so will break Alpha and confuse people reading | 228 | use the "_rcu()" variants of the list macros. Failing to do so |
206 | your code. | 229 | will break Alpha and confuse people reading your code. |
207 | 230 | ||
208 | 11. Note that synchronize_rcu() -only- guarantees to wait until | 231 | 11. Note that synchronize_rcu() -only- guarantees to wait until |
209 | all currently executing rcu_read_lock()-protected RCU read-side | 232 | all currently executing rcu_read_lock()-protected RCU read-side |
@@ -230,6 +253,14 @@ over a rather long period of time, but improvements are always welcome! | |||
230 | must use whatever locking or other synchronization is required | 253 | must use whatever locking or other synchronization is required |
231 | to safely access and/or modify that data structure. | 254 | to safely access and/or modify that data structure. |
232 | 255 | ||
256 | RCU callbacks are -usually- executed on the same CPU that executed | ||
257 | the corresponding call_rcu(), call_rcu_bh(), or call_rcu_sched(), | ||
258 | but are by -no- means guaranteed to be. For example, if a given | ||
259 | CPU goes offline while having an RCU callback pending, then that | ||
260 | RCU callback will execute on some surviving CPU. (If this was | ||
261 | not the case, a self-spawning RCU callback would prevent the | ||
262 | victim CPU from ever going offline.) | ||
263 | |||
233 | 14. SRCU (srcu_read_lock(), srcu_read_unlock(), and synchronize_srcu()) | 264 | 14. SRCU (srcu_read_lock(), srcu_read_unlock(), and synchronize_srcu()) |
234 | may only be invoked from process context. Unlike other forms of | 265 | may only be invoked from process context. Unlike other forms of |
235 | RCU, it -is- permissible to block in an SRCU read-side critical | 266 | RCU, it -is- permissible to block in an SRCU read-side critical |
diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt index 2967a65269d8..a342b6e1cc10 100644 --- a/Documentation/RCU/torture.txt +++ b/Documentation/RCU/torture.txt | |||
@@ -10,23 +10,30 @@ status messages via printk(), which can be examined via the dmesg | |||
10 | command (perhaps grepping for "torture"). The test is started | 10 | command (perhaps grepping for "torture"). The test is started |
11 | when the module is loaded, and stops when the module is unloaded. | 11 | when the module is loaded, and stops when the module is unloaded. |
12 | 12 | ||
13 | However, actually setting this config option to "y" results in the system | 13 | CONFIG_RCU_TORTURE_TEST_RUNNABLE |
14 | running the test immediately upon boot, and ending only when the system | 14 | |
15 | is taken down. Normally, one will instead want to build the system | 15 | It is also possible to specify CONFIG_RCU_TORTURE_TEST=y, which will |
16 | with CONFIG_RCU_TORTURE_TEST=m and to use modprobe and rmmod to control | 16 | result in the tests being loaded into the base kernel. In this case, |
17 | the test, perhaps using a script similar to the one shown at the end of | 17 | the CONFIG_RCU_TORTURE_TEST_RUNNABLE config option is used to specify |
18 | this document. Note that you will need CONFIG_MODULE_UNLOAD in order | 18 | whether the RCU torture tests are to be started immediately during |
19 | to be able to end the test. | 19 | boot or whether the /proc/sys/kernel/rcutorture_runnable file is used |
20 | to enable them. This /proc file can be used to repeatedly pause and | ||
21 | restart the tests, regardless of the initial state specified by the | ||
22 | CONFIG_RCU_TORTURE_TEST_RUNNABLE config option. | ||
23 | |||
24 | You will normally -not- want to start the RCU torture tests during boot | ||
25 | (and thus the default is CONFIG_RCU_TORTURE_TEST_RUNNABLE=n), but doing | ||
26 | this can sometimes be useful in finding boot-time bugs. | ||
20 | 27 | ||
21 | 28 | ||
22 | MODULE PARAMETERS | 29 | MODULE PARAMETERS |
23 | 30 | ||
24 | This module has the following parameters: | 31 | This module has the following parameters: |
25 | 32 | ||
26 | nreaders This is the number of RCU reading threads supported. | 33 | irqreaders Says to invoke RCU readers from irq level. This is currently |
27 | The default is twice the number of CPUs. Why twice? | 34 | done via timers. Defaults to "1" for variants of RCU that |
28 | To properly exercise RCU implementations with preemptible | 35 | permit this. (Or, more accurately, variants of RCU that do |
29 | read-side critical sections. | 36 | -not- permit this know to ignore this variable.) |
30 | 37 | ||
31 | nfakewriters This is the number of RCU fake writer threads to run. Fake | 38 | nfakewriters This is the number of RCU fake writer threads to run. Fake |
32 | writer threads repeatedly use the synchronous "wait for | 39 | writer threads repeatedly use the synchronous "wait for |
@@ -37,6 +44,16 @@ nfakewriters This is the number of RCU fake writer threads to run. Fake | |||
37 | to trigger special cases caused by multiple writers, such as | 44 | to trigger special cases caused by multiple writers, such as |
38 | the synchronize_srcu() early return optimization. | 45 | the synchronize_srcu() early return optimization. |
39 | 46 | ||
47 | nreaders This is the number of RCU reading threads supported. | ||
48 | The default is twice the number of CPUs. Why twice? | ||
49 | To properly exercise RCU implementations with preemptible | ||
50 | read-side critical sections. | ||
51 | |||
52 | shuffle_interval | ||
53 | The number of seconds to keep the test threads affinitied | ||
54 | to a particular subset of the CPUs, defaults to 3 seconds. | ||
55 | Used in conjunction with test_no_idle_hz. | ||
56 | |||
40 | stat_interval The number of seconds between output of torture | 57 | stat_interval The number of seconds between output of torture |
41 | statistics (via printk()). Regardless of the interval, | 58 | statistics (via printk()). Regardless of the interval, |
42 | statistics are printed when the module is unloaded. | 59 | statistics are printed when the module is unloaded. |
@@ -44,10 +61,11 @@ stat_interval The number of seconds between output of torture | |||
44 | be printed -only- when the module is unloaded, and this | 61 | be printed -only- when the module is unloaded, and this |
45 | is the default. | 62 | is the default. |
46 | 63 | ||
47 | shuffle_interval | 64 | stutter The length of time to run the test before pausing for this |
48 | The number of seconds to keep the test threads affinitied | 65 | same period of time. Defaults to "stutter=5", so as |
49 | to a particular subset of the CPUs, defaults to 5 seconds. | 66 | to run and pause for (roughly) five-second intervals. |
50 | Used in conjunction with test_no_idle_hz. | 67 | Specifying "stutter=0" causes the test to run continuously |
68 | without pausing, which is the old default behavior. | ||
51 | 69 | ||
52 | test_no_idle_hz Whether or not to test the ability of RCU to operate in | 70 | test_no_idle_hz Whether or not to test the ability of RCU to operate in |
53 | a kernel that disables the scheduling-clock interrupt to | 71 | a kernel that disables the scheduling-clock interrupt to |
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index e0d6d99b8f9b..e04d643a9f57 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt | |||
@@ -1,3 +1,11 @@ | |||
1 | Please note that the "What is RCU?" LWN series is an excellent place | ||
2 | to start learning about RCU: | ||
3 | |||
4 | 1. What is RCU, Fundamentally? http://lwn.net/Articles/262464/ | ||
5 | 2. What is RCU? Part 2: Usage http://lwn.net/Articles/263130/ | ||
6 | 3. RCU part 3: the RCU API http://lwn.net/Articles/264090/ | ||
7 | |||
8 | |||
1 | What is RCU? | 9 | What is RCU? |
2 | 10 | ||
3 | RCU is a synchronization mechanism that was added to the Linux kernel | 11 | RCU is a synchronization mechanism that was added to the Linux kernel |
@@ -772,26 +780,18 @@ Linux-kernel source code, but it helps to have a full list of the | |||
772 | APIs, since there does not appear to be a way to categorize them | 780 | APIs, since there does not appear to be a way to categorize them |
773 | in docbook. Here is the list, by category. | 781 | in docbook. Here is the list, by category. |
774 | 782 | ||
775 | Markers for RCU read-side critical sections: | ||
776 | |||
777 | rcu_read_lock | ||
778 | rcu_read_unlock | ||
779 | rcu_read_lock_bh | ||
780 | rcu_read_unlock_bh | ||
781 | srcu_read_lock | ||
782 | srcu_read_unlock | ||
783 | |||
784 | RCU pointer/list traversal: | 783 | RCU pointer/list traversal: |
785 | 784 | ||
786 | rcu_dereference | 785 | rcu_dereference |
786 | list_for_each_entry_rcu | ||
787 | hlist_for_each_entry_rcu | ||
788 | |||
787 | list_for_each_rcu (to be deprecated in favor of | 789 | list_for_each_rcu (to be deprecated in favor of |
788 | list_for_each_entry_rcu) | 790 | list_for_each_entry_rcu) |
789 | list_for_each_entry_rcu | ||
790 | list_for_each_continue_rcu (to be deprecated in favor of new | 791 | list_for_each_continue_rcu (to be deprecated in favor of new |
791 | list_for_each_entry_continue_rcu) | 792 | list_for_each_entry_continue_rcu) |
792 | hlist_for_each_entry_rcu | ||
793 | 793 | ||
794 | RCU pointer update: | 794 | RCU pointer/list update: |
795 | 795 | ||
796 | rcu_assign_pointer | 796 | rcu_assign_pointer |
797 | list_add_rcu | 797 | list_add_rcu |
@@ -799,16 +799,36 @@ RCU pointer update: | |||
799 | list_del_rcu | 799 | list_del_rcu |
800 | list_replace_rcu | 800 | list_replace_rcu |
801 | hlist_del_rcu | 801 | hlist_del_rcu |
802 | hlist_add_after_rcu | ||
803 | hlist_add_before_rcu | ||
802 | hlist_add_head_rcu | 804 | hlist_add_head_rcu |
805 | hlist_replace_rcu | ||
806 | list_splice_init_rcu() | ||
803 | 807 | ||
804 | RCU grace period: | 808 | RCU: Critical sections Grace period Barrier |
809 | |||
810 | rcu_read_lock synchronize_net rcu_barrier | ||
811 | rcu_read_unlock synchronize_rcu | ||
812 | call_rcu | ||
813 | |||
814 | |||
815 | bh: Critical sections Grace period Barrier | ||
816 | |||
817 | rcu_read_lock_bh call_rcu_bh rcu_barrier_bh | ||
818 | rcu_read_unlock_bh | ||
819 | |||
820 | |||
821 | sched: Critical sections Grace period Barrier | ||
822 | |||
823 | [preempt_disable] synchronize_sched rcu_barrier_sched | ||
824 | [and friends] call_rcu_sched | ||
825 | |||
826 | |||
827 | SRCU: Critical sections Grace period Barrier | ||
828 | |||
829 | srcu_read_lock synchronize_srcu N/A | ||
830 | srcu_read_unlock | ||
805 | 831 | ||
806 | synchronize_net | ||
807 | synchronize_sched | ||
808 | synchronize_rcu | ||
809 | synchronize_srcu | ||
810 | call_rcu | ||
811 | call_rcu_bh | ||
812 | 832 | ||
813 | See the comment headers in the source code (or the docbook generated | 833 | See the comment headers in the source code (or the docbook generated |
814 | from them) for more information. | 834 | from them) for more information. |
diff --git a/Documentation/block/data-integrity.txt b/Documentation/block/data-integrity.txt new file mode 100644 index 000000000000..e9dc8d86adc7 --- /dev/null +++ b/Documentation/block/data-integrity.txt | |||
@@ -0,0 +1,327 @@ | |||
1 | ---------------------------------------------------------------------- | ||
2 | 1. INTRODUCTION | ||
3 | |||
4 | Modern filesystems feature checksumming of data and metadata to | ||
5 | protect against data corruption. However, the detection of the | ||
6 | corruption is done at read time which could potentially be months | ||
7 | after the data was written. At that point the original data that the | ||
8 | application tried to write is most likely lost. | ||
9 | |||
10 | The solution is to ensure that the disk is actually storing what the | ||
11 | application meant it to. Recent additions to both the SCSI family | ||
12 | protocols (SBC Data Integrity Field, SCC protection proposal) as well | ||
13 | as SATA/T13 (External Path Protection) try to remedy this by adding | ||
14 | support for appending integrity metadata to an I/O. The integrity | ||
15 | metadata (or protection information in SCSI terminology) includes a | ||
16 | checksum for each sector as well as an incrementing counter that | ||
17 | ensures the individual sectors are written in the right order. And | ||
18 | for some protection schemes also that the I/O is written to the right | ||
19 | place on disk. | ||
20 | |||
21 | Current storage controllers and devices implement various protective | ||
22 | measures, for instance checksumming and scrubbing. But these | ||
23 | technologies are working in their own isolated domains or at best | ||
24 | between adjacent nodes in the I/O path. The interesting thing about | ||
25 | DIF and the other integrity extensions is that the protection format | ||
26 | is well defined and every node in the I/O path can verify the | ||
27 | integrity of the I/O and reject it if corruption is detected. This | ||
28 | allows not only corruption prevention but also isolation of the point | ||
29 | of failure. | ||
30 | |||
31 | ---------------------------------------------------------------------- | ||
32 | 2. THE DATA INTEGRITY EXTENSIONS | ||
33 | |||
34 | As written, the protocol extensions only protect the path between | ||
35 | controller and storage device. However, many controllers actually | ||
36 | allow the operating system to interact with the integrity metadata | ||
37 | (IMD). We have been working with several FC/SAS HBA vendors to enable | ||
38 | the protection information to be transferred to and from their | ||
39 | controllers. | ||
40 | |||
41 | The SCSI Data Integrity Field works by appending 8 bytes of protection | ||
42 | information to each sector. The data + integrity metadata is stored | ||
43 | in 520 byte sectors on disk. Data + IMD are interleaved when | ||
44 | transferred between the controller and target. The T13 proposal is | ||
45 | similar. | ||
46 | |||
47 | Because it is highly inconvenient for operating systems to deal with | ||
48 | 520 (and 4104) byte sectors, we approached several HBA vendors and | ||
49 | encouraged them to allow separation of the data and integrity metadata | ||
50 | scatter-gather lists. | ||
51 | |||
52 | The controller will interleave the buffers on write and split them on | ||
53 | read. This means that the Linux can DMA the data buffers to and from | ||
54 | host memory without changes to the page cache. | ||
55 | |||
56 | Also, the 16-bit CRC checksum mandated by both the SCSI and SATA specs | ||
57 | is somewhat heavy to compute in software. Benchmarks found that | ||
58 | calculating this checksum had a significant impact on system | ||
59 | performance for a number of workloads. Some controllers allow a | ||
60 | lighter-weight checksum to be used when interfacing with the operating | ||
61 | system. Emulex, for instance, supports the TCP/IP checksum instead. | ||
62 | The IP checksum received from the OS is converted to the 16-bit CRC | ||
63 | when writing and vice versa. This allows the integrity metadata to be | ||
64 | generated by Linux or the application at very low cost (comparable to | ||
65 | software RAID5). | ||
66 | |||
67 | The IP checksum is weaker than the CRC in terms of detecting bit | ||
68 | errors. However, the strength is really in the separation of the data | ||
69 | buffers and the integrity metadata. These two distinct buffers much | ||
70 | match up for an I/O to complete. | ||
71 | |||
72 | The separation of the data and integrity metadata buffers as well as | ||
73 | the choice in checksums is referred to as the Data Integrity | ||
74 | Extensions. As these extensions are outside the scope of the protocol | ||
75 | bodies (T10, T13), Oracle and its partners are trying to standardize | ||
76 | them within the Storage Networking Industry Association. | ||
77 | |||
78 | ---------------------------------------------------------------------- | ||
79 | 3. KERNEL CHANGES | ||
80 | |||
81 | The data integrity framework in Linux enables protection information | ||
82 | to be pinned to I/Os and sent to/received from controllers that | ||
83 | support it. | ||
84 | |||
85 | The advantage to the integrity extensions in SCSI and SATA is that | ||
86 | they enable us to protect the entire path from application to storage | ||
87 | device. However, at the same time this is also the biggest | ||
88 | disadvantage. It means that the protection information must be in a | ||
89 | format that can be understood by the disk. | ||
90 | |||
91 | Generally Linux/POSIX applications are agnostic to the intricacies of | ||
92 | the storage devices they are accessing. The virtual filesystem switch | ||
93 | and the block layer make things like hardware sector size and | ||
94 | transport protocols completely transparent to the application. | ||
95 | |||
96 | However, this level of detail is required when preparing the | ||
97 | protection information to send to a disk. Consequently, the very | ||
98 | concept of an end-to-end protection scheme is a layering violation. | ||
99 | It is completely unreasonable for an application to be aware whether | ||
100 | it is accessing a SCSI or SATA disk. | ||
101 | |||
102 | The data integrity support implemented in Linux attempts to hide this | ||
103 | from the application. As far as the application (and to some extent | ||
104 | the kernel) is concerned, the integrity metadata is opaque information | ||
105 | that's attached to the I/O. | ||
106 | |||
107 | The current implementation allows the block layer to automatically | ||
108 | generate the protection information for any I/O. Eventually the | ||
109 | intent is to move the integrity metadata calculation to userspace for | ||
110 | user data. Metadata and other I/O that originates within the kernel | ||
111 | will still use the automatic generation interface. | ||
112 | |||
113 | Some storage devices allow each hardware sector to be tagged with a | ||
114 | 16-bit value. The owner of this tag space is the owner of the block | ||
115 | device. I.e. the filesystem in most cases. The filesystem can use | ||
116 | this extra space to tag sectors as they see fit. Because the tag | ||
117 | space is limited, the block interface allows tagging bigger chunks by | ||
118 | way of interleaving. This way, 8*16 bits of information can be | ||
119 | attached to a typical 4KB filesystem block. | ||
120 | |||
121 | This also means that applications such as fsck and mkfs will need | ||
122 | access to manipulate the tags from user space. A passthrough | ||
123 | interface for this is being worked on. | ||
124 | |||
125 | |||
126 | ---------------------------------------------------------------------- | ||
127 | 4. BLOCK LAYER IMPLEMENTATION DETAILS | ||
128 | |||
129 | 4.1 BIO | ||
130 | |||
131 | The data integrity patches add a new field to struct bio when | ||
132 | CONFIG_BLK_DEV_INTEGRITY is enabled. bio->bi_integrity is a pointer | ||
133 | to a struct bip which contains the bio integrity payload. Essentially | ||
134 | a bip is a trimmed down struct bio which holds a bio_vec containing | ||
135 | the integrity metadata and the required housekeeping information (bvec | ||
136 | pool, vector count, etc.) | ||
137 | |||
138 | A kernel subsystem can enable data integrity protection on a bio by | ||
139 | calling bio_integrity_alloc(bio). This will allocate and attach the | ||
140 | bip to the bio. | ||
141 | |||
142 | Individual pages containing integrity metadata can subsequently be | ||
143 | attached using bio_integrity_add_page(). | ||
144 | |||
145 | bio_free() will automatically free the bip. | ||
146 | |||
147 | |||
148 | 4.2 BLOCK DEVICE | ||
149 | |||
150 | Because the format of the protection data is tied to the physical | ||
151 | disk, each block device has been extended with a block integrity | ||
152 | profile (struct blk_integrity). This optional profile is registered | ||
153 | with the block layer using blk_integrity_register(). | ||
154 | |||
155 | The profile contains callback functions for generating and verifying | ||
156 | the protection data, as well as getting and setting application tags. | ||
157 | The profile also contains a few constants to aid in completing, | ||
158 | merging and splitting the integrity metadata. | ||
159 | |||
160 | Layered block devices will need to pick a profile that's appropriate | ||
161 | for all subdevices. blk_integrity_compare() can help with that. DM | ||
162 | and MD linear, RAID0 and RAID1 are currently supported. RAID4/5/6 | ||
163 | will require extra work due to the application tag. | ||
164 | |||
165 | |||
166 | ---------------------------------------------------------------------- | ||
167 | 5.0 BLOCK LAYER INTEGRITY API | ||
168 | |||
169 | 5.1 NORMAL FILESYSTEM | ||
170 | |||
171 | The normal filesystem is unaware that the underlying block device | ||
172 | is capable of sending/receiving integrity metadata. The IMD will | ||
173 | be automatically generated by the block layer at submit_bio() time | ||
174 | in case of a WRITE. A READ request will cause the I/O integrity | ||
175 | to be verified upon completion. | ||
176 | |||
177 | IMD generation and verification can be toggled using the | ||
178 | |||
179 | /sys/block/<bdev>/integrity/write_generate | ||
180 | |||
181 | and | ||
182 | |||
183 | /sys/block/<bdev>/integrity/read_verify | ||
184 | |||
185 | flags. | ||
186 | |||
187 | |||
188 | 5.2 INTEGRITY-AWARE FILESYSTEM | ||
189 | |||
190 | A filesystem that is integrity-aware can prepare I/Os with IMD | ||
191 | attached. It can also use the application tag space if this is | ||
192 | supported by the block device. | ||
193 | |||
194 | |||
195 | int bdev_integrity_enabled(block_device, int rw); | ||
196 | |||
197 | bdev_integrity_enabled() will return 1 if the block device | ||
198 | supports integrity metadata transfer for the data direction | ||
199 | specified in 'rw'. | ||
200 | |||
201 | bdev_integrity_enabled() honors the write_generate and | ||
202 | read_verify flags in sysfs and will respond accordingly. | ||
203 | |||
204 | |||
205 | int bio_integrity_prep(bio); | ||
206 | |||
207 | To generate IMD for WRITE and to set up buffers for READ, the | ||
208 | filesystem must call bio_integrity_prep(bio). | ||
209 | |||
210 | Prior to calling this function, the bio data direction and start | ||
211 | sector must be set, and the bio should have all data pages | ||
212 | added. It is up to the caller to ensure that the bio does not | ||
213 | change while I/O is in progress. | ||
214 | |||
215 | bio_integrity_prep() should only be called if | ||
216 | bio_integrity_enabled() returned 1. | ||
217 | |||
218 | |||
219 | int bio_integrity_tag_size(bio); | ||
220 | |||
221 | If the filesystem wants to use the application tag space it will | ||
222 | first have to find out how much storage space is available. | ||
223 | Because tag space is generally limited (usually 2 bytes per | ||
224 | sector regardless of sector size), the integrity framework | ||
225 | supports interleaving the information between the sectors in an | ||
226 | I/O. | ||
227 | |||
228 | Filesystems can call bio_integrity_tag_size(bio) to find out how | ||
229 | many bytes of storage are available for that particular bio. | ||
230 | |||
231 | Another option is bdev_get_tag_size(block_device) which will | ||
232 | return the number of available bytes per hardware sector. | ||
233 | |||
234 | |||
235 | int bio_integrity_set_tag(bio, void *tag_buf, len); | ||
236 | |||
237 | After a successful return from bio_integrity_prep(), | ||
238 | bio_integrity_set_tag() can be used to attach an opaque tag | ||
239 | buffer to a bio. Obviously this only makes sense if the I/O is | ||
240 | a WRITE. | ||
241 | |||
242 | |||
243 | int bio_integrity_get_tag(bio, void *tag_buf, len); | ||
244 | |||
245 | Similarly, at READ I/O completion time the filesystem can | ||
246 | retrieve the tag buffer using bio_integrity_get_tag(). | ||
247 | |||
248 | |||
249 | 6.3 PASSING EXISTING INTEGRITY METADATA | ||
250 | |||
251 | Filesystems that either generate their own integrity metadata or | ||
252 | are capable of transferring IMD from user space can use the | ||
253 | following calls: | ||
254 | |||
255 | |||
256 | struct bip * bio_integrity_alloc(bio, gfp_mask, nr_pages); | ||
257 | |||
258 | Allocates the bio integrity payload and hangs it off of the bio. | ||
259 | nr_pages indicate how many pages of protection data need to be | ||
260 | stored in the integrity bio_vec list (similar to bio_alloc()). | ||
261 | |||
262 | The integrity payload will be freed at bio_free() time. | ||
263 | |||
264 | |||
265 | int bio_integrity_add_page(bio, page, len, offset); | ||
266 | |||
267 | Attaches a page containing integrity metadata to an existing | ||
268 | bio. The bio must have an existing bip, | ||
269 | i.e. bio_integrity_alloc() must have been called. For a WRITE, | ||
270 | the integrity metadata in the pages must be in a format | ||
271 | understood by the target device with the notable exception that | ||
272 | the sector numbers will be remapped as the request traverses the | ||
273 | I/O stack. This implies that the pages added using this call | ||
274 | will be modified during I/O! The first reference tag in the | ||
275 | integrity metadata must have a value of bip->bip_sector. | ||
276 | |||
277 | Pages can be added using bio_integrity_add_page() as long as | ||
278 | there is room in the bip bio_vec array (nr_pages). | ||
279 | |||
280 | Upon completion of a READ operation, the attached pages will | ||
281 | contain the integrity metadata received from the storage device. | ||
282 | It is up to the receiver to process them and verify data | ||
283 | integrity upon completion. | ||
284 | |||
285 | |||
286 | 6.4 REGISTERING A BLOCK DEVICE AS CAPABLE OF EXCHANGING INTEGRITY | ||
287 | METADATA | ||
288 | |||
289 | To enable integrity exchange on a block device the gendisk must be | ||
290 | registered as capable: | ||
291 | |||
292 | int blk_integrity_register(gendisk, blk_integrity); | ||
293 | |||
294 | The blk_integrity struct is a template and should contain the | ||
295 | following: | ||
296 | |||
297 | static struct blk_integrity my_profile = { | ||
298 | .name = "STANDARDSBODY-TYPE-VARIANT-CSUM", | ||
299 | .generate_fn = my_generate_fn, | ||
300 | .verify_fn = my_verify_fn, | ||
301 | .get_tag_fn = my_get_tag_fn, | ||
302 | .set_tag_fn = my_set_tag_fn, | ||
303 | .tuple_size = sizeof(struct my_tuple_size), | ||
304 | .tag_size = <tag bytes per hw sector>, | ||
305 | }; | ||
306 | |||
307 | 'name' is a text string which will be visible in sysfs. This is | ||
308 | part of the userland API so chose it carefully and never change | ||
309 | it. The format is standards body-type-variant. | ||
310 | E.g. T10-DIF-TYPE1-IP or T13-EPP-0-CRC. | ||
311 | |||
312 | 'generate_fn' generates appropriate integrity metadata (for WRITE). | ||
313 | |||
314 | 'verify_fn' verifies that the data buffer matches the integrity | ||
315 | metadata. | ||
316 | |||
317 | 'tuple_size' must be set to match the size of the integrity | ||
318 | metadata per sector. I.e. 8 for DIF and EPP. | ||
319 | |||
320 | 'tag_size' must be set to identify how many bytes of tag space | ||
321 | are available per hardware sector. For DIF this is either 2 or | ||
322 | 0 depending on the value of the Control Mode Page ATO bit. | ||
323 | |||
324 | See 6.2 for a description of get_tag_fn and set_tag_fn. | ||
325 | |||
326 | ---------------------------------------------------------------------- | ||
327 | 2007-12-24 Martin K. Petersen <martin.petersen@oracle.com> | ||
diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt index b61cb9564023..bd699da24666 100644 --- a/Documentation/cputopology.txt +++ b/Documentation/cputopology.txt | |||
@@ -14,9 +14,8 @@ represent the thread siblings to cpu X in the same physical package; | |||
14 | To implement it in an architecture-neutral way, a new source file, | 14 | To implement it in an architecture-neutral way, a new source file, |
15 | drivers/base/topology.c, is to export the 4 attributes. | 15 | drivers/base/topology.c, is to export the 4 attributes. |
16 | 16 | ||
17 | If one architecture wants to support this feature, it just needs to | 17 | For an architecture to support this feature, it must define some of |
18 | implement 4 defines, typically in file include/asm-XXX/topology.h. | 18 | these macros in include/asm-XXX/topology.h: |
19 | The 4 defines are: | ||
20 | #define topology_physical_package_id(cpu) | 19 | #define topology_physical_package_id(cpu) |
21 | #define topology_core_id(cpu) | 20 | #define topology_core_id(cpu) |
22 | #define topology_thread_siblings(cpu) | 21 | #define topology_thread_siblings(cpu) |
@@ -25,17 +24,10 @@ The 4 defines are: | |||
25 | The type of **_id is int. | 24 | The type of **_id is int. |
26 | The type of siblings is cpumask_t. | 25 | The type of siblings is cpumask_t. |
27 | 26 | ||
28 | To be consistent on all architectures, the 4 attributes should have | 27 | To be consistent on all architectures, include/linux/topology.h |
29 | default values if their values are unavailable. Below is the rule. | 28 | provides default definitions for any of the above macros that are |
30 | 1) physical_package_id: If cpu has no physical package id, -1 is the | 29 | not defined by include/asm-XXX/topology.h: |
31 | default value. | 30 | 1) physical_package_id: -1 |
32 | 2) core_id: If cpu doesn't support multi-core, its core id is 0. | 31 | 2) core_id: 0 |
33 | 3) thread_siblings: Just include itself, if the cpu doesn't support | 32 | 3) thread_siblings: just the given CPU |
34 | HT/multi-thread. | 33 | 4) core_siblings: just the given CPU |
35 | 4) core_siblings: Just include itself, if the cpu doesn't support | ||
36 | multi-core and HT/Multi-thread. | ||
37 | |||
38 | So be careful when declaring the 4 defines in include/asm-XXX/topology.h. | ||
39 | |||
40 | If an attribute isn't defined on an architecture, it won't be exported. | ||
41 | |||
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 46ece3fba6f9..86334b6f8238 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt | |||
@@ -222,13 +222,6 @@ Who: Thomas Gleixner <tglx@linutronix.de> | |||
222 | 222 | ||
223 | --------------------------- | 223 | --------------------------- |
224 | 224 | ||
225 | What: i2c-i810, i2c-prosavage and i2c-savage4 | ||
226 | When: May 2008 | ||
227 | Why: These drivers are superseded by i810fb, intelfb and savagefb. | ||
228 | Who: Jean Delvare <khali@linux-fr.org> | ||
229 | |||
230 | --------------------------- | ||
231 | |||
232 | What (Why): | 225 | What (Why): |
233 | - include/linux/netfilter_ipv4/ipt_TOS.h ipt_tos.h header files | 226 | - include/linux/netfilter_ipv4/ipt_TOS.h ipt_tos.h header files |
234 | (superseded by xt_TOS/xt_tos target & match) | 227 | (superseded by xt_TOS/xt_tos target & match) |
@@ -315,9 +308,31 @@ Who: Matthew Wilcox <willy@linux.intel.com> | |||
315 | 308 | ||
316 | --------------------------- | 309 | --------------------------- |
317 | 310 | ||
311 | What: SCTP_GET_PEER_ADDRS_NUM_OLD, SCTP_GET_PEER_ADDRS_OLD, | ||
312 | SCTP_GET_LOCAL_ADDRS_NUM_OLD, SCTP_GET_LOCAL_ADDRS_OLD | ||
313 | When: June 2009 | ||
314 | Why: A newer version of the options have been introduced in 2005 that | ||
315 | removes the limitions of the old API. The sctp library has been | ||
316 | converted to use these new options at the same time. Any user | ||
317 | space app that directly uses the old options should convert to using | ||
318 | the new options. | ||
319 | Who: Vlad Yasevich <vladislav.yasevich@hp.com> | ||
320 | |||
321 | --------------------------- | ||
322 | |||
318 | What: CONFIG_THERMAL_HWMON | 323 | What: CONFIG_THERMAL_HWMON |
319 | When: January 2009 | 324 | When: January 2009 |
320 | Why: This option was introduced just to allow older lm-sensors userspace | 325 | Why: This option was introduced just to allow older lm-sensors userspace |
321 | to keep working over the upgrade to 2.6.26. At the scheduled time of | 326 | to keep working over the upgrade to 2.6.26. At the scheduled time of |
322 | removal fixed lm-sensors (2.x or 3.x) should be readily available. | 327 | removal fixed lm-sensors (2.x or 3.x) should be readily available. |
323 | Who: Rene Herman <rene.herman@gmail.com> | 328 | Who: Rene Herman <rene.herman@gmail.com> |
329 | |||
330 | --------------------------- | ||
331 | |||
332 | What: Code that is now under CONFIG_WIRELESS_EXT_SYSFS | ||
333 | (in net/core/net-sysfs.c) | ||
334 | When: After the only user (hal) has seen a release with the patches | ||
335 | for enough time, probably some time in 2010. | ||
336 | Why: Over 1K .text/.data size reduction, data is available in other | ||
337 | ways (ioctls) | ||
338 | Who: Johannes Berg <johannes@sipsolutions.net> | ||
diff --git a/Documentation/filesystems/configfs/configfs_example.c b/Documentation/filesystems/configfs/configfs_example.c index 25151fd5c2c6..039648791701 100644 --- a/Documentation/filesystems/configfs/configfs_example.c +++ b/Documentation/filesystems/configfs/configfs_example.c | |||
@@ -279,7 +279,7 @@ static struct config_item *simple_children_make_item(struct config_group *group, | |||
279 | 279 | ||
280 | simple_child = kzalloc(sizeof(struct simple_child), GFP_KERNEL); | 280 | simple_child = kzalloc(sizeof(struct simple_child), GFP_KERNEL); |
281 | if (!simple_child) | 281 | if (!simple_child) |
282 | return NULL; | 282 | return ERR_PTR(-ENOMEM); |
283 | 283 | ||
284 | 284 | ||
285 | config_item_init_type_name(&simple_child->item, name, | 285 | config_item_init_type_name(&simple_child->item, name, |
@@ -366,7 +366,7 @@ static struct config_group *group_children_make_group(struct config_group *group | |||
366 | simple_children = kzalloc(sizeof(struct simple_children), | 366 | simple_children = kzalloc(sizeof(struct simple_children), |
367 | GFP_KERNEL); | 367 | GFP_KERNEL); |
368 | if (!simple_children) | 368 | if (!simple_children) |
369 | return NULL; | 369 | return ERR_PTR(-ENOMEM); |
370 | 370 | ||
371 | 371 | ||
372 | config_group_init_type_name(&simple_children->group, name, | 372 | config_group_init_type_name(&simple_children->group, name, |
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index 0c5086db8352..80e193d82e2e 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt | |||
@@ -13,72 +13,93 @@ Mailing list: linux-ext4@vger.kernel.org | |||
13 | 1. Quick usage instructions: | 13 | 1. Quick usage instructions: |
14 | =========================== | 14 | =========================== |
15 | 15 | ||
16 | - Grab updated e2fsprogs from | 16 | - Compile and install the latest version of e2fsprogs (as of this |
17 | ftp://ftp.kernel.org/pub/linux/kernel/people/tytso/e2fsprogs-interim/ | 17 | writing version 1.41) from: |
18 | This is a patchset on top of e2fsprogs-1.39, which can be found at | 18 | |
19 | http://sourceforge.net/project/showfiles.php?group_id=2406 | ||
20 | |||
21 | or | ||
22 | |||
19 | ftp://ftp.kernel.org/pub/linux/kernel/people/tytso/e2fsprogs/ | 23 | ftp://ftp.kernel.org/pub/linux/kernel/people/tytso/e2fsprogs/ |
20 | 24 | ||
21 | - It's still mke2fs -j /dev/hda1 | 25 | or grab the latest git repository from: |
26 | |||
27 | git://git.kernel.org/pub/scm/fs/ext2/e2fsprogs.git | ||
28 | |||
29 | - Create a new filesystem using the ext4dev filesystem type: | ||
30 | |||
31 | # mke2fs -t ext4dev /dev/hda1 | ||
32 | |||
33 | Or configure an existing ext3 filesystem to support extents and set | ||
34 | the test_fs flag to indicate that it's ok for an in-development | ||
35 | filesystem to touch this filesystem: | ||
22 | 36 | ||
23 | - mount /dev/hda1 /wherever -t ext4dev | 37 | # tune2fs -O extents -E test_fs /dev/hda1 |
24 | 38 | ||
25 | - To enable extents, | 39 | If the filesystem was created with 128 byte inodes, it can be |
40 | converted to use 256 byte for greater efficiency via: | ||
26 | 41 | ||
27 | mount /dev/hda1 /wherever -t ext4dev -o extents | 42 | # tune2fs -I 256 /dev/hda1 |
28 | 43 | ||
29 | - The filesystem is compatible with the ext3 driver until you add a file | 44 | (Note: we currently do not have tools to convert an ext4dev |
30 | which has extents (ie: `mount -o extents', then create a file). | 45 | filesystem back to ext3; so please do not do try this on production |
46 | filesystems.) | ||
31 | 47 | ||
32 | NOTE: The "extents" mount flag is temporary. It will soon go away and | 48 | - Mounting: |
33 | extents will be enabled by the "-o extents" flag to mke2fs or tune2fs | 49 | |
50 | # mount -t ext4dev /dev/hda1 /wherever | ||
34 | 51 | ||
35 | - When comparing performance with other filesystems, remember that | 52 | - When comparing performance with other filesystems, remember that |
36 | ext3/4 by default offers higher data integrity guarantees than most. So | 53 | ext3/4 by default offers higher data integrity guarantees than most. |
37 | when comparing with a metadata-only journalling filesystem, use `mount -o | 54 | So when comparing with a metadata-only journalling filesystem, such |
38 | data=writeback'. And you might as well use `mount -o nobh' too along | 55 | as ext3, use `mount -o data=writeback'. And you might as well use |
39 | with it. Making the journal larger than the mke2fs default often helps | 56 | `mount -o nobh' too along with it. Making the journal larger than |
40 | performance with metadata-intensive workloads. | 57 | the mke2fs default often helps performance with metadata-intensive |
58 | workloads. | ||
41 | 59 | ||
42 | 2. Features | 60 | 2. Features |
43 | =========== | 61 | =========== |
44 | 62 | ||
45 | 2.1 Currently available | 63 | 2.1 Currently available |
46 | 64 | ||
47 | * ability to use filesystems > 16TB | 65 | * ability to use filesystems > 16TB (e2fsprogs support not available yet) |
48 | * extent format reduces metadata overhead (RAM, IO for access, transactions) | 66 | * extent format reduces metadata overhead (RAM, IO for access, transactions) |
49 | * extent format more robust in face of on-disk corruption due to magics, | 67 | * extent format more robust in face of on-disk corruption due to magics, |
50 | * internal redunancy in tree | 68 | * internal redunancy in tree |
51 | 69 | * improved file allocation (multi-block alloc) | |
52 | 2.1 Previously available, soon to be enabled by default by "mkefs.ext4": | 70 | * fix 32000 subdirectory limit |
53 | 71 | * nsec timestamps for mtime, atime, ctime, create time | |
54 | * dir_index and resize inode will be on by default | 72 | * inode version field on disk (NFSv4, Lustre) |
55 | * large inodes will be used by default for fast EAs, nsec timestamps, etc | 73 | * reduced e2fsck time via uninit_bg feature |
74 | * journal checksumming for robustness, performance | ||
75 | * persistent file preallocation (e.g for streaming media, databases) | ||
76 | * ability to pack bitmaps and inode tables into larger virtual groups via the | ||
77 | flex_bg feature | ||
78 | * large file support | ||
79 | * Inode allocation using large virtual block groups via flex_bg | ||
80 | * delayed allocation | ||
81 | * large block (up to pagesize) support | ||
82 | * efficent new ordered mode in JBD2 and ext4(avoid using buffer head to force | ||
83 | the ordering) | ||
56 | 84 | ||
57 | 2.2 Candidate features for future inclusion | 85 | 2.2 Candidate features for future inclusion |
58 | 86 | ||
59 | There are several under discussion, whether they all make it in is | 87 | * Online defrag (patches available but not well tested) |
60 | partly a function of how much time everyone has to work on them: | 88 | * reduced mke2fs time via lazy itable initialization in conjuction with |
89 | the uninit_bg feature (capability to do this is available in e2fsprogs | ||
90 | but a kernel thread to do lazy zeroing of unused inode table blocks | ||
91 | after filesystem is first mounted is required for safety) | ||
61 | 92 | ||
62 | * improved file allocation (multi-block alloc, delayed alloc; basically done) | 93 | There are several others under discussion, whether they all make it in is |
63 | * fix 32000 subdirectory limit (patch exists, needs some e2fsck work) | 94 | partly a function of how much time everyone has to work on them. Features like |
64 | * nsec timestamps for mtime, atime, ctime, create time (patch exists, | 95 | metadata checksumming have been discussed and planned for a bit but no patches |
65 | needs some e2fsck work) | 96 | exist yet so I'm not sure they're in the near-term roadmap. |
66 | * inode version field on disk (NFSv4, Lustre; prototype exists) | ||
67 | * reduced mke2fs/e2fsck time via uninitialized groups (prototype exists) | ||
68 | * journal checksumming for robustness, performance (prototype exists) | ||
69 | * persistent file preallocation (e.g for streaming media, databases) | ||
70 | 97 | ||
71 | Features like metadata checksumming have been discussed and planned for | 98 | The big performance win will come with mballoc, delalloc and flex_bg |
72 | a bit but no patches exist yet so I'm not sure they're in the near-term | 99 | grouping of bitmaps and inode tables. Some test results available here: |
73 | roadmap. | ||
74 | 100 | ||
75 | The big performance win will come with mballoc and delalloc. CFS has | 101 | - http://www.bullopensource.org/ext4/20080530/ffsb-write-2.6.26-rc2.html |
76 | been using mballoc for a few years already with Lustre, and IBM + Bull | 102 | - http://www.bullopensource.org/ext4/20080530/ffsb-readwrite-2.6.26-rc2.html |
77 | did a lot of benchmarking on it. The reason it isn't in the first set of | ||
78 | patches is partly a manageability issue, and partly because it doesn't | ||
79 | directly affect the on-disk format (outside of much better allocation) | ||
80 | so it isn't critical to get into the first round of changes. I believe | ||
81 | Alex is working on a new set of patches right now. | ||
82 | 103 | ||
83 | 3. Options | 104 | 3. Options |
84 | ========== | 105 | ========== |
@@ -222,9 +243,11 @@ stripe=n Number of filesystem blocks that mballoc will try | |||
222 | to use for allocation size and alignment. For RAID5/6 | 243 | to use for allocation size and alignment. For RAID5/6 |
223 | systems this should be the number of data | 244 | systems this should be the number of data |
224 | disks * RAID chunk size in file system blocks. | 245 | disks * RAID chunk size in file system blocks. |
225 | 246 | delalloc (*) Deferring block allocation until write-out time. | |
247 | nodelalloc Disable delayed allocation. Blocks are allocation | ||
248 | when data is copied from user to page cache. | ||
226 | Data Mode | 249 | Data Mode |
227 | --------- | 250 | ========= |
228 | There are 3 different data modes: | 251 | There are 3 different data modes: |
229 | 252 | ||
230 | * writeback mode | 253 | * writeback mode |
@@ -236,10 +259,10 @@ typically provide the best ext4 performance. | |||
236 | 259 | ||
237 | * ordered mode | 260 | * ordered mode |
238 | In data=ordered mode, ext4 only officially journals metadata, but it logically | 261 | In data=ordered mode, ext4 only officially journals metadata, but it logically |
239 | groups metadata and data blocks into a single unit called a transaction. When | 262 | groups metadata information related to data changes with the data blocks into a |
240 | it's time to write the new metadata out to disk, the associated data blocks | 263 | single unit called a transaction. When it's time to write the new metadata |
241 | are written first. In general, this mode performs slightly slower than | 264 | out to disk, the associated data blocks are written first. In general, |
242 | writeback but significantly faster than journal mode. | 265 | this mode performs slightly slower than writeback but significantly faster than journal mode. |
243 | 266 | ||
244 | * journal mode | 267 | * journal mode |
245 | data=journal mode provides full data and metadata journaling. All new data is | 268 | data=journal mode provides full data and metadata journaling. All new data is |
@@ -247,7 +270,8 @@ written to the journal first, and then to its final location. | |||
247 | In the event of a crash, the journal can be replayed, bringing both data and | 270 | In the event of a crash, the journal can be replayed, bringing both data and |
248 | metadata into a consistent state. This mode is the slowest except when data | 271 | metadata into a consistent state. This mode is the slowest except when data |
249 | needs to be read from and written to disk at the same time where it | 272 | needs to be read from and written to disk at the same time where it |
250 | outperforms all others modes. | 273 | outperforms all others modes. Curently ext4 does not have delayed |
274 | allocation support if this data journalling mode is selected. | ||
251 | 275 | ||
252 | References | 276 | References |
253 | ========== | 277 | ========== |
@@ -256,7 +280,8 @@ kernel source: <file:fs/ext4/> | |||
256 | <file:fs/jbd2/> | 280 | <file:fs/jbd2/> |
257 | 281 | ||
258 | programs: http://e2fsprogs.sourceforge.net/ | 282 | programs: http://e2fsprogs.sourceforge.net/ |
259 | http://ext2resize.sourceforge.net | ||
260 | 283 | ||
261 | useful links: http://fedoraproject.org/wiki/ext3-devel | 284 | useful links: http://fedoraproject.org/wiki/ext3-devel |
262 | http://www.bullopensource.org/ext4/ | 285 | http://www.bullopensource.org/ext4/ |
286 | http://ext4.wiki.kernel.org/index.php/Main_Page | ||
287 | http://fedoraproject.org/wiki/Features/Ext4 | ||
diff --git a/Documentation/filesystems/gfs2-glocks.txt b/Documentation/filesystems/gfs2-glocks.txt new file mode 100644 index 000000000000..4dae9a3840bf --- /dev/null +++ b/Documentation/filesystems/gfs2-glocks.txt | |||
@@ -0,0 +1,114 @@ | |||
1 | Glock internal locking rules | ||
2 | ------------------------------ | ||
3 | |||
4 | This documents the basic principles of the glock state machine | ||
5 | internals. Each glock (struct gfs2_glock in fs/gfs2/incore.h) | ||
6 | has two main (internal) locks: | ||
7 | |||
8 | 1. A spinlock (gl_spin) which protects the internal state such | ||
9 | as gl_state, gl_target and the list of holders (gl_holders) | ||
10 | 2. A non-blocking bit lock, GLF_LOCK, which is used to prevent other | ||
11 | threads from making calls to the DLM, etc. at the same time. If a | ||
12 | thread takes this lock, it must then call run_queue (usually via the | ||
13 | workqueue) when it releases it in order to ensure any pending tasks | ||
14 | are completed. | ||
15 | |||
16 | The gl_holders list contains all the queued lock requests (not | ||
17 | just the holders) associated with the glock. If there are any | ||
18 | held locks, then they will be contiguous entries at the head | ||
19 | of the list. Locks are granted in strictly the order that they | ||
20 | are queued, except for those marked LM_FLAG_PRIORITY which are | ||
21 | used only during recovery, and even then only for journal locks. | ||
22 | |||
23 | There are three lock states that users of the glock layer can request, | ||
24 | namely shared (SH), deferred (DF) and exclusive (EX). Those translate | ||
25 | to the following DLM lock modes: | ||
26 | |||
27 | Glock mode | DLM lock mode | ||
28 | ------------------------------ | ||
29 | UN | IV/NL Unlocked (no DLM lock associated with glock) or NL | ||
30 | SH | PR (Protected read) | ||
31 | DF | CW (Concurrent write) | ||
32 | EX | EX (Exclusive) | ||
33 | |||
34 | Thus DF is basically a shared mode which is incompatible with the "normal" | ||
35 | shared lock mode, SH. In GFS2 the DF mode is used exclusively for direct I/O | ||
36 | operations. The glocks are basically a lock plus some routines which deal | ||
37 | with cache management. The following rules apply for the cache: | ||
38 | |||
39 | Glock mode | Cache data | Cache Metadata | Dirty Data | Dirty Metadata | ||
40 | -------------------------------------------------------------------------- | ||
41 | UN | No | No | No | No | ||
42 | SH | Yes | Yes | No | No | ||
43 | DF | No | Yes | No | No | ||
44 | EX | Yes | Yes | Yes | Yes | ||
45 | |||
46 | These rules are implemented using the various glock operations which | ||
47 | are defined for each type of glock. Not all types of glocks use | ||
48 | all the modes. Only inode glocks use the DF mode for example. | ||
49 | |||
50 | Table of glock operations and per type constants: | ||
51 | |||
52 | Field | Purpose | ||
53 | ---------------------------------------------------------------------------- | ||
54 | go_xmote_th | Called before remote state change (e.g. to sync dirty data) | ||
55 | go_xmote_bh | Called after remote state change (e.g. to refill cache) | ||
56 | go_inval | Called if remote state change requires invalidating the cache | ||
57 | go_demote_ok | Returns boolean value of whether its ok to demote a glock | ||
58 | | (e.g. checks timeout, and that there is no cached data) | ||
59 | go_lock | Called for the first local holder of a lock | ||
60 | go_unlock | Called on the final local unlock of a lock | ||
61 | go_dump | Called to print content of object for debugfs file, or on | ||
62 | | error to dump glock to the log. | ||
63 | go_type; | The type of the glock, LM_TYPE_..... | ||
64 | go_min_hold_time | The minimum hold time | ||
65 | |||
66 | The minimum hold time for each lock is the time after a remote lock | ||
67 | grant for which we ignore remote demote requests. This is in order to | ||
68 | prevent a situation where locks are being bounced around the cluster | ||
69 | from node to node with none of the nodes making any progress. This | ||
70 | tends to show up most with shared mmaped files which are being written | ||
71 | to by multiple nodes. By delaying the demotion in response to a | ||
72 | remote callback, that gives the userspace program time to make | ||
73 | some progress before the pages are unmapped. | ||
74 | |||
75 | There is a plan to try and remove the go_lock and go_unlock callbacks | ||
76 | if possible, in order to try and speed up the fast path though the locking. | ||
77 | Also, eventually we hope to make the glock "EX" mode locally shared | ||
78 | such that any local locking will be done with the i_mutex as required | ||
79 | rather than via the glock. | ||
80 | |||
81 | Locking rules for glock operations: | ||
82 | |||
83 | Operation | GLF_LOCK bit lock held | gl_spin spinlock held | ||
84 | ----------------------------------------------------------------- | ||
85 | go_xmote_th | Yes | No | ||
86 | go_xmote_bh | Yes | No | ||
87 | go_inval | Yes | No | ||
88 | go_demote_ok | Sometimes | Yes | ||
89 | go_lock | Yes | No | ||
90 | go_unlock | Yes | No | ||
91 | go_dump | Sometimes | Yes | ||
92 | |||
93 | N.B. Operations must not drop either the bit lock or the spinlock | ||
94 | if its held on entry. go_dump and do_demote_ok must never block. | ||
95 | Note that go_dump will only be called if the glock's state | ||
96 | indicates that it is caching uptodate data. | ||
97 | |||
98 | Glock locking order within GFS2: | ||
99 | |||
100 | 1. i_mutex (if required) | ||
101 | 2. Rename glock (for rename only) | ||
102 | 3. Inode glock(s) | ||
103 | (Parents before children, inodes at "same level" with same parent in | ||
104 | lock number order) | ||
105 | 4. Rgrp glock(s) (for (de)allocation operations) | ||
106 | 5. Transaction glock (via gfs2_trans_begin) for non-read operations | ||
107 | 6. Page lock (always last, very important!) | ||
108 | |||
109 | There are two glocks per inode. One deals with access to the inode | ||
110 | itself (locking order as above), and the other, known as the iopen | ||
111 | glock is used in conjunction with the i_nlink field in the inode to | ||
112 | determine the lifetime of the inode in question. Locking of inodes | ||
113 | is on a per-inode basis. Locking of rgrps is on a per rgrp basis. | ||
114 | |||
diff --git a/Documentation/filesystems/nfs-rdma.txt b/Documentation/filesystems/nfs-rdma.txt index d0ec45ae4e7d..44bd766f2e5d 100644 --- a/Documentation/filesystems/nfs-rdma.txt +++ b/Documentation/filesystems/nfs-rdma.txt | |||
@@ -5,7 +5,7 @@ | |||
5 | ################################################################################ | 5 | ################################################################################ |
6 | 6 | ||
7 | Author: NetApp and Open Grid Computing | 7 | Author: NetApp and Open Grid Computing |
8 | Date: April 15, 2008 | 8 | Date: May 29, 2008 |
9 | 9 | ||
10 | Table of Contents | 10 | Table of Contents |
11 | ~~~~~~~~~~~~~~~~~ | 11 | ~~~~~~~~~~~~~~~~~ |
@@ -60,16 +60,18 @@ Installation | |||
60 | The procedures described in this document have been tested with | 60 | The procedures described in this document have been tested with |
61 | distributions from Red Hat's Fedora Project (http://fedora.redhat.com/). | 61 | distributions from Red Hat's Fedora Project (http://fedora.redhat.com/). |
62 | 62 | ||
63 | - Install nfs-utils-1.1.1 or greater on the client | 63 | - Install nfs-utils-1.1.2 or greater on the client |
64 | 64 | ||
65 | An NFS/RDMA mount point can only be obtained by using the mount.nfs | 65 | An NFS/RDMA mount point can be obtained by using the mount.nfs command in |
66 | command in nfs-utils-1.1.1 or greater. To see which version of mount.nfs | 66 | nfs-utils-1.1.2 or greater (nfs-utils-1.1.1 was the first nfs-utils |
67 | you are using, type: | 67 | version with support for NFS/RDMA mounts, but for various reasons we |
68 | recommend using nfs-utils-1.1.2 or greater). To see which version of | ||
69 | mount.nfs you are using, type: | ||
68 | 70 | ||
69 | > /sbin/mount.nfs -V | 71 | $ /sbin/mount.nfs -V |
70 | 72 | ||
71 | If the version is less than 1.1.1 or the command does not exist, | 73 | If the version is less than 1.1.2 or the command does not exist, |
72 | then you will need to install the latest version of nfs-utils. | 74 | you should install the latest version of nfs-utils. |
73 | 75 | ||
74 | Download the latest package from: | 76 | Download the latest package from: |
75 | 77 | ||
@@ -77,22 +79,33 @@ Installation | |||
77 | 79 | ||
78 | Uncompress the package and follow the installation instructions. | 80 | Uncompress the package and follow the installation instructions. |
79 | 81 | ||
80 | If you will not be using GSS and NFSv4, the installation process | 82 | If you will not need the idmapper and gssd executables (you do not need |
81 | can be simplified by disabling these features when running configure: | 83 | these to create an NFS/RDMA enabled mount command), the installation |
84 | process can be simplified by disabling these features when running | ||
85 | configure: | ||
82 | 86 | ||
83 | > ./configure --disable-gss --disable-nfsv4 | 87 | $ ./configure --disable-gss --disable-nfsv4 |
84 | 88 | ||
85 | For more information on this see the package's README and INSTALL files. | 89 | To build nfs-utils you will need the tcp_wrappers package installed. For |
90 | more information on this see the package's README and INSTALL files. | ||
86 | 91 | ||
87 | After building the nfs-utils package, there will be a mount.nfs binary in | 92 | After building the nfs-utils package, there will be a mount.nfs binary in |
88 | the utils/mount directory. This binary can be used to initiate NFS v2, v3, | 93 | the utils/mount directory. This binary can be used to initiate NFS v2, v3, |
89 | or v4 mounts. To initiate a v4 mount, the binary must be called mount.nfs4. | 94 | or v4 mounts. To initiate a v4 mount, the binary must be called |
90 | The standard technique is to create a symlink called mount.nfs4 to mount.nfs. | 95 | mount.nfs4. The standard technique is to create a symlink called |
96 | mount.nfs4 to mount.nfs. | ||
91 | 97 | ||
92 | NOTE: mount.nfs and therefore nfs-utils-1.1.1 or greater is only needed | 98 | This mount.nfs binary should be installed at /sbin/mount.nfs as follows: |
99 | |||
100 | $ sudo cp utils/mount/mount.nfs /sbin/mount.nfs | ||
101 | |||
102 | In this location, mount.nfs will be invoked automatically for NFS mounts | ||
103 | by the system mount commmand. | ||
104 | |||
105 | NOTE: mount.nfs and therefore nfs-utils-1.1.2 or greater is only needed | ||
93 | on the NFS client machine. You do not need this specific version of | 106 | on the NFS client machine. You do not need this specific version of |
94 | nfs-utils on the server. Furthermore, only the mount.nfs command from | 107 | nfs-utils on the server. Furthermore, only the mount.nfs command from |
95 | nfs-utils-1.1.1 is needed on the client. | 108 | nfs-utils-1.1.2 is needed on the client. |
96 | 109 | ||
97 | - Install a Linux kernel with NFS/RDMA | 110 | - Install a Linux kernel with NFS/RDMA |
98 | 111 | ||
@@ -156,8 +169,8 @@ Check RDMA and NFS Setup | |||
156 | this time. For example, if you are using a Mellanox Tavor/Sinai/Arbel | 169 | this time. For example, if you are using a Mellanox Tavor/Sinai/Arbel |
157 | card: | 170 | card: |
158 | 171 | ||
159 | > modprobe ib_mthca | 172 | $ modprobe ib_mthca |
160 | > modprobe ib_ipoib | 173 | $ modprobe ib_ipoib |
161 | 174 | ||
162 | If you are using InfiniBand, make sure there is a Subnet Manager (SM) | 175 | If you are using InfiniBand, make sure there is a Subnet Manager (SM) |
163 | running on the network. If your IB switch has an embedded SM, you can | 176 | running on the network. If your IB switch has an embedded SM, you can |
@@ -166,7 +179,7 @@ Check RDMA and NFS Setup | |||
166 | 179 | ||
167 | If an SM is running on your network, you should see the following: | 180 | If an SM is running on your network, you should see the following: |
168 | 181 | ||
169 | > cat /sys/class/infiniband/driverX/ports/1/state | 182 | $ cat /sys/class/infiniband/driverX/ports/1/state |
170 | 4: ACTIVE | 183 | 4: ACTIVE |
171 | 184 | ||
172 | where driverX is mthca0, ipath5, ehca3, etc. | 185 | where driverX is mthca0, ipath5, ehca3, etc. |
@@ -174,10 +187,10 @@ Check RDMA and NFS Setup | |||
174 | To further test the InfiniBand software stack, use IPoIB (this | 187 | To further test the InfiniBand software stack, use IPoIB (this |
175 | assumes you have two IB hosts named host1 and host2): | 188 | assumes you have two IB hosts named host1 and host2): |
176 | 189 | ||
177 | host1> ifconfig ib0 a.b.c.x | 190 | host1$ ifconfig ib0 a.b.c.x |
178 | host2> ifconfig ib0 a.b.c.y | 191 | host2$ ifconfig ib0 a.b.c.y |
179 | host1> ping a.b.c.y | 192 | host1$ ping a.b.c.y |
180 | host2> ping a.b.c.x | 193 | host2$ ping a.b.c.x |
181 | 194 | ||
182 | For other device types, follow the appropriate procedures. | 195 | For other device types, follow the appropriate procedures. |
183 | 196 | ||
@@ -202,11 +215,11 @@ NFS/RDMA Setup | |||
202 | /vol0 192.168.0.47(fsid=0,rw,async,insecure,no_root_squash) | 215 | /vol0 192.168.0.47(fsid=0,rw,async,insecure,no_root_squash) |
203 | /vol0 192.168.0.0/255.255.255.0(fsid=0,rw,async,insecure,no_root_squash) | 216 | /vol0 192.168.0.0/255.255.255.0(fsid=0,rw,async,insecure,no_root_squash) |
204 | 217 | ||
205 | The IP address(es) is(are) the client's IPoIB address for an InfiniBand HCA or the | 218 | The IP address(es) is(are) the client's IPoIB address for an InfiniBand |
206 | cleint's iWARP address(es) for an RNIC. | 219 | HCA or the cleint's iWARP address(es) for an RNIC. |
207 | 220 | ||
208 | NOTE: The "insecure" option must be used because the NFS/RDMA client does not | 221 | NOTE: The "insecure" option must be used because the NFS/RDMA client does |
209 | use a reserved port. | 222 | not use a reserved port. |
210 | 223 | ||
211 | Each time a machine boots: | 224 | Each time a machine boots: |
212 | 225 | ||
@@ -214,43 +227,45 @@ NFS/RDMA Setup | |||
214 | 227 | ||
215 | For InfiniBand using a Mellanox adapter: | 228 | For InfiniBand using a Mellanox adapter: |
216 | 229 | ||
217 | > modprobe ib_mthca | 230 | $ modprobe ib_mthca |
218 | > modprobe ib_ipoib | 231 | $ modprobe ib_ipoib |
219 | > ifconfig ib0 a.b.c.d | 232 | $ ifconfig ib0 a.b.c.d |
220 | 233 | ||
221 | NOTE: use unique addresses for the client and server | 234 | NOTE: use unique addresses for the client and server |
222 | 235 | ||
223 | - Start the NFS server | 236 | - Start the NFS server |
224 | 237 | ||
225 | If the NFS/RDMA server was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in kernel config), | 238 | If the NFS/RDMA server was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in |
226 | load the RDMA transport module: | 239 | kernel config), load the RDMA transport module: |
227 | 240 | ||
228 | > modprobe svcrdma | 241 | $ modprobe svcrdma |
229 | 242 | ||
230 | Regardless of how the server was built (module or built-in), start the server: | 243 | Regardless of how the server was built (module or built-in), start the |
244 | server: | ||
231 | 245 | ||
232 | > /etc/init.d/nfs start | 246 | $ /etc/init.d/nfs start |
233 | 247 | ||
234 | or | 248 | or |
235 | 249 | ||
236 | > service nfs start | 250 | $ service nfs start |
237 | 251 | ||
238 | Instruct the server to listen on the RDMA transport: | 252 | Instruct the server to listen on the RDMA transport: |
239 | 253 | ||
240 | > echo rdma 2050 > /proc/fs/nfsd/portlist | 254 | $ echo rdma 2050 > /proc/fs/nfsd/portlist |
241 | 255 | ||
242 | - On the client system | 256 | - On the client system |
243 | 257 | ||
244 | If the NFS/RDMA client was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in kernel config), | 258 | If the NFS/RDMA client was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in |
245 | load the RDMA client module: | 259 | kernel config), load the RDMA client module: |
246 | 260 | ||
247 | > modprobe xprtrdma.ko | 261 | $ modprobe xprtrdma.ko |
248 | 262 | ||
249 | Regardless of how the client was built (module or built-in), issue the mount.nfs command: | 263 | Regardless of how the client was built (module or built-in), use this |
264 | command to mount the NFS/RDMA server: | ||
250 | 265 | ||
251 | > /path/to/your/mount.nfs <IPoIB-server-name-or-address>:/<export> /mnt -i -o rdma,port=2050 | 266 | $ mount -o rdma,port=2050 <IPoIB-server-name-or-address>:/<export> /mnt |
252 | 267 | ||
253 | To verify that the mount is using RDMA, run "cat /proc/mounts" and check the | 268 | To verify that the mount is using RDMA, run "cat /proc/mounts" and check |
254 | "proto" field for the given mount. | 269 | the "proto" field for the given mount. |
255 | 270 | ||
256 | Congratulations! You're using NFS/RDMA! | 271 | Congratulations! You're using NFS/RDMA! |
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index dbc3c6a3650f..7f268f327d75 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt | |||
@@ -380,28 +380,35 @@ i386 and x86_64 platforms support the new IRQ vector displays. | |||
380 | Of some interest is the introduction of the /proc/irq directory to 2.4. | 380 | Of some interest is the introduction of the /proc/irq directory to 2.4. |
381 | It could be used to set IRQ to CPU affinity, this means that you can "hook" an | 381 | It could be used to set IRQ to CPU affinity, this means that you can "hook" an |
382 | IRQ to only one CPU, or to exclude a CPU of handling IRQs. The contents of the | 382 | IRQ to only one CPU, or to exclude a CPU of handling IRQs. The contents of the |
383 | irq subdir is one subdir for each IRQ, and one file; prof_cpu_mask | 383 | irq subdir is one subdir for each IRQ, and two files; default_smp_affinity and |
384 | prof_cpu_mask. | ||
384 | 385 | ||
385 | For example | 386 | For example |
386 | > ls /proc/irq/ | 387 | > ls /proc/irq/ |
387 | 0 10 12 14 16 18 2 4 6 8 prof_cpu_mask | 388 | 0 10 12 14 16 18 2 4 6 8 prof_cpu_mask |
388 | 1 11 13 15 17 19 3 5 7 9 | 389 | 1 11 13 15 17 19 3 5 7 9 default_smp_affinity |
389 | > ls /proc/irq/0/ | 390 | > ls /proc/irq/0/ |
390 | smp_affinity | 391 | smp_affinity |
391 | 392 | ||
392 | The contents of the prof_cpu_mask file and each smp_affinity file for each IRQ | 393 | smp_affinity is a bitmask, in which you can specify which CPUs can handle the |
393 | is the same by default: | 394 | IRQ, you can set it by doing: |
394 | 395 | ||
395 | > cat /proc/irq/0/smp_affinity | 396 | > echo 1 > /proc/irq/10/smp_affinity |
396 | ffffffff | 397 | |
398 | This means that only the first CPU will handle the IRQ, but you can also echo | ||
399 | 5 which means that only the first and fourth CPU can handle the IRQ. | ||
397 | 400 | ||
398 | It's a bitmask, in which you can specify which CPUs can handle the IRQ, you can | 401 | The contents of each smp_affinity file is the same by default: |
399 | set it by doing: | 402 | |
403 | > cat /proc/irq/0/smp_affinity | ||
404 | ffffffff | ||
400 | 405 | ||
401 | > echo 1 > /proc/irq/prof_cpu_mask | 406 | The default_smp_affinity mask applies to all non-active IRQs, which are the |
407 | IRQs which have not yet been allocated/activated, and hence which lack a | ||
408 | /proc/irq/[0-9]* directory. | ||
402 | 409 | ||
403 | This means that only the first CPU will handle the IRQ, but you can also echo 5 | 410 | prof_cpu_mask specifies which CPUs are to be profiled by the system wide |
404 | which means that only the first and fourth CPU can handle the IRQ. | 411 | profiler. Default value is ffffffff (all cpus). |
405 | 412 | ||
406 | The way IRQs are routed is handled by the IO-APIC, and it's Round Robin | 413 | The way IRQs are routed is handled by the IO-APIC, and it's Round Robin |
407 | between all the CPUs which are allowed to handle it. As usual the kernel has | 414 | between all the CPUs which are allowed to handle it. As usual the kernel has |
diff --git a/Documentation/filesystems/ubifs.txt b/Documentation/filesystems/ubifs.txt new file mode 100644 index 000000000000..540e9e7f59c5 --- /dev/null +++ b/Documentation/filesystems/ubifs.txt | |||
@@ -0,0 +1,164 @@ | |||
1 | Introduction | ||
2 | ============= | ||
3 | |||
4 | UBIFS file-system stands for UBI File System. UBI stands for "Unsorted | ||
5 | Block Images". UBIFS is a flash file system, which means it is designed | ||
6 | to work with flash devices. It is important to understand, that UBIFS | ||
7 | is completely different to any traditional file-system in Linux, like | ||
8 | Ext2, XFS, JFS, etc. UBIFS represents a separate class of file-systems | ||
9 | which work with MTD devices, not block devices. The other Linux | ||
10 | file-system of this class is JFFS2. | ||
11 | |||
12 | To make it more clear, here is a small comparison of MTD devices and | ||
13 | block devices. | ||
14 | |||
15 | 1 MTD devices represent flash devices and they consist of eraseblocks of | ||
16 | rather large size, typically about 128KiB. Block devices consist of | ||
17 | small blocks, typically 512 bytes. | ||
18 | 2 MTD devices support 3 main operations - read from some offset within an | ||
19 | eraseblock, write to some offset within an eraseblock, and erase a whole | ||
20 | eraseblock. Block devices support 2 main operations - read a whole | ||
21 | block and write a whole block. | ||
22 | 3 The whole eraseblock has to be erased before it becomes possible to | ||
23 | re-write its contents. Blocks may be just re-written. | ||
24 | 4 Eraseblocks become worn out after some number of erase cycles - | ||
25 | typically 100K-1G for SLC NAND and NOR flashes, and 1K-10K for MLC | ||
26 | NAND flashes. Blocks do not have the wear-out property. | ||
27 | 5 Eraseblocks may become bad (only on NAND flashes) and software should | ||
28 | deal with this. Blocks on hard drives typically do not become bad, | ||
29 | because hardware has mechanisms to substitute bad blocks, at least in | ||
30 | modern LBA disks. | ||
31 | |||
32 | It should be quite obvious why UBIFS is very different to traditional | ||
33 | file-systems. | ||
34 | |||
35 | UBIFS works on top of UBI. UBI is a separate software layer which may be | ||
36 | found in drivers/mtd/ubi. UBI is basically a volume management and | ||
37 | wear-leveling layer. It provides so called UBI volumes which is a higher | ||
38 | level abstraction than a MTD device. The programming model of UBI devices | ||
39 | is very similar to MTD devices - they still consist of large eraseblocks, | ||
40 | they have read/write/erase operations, but UBI devices are devoid of | ||
41 | limitations like wear and bad blocks (items 4 and 5 in the above list). | ||
42 | |||
43 | In a sense, UBIFS is a next generation of JFFS2 file-system, but it is | ||
44 | very different and incompatible to JFFS2. The following are the main | ||
45 | differences. | ||
46 | |||
47 | * JFFS2 works on top of MTD devices, UBIFS depends on UBI and works on | ||
48 | top of UBI volumes. | ||
49 | * JFFS2 does not have on-media index and has to build it while mounting, | ||
50 | which requires full media scan. UBIFS maintains the FS indexing | ||
51 | information on the flash media and does not require full media scan, | ||
52 | so it mounts many times faster than JFFS2. | ||
53 | * JFFS2 is a write-through file-system, while UBIFS supports write-back, | ||
54 | which makes UBIFS much faster on writes. | ||
55 | |||
56 | Similarly to JFFS2, UBIFS supports on-the-flight compression which makes | ||
57 | it possible to fit quite a lot of data to the flash. | ||
58 | |||
59 | Similarly to JFFS2, UBIFS is tolerant of unclean reboots and power-cuts. | ||
60 | It does not need stuff like ckfs.ext2. UBIFS automatically replays its | ||
61 | journal and recovers from crashes, ensuring that the on-flash data | ||
62 | structures are consistent. | ||
63 | |||
64 | UBIFS scales logarithmically (most of the data structures it uses are | ||
65 | trees), so the mount time and memory consumption do not linearly depend | ||
66 | on the flash size, like in case of JFFS2. This is because UBIFS | ||
67 | maintains the FS index on the flash media. However, UBIFS depends on | ||
68 | UBI, which scales linearly. So overall UBI/UBIFS stack scales linearly. | ||
69 | Nevertheless, UBI/UBIFS scales considerably better than JFFS2. | ||
70 | |||
71 | The authors of UBIFS believe, that it is possible to develop UBI2 which | ||
72 | would scale logarithmically as well. UBI2 would support the same API as UBI, | ||
73 | but it would be binary incompatible to UBI. So UBIFS would not need to be | ||
74 | changed to use UBI2 | ||
75 | |||
76 | |||
77 | Mount options | ||
78 | ============= | ||
79 | |||
80 | (*) == default. | ||
81 | |||
82 | norm_unmount (*) commit on unmount; the journal is committed | ||
83 | when the file-system is unmounted so that the | ||
84 | next mount does not have to replay the journal | ||
85 | and it becomes very fast; | ||
86 | fast_unmount do not commit on unmount; this option makes | ||
87 | unmount faster, but the next mount slower | ||
88 | because of the need to replay the journal. | ||
89 | |||
90 | |||
91 | Quick usage instructions | ||
92 | ======================== | ||
93 | |||
94 | The UBI volume to mount is specified using "ubiX_Y" or "ubiX:NAME" syntax, | ||
95 | where "X" is UBI device number, "Y" is UBI volume number, and "NAME" is | ||
96 | UBI volume name. | ||
97 | |||
98 | Mount volume 0 on UBI device 0 to /mnt/ubifs: | ||
99 | $ mount -t ubifs ubi0_0 /mnt/ubifs | ||
100 | |||
101 | Mount "rootfs" volume of UBI device 0 to /mnt/ubifs ("rootfs" is volume | ||
102 | name): | ||
103 | $ mount -t ubifs ubi0:rootfs /mnt/ubifs | ||
104 | |||
105 | The following is an example of the kernel boot arguments to attach mtd0 | ||
106 | to UBI and mount volume "rootfs": | ||
107 | ubi.mtd=0 root=ubi0:rootfs rootfstype=ubifs | ||
108 | |||
109 | |||
110 | Module Parameters for Debugging | ||
111 | =============================== | ||
112 | |||
113 | When UBIFS has been compiled with debugging enabled, there are 3 module | ||
114 | parameters that are available to control aspects of testing and debugging. | ||
115 | The parameters are unsigned integers where each bit controls an option. | ||
116 | The parameters are: | ||
117 | |||
118 | debug_msgs Selects which debug messages to display, as follows: | ||
119 | |||
120 | Message Type Flag value | ||
121 | |||
122 | General messages 1 | ||
123 | Journal messages 2 | ||
124 | Mount messages 4 | ||
125 | Commit messages 8 | ||
126 | LEB search messages 16 | ||
127 | Budgeting messages 32 | ||
128 | Garbage collection messages 64 | ||
129 | Tree Node Cache (TNC) messages 128 | ||
130 | LEB properties (lprops) messages 256 | ||
131 | Input/output messages 512 | ||
132 | Log messages 1024 | ||
133 | Scan messages 2048 | ||
134 | Recovery messages 4096 | ||
135 | |||
136 | debug_chks Selects extra checks that UBIFS can do while running: | ||
137 | |||
138 | Check Flag value | ||
139 | |||
140 | General checks 1 | ||
141 | Check Tree Node Cache (TNC) 2 | ||
142 | Check indexing tree size 4 | ||
143 | Check orphan area 8 | ||
144 | Check old indexing tree 16 | ||
145 | Check LEB properties (lprops) 32 | ||
146 | Check leaf nodes and inodes 64 | ||
147 | |||
148 | debug_tsts Selects a mode of testing, as follows: | ||
149 | |||
150 | Test mode Flag value | ||
151 | |||
152 | Force in-the-gaps method 2 | ||
153 | Failure mode for recovery testing 4 | ||
154 | |||
155 | For example, set debug_msgs to 5 to display General messages and Mount | ||
156 | messages. | ||
157 | |||
158 | |||
159 | References | ||
160 | ========== | ||
161 | |||
162 | UBIFS documentation and FAQ/HOWTO at the MTD web site: | ||
163 | http://www.linux-mtd.infradead.org/doc/ubifs.html | ||
164 | http://www.linux-mtd.infradead.org/faq/ubifs.html | ||
diff --git a/Documentation/ftrace.txt b/Documentation/ftrace.txt new file mode 100644 index 000000000000..f218f616ff6b --- /dev/null +++ b/Documentation/ftrace.txt | |||
@@ -0,0 +1,1360 @@ | |||
1 | ftrace - Function Tracer | ||
2 | ======================== | ||
3 | |||
4 | Copyright 2008 Red Hat Inc. | ||
5 | Author: Steven Rostedt <srostedt@redhat.com> | ||
6 | License: The GNU Free Documentation License, Version 1.2 | ||
7 | Reviewers: Elias Oltmanns, Randy Dunlap, Andrew Morton, | ||
8 | John Kacur, and David Teigland. | ||
9 | |||
10 | Written for: 2.6.27-rc1 | ||
11 | |||
12 | Introduction | ||
13 | ------------ | ||
14 | |||
15 | Ftrace is an internal tracer designed to help out developers and | ||
16 | designers of systems to find what is going on inside the kernel. | ||
17 | It can be used for debugging or analyzing latencies and performance | ||
18 | issues that take place outside of user-space. | ||
19 | |||
20 | Although ftrace is the function tracer, it also includes an | ||
21 | infrastructure that allows for other types of tracing. Some of the | ||
22 | tracers that are currently in ftrace include a tracer to trace | ||
23 | context switches, the time it takes for a high priority task to | ||
24 | run after it was woken up, the time interrupts are disabled, and | ||
25 | more (ftrace allows for tracer plugins, which means that the list of | ||
26 | tracers can always grow). | ||
27 | |||
28 | |||
29 | The File System | ||
30 | --------------- | ||
31 | |||
32 | Ftrace uses the debugfs file system to hold the control files as well | ||
33 | as the files to display output. | ||
34 | |||
35 | To mount the debugfs system: | ||
36 | |||
37 | # mkdir /debug | ||
38 | # mount -t debugfs nodev /debug | ||
39 | |||
40 | (Note: it is more common to mount at /sys/kernel/debug, but for simplicity | ||
41 | this document will use /debug) | ||
42 | |||
43 | That's it! (assuming that you have ftrace configured into your kernel) | ||
44 | |||
45 | After mounting the debugfs, you can see a directory called | ||
46 | "tracing". This directory contains the control and output files | ||
47 | of ftrace. Here is a list of some of the key files: | ||
48 | |||
49 | |||
50 | Note: all time values are in microseconds. | ||
51 | |||
52 | current_tracer : This is used to set or display the current tracer | ||
53 | that is configured. | ||
54 | |||
55 | available_tracers : This holds the different types of tracers that | ||
56 | have been compiled into the kernel. The tracers | ||
57 | listed here can be configured by echoing their name | ||
58 | into current_tracer. | ||
59 | |||
60 | tracing_enabled : This sets or displays whether the current_tracer | ||
61 | is activated and tracing or not. Echo 0 into this | ||
62 | file to disable the tracer or 1 to enable it. | ||
63 | |||
64 | trace : This file holds the output of the trace in a human readable | ||
65 | format (described below). | ||
66 | |||
67 | latency_trace : This file shows the same trace but the information | ||
68 | is organized more to display possible latencies | ||
69 | in the system (described below). | ||
70 | |||
71 | trace_pipe : The output is the same as the "trace" file but this | ||
72 | file is meant to be streamed with live tracing. | ||
73 | Reads from this file will block until new data | ||
74 | is retrieved. Unlike the "trace" and "latency_trace" | ||
75 | files, this file is a consumer. This means reading | ||
76 | from this file causes sequential reads to display | ||
77 | more current data. Once data is read from this | ||
78 | file, it is consumed, and will not be read | ||
79 | again with a sequential read. The "trace" and | ||
80 | "latency_trace" files are static, and if the | ||
81 | tracer is not adding more data, they will display | ||
82 | the same information every time they are read. | ||
83 | |||
84 | iter_ctrl : This file lets the user control the amount of data | ||
85 | that is displayed in one of the above output | ||
86 | files. | ||
87 | |||
88 | trace_max_latency : Some of the tracers record the max latency. | ||
89 | For example, the time interrupts are disabled. | ||
90 | This time is saved in this file. The max trace | ||
91 | will also be stored, and displayed by either | ||
92 | "trace" or "latency_trace". A new max trace will | ||
93 | only be recorded if the latency is greater than | ||
94 | the value in this file. (in microseconds) | ||
95 | |||
96 | trace_entries : This sets or displays the number of trace | ||
97 | entries each CPU buffer can hold. The tracer buffers | ||
98 | are the same size for each CPU. The displayed number | ||
99 | is the size of the CPU buffer and not total size. The | ||
100 | trace buffers are allocated in pages (blocks of memory | ||
101 | that the kernel uses for allocation, usually 4 KB in size). | ||
102 | Since each entry is smaller than a page, if the last | ||
103 | allocated page has room for more entries than were | ||
104 | requested, the rest of the page is used to allocate | ||
105 | entries. | ||
106 | |||
107 | This can only be updated when the current_tracer | ||
108 | is set to "none". | ||
109 | |||
110 | NOTE: It is planned on changing the allocated buffers | ||
111 | from being the number of possible CPUS to | ||
112 | the number of online CPUS. | ||
113 | |||
114 | tracing_cpumask : This is a mask that lets the user only trace | ||
115 | on specified CPUS. The format is a hex string | ||
116 | representing the CPUS. | ||
117 | |||
118 | set_ftrace_filter : When dynamic ftrace is configured in (see the | ||
119 | section below "dynamic ftrace"), the code is dynamically | ||
120 | modified (code text rewrite) to disable calling of the | ||
121 | function profiler (mcount). This lets tracing be configured | ||
122 | in with practically no overhead in performance. This also | ||
123 | has a side effect of enabling or disabling specific functions | ||
124 | to be traced. Echoing names of functions into this file | ||
125 | will limit the trace to only those functions. | ||
126 | |||
127 | set_ftrace_notrace: This has an effect opposite to that of | ||
128 | set_ftrace_filter. Any function that is added here will not | ||
129 | be traced. If a function exists in both set_ftrace_filter | ||
130 | and set_ftrace_notrace, the function will _not_ be traced. | ||
131 | |||
132 | available_filter_functions : When a function is encountered the first | ||
133 | time by the dynamic tracer, it is recorded and | ||
134 | later the call is converted into a nop. This file | ||
135 | lists the functions that have been recorded | ||
136 | by the dynamic tracer and these functions can | ||
137 | be used to set the ftrace filter by the above | ||
138 | "set_ftrace_filter" file. (See the section "dynamic ftrace" | ||
139 | below for more details). | ||
140 | |||
141 | |||
142 | The Tracers | ||
143 | ----------- | ||
144 | |||
145 | Here is the list of current tracers that may be configured. | ||
146 | |||
147 | ftrace - function tracer that uses mcount to trace all functions. | ||
148 | |||
149 | sched_switch - traces the context switches between tasks. | ||
150 | |||
151 | irqsoff - traces the areas that disable interrupts and saves | ||
152 | the trace with the longest max latency. | ||
153 | See tracing_max_latency. When a new max is recorded, | ||
154 | it replaces the old trace. It is best to view this | ||
155 | trace via the latency_trace file. | ||
156 | |||
157 | preemptoff - Similar to irqsoff but traces and records the amount of | ||
158 | time for which preemption is disabled. | ||
159 | |||
160 | preemptirqsoff - Similar to irqsoff and preemptoff, but traces and | ||
161 | records the largest time for which irqs and/or preemption | ||
162 | is disabled. | ||
163 | |||
164 | wakeup - Traces and records the max latency that it takes for | ||
165 | the highest priority task to get scheduled after | ||
166 | it has been woken up. | ||
167 | |||
168 | none - This is not a tracer. To remove all tracers from tracing | ||
169 | simply echo "none" into current_tracer. | ||
170 | |||
171 | |||
172 | Examples of using the tracer | ||
173 | ---------------------------- | ||
174 | |||
175 | Here are typical examples of using the tracers when controlling them only | ||
176 | with the debugfs interface (without using any user-land utilities). | ||
177 | |||
178 | Output format: | ||
179 | -------------- | ||
180 | |||
181 | Here is an example of the output format of the file "trace" | ||
182 | |||
183 | -------- | ||
184 | # tracer: ftrace | ||
185 | # | ||
186 | # TASK-PID CPU# TIMESTAMP FUNCTION | ||
187 | # | | | | | | ||
188 | bash-4251 [01] 10152.583854: path_put <-path_walk | ||
189 | bash-4251 [01] 10152.583855: dput <-path_put | ||
190 | bash-4251 [01] 10152.583855: _atomic_dec_and_lock <-dput | ||
191 | -------- | ||
192 | |||
193 | A header is printed with the tracer name that is represented by the trace. | ||
194 | In this case the tracer is "ftrace". Then a header showing the format. Task | ||
195 | name "bash", the task PID "4251", the CPU that it was running on | ||
196 | "01", the timestamp in <secs>.<usecs> format, the function name that was | ||
197 | traced "path_put" and the parent function that called this function | ||
198 | "path_walk". The timestamp is the time at which the function was | ||
199 | entered. | ||
200 | |||
201 | The sched_switch tracer also includes tracing of task wakeups and | ||
202 | context switches. | ||
203 | |||
204 | ksoftirqd/1-7 [01] 1453.070013: 7:115:R + 2916:115:S | ||
205 | ksoftirqd/1-7 [01] 1453.070013: 7:115:R + 10:115:S | ||
206 | ksoftirqd/1-7 [01] 1453.070013: 7:115:R ==> 10:115:R | ||
207 | events/1-10 [01] 1453.070013: 10:115:S ==> 2916:115:R | ||
208 | kondemand/1-2916 [01] 1453.070013: 2916:115:S ==> 7:115:R | ||
209 | ksoftirqd/1-7 [01] 1453.070013: 7:115:S ==> 0:140:R | ||
210 | |||
211 | Wake ups are represented by a "+" and the context switches are shown as | ||
212 | "==>". The format is: | ||
213 | |||
214 | Context switches: | ||
215 | |||
216 | Previous task Next Task | ||
217 | |||
218 | <pid>:<prio>:<state> ==> <pid>:<prio>:<state> | ||
219 | |||
220 | Wake ups: | ||
221 | |||
222 | Current task Task waking up | ||
223 | |||
224 | <pid>:<prio>:<state> + <pid>:<prio>:<state> | ||
225 | |||
226 | The prio is the internal kernel priority, which is the inverse of the | ||
227 | priority that is usually displayed by user-space tools. Zero represents | ||
228 | the highest priority (99). Prio 100 starts the "nice" priorities with | ||
229 | 100 being equal to nice -20 and 139 being nice 19. The prio "140" is | ||
230 | reserved for the idle task which is the lowest priority thread (pid 0). | ||
231 | |||
232 | |||
233 | Latency trace format | ||
234 | -------------------- | ||
235 | |||
236 | For traces that display latency times, the latency_trace file gives | ||
237 | somewhat more information to see why a latency happened. Here is a typical | ||
238 | trace. | ||
239 | |||
240 | # tracer: irqsoff | ||
241 | # | ||
242 | irqsoff latency trace v1.1.5 on 2.6.26-rc8 | ||
243 | -------------------------------------------------------------------- | ||
244 | latency: 97 us, #3/3, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | ||
245 | ----------------- | ||
246 | | task: swapper-0 (uid:0 nice:0 policy:0 rt_prio:0) | ||
247 | ----------------- | ||
248 | => started at: apic_timer_interrupt | ||
249 | => ended at: do_softirq | ||
250 | |||
251 | # _------=> CPU# | ||
252 | # / _-----=> irqs-off | ||
253 | # | / _----=> need-resched | ||
254 | # || / _---=> hardirq/softirq | ||
255 | # ||| / _--=> preempt-depth | ||
256 | # |||| / | ||
257 | # ||||| delay | ||
258 | # cmd pid ||||| time | caller | ||
259 | # \ / ||||| \ | / | ||
260 | <idle>-0 0d..1 0us+: trace_hardirqs_off_thunk (apic_timer_interrupt) | ||
261 | <idle>-0 0d.s. 97us : __do_softirq (do_softirq) | ||
262 | <idle>-0 0d.s1 98us : trace_hardirqs_on (do_softirq) | ||
263 | |||
264 | |||
265 | |||
266 | This shows that the current tracer is "irqsoff" tracing the time for which | ||
267 | interrupts were disabled. It gives the trace version and the version | ||
268 | of the kernel upon which this was executed on (2.6.26-rc8). Then it displays | ||
269 | the max latency in microsecs (97 us). The number of trace entries displayed | ||
270 | and the total number recorded (both are three: #3/3). The type of | ||
271 | preemption that was used (PREEMPT). VP, KP, SP, and HP are always zero | ||
272 | and are reserved for later use. #P is the number of online CPUS (#P:2). | ||
273 | |||
274 | The task is the process that was running when the latency occurred. | ||
275 | (swapper pid: 0). | ||
276 | |||
277 | The start and stop (the functions in which the interrupts were disabled and | ||
278 | enabled respectively) that caused the latencies: | ||
279 | |||
280 | apic_timer_interrupt is where the interrupts were disabled. | ||
281 | do_softirq is where they were enabled again. | ||
282 | |||
283 | The next lines after the header are the trace itself. The header | ||
284 | explains which is which. | ||
285 | |||
286 | cmd: The name of the process in the trace. | ||
287 | |||
288 | pid: The PID of that process. | ||
289 | |||
290 | CPU#: The CPU which the process was running on. | ||
291 | |||
292 | irqs-off: 'd' interrupts are disabled. '.' otherwise. | ||
293 | |||
294 | need-resched: 'N' task need_resched is set, '.' otherwise. | ||
295 | |||
296 | hardirq/softirq: | ||
297 | 'H' - hard irq occurred inside a softirq. | ||
298 | 'h' - hard irq is running | ||
299 | 's' - soft irq is running | ||
300 | '.' - normal context. | ||
301 | |||
302 | preempt-depth: The level of preempt_disabled | ||
303 | |||
304 | The above is mostly meaningful for kernel developers. | ||
305 | |||
306 | time: This differs from the trace file output. The trace file output | ||
307 | includes an absolute timestamp. The timestamp used by the | ||
308 | latency_trace file is relative to the start of the trace. | ||
309 | |||
310 | delay: This is just to help catch your eye a bit better. And | ||
311 | needs to be fixed to be only relative to the same CPU. | ||
312 | The marks are determined by the difference between this | ||
313 | current trace and the next trace. | ||
314 | '!' - greater than preempt_mark_thresh (default 100) | ||
315 | '+' - greater than 1 microsecond | ||
316 | ' ' - less than or equal to 1 microsecond. | ||
317 | |||
318 | The rest is the same as the 'trace' file. | ||
319 | |||
320 | |||
321 | iter_ctrl | ||
322 | --------- | ||
323 | |||
324 | The iter_ctrl file is used to control what gets printed in the trace | ||
325 | output. To see what is available, simply cat the file: | ||
326 | |||
327 | cat /debug/tracing/iter_ctrl | ||
328 | print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \ | ||
329 | noblock nostacktrace nosched-tree | ||
330 | |||
331 | To disable one of the options, echo in the option prepended with "no". | ||
332 | |||
333 | echo noprint-parent > /debug/tracing/iter_ctrl | ||
334 | |||
335 | To enable an option, leave off the "no". | ||
336 | |||
337 | echo sym-offset > /debug/tracing/iter_ctrl | ||
338 | |||
339 | Here are the available options: | ||
340 | |||
341 | print-parent - On function traces, display the calling function | ||
342 | as well as the function being traced. | ||
343 | |||
344 | print-parent: | ||
345 | bash-4000 [01] 1477.606694: simple_strtoul <-strict_strtoul | ||
346 | |||
347 | noprint-parent: | ||
348 | bash-4000 [01] 1477.606694: simple_strtoul | ||
349 | |||
350 | |||
351 | sym-offset - Display not only the function name, but also the offset | ||
352 | in the function. For example, instead of seeing just | ||
353 | "ktime_get", you will see "ktime_get+0xb/0x20". | ||
354 | |||
355 | sym-offset: | ||
356 | bash-4000 [01] 1477.606694: simple_strtoul+0x6/0xa0 | ||
357 | |||
358 | sym-addr - this will also display the function address as well as | ||
359 | the function name. | ||
360 | |||
361 | sym-addr: | ||
362 | bash-4000 [01] 1477.606694: simple_strtoul <c0339346> | ||
363 | |||
364 | verbose - This deals with the latency_trace file. | ||
365 | |||
366 | bash 4000 1 0 00000000 00010a95 [58127d26] 1720.415ms \ | ||
367 | (+0.000ms): simple_strtoul (strict_strtoul) | ||
368 | |||
369 | raw - This will display raw numbers. This option is best for use with | ||
370 | user applications that can translate the raw numbers better than | ||
371 | having it done in the kernel. | ||
372 | |||
373 | hex - Similar to raw, but the numbers will be in a hexadecimal format. | ||
374 | |||
375 | bin - This will print out the formats in raw binary. | ||
376 | |||
377 | block - TBD (needs update) | ||
378 | |||
379 | stacktrace - This is one of the options that changes the trace itself. | ||
380 | When a trace is recorded, so is the stack of functions. | ||
381 | This allows for back traces of trace sites. | ||
382 | |||
383 | sched-tree - TBD (any users??) | ||
384 | |||
385 | |||
386 | sched_switch | ||
387 | ------------ | ||
388 | |||
389 | This tracer simply records schedule switches. Here is an example | ||
390 | of how to use it. | ||
391 | |||
392 | # echo sched_switch > /debug/tracing/current_tracer | ||
393 | # echo 1 > /debug/tracing/tracing_enabled | ||
394 | # sleep 1 | ||
395 | # echo 0 > /debug/tracing/tracing_enabled | ||
396 | # cat /debug/tracing/trace | ||
397 | |||
398 | # tracer: sched_switch | ||
399 | # | ||
400 | # TASK-PID CPU# TIMESTAMP FUNCTION | ||
401 | # | | | | | | ||
402 | bash-3997 [01] 240.132281: 3997:120:R + 4055:120:R | ||
403 | bash-3997 [01] 240.132284: 3997:120:R ==> 4055:120:R | ||
404 | sleep-4055 [01] 240.132371: 4055:120:S ==> 3997:120:R | ||
405 | bash-3997 [01] 240.132454: 3997:120:R + 4055:120:S | ||
406 | bash-3997 [01] 240.132457: 3997:120:R ==> 4055:120:R | ||
407 | sleep-4055 [01] 240.132460: 4055:120:D ==> 3997:120:R | ||
408 | bash-3997 [01] 240.132463: 3997:120:R + 4055:120:D | ||
409 | bash-3997 [01] 240.132465: 3997:120:R ==> 4055:120:R | ||
410 | <idle>-0 [00] 240.132589: 0:140:R + 4:115:S | ||
411 | <idle>-0 [00] 240.132591: 0:140:R ==> 4:115:R | ||
412 | ksoftirqd/0-4 [00] 240.132595: 4:115:S ==> 0:140:R | ||
413 | <idle>-0 [00] 240.132598: 0:140:R + 4:115:S | ||
414 | <idle>-0 [00] 240.132599: 0:140:R ==> 4:115:R | ||
415 | ksoftirqd/0-4 [00] 240.132603: 4:115:S ==> 0:140:R | ||
416 | sleep-4055 [01] 240.133058: 4055:120:S ==> 3997:120:R | ||
417 | [...] | ||
418 | |||
419 | |||
420 | As we have discussed previously about this format, the header shows | ||
421 | the name of the trace and points to the options. The "FUNCTION" | ||
422 | is a misnomer since here it represents the wake ups and context | ||
423 | switches. | ||
424 | |||
425 | The sched_switch file only lists the wake ups (represented with '+') | ||
426 | and context switches ('==>') with the previous task or current task | ||
427 | first followed by the next task or task waking up. The format for both | ||
428 | of these is PID:KERNEL-PRIO:TASK-STATE. Remember that the KERNEL-PRIO | ||
429 | is the inverse of the actual priority with zero (0) being the highest | ||
430 | priority and the nice values starting at 100 (nice -20). Below is | ||
431 | a quick chart to map the kernel priority to user land priorities. | ||
432 | |||
433 | Kernel priority: 0 to 99 ==> user RT priority 99 to 0 | ||
434 | Kernel priority: 100 to 139 ==> user nice -20 to 19 | ||
435 | Kernel priority: 140 ==> idle task priority | ||
436 | |||
437 | The task states are: | ||
438 | |||
439 | R - running : wants to run, may not actually be running | ||
440 | S - sleep : process is waiting to be woken up (handles signals) | ||
441 | D - disk sleep (uninterruptible sleep) : process must be woken up | ||
442 | (ignores signals) | ||
443 | T - stopped : process suspended | ||
444 | t - traced : process is being traced (with something like gdb) | ||
445 | Z - zombie : process waiting to be cleaned up | ||
446 | X - unknown | ||
447 | |||
448 | |||
449 | ftrace_enabled | ||
450 | -------------- | ||
451 | |||
452 | The following tracers (listed below) give different output depending | ||
453 | on whether or not the sysctl ftrace_enabled is set. To set ftrace_enabled, | ||
454 | one can either use the sysctl function or set it via the proc | ||
455 | file system interface. | ||
456 | |||
457 | sysctl kernel.ftrace_enabled=1 | ||
458 | |||
459 | or | ||
460 | |||
461 | echo 1 > /proc/sys/kernel/ftrace_enabled | ||
462 | |||
463 | To disable ftrace_enabled simply replace the '1' with '0' in | ||
464 | the above commands. | ||
465 | |||
466 | When ftrace_enabled is set the tracers will also record the functions | ||
467 | that are within the trace. The descriptions of the tracers | ||
468 | will also show an example with ftrace enabled. | ||
469 | |||
470 | |||
471 | irqsoff | ||
472 | ------- | ||
473 | |||
474 | When interrupts are disabled, the CPU can not react to any other | ||
475 | external event (besides NMIs and SMIs). This prevents the timer | ||
476 | interrupt from triggering or the mouse interrupt from letting the | ||
477 | kernel know of a new mouse event. The result is a latency with the | ||
478 | reaction time. | ||
479 | |||
480 | The irqsoff tracer tracks the time for which interrupts are disabled. | ||
481 | When a new maximum latency is hit, the tracer saves the trace leading up | ||
482 | to that latency point so that every time a new maximum is reached, the old | ||
483 | saved trace is discarded and the new trace is saved. | ||
484 | |||
485 | To reset the maximum, echo 0 into tracing_max_latency. Here is an | ||
486 | example: | ||
487 | |||
488 | # echo irqsoff > /debug/tracing/current_tracer | ||
489 | # echo 0 > /debug/tracing/tracing_max_latency | ||
490 | # echo 1 > /debug/tracing/tracing_enabled | ||
491 | # ls -ltr | ||
492 | [...] | ||
493 | # echo 0 > /debug/tracing/tracing_enabled | ||
494 | # cat /debug/tracing/latency_trace | ||
495 | # tracer: irqsoff | ||
496 | # | ||
497 | irqsoff latency trace v1.1.5 on 2.6.26 | ||
498 | -------------------------------------------------------------------- | ||
499 | latency: 12 us, #3/3, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | ||
500 | ----------------- | ||
501 | | task: bash-3730 (uid:0 nice:0 policy:0 rt_prio:0) | ||
502 | ----------------- | ||
503 | => started at: sys_setpgid | ||
504 | => ended at: sys_setpgid | ||
505 | |||
506 | # _------=> CPU# | ||
507 | # / _-----=> irqs-off | ||
508 | # | / _----=> need-resched | ||
509 | # || / _---=> hardirq/softirq | ||
510 | # ||| / _--=> preempt-depth | ||
511 | # |||| / | ||
512 | # ||||| delay | ||
513 | # cmd pid ||||| time | caller | ||
514 | # \ / ||||| \ | / | ||
515 | bash-3730 1d... 0us : _write_lock_irq (sys_setpgid) | ||
516 | bash-3730 1d..1 1us+: _write_unlock_irq (sys_setpgid) | ||
517 | bash-3730 1d..2 14us : trace_hardirqs_on (sys_setpgid) | ||
518 | |||
519 | |||
520 | Here we see that that we had a latency of 12 microsecs (which is | ||
521 | very good). The _write_lock_irq in sys_setpgid disabled interrupts. | ||
522 | The difference between the 12 and the displayed timestamp 14us occurred | ||
523 | because the clock was incremented between the time of recording the max | ||
524 | latency and the time of recording the function that had that latency. | ||
525 | |||
526 | Note the above example had ftrace_enabled not set. If we set the | ||
527 | ftrace_enabled, we get a much larger output: | ||
528 | |||
529 | # tracer: irqsoff | ||
530 | # | ||
531 | irqsoff latency trace v1.1.5 on 2.6.26-rc8 | ||
532 | -------------------------------------------------------------------- | ||
533 | latency: 50 us, #101/101, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | ||
534 | ----------------- | ||
535 | | task: ls-4339 (uid:0 nice:0 policy:0 rt_prio:0) | ||
536 | ----------------- | ||
537 | => started at: __alloc_pages_internal | ||
538 | => ended at: __alloc_pages_internal | ||
539 | |||
540 | # _------=> CPU# | ||
541 | # / _-----=> irqs-off | ||
542 | # | / _----=> need-resched | ||
543 | # || / _---=> hardirq/softirq | ||
544 | # ||| / _--=> preempt-depth | ||
545 | # |||| / | ||
546 | # ||||| delay | ||
547 | # cmd pid ||||| time | caller | ||
548 | # \ / ||||| \ | / | ||
549 | ls-4339 0...1 0us+: get_page_from_freelist (__alloc_pages_internal) | ||
550 | ls-4339 0d..1 3us : rmqueue_bulk (get_page_from_freelist) | ||
551 | ls-4339 0d..1 3us : _spin_lock (rmqueue_bulk) | ||
552 | ls-4339 0d..1 4us : add_preempt_count (_spin_lock) | ||
553 | ls-4339 0d..2 4us : __rmqueue (rmqueue_bulk) | ||
554 | ls-4339 0d..2 5us : __rmqueue_smallest (__rmqueue) | ||
555 | ls-4339 0d..2 5us : __mod_zone_page_state (__rmqueue_smallest) | ||
556 | ls-4339 0d..2 6us : __rmqueue (rmqueue_bulk) | ||
557 | ls-4339 0d..2 6us : __rmqueue_smallest (__rmqueue) | ||
558 | ls-4339 0d..2 7us : __mod_zone_page_state (__rmqueue_smallest) | ||
559 | ls-4339 0d..2 7us : __rmqueue (rmqueue_bulk) | ||
560 | ls-4339 0d..2 8us : __rmqueue_smallest (__rmqueue) | ||
561 | [...] | ||
562 | ls-4339 0d..2 46us : __rmqueue_smallest (__rmqueue) | ||
563 | ls-4339 0d..2 47us : __mod_zone_page_state (__rmqueue_smallest) | ||
564 | ls-4339 0d..2 47us : __rmqueue (rmqueue_bulk) | ||
565 | ls-4339 0d..2 48us : __rmqueue_smallest (__rmqueue) | ||
566 | ls-4339 0d..2 48us : __mod_zone_page_state (__rmqueue_smallest) | ||
567 | ls-4339 0d..2 49us : _spin_unlock (rmqueue_bulk) | ||
568 | ls-4339 0d..2 49us : sub_preempt_count (_spin_unlock) | ||
569 | ls-4339 0d..1 50us : get_page_from_freelist (__alloc_pages_internal) | ||
570 | ls-4339 0d..2 51us : trace_hardirqs_on (__alloc_pages_internal) | ||
571 | |||
572 | |||
573 | |||
574 | Here we traced a 50 microsecond latency. But we also see all the | ||
575 | functions that were called during that time. Note that by enabling | ||
576 | function tracing, we incur an added overhead. This overhead may | ||
577 | extend the latency times. But nevertheless, this trace has provided | ||
578 | some very helpful debugging information. | ||
579 | |||
580 | |||
581 | preemptoff | ||
582 | ---------- | ||
583 | |||
584 | When preemption is disabled, we may be able to receive interrupts but | ||
585 | the task cannot be preempted and a higher priority task must wait | ||
586 | for preemption to be enabled again before it can preempt a lower | ||
587 | priority task. | ||
588 | |||
589 | The preemptoff tracer traces the places that disable preemption. | ||
590 | Like the irqsoff tracer, it records the maximum latency for which preemption | ||
591 | was disabled. The control of preemptoff tracer is much like the irqsoff | ||
592 | tracer. | ||
593 | |||
594 | # echo preemptoff > /debug/tracing/current_tracer | ||
595 | # echo 0 > /debug/tracing/tracing_max_latency | ||
596 | # echo 1 > /debug/tracing/tracing_enabled | ||
597 | # ls -ltr | ||
598 | [...] | ||
599 | # echo 0 > /debug/tracing/tracing_enabled | ||
600 | # cat /debug/tracing/latency_trace | ||
601 | # tracer: preemptoff | ||
602 | # | ||
603 | preemptoff latency trace v1.1.5 on 2.6.26-rc8 | ||
604 | -------------------------------------------------------------------- | ||
605 | latency: 29 us, #3/3, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | ||
606 | ----------------- | ||
607 | | task: sshd-4261 (uid:0 nice:0 policy:0 rt_prio:0) | ||
608 | ----------------- | ||
609 | => started at: do_IRQ | ||
610 | => ended at: __do_softirq | ||
611 | |||
612 | # _------=> CPU# | ||
613 | # / _-----=> irqs-off | ||
614 | # | / _----=> need-resched | ||
615 | # || / _---=> hardirq/softirq | ||
616 | # ||| / _--=> preempt-depth | ||
617 | # |||| / | ||
618 | # ||||| delay | ||
619 | # cmd pid ||||| time | caller | ||
620 | # \ / ||||| \ | / | ||
621 | sshd-4261 0d.h. 0us+: irq_enter (do_IRQ) | ||
622 | sshd-4261 0d.s. 29us : _local_bh_enable (__do_softirq) | ||
623 | sshd-4261 0d.s1 30us : trace_preempt_on (__do_softirq) | ||
624 | |||
625 | |||
626 | This has some more changes. Preemption was disabled when an interrupt | ||
627 | came in (notice the 'h'), and was enabled while doing a softirq. | ||
628 | (notice the 's'). But we also see that interrupts have been disabled | ||
629 | when entering the preempt off section and leaving it (the 'd'). | ||
630 | We do not know if interrupts were enabled in the mean time. | ||
631 | |||
632 | # tracer: preemptoff | ||
633 | # | ||
634 | preemptoff latency trace v1.1.5 on 2.6.26-rc8 | ||
635 | -------------------------------------------------------------------- | ||
636 | latency: 63 us, #87/87, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | ||
637 | ----------------- | ||
638 | | task: sshd-4261 (uid:0 nice:0 policy:0 rt_prio:0) | ||
639 | ----------------- | ||
640 | => started at: remove_wait_queue | ||
641 | => ended at: __do_softirq | ||
642 | |||
643 | # _------=> CPU# | ||
644 | # / _-----=> irqs-off | ||
645 | # | / _----=> need-resched | ||
646 | # || / _---=> hardirq/softirq | ||
647 | # ||| / _--=> preempt-depth | ||
648 | # |||| / | ||
649 | # ||||| delay | ||
650 | # cmd pid ||||| time | caller | ||
651 | # \ / ||||| \ | / | ||
652 | sshd-4261 0d..1 0us : _spin_lock_irqsave (remove_wait_queue) | ||
653 | sshd-4261 0d..1 1us : _spin_unlock_irqrestore (remove_wait_queue) | ||
654 | sshd-4261 0d..1 2us : do_IRQ (common_interrupt) | ||
655 | sshd-4261 0d..1 2us : irq_enter (do_IRQ) | ||
656 | sshd-4261 0d..1 2us : idle_cpu (irq_enter) | ||
657 | sshd-4261 0d..1 3us : add_preempt_count (irq_enter) | ||
658 | sshd-4261 0d.h1 3us : idle_cpu (irq_enter) | ||
659 | sshd-4261 0d.h. 4us : handle_fasteoi_irq (do_IRQ) | ||
660 | [...] | ||
661 | sshd-4261 0d.h. 12us : add_preempt_count (_spin_lock) | ||
662 | sshd-4261 0d.h1 12us : ack_ioapic_quirk_irq (handle_fasteoi_irq) | ||
663 | sshd-4261 0d.h1 13us : move_native_irq (ack_ioapic_quirk_irq) | ||
664 | sshd-4261 0d.h1 13us : _spin_unlock (handle_fasteoi_irq) | ||
665 | sshd-4261 0d.h1 14us : sub_preempt_count (_spin_unlock) | ||
666 | sshd-4261 0d.h1 14us : irq_exit (do_IRQ) | ||
667 | sshd-4261 0d.h1 15us : sub_preempt_count (irq_exit) | ||
668 | sshd-4261 0d..2 15us : do_softirq (irq_exit) | ||
669 | sshd-4261 0d... 15us : __do_softirq (do_softirq) | ||
670 | sshd-4261 0d... 16us : __local_bh_disable (__do_softirq) | ||
671 | sshd-4261 0d... 16us+: add_preempt_count (__local_bh_disable) | ||
672 | sshd-4261 0d.s4 20us : add_preempt_count (__local_bh_disable) | ||
673 | sshd-4261 0d.s4 21us : sub_preempt_count (local_bh_enable) | ||
674 | sshd-4261 0d.s5 21us : sub_preempt_count (local_bh_enable) | ||
675 | [...] | ||
676 | sshd-4261 0d.s6 41us : add_preempt_count (__local_bh_disable) | ||
677 | sshd-4261 0d.s6 42us : sub_preempt_count (local_bh_enable) | ||
678 | sshd-4261 0d.s7 42us : sub_preempt_count (local_bh_enable) | ||
679 | sshd-4261 0d.s5 43us : add_preempt_count (__local_bh_disable) | ||
680 | sshd-4261 0d.s5 43us : sub_preempt_count (local_bh_enable_ip) | ||
681 | sshd-4261 0d.s6 44us : sub_preempt_count (local_bh_enable_ip) | ||
682 | sshd-4261 0d.s5 44us : add_preempt_count (__local_bh_disable) | ||
683 | sshd-4261 0d.s5 45us : sub_preempt_count (local_bh_enable) | ||
684 | [...] | ||
685 | sshd-4261 0d.s. 63us : _local_bh_enable (__do_softirq) | ||
686 | sshd-4261 0d.s1 64us : trace_preempt_on (__do_softirq) | ||
687 | |||
688 | |||
689 | The above is an example of the preemptoff trace with ftrace_enabled | ||
690 | set. Here we see that interrupts were disabled the entire time. | ||
691 | The irq_enter code lets us know that we entered an interrupt 'h'. | ||
692 | Before that, the functions being traced still show that it is not | ||
693 | in an interrupt, but we can see from the functions themselves that | ||
694 | this is not the case. | ||
695 | |||
696 | Notice that __do_softirq when called does not have a preempt_count. | ||
697 | It may seem that we missed a preempt enabling. What really happened | ||
698 | is that the preempt count is held on the thread's stack and we | ||
699 | switched to the softirq stack (4K stacks in effect). The code | ||
700 | does not copy the preempt count, but because interrupts are disabled, | ||
701 | we do not need to worry about it. Having a tracer like this is good | ||
702 | for letting people know what really happens inside the kernel. | ||
703 | |||
704 | |||
705 | preemptirqsoff | ||
706 | -------------- | ||
707 | |||
708 | Knowing the locations that have interrupts disabled or preemption | ||
709 | disabled for the longest times is helpful. But sometimes we would | ||
710 | like to know when either preemption and/or interrupts are disabled. | ||
711 | |||
712 | Consider the following code: | ||
713 | |||
714 | local_irq_disable(); | ||
715 | call_function_with_irqs_off(); | ||
716 | preempt_disable(); | ||
717 | call_function_with_irqs_and_preemption_off(); | ||
718 | local_irq_enable(); | ||
719 | call_function_with_preemption_off(); | ||
720 | preempt_enable(); | ||
721 | |||
722 | The irqsoff tracer will record the total length of | ||
723 | call_function_with_irqs_off() and | ||
724 | call_function_with_irqs_and_preemption_off(). | ||
725 | |||
726 | The preemptoff tracer will record the total length of | ||
727 | call_function_with_irqs_and_preemption_off() and | ||
728 | call_function_with_preemption_off(). | ||
729 | |||
730 | But neither will trace the time that interrupts and/or preemption | ||
731 | is disabled. This total time is the time that we can not schedule. | ||
732 | To record this time, use the preemptirqsoff tracer. | ||
733 | |||
734 | Again, using this trace is much like the irqsoff and preemptoff tracers. | ||
735 | |||
736 | # echo preemptirqsoff > /debug/tracing/current_tracer | ||
737 | # echo 0 > /debug/tracing/tracing_max_latency | ||
738 | # echo 1 > /debug/tracing/tracing_enabled | ||
739 | # ls -ltr | ||
740 | [...] | ||
741 | # echo 0 > /debug/tracing/tracing_enabled | ||
742 | # cat /debug/tracing/latency_trace | ||
743 | # tracer: preemptirqsoff | ||
744 | # | ||
745 | preemptirqsoff latency trace v1.1.5 on 2.6.26-rc8 | ||
746 | -------------------------------------------------------------------- | ||
747 | latency: 293 us, #3/3, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | ||
748 | ----------------- | ||
749 | | task: ls-4860 (uid:0 nice:0 policy:0 rt_prio:0) | ||
750 | ----------------- | ||
751 | => started at: apic_timer_interrupt | ||
752 | => ended at: __do_softirq | ||
753 | |||
754 | # _------=> CPU# | ||
755 | # / _-----=> irqs-off | ||
756 | # | / _----=> need-resched | ||
757 | # || / _---=> hardirq/softirq | ||
758 | # ||| / _--=> preempt-depth | ||
759 | # |||| / | ||
760 | # ||||| delay | ||
761 | # cmd pid ||||| time | caller | ||
762 | # \ / ||||| \ | / | ||
763 | ls-4860 0d... 0us!: trace_hardirqs_off_thunk (apic_timer_interrupt) | ||
764 | ls-4860 0d.s. 294us : _local_bh_enable (__do_softirq) | ||
765 | ls-4860 0d.s1 294us : trace_preempt_on (__do_softirq) | ||
766 | |||
767 | |||
768 | |||
769 | The trace_hardirqs_off_thunk is called from assembly on x86 when | ||
770 | interrupts are disabled in the assembly code. Without the function | ||
771 | tracing, we do not know if interrupts were enabled within the preemption | ||
772 | points. We do see that it started with preemption enabled. | ||
773 | |||
774 | Here is a trace with ftrace_enabled set: | ||
775 | |||
776 | |||
777 | # tracer: preemptirqsoff | ||
778 | # | ||
779 | preemptirqsoff latency trace v1.1.5 on 2.6.26-rc8 | ||
780 | -------------------------------------------------------------------- | ||
781 | latency: 105 us, #183/183, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | ||
782 | ----------------- | ||
783 | | task: sshd-4261 (uid:0 nice:0 policy:0 rt_prio:0) | ||
784 | ----------------- | ||
785 | => started at: write_chan | ||
786 | => ended at: __do_softirq | ||
787 | |||
788 | # _------=> CPU# | ||
789 | # / _-----=> irqs-off | ||
790 | # | / _----=> need-resched | ||
791 | # || / _---=> hardirq/softirq | ||
792 | # ||| / _--=> preempt-depth | ||
793 | # |||| / | ||
794 | # ||||| delay | ||
795 | # cmd pid ||||| time | caller | ||
796 | # \ / ||||| \ | / | ||
797 | ls-4473 0.N.. 0us : preempt_schedule (write_chan) | ||
798 | ls-4473 0dN.1 1us : _spin_lock (schedule) | ||
799 | ls-4473 0dN.1 2us : add_preempt_count (_spin_lock) | ||
800 | ls-4473 0d..2 2us : put_prev_task_fair (schedule) | ||
801 | [...] | ||
802 | ls-4473 0d..2 13us : set_normalized_timespec (ktime_get_ts) | ||
803 | ls-4473 0d..2 13us : __switch_to (schedule) | ||
804 | sshd-4261 0d..2 14us : finish_task_switch (schedule) | ||
805 | sshd-4261 0d..2 14us : _spin_unlock_irq (finish_task_switch) | ||
806 | sshd-4261 0d..1 15us : add_preempt_count (_spin_lock_irqsave) | ||
807 | sshd-4261 0d..2 16us : _spin_unlock_irqrestore (hrtick_set) | ||
808 | sshd-4261 0d..2 16us : do_IRQ (common_interrupt) | ||
809 | sshd-4261 0d..2 17us : irq_enter (do_IRQ) | ||
810 | sshd-4261 0d..2 17us : idle_cpu (irq_enter) | ||
811 | sshd-4261 0d..2 18us : add_preempt_count (irq_enter) | ||
812 | sshd-4261 0d.h2 18us : idle_cpu (irq_enter) | ||
813 | sshd-4261 0d.h. 18us : handle_fasteoi_irq (do_IRQ) | ||
814 | sshd-4261 0d.h. 19us : _spin_lock (handle_fasteoi_irq) | ||
815 | sshd-4261 0d.h. 19us : add_preempt_count (_spin_lock) | ||
816 | sshd-4261 0d.h1 20us : _spin_unlock (handle_fasteoi_irq) | ||
817 | sshd-4261 0d.h1 20us : sub_preempt_count (_spin_unlock) | ||
818 | [...] | ||
819 | sshd-4261 0d.h1 28us : _spin_unlock (handle_fasteoi_irq) | ||
820 | sshd-4261 0d.h1 29us : sub_preempt_count (_spin_unlock) | ||
821 | sshd-4261 0d.h2 29us : irq_exit (do_IRQ) | ||
822 | sshd-4261 0d.h2 29us : sub_preempt_count (irq_exit) | ||
823 | sshd-4261 0d..3 30us : do_softirq (irq_exit) | ||
824 | sshd-4261 0d... 30us : __do_softirq (do_softirq) | ||
825 | sshd-4261 0d... 31us : __local_bh_disable (__do_softirq) | ||
826 | sshd-4261 0d... 31us+: add_preempt_count (__local_bh_disable) | ||
827 | sshd-4261 0d.s4 34us : add_preempt_count (__local_bh_disable) | ||
828 | [...] | ||
829 | sshd-4261 0d.s3 43us : sub_preempt_count (local_bh_enable_ip) | ||
830 | sshd-4261 0d.s4 44us : sub_preempt_count (local_bh_enable_ip) | ||
831 | sshd-4261 0d.s3 44us : smp_apic_timer_interrupt (apic_timer_interrupt) | ||
832 | sshd-4261 0d.s3 45us : irq_enter (smp_apic_timer_interrupt) | ||
833 | sshd-4261 0d.s3 45us : idle_cpu (irq_enter) | ||
834 | sshd-4261 0d.s3 46us : add_preempt_count (irq_enter) | ||
835 | sshd-4261 0d.H3 46us : idle_cpu (irq_enter) | ||
836 | sshd-4261 0d.H3 47us : hrtimer_interrupt (smp_apic_timer_interrupt) | ||
837 | sshd-4261 0d.H3 47us : ktime_get (hrtimer_interrupt) | ||
838 | [...] | ||
839 | sshd-4261 0d.H3 81us : tick_program_event (hrtimer_interrupt) | ||
840 | sshd-4261 0d.H3 82us : ktime_get (tick_program_event) | ||
841 | sshd-4261 0d.H3 82us : ktime_get_ts (ktime_get) | ||
842 | sshd-4261 0d.H3 83us : getnstimeofday (ktime_get_ts) | ||
843 | sshd-4261 0d.H3 83us : set_normalized_timespec (ktime_get_ts) | ||
844 | sshd-4261 0d.H3 84us : clockevents_program_event (tick_program_event) | ||
845 | sshd-4261 0d.H3 84us : lapic_next_event (clockevents_program_event) | ||
846 | sshd-4261 0d.H3 85us : irq_exit (smp_apic_timer_interrupt) | ||
847 | sshd-4261 0d.H3 85us : sub_preempt_count (irq_exit) | ||
848 | sshd-4261 0d.s4 86us : sub_preempt_count (irq_exit) | ||
849 | sshd-4261 0d.s3 86us : add_preempt_count (__local_bh_disable) | ||
850 | [...] | ||
851 | sshd-4261 0d.s1 98us : sub_preempt_count (net_rx_action) | ||
852 | sshd-4261 0d.s. 99us : add_preempt_count (_spin_lock_irq) | ||
853 | sshd-4261 0d.s1 99us+: _spin_unlock_irq (run_timer_softirq) | ||
854 | sshd-4261 0d.s. 104us : _local_bh_enable (__do_softirq) | ||
855 | sshd-4261 0d.s. 104us : sub_preempt_count (_local_bh_enable) | ||
856 | sshd-4261 0d.s. 105us : _local_bh_enable (__do_softirq) | ||
857 | sshd-4261 0d.s1 105us : trace_preempt_on (__do_softirq) | ||
858 | |||
859 | |||
860 | This is a very interesting trace. It started with the preemption of | ||
861 | the ls task. We see that the task had the "need_resched" bit set | ||
862 | via the 'N' in the trace. Interrupts were disabled before the spin_lock | ||
863 | at the beginning of the trace. We see that a schedule took place to run | ||
864 | sshd. When the interrupts were enabled, we took an interrupt. | ||
865 | On return from the interrupt handler, the softirq ran. We took another | ||
866 | interrupt while running the softirq as we see from the capital 'H'. | ||
867 | |||
868 | |||
869 | wakeup | ||
870 | ------ | ||
871 | |||
872 | In a Real-Time environment it is very important to know the wakeup | ||
873 | time it takes for the highest priority task that is woken up to the | ||
874 | time that it executes. This is also known as "schedule latency". | ||
875 | I stress the point that this is about RT tasks. It is also important | ||
876 | to know the scheduling latency of non-RT tasks, but the average | ||
877 | schedule latency is better for non-RT tasks. Tools like | ||
878 | LatencyTop are more appropriate for such measurements. | ||
879 | |||
880 | Real-Time environments are interested in the worst case latency. | ||
881 | That is the longest latency it takes for something to happen, and | ||
882 | not the average. We can have a very fast scheduler that may only | ||
883 | have a large latency once in a while, but that would not work well | ||
884 | with Real-Time tasks. The wakeup tracer was designed to record | ||
885 | the worst case wakeups of RT tasks. Non-RT tasks are not recorded | ||
886 | because the tracer only records one worst case and tracing non-RT | ||
887 | tasks that are unpredictable will overwrite the worst case latency | ||
888 | of RT tasks. | ||
889 | |||
890 | Since this tracer only deals with RT tasks, we will run this slightly | ||
891 | differently than we did with the previous tracers. Instead of performing | ||
892 | an 'ls', we will run 'sleep 1' under 'chrt' which changes the | ||
893 | priority of the task. | ||
894 | |||
895 | # echo wakeup > /debug/tracing/current_tracer | ||
896 | # echo 0 > /debug/tracing/tracing_max_latency | ||
897 | # echo 1 > /debug/tracing/tracing_enabled | ||
898 | # chrt -f 5 sleep 1 | ||
899 | # echo 0 > /debug/tracing/tracing_enabled | ||
900 | # cat /debug/tracing/latency_trace | ||
901 | # tracer: wakeup | ||
902 | # | ||
903 | wakeup latency trace v1.1.5 on 2.6.26-rc8 | ||
904 | -------------------------------------------------------------------- | ||
905 | latency: 4 us, #2/2, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | ||
906 | ----------------- | ||
907 | | task: sleep-4901 (uid:0 nice:0 policy:1 rt_prio:5) | ||
908 | ----------------- | ||
909 | |||
910 | # _------=> CPU# | ||
911 | # / _-----=> irqs-off | ||
912 | # | / _----=> need-resched | ||
913 | # || / _---=> hardirq/softirq | ||
914 | # ||| / _--=> preempt-depth | ||
915 | # |||| / | ||
916 | # ||||| delay | ||
917 | # cmd pid ||||| time | caller | ||
918 | # \ / ||||| \ | / | ||
919 | <idle>-0 1d.h4 0us+: try_to_wake_up (wake_up_process) | ||
920 | <idle>-0 1d..4 4us : schedule (cpu_idle) | ||
921 | |||
922 | |||
923 | |||
924 | Running this on an idle system, we see that it only took 4 microseconds | ||
925 | to perform the task switch. Note, since the trace marker in the | ||
926 | schedule is before the actual "switch", we stop the tracing when | ||
927 | the recorded task is about to schedule in. This may change if | ||
928 | we add a new marker at the end of the scheduler. | ||
929 | |||
930 | Notice that the recorded task is 'sleep' with the PID of 4901 and it | ||
931 | has an rt_prio of 5. This priority is user-space priority and not | ||
932 | the internal kernel priority. The policy is 1 for SCHED_FIFO and 2 | ||
933 | for SCHED_RR. | ||
934 | |||
935 | Doing the same with chrt -r 5 and ftrace_enabled set. | ||
936 | |||
937 | # tracer: wakeup | ||
938 | # | ||
939 | wakeup latency trace v1.1.5 on 2.6.26-rc8 | ||
940 | -------------------------------------------------------------------- | ||
941 | latency: 50 us, #60/60, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | ||
942 | ----------------- | ||
943 | | task: sleep-4068 (uid:0 nice:0 policy:2 rt_prio:5) | ||
944 | ----------------- | ||
945 | |||
946 | # _------=> CPU# | ||
947 | # / _-----=> irqs-off | ||
948 | # | / _----=> need-resched | ||
949 | # || / _---=> hardirq/softirq | ||
950 | # ||| / _--=> preempt-depth | ||
951 | # |||| / | ||
952 | # ||||| delay | ||
953 | # cmd pid ||||| time | caller | ||
954 | # \ / ||||| \ | / | ||
955 | ksoftirq-7 1d.H3 0us : try_to_wake_up (wake_up_process) | ||
956 | ksoftirq-7 1d.H4 1us : sub_preempt_count (marker_probe_cb) | ||
957 | ksoftirq-7 1d.H3 2us : check_preempt_wakeup (try_to_wake_up) | ||
958 | ksoftirq-7 1d.H3 3us : update_curr (check_preempt_wakeup) | ||
959 | ksoftirq-7 1d.H3 4us : calc_delta_mine (update_curr) | ||
960 | ksoftirq-7 1d.H3 5us : __resched_task (check_preempt_wakeup) | ||
961 | ksoftirq-7 1d.H3 6us : task_wake_up_rt (try_to_wake_up) | ||
962 | ksoftirq-7 1d.H3 7us : _spin_unlock_irqrestore (try_to_wake_up) | ||
963 | [...] | ||
964 | ksoftirq-7 1d.H2 17us : irq_exit (smp_apic_timer_interrupt) | ||
965 | ksoftirq-7 1d.H2 18us : sub_preempt_count (irq_exit) | ||
966 | ksoftirq-7 1d.s3 19us : sub_preempt_count (irq_exit) | ||
967 | ksoftirq-7 1..s2 20us : rcu_process_callbacks (__do_softirq) | ||
968 | [...] | ||
969 | ksoftirq-7 1..s2 26us : __rcu_process_callbacks (rcu_process_callbacks) | ||
970 | ksoftirq-7 1d.s2 27us : _local_bh_enable (__do_softirq) | ||
971 | ksoftirq-7 1d.s2 28us : sub_preempt_count (_local_bh_enable) | ||
972 | ksoftirq-7 1.N.3 29us : sub_preempt_count (ksoftirqd) | ||
973 | ksoftirq-7 1.N.2 30us : _cond_resched (ksoftirqd) | ||
974 | ksoftirq-7 1.N.2 31us : __cond_resched (_cond_resched) | ||
975 | ksoftirq-7 1.N.2 32us : add_preempt_count (__cond_resched) | ||
976 | ksoftirq-7 1.N.2 33us : schedule (__cond_resched) | ||
977 | ksoftirq-7 1.N.2 33us : add_preempt_count (schedule) | ||
978 | ksoftirq-7 1.N.3 34us : hrtick_clear (schedule) | ||
979 | ksoftirq-7 1dN.3 35us : _spin_lock (schedule) | ||
980 | ksoftirq-7 1dN.3 36us : add_preempt_count (_spin_lock) | ||
981 | ksoftirq-7 1d..4 37us : put_prev_task_fair (schedule) | ||
982 | ksoftirq-7 1d..4 38us : update_curr (put_prev_task_fair) | ||
983 | [...] | ||
984 | ksoftirq-7 1d..5 47us : _spin_trylock (tracing_record_cmdline) | ||
985 | ksoftirq-7 1d..5 48us : add_preempt_count (_spin_trylock) | ||
986 | ksoftirq-7 1d..6 49us : _spin_unlock (tracing_record_cmdline) | ||
987 | ksoftirq-7 1d..6 49us : sub_preempt_count (_spin_unlock) | ||
988 | ksoftirq-7 1d..4 50us : schedule (__cond_resched) | ||
989 | |||
990 | The interrupt went off while running ksoftirqd. This task runs at | ||
991 | SCHED_OTHER. Why did not we see the 'N' set early? This may be | ||
992 | a harmless bug with x86_32 and 4K stacks. On x86_32 with 4K stacks | ||
993 | configured, the interrupt and softirq run with their own stack. | ||
994 | Some information is held on the top of the task's stack (need_resched | ||
995 | and preempt_count are both stored there). The setting of the NEED_RESCHED | ||
996 | bit is done directly to the task's stack, but the reading of the | ||
997 | NEED_RESCHED is done by looking at the current stack, which in this case | ||
998 | is the stack for the hard interrupt. This hides the fact that NEED_RESCHED | ||
999 | has been set. We do not see the 'N' until we switch back to the task's | ||
1000 | assigned stack. | ||
1001 | |||
1002 | ftrace | ||
1003 | ------ | ||
1004 | |||
1005 | ftrace is not only the name of the tracing infrastructure, but it | ||
1006 | is also a name of one of the tracers. The tracer is the function | ||
1007 | tracer. Enabling the function tracer can be done from the | ||
1008 | debug file system. Make sure the ftrace_enabled is set otherwise | ||
1009 | this tracer is a nop. | ||
1010 | |||
1011 | # sysctl kernel.ftrace_enabled=1 | ||
1012 | # echo ftrace > /debug/tracing/current_tracer | ||
1013 | # echo 1 > /debug/tracing/tracing_enabled | ||
1014 | # usleep 1 | ||
1015 | # echo 0 > /debug/tracing/tracing_enabled | ||
1016 | # cat /debug/tracing/trace | ||
1017 | # tracer: ftrace | ||
1018 | # | ||
1019 | # TASK-PID CPU# TIMESTAMP FUNCTION | ||
1020 | # | | | | | | ||
1021 | bash-4003 [00] 123.638713: finish_task_switch <-schedule | ||
1022 | bash-4003 [00] 123.638714: _spin_unlock_irq <-finish_task_switch | ||
1023 | bash-4003 [00] 123.638714: sub_preempt_count <-_spin_unlock_irq | ||
1024 | bash-4003 [00] 123.638715: hrtick_set <-schedule | ||
1025 | bash-4003 [00] 123.638715: _spin_lock_irqsave <-hrtick_set | ||
1026 | bash-4003 [00] 123.638716: add_preempt_count <-_spin_lock_irqsave | ||
1027 | bash-4003 [00] 123.638716: _spin_unlock_irqrestore <-hrtick_set | ||
1028 | bash-4003 [00] 123.638717: sub_preempt_count <-_spin_unlock_irqrestore | ||
1029 | bash-4003 [00] 123.638717: hrtick_clear <-hrtick_set | ||
1030 | bash-4003 [00] 123.638718: sub_preempt_count <-schedule | ||
1031 | bash-4003 [00] 123.638718: sub_preempt_count <-preempt_schedule | ||
1032 | bash-4003 [00] 123.638719: wait_for_completion <-__stop_machine_run | ||
1033 | bash-4003 [00] 123.638719: wait_for_common <-wait_for_completion | ||
1034 | bash-4003 [00] 123.638720: _spin_lock_irq <-wait_for_common | ||
1035 | bash-4003 [00] 123.638720: add_preempt_count <-_spin_lock_irq | ||
1036 | [...] | ||
1037 | |||
1038 | |||
1039 | Note: ftrace uses ring buffers to store the above entries. The newest data | ||
1040 | may overwrite the oldest data. Sometimes using echo to stop the trace | ||
1041 | is not sufficient because the tracing could have overwritten the data | ||
1042 | that you wanted to record. For this reason, it is sometimes better to | ||
1043 | disable tracing directly from a program. This allows you to stop the | ||
1044 | tracing at the point that you hit the part that you are interested in. | ||
1045 | To disable the tracing directly from a C program, something like following | ||
1046 | code snippet can be used: | ||
1047 | |||
1048 | int trace_fd; | ||
1049 | [...] | ||
1050 | int main(int argc, char *argv[]) { | ||
1051 | [...] | ||
1052 | trace_fd = open("/debug/tracing/tracing_enabled", O_WRONLY); | ||
1053 | [...] | ||
1054 | if (condition_hit()) { | ||
1055 | write(trace_fd, "0", 1); | ||
1056 | } | ||
1057 | [...] | ||
1058 | } | ||
1059 | |||
1060 | Note: Here we hard coded the path name. The debugfs mount is not | ||
1061 | guaranteed to be at /debug (and is more commonly at /sys/kernel/debug). | ||
1062 | For simple one time traces, the above is sufficent. For anything else, | ||
1063 | a search through /proc/mounts may be needed to find where the debugfs | ||
1064 | file-system is mounted. | ||
1065 | |||
1066 | dynamic ftrace | ||
1067 | -------------- | ||
1068 | |||
1069 | If CONFIG_DYNAMIC_FTRACE is set, the system will run with | ||
1070 | virtually no overhead when function tracing is disabled. The way | ||
1071 | this works is the mcount function call (placed at the start of | ||
1072 | every kernel function, produced by the -pg switch in gcc), starts | ||
1073 | of pointing to a simple return. (Enabling FTRACE will include the | ||
1074 | -pg switch in the compiling of the kernel.) | ||
1075 | |||
1076 | When dynamic ftrace is initialized, it calls kstop_machine to make | ||
1077 | the machine act like a uniprocessor so that it can freely modify code | ||
1078 | without worrying about other processors executing that same code. At | ||
1079 | initialization, the mcount calls are changed to call a "record_ip" | ||
1080 | function. After this, the first time a kernel function is called, | ||
1081 | it has the calling address saved in a hash table. | ||
1082 | |||
1083 | Later on the ftraced kernel thread is awoken and will again call | ||
1084 | kstop_machine if new functions have been recorded. The ftraced thread | ||
1085 | will change all calls to mcount to "nop". Just calling mcount | ||
1086 | and having mcount return has shown a 10% overhead. By converting | ||
1087 | it to a nop, there is no measurable overhead to the system. | ||
1088 | |||
1089 | One special side-effect to the recording of the functions being | ||
1090 | traced is that we can now selectively choose which functions we | ||
1091 | wish to trace and which ones we want the mcount calls to remain as | ||
1092 | nops. | ||
1093 | |||
1094 | Two files are used, one for enabling and one for disabling the tracing | ||
1095 | of specified functions. They are: | ||
1096 | |||
1097 | set_ftrace_filter | ||
1098 | |||
1099 | and | ||
1100 | |||
1101 | set_ftrace_notrace | ||
1102 | |||
1103 | A list of available functions that you can add to these files is listed | ||
1104 | in: | ||
1105 | |||
1106 | available_filter_functions | ||
1107 | |||
1108 | # cat /debug/tracing/available_filter_functions | ||
1109 | put_prev_task_idle | ||
1110 | kmem_cache_create | ||
1111 | pick_next_task_rt | ||
1112 | get_online_cpus | ||
1113 | pick_next_task_fair | ||
1114 | mutex_lock | ||
1115 | [...] | ||
1116 | |||
1117 | If I am only interested in sys_nanosleep and hrtimer_interrupt: | ||
1118 | |||
1119 | # echo sys_nanosleep hrtimer_interrupt \ | ||
1120 | > /debug/tracing/set_ftrace_filter | ||
1121 | # echo ftrace > /debug/tracing/current_tracer | ||
1122 | # echo 1 > /debug/tracing/tracing_enabled | ||
1123 | # usleep 1 | ||
1124 | # echo 0 > /debug/tracing/tracing_enabled | ||
1125 | # cat /debug/tracing/trace | ||
1126 | # tracer: ftrace | ||
1127 | # | ||
1128 | # TASK-PID CPU# TIMESTAMP FUNCTION | ||
1129 | # | | | | | | ||
1130 | usleep-4134 [00] 1317.070017: hrtimer_interrupt <-smp_apic_timer_interrupt | ||
1131 | usleep-4134 [00] 1317.070111: sys_nanosleep <-syscall_call | ||
1132 | <idle>-0 [00] 1317.070115: hrtimer_interrupt <-smp_apic_timer_interrupt | ||
1133 | |||
1134 | To see which functions are being traced, you can cat the file: | ||
1135 | |||
1136 | # cat /debug/tracing/set_ftrace_filter | ||
1137 | hrtimer_interrupt | ||
1138 | sys_nanosleep | ||
1139 | |||
1140 | |||
1141 | Perhaps this is not enough. The filters also allow simple wild cards. | ||
1142 | Only the following are currently available | ||
1143 | |||
1144 | <match>* - will match functions that begin with <match> | ||
1145 | *<match> - will match functions that end with <match> | ||
1146 | *<match>* - will match functions that have <match> in it | ||
1147 | |||
1148 | These are the only wild cards which are supported. | ||
1149 | |||
1150 | <match>*<match> will not work. | ||
1151 | |||
1152 | # echo hrtimer_* > /debug/tracing/set_ftrace_filter | ||
1153 | |||
1154 | Produces: | ||
1155 | |||
1156 | # tracer: ftrace | ||
1157 | # | ||
1158 | # TASK-PID CPU# TIMESTAMP FUNCTION | ||
1159 | # | | | | | | ||
1160 | bash-4003 [00] 1480.611794: hrtimer_init <-copy_process | ||
1161 | bash-4003 [00] 1480.611941: hrtimer_start <-hrtick_set | ||
1162 | bash-4003 [00] 1480.611956: hrtimer_cancel <-hrtick_clear | ||
1163 | bash-4003 [00] 1480.611956: hrtimer_try_to_cancel <-hrtimer_cancel | ||
1164 | <idle>-0 [00] 1480.612019: hrtimer_get_next_event <-get_next_timer_interrupt | ||
1165 | <idle>-0 [00] 1480.612025: hrtimer_get_next_event <-get_next_timer_interrupt | ||
1166 | <idle>-0 [00] 1480.612032: hrtimer_get_next_event <-get_next_timer_interrupt | ||
1167 | <idle>-0 [00] 1480.612037: hrtimer_get_next_event <-get_next_timer_interrupt | ||
1168 | <idle>-0 [00] 1480.612382: hrtimer_get_next_event <-get_next_timer_interrupt | ||
1169 | |||
1170 | |||
1171 | Notice that we lost the sys_nanosleep. | ||
1172 | |||
1173 | # cat /debug/tracing/set_ftrace_filter | ||
1174 | hrtimer_run_queues | ||
1175 | hrtimer_run_pending | ||
1176 | hrtimer_init | ||
1177 | hrtimer_cancel | ||
1178 | hrtimer_try_to_cancel | ||
1179 | hrtimer_forward | ||
1180 | hrtimer_start | ||
1181 | hrtimer_reprogram | ||
1182 | hrtimer_force_reprogram | ||
1183 | hrtimer_get_next_event | ||
1184 | hrtimer_interrupt | ||
1185 | hrtimer_nanosleep | ||
1186 | hrtimer_wakeup | ||
1187 | hrtimer_get_remaining | ||
1188 | hrtimer_get_res | ||
1189 | hrtimer_init_sleeper | ||
1190 | |||
1191 | |||
1192 | This is because the '>' and '>>' act just like they do in bash. | ||
1193 | To rewrite the filters, use '>' | ||
1194 | To append to the filters, use '>>' | ||
1195 | |||
1196 | To clear out a filter so that all functions will be recorded again: | ||
1197 | |||
1198 | # echo > /debug/tracing/set_ftrace_filter | ||
1199 | # cat /debug/tracing/set_ftrace_filter | ||
1200 | # | ||
1201 | |||
1202 | Again, now we want to append. | ||
1203 | |||
1204 | # echo sys_nanosleep > /debug/tracing/set_ftrace_filter | ||
1205 | # cat /debug/tracing/set_ftrace_filter | ||
1206 | sys_nanosleep | ||
1207 | # echo hrtimer_* >> /debug/tracing/set_ftrace_filter | ||
1208 | # cat /debug/tracing/set_ftrace_filter | ||
1209 | hrtimer_run_queues | ||
1210 | hrtimer_run_pending | ||
1211 | hrtimer_init | ||
1212 | hrtimer_cancel | ||
1213 | hrtimer_try_to_cancel | ||
1214 | hrtimer_forward | ||
1215 | hrtimer_start | ||
1216 | hrtimer_reprogram | ||
1217 | hrtimer_force_reprogram | ||
1218 | hrtimer_get_next_event | ||
1219 | hrtimer_interrupt | ||
1220 | sys_nanosleep | ||
1221 | hrtimer_nanosleep | ||
1222 | hrtimer_wakeup | ||
1223 | hrtimer_get_remaining | ||
1224 | hrtimer_get_res | ||
1225 | hrtimer_init_sleeper | ||
1226 | |||
1227 | |||
1228 | The set_ftrace_notrace prevents those functions from being traced. | ||
1229 | |||
1230 | # echo '*preempt*' '*lock*' > /debug/tracing/set_ftrace_notrace | ||
1231 | |||
1232 | Produces: | ||
1233 | |||
1234 | # tracer: ftrace | ||
1235 | # | ||
1236 | # TASK-PID CPU# TIMESTAMP FUNCTION | ||
1237 | # | | | | | | ||
1238 | bash-4043 [01] 115.281644: finish_task_switch <-schedule | ||
1239 | bash-4043 [01] 115.281645: hrtick_set <-schedule | ||
1240 | bash-4043 [01] 115.281645: hrtick_clear <-hrtick_set | ||
1241 | bash-4043 [01] 115.281646: wait_for_completion <-__stop_machine_run | ||
1242 | bash-4043 [01] 115.281647: wait_for_common <-wait_for_completion | ||
1243 | bash-4043 [01] 115.281647: kthread_stop <-stop_machine_run | ||
1244 | bash-4043 [01] 115.281648: init_waitqueue_head <-kthread_stop | ||
1245 | bash-4043 [01] 115.281648: wake_up_process <-kthread_stop | ||
1246 | bash-4043 [01] 115.281649: try_to_wake_up <-wake_up_process | ||
1247 | |||
1248 | We can see that there's no more lock or preempt tracing. | ||
1249 | |||
1250 | ftraced | ||
1251 | ------- | ||
1252 | |||
1253 | As mentioned above, when dynamic ftrace is configured in, a kernel | ||
1254 | thread wakes up once a second and checks to see if there are mcount | ||
1255 | calls that need to be converted into nops. If there are not any, then | ||
1256 | it simply goes back to sleep. But if there are some, it will call | ||
1257 | kstop_machine to convert the calls to nops. | ||
1258 | |||
1259 | There may be a case in which you do not want this added latency. | ||
1260 | Perhaps you are doing some audio recording and this activity might | ||
1261 | cause skips in the playback. There is an interface to disable | ||
1262 | and enable the "ftraced" kernel thread. | ||
1263 | |||
1264 | # echo 0 > /debug/tracing/ftraced_enabled | ||
1265 | |||
1266 | This will disable the calling of kstop_machine to update the | ||
1267 | mcount calls to nops. Remember that there is a large overhead | ||
1268 | to calling mcount. Without this kernel thread, that overhead will | ||
1269 | exist. | ||
1270 | |||
1271 | If there are recorded calls to mcount, any write to the ftraced_enabled | ||
1272 | file will cause the kstop_machine to run. This means that a | ||
1273 | user can manually perform the updates when they want to by simply | ||
1274 | echoing a '0' into the ftraced_enabled file. | ||
1275 | |||
1276 | The updates are also done at the beginning of enabling a tracer | ||
1277 | that uses ftrace function recording. | ||
1278 | |||
1279 | |||
1280 | trace_pipe | ||
1281 | ---------- | ||
1282 | |||
1283 | The trace_pipe outputs the same content as the trace file, but the effect | ||
1284 | on the tracing is different. Every read from trace_pipe is consumed. | ||
1285 | This means that subsequent reads will be different. The trace | ||
1286 | is live. | ||
1287 | |||
1288 | # echo ftrace > /debug/tracing/current_tracer | ||
1289 | # cat /debug/tracing/trace_pipe > /tmp/trace.out & | ||
1290 | [1] 4153 | ||
1291 | # echo 1 > /debug/tracing/tracing_enabled | ||
1292 | # usleep 1 | ||
1293 | # echo 0 > /debug/tracing/tracing_enabled | ||
1294 | # cat /debug/tracing/trace | ||
1295 | # tracer: ftrace | ||
1296 | # | ||
1297 | # TASK-PID CPU# TIMESTAMP FUNCTION | ||
1298 | # | | | | | | ||
1299 | |||
1300 | # | ||
1301 | # cat /tmp/trace.out | ||
1302 | bash-4043 [00] 41.267106: finish_task_switch <-schedule | ||
1303 | bash-4043 [00] 41.267106: hrtick_set <-schedule | ||
1304 | bash-4043 [00] 41.267107: hrtick_clear <-hrtick_set | ||
1305 | bash-4043 [00] 41.267108: wait_for_completion <-__stop_machine_run | ||
1306 | bash-4043 [00] 41.267108: wait_for_common <-wait_for_completion | ||
1307 | bash-4043 [00] 41.267109: kthread_stop <-stop_machine_run | ||
1308 | bash-4043 [00] 41.267109: init_waitqueue_head <-kthread_stop | ||
1309 | bash-4043 [00] 41.267110: wake_up_process <-kthread_stop | ||
1310 | bash-4043 [00] 41.267110: try_to_wake_up <-wake_up_process | ||
1311 | bash-4043 [00] 41.267111: select_task_rq_rt <-try_to_wake_up | ||
1312 | |||
1313 | |||
1314 | Note, reading the trace_pipe file will block until more input is added. | ||
1315 | By changing the tracer, trace_pipe will issue an EOF. We needed | ||
1316 | to set the ftrace tracer _before_ cating the trace_pipe file. | ||
1317 | |||
1318 | |||
1319 | trace entries | ||
1320 | ------------- | ||
1321 | |||
1322 | Having too much or not enough data can be troublesome in diagnosing | ||
1323 | an issue in the kernel. The file trace_entries is used to modify | ||
1324 | the size of the internal trace buffers. The number listed | ||
1325 | is the number of entries that can be recorded per CPU. To know | ||
1326 | the full size, multiply the number of possible CPUS with the | ||
1327 | number of entries. | ||
1328 | |||
1329 | # cat /debug/tracing/trace_entries | ||
1330 | 65620 | ||
1331 | |||
1332 | Note, to modify this, you must have tracing completely disabled. To do that, | ||
1333 | echo "none" into the current_tracer. If the current_tracer is not set | ||
1334 | to "none", an EINVAL error will be returned. | ||
1335 | |||
1336 | # echo none > /debug/tracing/current_tracer | ||
1337 | # echo 100000 > /debug/tracing/trace_entries | ||
1338 | # cat /debug/tracing/trace_entries | ||
1339 | 100045 | ||
1340 | |||
1341 | |||
1342 | Notice that we echoed in 100,000 but the size is 100,045. The entries | ||
1343 | are held in individual pages. It allocates the number of pages it takes | ||
1344 | to fulfill the request. If more entries may fit on the last page | ||
1345 | then they will be added. | ||
1346 | |||
1347 | # echo 1 > /debug/tracing/trace_entries | ||
1348 | # cat /debug/tracing/trace_entries | ||
1349 | 85 | ||
1350 | |||
1351 | This shows us that 85 entries can fit in a single page. | ||
1352 | |||
1353 | The number of pages which will be allocated is limited to a percentage | ||
1354 | of available memory. Allocating too much will produce an error. | ||
1355 | |||
1356 | # echo 1000000000000 > /debug/tracing/trace_entries | ||
1357 | -bash: echo: write error: Cannot allocate memory | ||
1358 | # cat /debug/tracing/trace_entries | ||
1359 | 85 | ||
1360 | |||
diff --git a/Documentation/i2c/busses/i2c-i810 b/Documentation/i2c/busses/i2c-i810 deleted file mode 100644 index 778210ee1583..000000000000 --- a/Documentation/i2c/busses/i2c-i810 +++ /dev/null | |||
@@ -1,47 +0,0 @@ | |||
1 | Kernel driver i2c-i810 | ||
2 | |||
3 | Supported adapters: | ||
4 | * Intel 82810, 82810-DC100, 82810E, and 82815 (GMCH) | ||
5 | * Intel 82845G (GMCH) | ||
6 | |||
7 | Authors: | ||
8 | Frodo Looijaard <frodol@dds.nl>, | ||
9 | Philip Edelbrock <phil@netroedge.com>, | ||
10 | Kyösti Mälkki <kmalkki@cc.hut.fi>, | ||
11 | Ralph Metzler <rjkm@thp.uni-koeln.de>, | ||
12 | Mark D. Studebaker <mdsxyz123@yahoo.com> | ||
13 | |||
14 | Main contact: Mark Studebaker <mdsxyz123@yahoo.com> | ||
15 | |||
16 | Description | ||
17 | ----------- | ||
18 | |||
19 | WARNING: If you have an '810' or '815' motherboard, your standard I2C | ||
20 | temperature sensors are most likely on the 801's I2C bus. You want the | ||
21 | i2c-i801 driver for those, not this driver. | ||
22 | |||
23 | Now for the i2c-i810... | ||
24 | |||
25 | The GMCH chip contains two I2C interfaces. | ||
26 | |||
27 | The first interface is used for DDC (Data Display Channel) which is a | ||
28 | serial channel through the VGA monitor connector to a DDC-compliant | ||
29 | monitor. This interface is defined by the Video Electronics Standards | ||
30 | Association (VESA). The standards are available for purchase at | ||
31 | http://www.vesa.org . | ||
32 | |||
33 | The second interface is a general-purpose I2C bus. It may be connected to a | ||
34 | TV-out chip such as the BT869 or possibly to a digital flat-panel display. | ||
35 | |||
36 | Features | ||
37 | -------- | ||
38 | |||
39 | Both busses use the i2c-algo-bit driver for 'bit banging' | ||
40 | and support for specific transactions is provided by i2c-algo-bit. | ||
41 | |||
42 | Issues | ||
43 | ------ | ||
44 | |||
45 | If you enable bus testing in i2c-algo-bit (insmod i2c-algo-bit bit_test=1), | ||
46 | the test may fail; if so, the i2c-i810 driver won't be inserted. However, | ||
47 | we think this has been fixed. | ||
diff --git a/Documentation/i2c/busses/i2c-prosavage b/Documentation/i2c/busses/i2c-prosavage deleted file mode 100644 index 703687902511..000000000000 --- a/Documentation/i2c/busses/i2c-prosavage +++ /dev/null | |||
@@ -1,23 +0,0 @@ | |||
1 | Kernel driver i2c-prosavage | ||
2 | |||
3 | Supported adapters: | ||
4 | |||
5 | S3/VIA KM266/VT8375 aka ProSavage8 | ||
6 | S3/VIA KM133/VT8365 aka Savage4 | ||
7 | |||
8 | Author: Henk Vergonet <henk@god.dyndns.org> | ||
9 | |||
10 | Description | ||
11 | ----------- | ||
12 | |||
13 | The Savage4 chips contain two I2C interfaces (aka a I2C 'master' or | ||
14 | 'host'). | ||
15 | |||
16 | The first interface is used for DDC (Data Display Channel) which is a | ||
17 | serial channel through the VGA monitor connector to a DDC-compliant | ||
18 | monitor. This interface is defined by the Video Electronics Standards | ||
19 | Association (VESA). The standards are available for purchase at | ||
20 | http://www.vesa.org . The second interface is a general-purpose I2C bus. | ||
21 | |||
22 | Usefull for gaining access to the TV Encoder chips. | ||
23 | |||
diff --git a/Documentation/i2c/busses/i2c-savage4 b/Documentation/i2c/busses/i2c-savage4 deleted file mode 100644 index 6ecceab618d3..000000000000 --- a/Documentation/i2c/busses/i2c-savage4 +++ /dev/null | |||
@@ -1,26 +0,0 @@ | |||
1 | Kernel driver i2c-savage4 | ||
2 | |||
3 | Supported adapters: | ||
4 | * Savage4 | ||
5 | * Savage2000 | ||
6 | |||
7 | Authors: | ||
8 | Alexander Wold <awold@bigfoot.com>, | ||
9 | Mark D. Studebaker <mdsxyz123@yahoo.com> | ||
10 | |||
11 | Description | ||
12 | ----------- | ||
13 | |||
14 | The Savage4 chips contain two I2C interfaces (aka a I2C 'master' | ||
15 | or 'host'). | ||
16 | |||
17 | The first interface is used for DDC (Data Display Channel) which is a | ||
18 | serial channel through the VGA monitor connector to a DDC-compliant | ||
19 | monitor. This interface is defined by the Video Electronics Standards | ||
20 | Association (VESA). The standards are available for purchase at | ||
21 | http://www.vesa.org . The DDC bus is not yet supported because its register | ||
22 | is not directly memory-mapped. | ||
23 | |||
24 | The second interface is a general-purpose I2C bus. This is the only | ||
25 | interface supported by the driver at the moment. | ||
26 | |||
diff --git a/Documentation/i2c/chips/max6875 b/Documentation/i2c/chips/max6875 index a0cd8af2f408..10ca43cd1a72 100644 --- a/Documentation/i2c/chips/max6875 +++ b/Documentation/i2c/chips/max6875 | |||
@@ -49,7 +49,7 @@ $ modprobe max6875 force=0,0x50 | |||
49 | 49 | ||
50 | The MAX6874/MAX6875 ignores address bit 0, so this driver attaches to multiple | 50 | The MAX6874/MAX6875 ignores address bit 0, so this driver attaches to multiple |
51 | addresses. For example, for address 0x50, it also reserves 0x51. | 51 | addresses. For example, for address 0x50, it also reserves 0x51. |
52 | The even-address instance is called 'max6875', the odd one is 'max6875 subclient'. | 52 | The even-address instance is called 'max6875', the odd one is 'dummy'. |
53 | 53 | ||
54 | 54 | ||
55 | Programming the chip using i2c-dev | 55 | Programming the chip using i2c-dev |
diff --git a/Documentation/i2c/chips/pca9539 b/Documentation/i2c/chips/pca9539 index 1d81c530c4a5..6aff890088b1 100644 --- a/Documentation/i2c/chips/pca9539 +++ b/Documentation/i2c/chips/pca9539 | |||
@@ -7,7 +7,7 @@ drivers/gpio/pca9539.c instead. | |||
7 | Supported chips: | 7 | Supported chips: |
8 | * Philips PCA9539 | 8 | * Philips PCA9539 |
9 | Prefix: 'pca9539' | 9 | Prefix: 'pca9539' |
10 | Addresses scanned: 0x74 - 0x77 | 10 | Addresses scanned: none |
11 | Datasheet: | 11 | Datasheet: |
12 | http://www.semiconductors.philips.com/acrobat/datasheets/PCA9539_2.pdf | 12 | http://www.semiconductors.philips.com/acrobat/datasheets/PCA9539_2.pdf |
13 | 13 | ||
@@ -23,6 +23,14 @@ The input sense can also be inverted. | |||
23 | The 16 lines are split between two bytes. | 23 | The 16 lines are split between two bytes. |
24 | 24 | ||
25 | 25 | ||
26 | Detection | ||
27 | --------- | ||
28 | |||
29 | The PCA9539 is difficult to detect and not commonly found in PC machines, | ||
30 | so you have to pass the I2C bus and address of the installed PCA9539 | ||
31 | devices explicitly to the driver at load time via the force=... parameter. | ||
32 | |||
33 | |||
26 | Sysfs entries | 34 | Sysfs entries |
27 | ------------- | 35 | ------------- |
28 | 36 | ||
diff --git a/Documentation/i2c/chips/pcf8574 b/Documentation/i2c/chips/pcf8574 index 5c1ad1376b62..235815c075ff 100644 --- a/Documentation/i2c/chips/pcf8574 +++ b/Documentation/i2c/chips/pcf8574 | |||
@@ -4,13 +4,13 @@ Kernel driver pcf8574 | |||
4 | Supported chips: | 4 | Supported chips: |
5 | * Philips PCF8574 | 5 | * Philips PCF8574 |
6 | Prefix: 'pcf8574' | 6 | Prefix: 'pcf8574' |
7 | Addresses scanned: I2C 0x20 - 0x27 | 7 | Addresses scanned: none |
8 | Datasheet: Publicly available at the Philips Semiconductors website | 8 | Datasheet: Publicly available at the Philips Semiconductors website |
9 | http://www.semiconductors.philips.com/pip/PCF8574P.html | 9 | http://www.semiconductors.philips.com/pip/PCF8574P.html |
10 | 10 | ||
11 | * Philips PCF8574A | 11 | * Philips PCF8574A |
12 | Prefix: 'pcf8574a' | 12 | Prefix: 'pcf8574a' |
13 | Addresses scanned: I2C 0x38 - 0x3f | 13 | Addresses scanned: none |
14 | Datasheet: Publicly available at the Philips Semiconductors website | 14 | Datasheet: Publicly available at the Philips Semiconductors website |
15 | http://www.semiconductors.philips.com/pip/PCF8574P.html | 15 | http://www.semiconductors.philips.com/pip/PCF8574P.html |
16 | 16 | ||
@@ -38,12 +38,10 @@ For more informations see the datasheet. | |||
38 | Accessing PCF8574(A) via /sys interface | 38 | Accessing PCF8574(A) via /sys interface |
39 | ------------------------------------- | 39 | ------------------------------------- |
40 | 40 | ||
41 | ! Be careful ! | ||
42 | The PCF8574(A) is plainly impossible to detect ! Stupid chip. | 41 | The PCF8574(A) is plainly impossible to detect ! Stupid chip. |
43 | So every chip with address in the interval [20..27] and [38..3f] are | 42 | So, you have to pass the I2C bus and address of the installed PCF857A |
44 | detected as PCF8574(A). If you have other chips in this address | 43 | and PCF8574A devices explicitly to the driver at load time via the |
45 | range, the workaround is to load this module after the one | 44 | force=... parameter. |
46 | for your others chips. | ||
47 | 45 | ||
48 | On detection (i.e. insmod, modprobe et al.), directories are being | 46 | On detection (i.e. insmod, modprobe et al.), directories are being |
49 | created for each detected PCF8574(A): | 47 | created for each detected PCF8574(A): |
diff --git a/Documentation/i2c/chips/pcf8575 b/Documentation/i2c/chips/pcf8575 index 25f5698a61cf..40b268eb276f 100644 --- a/Documentation/i2c/chips/pcf8575 +++ b/Documentation/i2c/chips/pcf8575 | |||
@@ -40,12 +40,9 @@ Detection | |||
40 | --------- | 40 | --------- |
41 | 41 | ||
42 | There is no method known to detect whether a chip on a given I2C address is | 42 | There is no method known to detect whether a chip on a given I2C address is |
43 | a PCF8575 or whether it is any other I2C device. So there are two alternatives | 43 | a PCF8575 or whether it is any other I2C device, so you have to pass the I2C |
44 | to let the driver find the installed PCF8575 devices: | 44 | bus and address of the installed PCF8575 devices explicitly to the driver at |
45 | - Load this driver after any other I2C driver for I2C devices with addresses | 45 | load time via the force=... parameter. |
46 | in the range 0x20 .. 0x27. | ||
47 | - Pass the I2C bus and address of the installed PCF8575 devices explicitly to | ||
48 | the driver at load time via the probe=... or force=... parameters. | ||
49 | 46 | ||
50 | /sys interface | 47 | /sys interface |
51 | -------------- | 48 | -------------- |
diff --git a/Documentation/i2c/fault-codes b/Documentation/i2c/fault-codes new file mode 100644 index 000000000000..045765c0b9b5 --- /dev/null +++ b/Documentation/i2c/fault-codes | |||
@@ -0,0 +1,127 @@ | |||
1 | This is a summary of the most important conventions for use of fault | ||
2 | codes in the I2C/SMBus stack. | ||
3 | |||
4 | |||
5 | A "Fault" is not always an "Error" | ||
6 | ---------------------------------- | ||
7 | Not all fault reports imply errors; "page faults" should be a familiar | ||
8 | example. Software often retries idempotent operations after transient | ||
9 | faults. There may be fancier recovery schemes that are appropriate in | ||
10 | some cases, such as re-initializing (and maybe resetting). After such | ||
11 | recovery, triggered by a fault report, there is no error. | ||
12 | |||
13 | In a similar way, sometimes a "fault" code just reports one defined | ||
14 | result for an operation ... it doesn't indicate that anything is wrong | ||
15 | at all, just that the outcome wasn't on the "golden path". | ||
16 | |||
17 | In short, your I2C driver code may need to know these codes in order | ||
18 | to respond correctly. Other code may need to rely on YOUR code reporting | ||
19 | the right fault code, so that it can (in turn) behave correctly. | ||
20 | |||
21 | |||
22 | I2C and SMBus fault codes | ||
23 | ------------------------- | ||
24 | These are returned as negative numbers from most calls, with zero or | ||
25 | some positive number indicating a non-fault return. The specific | ||
26 | numbers associated with these symbols differ between architectures, | ||
27 | though most Linux systems use <asm-generic/errno*.h> numbering. | ||
28 | |||
29 | Note that the descriptions here are not exhaustive. There are other | ||
30 | codes that may be returned, and other cases where these codes should | ||
31 | be returned. However, drivers should not return other codes for these | ||
32 | cases (unless the hardware doesn't provide unique fault reports). | ||
33 | |||
34 | Also, codes returned by adapter probe methods follow rules which are | ||
35 | specific to their host bus (such as PCI, or the platform bus). | ||
36 | |||
37 | |||
38 | EAGAIN | ||
39 | Returned by I2C adapters when they lose arbitration in master | ||
40 | transmit mode: some other master was transmitting different | ||
41 | data at the same time. | ||
42 | |||
43 | Also returned when trying to invoke an I2C operation in an | ||
44 | atomic context, when some task is already using that I2C bus | ||
45 | to execute some other operation. | ||
46 | |||
47 | EBADMSG | ||
48 | Returned by SMBus logic when an invalid Packet Error Code byte | ||
49 | is received. This code is a CRC covering all bytes in the | ||
50 | transaction, and is sent before the terminating STOP. This | ||
51 | fault is only reported on read transactions; the SMBus slave | ||
52 | may have a way to report PEC mismatches on writes from the | ||
53 | host. Note that even if PECs are in use, you should not rely | ||
54 | on these as the only way to detect incorrect data transfers. | ||
55 | |||
56 | EBUSY | ||
57 | Returned by SMBus adapters when the bus was busy for longer | ||
58 | than allowed. This usually indicates some device (maybe the | ||
59 | SMBus adapter) needs some fault recovery (such as resetting), | ||
60 | or that the reset was attempted but failed. | ||
61 | |||
62 | EINVAL | ||
63 | This rather vague error means an invalid parameter has been | ||
64 | detected before any I/O operation was started. Use a more | ||
65 | specific fault code when you can. | ||
66 | |||
67 | One example would be a driver trying an SMBus Block Write | ||
68 | with block size outside the range of 1-32 bytes. | ||
69 | |||
70 | EIO | ||
71 | This rather vague error means something went wrong when | ||
72 | performing an I/O operation. Use a more specific fault | ||
73 | code when you can. | ||
74 | |||
75 | ENODEV | ||
76 | Returned by driver probe() methods. This is a bit more | ||
77 | specific than ENXIO, implying the problem isn't with the | ||
78 | address, but with the device found there. Driver probes | ||
79 | may verify the device returns *correct* responses, and | ||
80 | return this as appropriate. (The driver core will warn | ||
81 | about probe faults other than ENXIO and ENODEV.) | ||
82 | |||
83 | ENOMEM | ||
84 | Returned by any component that can't allocate memory when | ||
85 | it needs to do so. | ||
86 | |||
87 | ENXIO | ||
88 | Returned by I2C adapters to indicate that the address phase | ||
89 | of a transfer didn't get an ACK. While it might just mean | ||
90 | an I2C device was temporarily not responding, usually it | ||
91 | means there's nothing listening at that address. | ||
92 | |||
93 | Returned by driver probe() methods to indicate that they | ||
94 | found no device to bind to. (ENODEV may also be used.) | ||
95 | |||
96 | EOPNOTSUPP | ||
97 | Returned by an adapter when asked to perform an operation | ||
98 | that it doesn't, or can't, support. | ||
99 | |||
100 | For example, this would be returned when an adapter that | ||
101 | doesn't support SMBus block transfers is asked to execute | ||
102 | one. In that case, the driver making that request should | ||
103 | have verified that functionality was supported before it | ||
104 | made that block transfer request. | ||
105 | |||
106 | Similarly, if an I2C adapter can't execute all legal I2C | ||
107 | messages, it should return this when asked to perform a | ||
108 | transaction it can't. (These limitations can't be seen in | ||
109 | the adapter's functionality mask, since the assumption is | ||
110 | that if an adapter supports I2C it supports all of I2C.) | ||
111 | |||
112 | EPROTO | ||
113 | Returned when slave does not conform to the relevant I2C | ||
114 | or SMBus (or chip-specific) protocol specifications. One | ||
115 | case is when the length of an SMBus block data response | ||
116 | (from the SMBus slave) is outside the range 1-32 bytes. | ||
117 | |||
118 | ETIMEDOUT | ||
119 | This is returned by drivers when an operation took too much | ||
120 | time, and was aborted before it completed. | ||
121 | |||
122 | SMBus adapters may return it when an operation took more | ||
123 | time than allowed by the SMBus specification; for example, | ||
124 | when a slave stretches clocks too far. I2C has no such | ||
125 | timeouts, but it's normal for I2C adapters to impose some | ||
126 | arbitrary limits (much longer than SMBus!) too. | ||
127 | |||
diff --git a/Documentation/i2c/smbus-protocol b/Documentation/i2c/smbus-protocol index 03f08fb491cc..24bfb65da17d 100644 --- a/Documentation/i2c/smbus-protocol +++ b/Documentation/i2c/smbus-protocol | |||
@@ -42,8 +42,8 @@ Count (8 bits): A data byte containing the length of a block operation. | |||
42 | [..]: Data sent by I2C device, as opposed to data sent by the host adapter. | 42 | [..]: Data sent by I2C device, as opposed to data sent by the host adapter. |
43 | 43 | ||
44 | 44 | ||
45 | SMBus Quick Command: i2c_smbus_write_quick() | 45 | SMBus Quick Command |
46 | ============================================= | 46 | =================== |
47 | 47 | ||
48 | This sends a single bit to the device, at the place of the Rd/Wr bit. | 48 | This sends a single bit to the device, at the place of the Rd/Wr bit. |
49 | 49 | ||
diff --git a/Documentation/i2c/writing-clients b/Documentation/i2c/writing-clients index d4cd4126d1ad..6b61b3a2e90b 100644 --- a/Documentation/i2c/writing-clients +++ b/Documentation/i2c/writing-clients | |||
@@ -44,6 +44,10 @@ static struct i2c_driver foo_driver = { | |||
44 | .id_table = foo_ids, | 44 | .id_table = foo_ids, |
45 | .probe = foo_probe, | 45 | .probe = foo_probe, |
46 | .remove = foo_remove, | 46 | .remove = foo_remove, |
47 | /* if device autodetection is needed: */ | ||
48 | .class = I2C_CLASS_SOMETHING, | ||
49 | .detect = foo_detect, | ||
50 | .address_data = &addr_data, | ||
47 | 51 | ||
48 | /* else, driver uses "legacy" binding model: */ | 52 | /* else, driver uses "legacy" binding model: */ |
49 | .attach_adapter = foo_attach_adapter, | 53 | .attach_adapter = foo_attach_adapter, |
@@ -217,6 +221,31 @@ in the I2C bus driver. You may want to save the returned i2c_client | |||
217 | reference for later use. | 221 | reference for later use. |
218 | 222 | ||
219 | 223 | ||
224 | Device Detection (Standard driver model) | ||
225 | ---------------------------------------- | ||
226 | |||
227 | Sometimes you do not know in advance which I2C devices are connected to | ||
228 | a given I2C bus. This is for example the case of hardware monitoring | ||
229 | devices on a PC's SMBus. In that case, you may want to let your driver | ||
230 | detect supported devices automatically. This is how the legacy model | ||
231 | was working, and is now available as an extension to the standard | ||
232 | driver model (so that we can finally get rid of the legacy model.) | ||
233 | |||
234 | You simply have to define a detect callback which will attempt to | ||
235 | identify supported devices (returning 0 for supported ones and -ENODEV | ||
236 | for unsupported ones), a list of addresses to probe, and a device type | ||
237 | (or class) so that only I2C buses which may have that type of device | ||
238 | connected (and not otherwise enumerated) will be probed. The i2c | ||
239 | core will then call you back as needed and will instantiate a device | ||
240 | for you for every successful detection. | ||
241 | |||
242 | Note that this mechanism is purely optional and not suitable for all | ||
243 | devices. You need some reliable way to identify the supported devices | ||
244 | (typically using device-specific, dedicated identification registers), | ||
245 | otherwise misdetections are likely to occur and things can get wrong | ||
246 | quickly. | ||
247 | |||
248 | |||
220 | Device Deletion (Standard driver model) | 249 | Device Deletion (Standard driver model) |
221 | --------------------------------------- | 250 | --------------------------------------- |
222 | 251 | ||
@@ -569,7 +598,6 @@ SMBus communication | |||
569 | in terms of it. Never use this function directly! | 598 | in terms of it. Never use this function directly! |
570 | 599 | ||
571 | 600 | ||
572 | extern s32 i2c_smbus_write_quick(struct i2c_client * client, u8 value); | ||
573 | extern s32 i2c_smbus_read_byte(struct i2c_client * client); | 601 | extern s32 i2c_smbus_read_byte(struct i2c_client * client); |
574 | extern s32 i2c_smbus_write_byte(struct i2c_client * client, u8 value); | 602 | extern s32 i2c_smbus_write_byte(struct i2c_client * client, u8 value); |
575 | extern s32 i2c_smbus_read_byte_data(struct i2c_client * client, u8 command); | 603 | extern s32 i2c_smbus_read_byte_data(struct i2c_client * client, u8 command); |
@@ -578,30 +606,31 @@ SMBus communication | |||
578 | extern s32 i2c_smbus_read_word_data(struct i2c_client * client, u8 command); | 606 | extern s32 i2c_smbus_read_word_data(struct i2c_client * client, u8 command); |
579 | extern s32 i2c_smbus_write_word_data(struct i2c_client * client, | 607 | extern s32 i2c_smbus_write_word_data(struct i2c_client * client, |
580 | u8 command, u16 value); | 608 | u8 command, u16 value); |
609 | extern s32 i2c_smbus_read_block_data(struct i2c_client * client, | ||
610 | u8 command, u8 *values); | ||
581 | extern s32 i2c_smbus_write_block_data(struct i2c_client * client, | 611 | extern s32 i2c_smbus_write_block_data(struct i2c_client * client, |
582 | u8 command, u8 length, | 612 | u8 command, u8 length, |
583 | u8 *values); | 613 | u8 *values); |
584 | extern s32 i2c_smbus_read_i2c_block_data(struct i2c_client * client, | 614 | extern s32 i2c_smbus_read_i2c_block_data(struct i2c_client * client, |
585 | u8 command, u8 length, u8 *values); | 615 | u8 command, u8 length, u8 *values); |
586 | |||
587 | These ones were removed in Linux 2.6.10 because they had no users, but could | ||
588 | be added back later if needed: | ||
589 | |||
590 | extern s32 i2c_smbus_read_block_data(struct i2c_client * client, | ||
591 | u8 command, u8 *values); | ||
592 | extern s32 i2c_smbus_write_i2c_block_data(struct i2c_client * client, | 616 | extern s32 i2c_smbus_write_i2c_block_data(struct i2c_client * client, |
593 | u8 command, u8 length, | 617 | u8 command, u8 length, |
594 | u8 *values); | 618 | u8 *values); |
619 | |||
620 | These ones were removed from i2c-core because they had no users, but could | ||
621 | be added back later if needed: | ||
622 | |||
623 | extern s32 i2c_smbus_write_quick(struct i2c_client * client, u8 value); | ||
595 | extern s32 i2c_smbus_process_call(struct i2c_client * client, | 624 | extern s32 i2c_smbus_process_call(struct i2c_client * client, |
596 | u8 command, u16 value); | 625 | u8 command, u16 value); |
597 | extern s32 i2c_smbus_block_process_call(struct i2c_client *client, | 626 | extern s32 i2c_smbus_block_process_call(struct i2c_client *client, |
598 | u8 command, u8 length, | 627 | u8 command, u8 length, |
599 | u8 *values) | 628 | u8 *values) |
600 | 629 | ||
601 | All these transactions return -1 on failure. The 'write' transactions | 630 | All these transactions return a negative errno value on failure. The 'write' |
602 | return 0 on success; the 'read' transactions return the read value, except | 631 | transactions return 0 on success; the 'read' transactions return the read |
603 | for read_block, which returns the number of values read. The block buffers | 632 | value, except for block transactions, which return the number of values |
604 | need not be longer than 32 bytes. | 633 | read. The block buffers need not be longer than 32 bytes. |
605 | 634 | ||
606 | You can read the file `smbus-protocol' for more information about the | 635 | You can read the file `smbus-protocol' for more information about the |
607 | actual SMBus protocol. | 636 | actual SMBus protocol. |
diff --git a/Documentation/ioctl-number.txt b/Documentation/ioctl-number.txt index 240ce7a56c40..3bb5f466a90d 100644 --- a/Documentation/ioctl-number.txt +++ b/Documentation/ioctl-number.txt | |||
@@ -117,6 +117,7 @@ Code Seq# Include File Comments | |||
117 | <mailto:natalia@nikhefk.nikhef.nl> | 117 | <mailto:natalia@nikhefk.nikhef.nl> |
118 | 'c' 00-7F linux/comstats.h conflict! | 118 | 'c' 00-7F linux/comstats.h conflict! |
119 | 'c' 00-7F linux/coda.h conflict! | 119 | 'c' 00-7F linux/coda.h conflict! |
120 | 'c' 80-9F asm-s390/chsc.h | ||
120 | 'd' 00-FF linux/char/drm/drm/h conflict! | 121 | 'd' 00-FF linux/char/drm/drm/h conflict! |
121 | 'd' 00-DF linux/video_decoder.h conflict! | 122 | 'd' 00-DF linux/video_decoder.h conflict! |
122 | 'd' F0-FF linux/digi1.h | 123 | 'd' F0-FF linux/digi1.h |
diff --git a/Documentation/ioctl/hdio.txt b/Documentation/ioctl/hdio.txt index c19efdeace2c..91a6ecbae0bb 100644 --- a/Documentation/ioctl/hdio.txt +++ b/Documentation/ioctl/hdio.txt | |||
@@ -508,12 +508,13 @@ HDIO_DRIVE_RESET execute a device reset | |||
508 | 508 | ||
509 | error returns: | 509 | error returns: |
510 | EACCES Access denied: requires CAP_SYS_ADMIN | 510 | EACCES Access denied: requires CAP_SYS_ADMIN |
511 | ENXIO No such device: phy dead or ctl_addr == 0 | ||
512 | EIO I/O error: reset timed out or hardware error | ||
511 | 513 | ||
512 | notes: | 514 | notes: |
513 | 515 | ||
514 | Abort any current command, prevent anything else from being | 516 | Execute a reset on the device as soon as the current IO |
515 | queued, execute a reset on the device, and issue BLKRRPART | 517 | operation has completed. |
516 | ioctl on the block device. | ||
517 | 518 | ||
518 | Executes an ATAPI soft reset if applicable, otherwise | 519 | Executes an ATAPI soft reset if applicable, otherwise |
519 | executes an ATA soft reset on the controller. | 520 | executes an ATA soft reset on the controller. |
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 795c487af8e4..25e88cf5d84e 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -147,10 +147,14 @@ and is between 256 and 4096 characters. It is defined in the file | |||
147 | default: 0 | 147 | default: 0 |
148 | 148 | ||
149 | acpi_sleep= [HW,ACPI] Sleep options | 149 | acpi_sleep= [HW,ACPI] Sleep options |
150 | Format: { s3_bios, s3_mode, s3_beep } | 150 | Format: { s3_bios, s3_mode, s3_beep, old_ordering } |
151 | See Documentation/power/video.txt for s3_bios and s3_mode. | 151 | See Documentation/power/video.txt for s3_bios and s3_mode. |
152 | s3_beep is for debugging; it makes the PC's speaker beep | 152 | s3_beep is for debugging; it makes the PC's speaker beep |
153 | as soon as the kernel's real-mode entry point is called. | 153 | as soon as the kernel's real-mode entry point is called. |
154 | old_ordering causes the ACPI 1.0 ordering of the _PTS | ||
155 | control method, wrt putting devices into low power | ||
156 | states, to be enforced (the ACPI 2.0 ordering of _PTS is | ||
157 | used by default). | ||
154 | 158 | ||
155 | acpi_sci= [HW,ACPI] ACPI System Control Interrupt trigger mode | 159 | acpi_sci= [HW,ACPI] ACPI System Control Interrupt trigger mode |
156 | Format: { level | edge | high | low } | 160 | Format: { level | edge | high | low } |
@@ -571,6 +575,8 @@ and is between 256 and 4096 characters. It is defined in the file | |||
571 | 575 | ||
572 | debug_objects [KNL] Enable object debugging | 576 | debug_objects [KNL] Enable object debugging |
573 | 577 | ||
578 | debugpat [X86] Enable PAT debugging | ||
579 | |||
574 | decnet.addr= [HW,NET] | 580 | decnet.addr= [HW,NET] |
575 | Format: <area>[,<node>] | 581 | Format: <area>[,<node>] |
576 | See also Documentation/networking/decnet.txt. | 582 | See also Documentation/networking/decnet.txt. |
@@ -756,9 +762,6 @@ and is between 256 and 4096 characters. It is defined in the file | |||
756 | hd= [EIDE] (E)IDE hard drive subsystem geometry | 762 | hd= [EIDE] (E)IDE hard drive subsystem geometry |
757 | Format: <cyl>,<head>,<sect> | 763 | Format: <cyl>,<head>,<sect> |
758 | 764 | ||
759 | hd?= [HW] (E)IDE subsystem | ||
760 | hd?lun= See Documentation/ide/ide.txt. | ||
761 | |||
762 | highmem=nn[KMG] [KNL,BOOT] forces the highmem zone to have an exact | 765 | highmem=nn[KMG] [KNL,BOOT] forces the highmem zone to have an exact |
763 | size of <nn>. This works even on boxes that have no | 766 | size of <nn>. This works even on boxes that have no |
764 | highmem otherwise. This also works to reduce highmem | 767 | highmem otherwise. This also works to reduce highmem |
@@ -819,7 +822,7 @@ and is between 256 and 4096 characters. It is defined in the file | |||
819 | See Documentation/ide/ide.txt. | 822 | See Documentation/ide/ide.txt. |
820 | 823 | ||
821 | idle= [X86] | 824 | idle= [X86] |
822 | Format: idle=poll or idle=mwait | 825 | Format: idle=poll or idle=mwait, idle=halt, idle=nomwait |
823 | Poll forces a polling idle loop that can slightly improves the performance | 826 | Poll forces a polling idle loop that can slightly improves the performance |
824 | of waking up a idle CPU, but will use a lot of power and make the system | 827 | of waking up a idle CPU, but will use a lot of power and make the system |
825 | run hot. Not recommended. | 828 | run hot. Not recommended. |
@@ -827,6 +830,9 @@ and is between 256 and 4096 characters. It is defined in the file | |||
827 | to not use it because it doesn't save as much power as a normal idle | 830 | to not use it because it doesn't save as much power as a normal idle |
828 | loop use the MONITOR/MWAIT idle loop anyways. Performance should be the same | 831 | loop use the MONITOR/MWAIT idle loop anyways. Performance should be the same |
829 | as idle=poll. | 832 | as idle=poll. |
833 | idle=halt. Halt is forced to be used for CPU idle. | ||
834 | In such case C2/C3 won't be used again. | ||
835 | idle=nomwait. Disable mwait for CPU C-states | ||
830 | 836 | ||
831 | ide-pci-generic.all-generic-ide [HW] (E)IDE subsystem | 837 | ide-pci-generic.all-generic-ide [HW] (E)IDE subsystem |
832 | Claim all unknown PCI IDE storage controllers. | 838 | Claim all unknown PCI IDE storage controllers. |
@@ -1200,7 +1206,7 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1200 | or | 1206 | or |
1201 | memmap=0x10000$0x18690000 | 1207 | memmap=0x10000$0x18690000 |
1202 | 1208 | ||
1203 | memtest= [KNL,X86_64] Enable memtest | 1209 | memtest= [KNL,X86] Enable memtest |
1204 | Format: <integer> | 1210 | Format: <integer> |
1205 | range: 0,4 : pattern number | 1211 | range: 0,4 : pattern number |
1206 | default : 0 <disable> | 1212 | default : 0 <disable> |
@@ -1242,6 +1248,11 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1242 | mtdparts= [MTD] | 1248 | mtdparts= [MTD] |
1243 | See drivers/mtd/cmdlinepart.c. | 1249 | See drivers/mtd/cmdlinepart.c. |
1244 | 1250 | ||
1251 | mtdset= [ARM] | ||
1252 | ARM/S3C2412 JIVE boot control | ||
1253 | |||
1254 | See arch/arm/mach-s3c2412/mach-jive.c | ||
1255 | |||
1245 | mtouchusb.raw_coordinates= | 1256 | mtouchusb.raw_coordinates= |
1246 | [HW] Make the MicroTouch USB driver use raw coordinates | 1257 | [HW] Make the MicroTouch USB driver use raw coordinates |
1247 | ('y', default) or cooked coordinates ('n') | 1258 | ('y', default) or cooked coordinates ('n') |
@@ -1530,6 +1541,9 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1530 | Use with caution as certain devices share | 1541 | Use with caution as certain devices share |
1531 | address decoders between ROMs and other | 1542 | address decoders between ROMs and other |
1532 | resources. | 1543 | resources. |
1544 | norom [X86-32,X86_64] Do not assign address space to | ||
1545 | expansion ROMs that do not already have | ||
1546 | BIOS assigned address ranges. | ||
1533 | irqmask=0xMMMM [X86-32] Set a bit mask of IRQs allowed to be | 1547 | irqmask=0xMMMM [X86-32] Set a bit mask of IRQs allowed to be |
1534 | assigned automatically to PCI devices. You can | 1548 | assigned automatically to PCI devices. You can |
1535 | make the kernel exclude IRQs of your ISA cards | 1549 | make the kernel exclude IRQs of your ISA cards |
@@ -1605,6 +1619,10 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1605 | Format: { parport<nr> | timid | 0 } | 1619 | Format: { parport<nr> | timid | 0 } |
1606 | See also Documentation/parport.txt. | 1620 | See also Documentation/parport.txt. |
1607 | 1621 | ||
1622 | pmtmr= [X86] Manual setup of pmtmr I/O Port. | ||
1623 | Override pmtimer IOPort with a hex value. | ||
1624 | e.g. pmtmr=0x508 | ||
1625 | |||
1608 | pnpacpi= [ACPI] | 1626 | pnpacpi= [ACPI] |
1609 | { off } | 1627 | { off } |
1610 | 1628 | ||
@@ -2140,6 +2158,10 @@ and is between 256 and 4096 characters. It is defined in the file | |||
2140 | Note that genuine overcurrent events won't be | 2158 | Note that genuine overcurrent events won't be |
2141 | reported either. | 2159 | reported either. |
2142 | 2160 | ||
2161 | unknown_nmi_panic | ||
2162 | [X86-32,X86-64] | ||
2163 | Set unknown_nmi_panic=1 early on boot. | ||
2164 | |||
2143 | usbcore.autosuspend= | 2165 | usbcore.autosuspend= |
2144 | [USB] The autosuspend time delay (in seconds) used | 2166 | [USB] The autosuspend time delay (in seconds) used |
2145 | for newly-detected USB devices (default 2). This | 2167 | for newly-detected USB devices (default 2). This |
diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt index 6877e7187113..a79633d702bf 100644 --- a/Documentation/kprobes.txt +++ b/Documentation/kprobes.txt | |||
@@ -172,6 +172,7 @@ architectures: | |||
172 | - ia64 (Does not support probes on instruction slot1.) | 172 | - ia64 (Does not support probes on instruction slot1.) |
173 | - sparc64 (Return probes not yet implemented.) | 173 | - sparc64 (Return probes not yet implemented.) |
174 | - arm | 174 | - arm |
175 | - ppc | ||
175 | 176 | ||
176 | 3. Configuring Kprobes | 177 | 3. Configuring Kprobes |
177 | 178 | ||
diff --git a/Documentation/laptops/acer-wmi.txt b/Documentation/laptops/acer-wmi.txt index 79b7dbd22141..69b5dd4e5a59 100644 --- a/Documentation/laptops/acer-wmi.txt +++ b/Documentation/laptops/acer-wmi.txt | |||
@@ -174,8 +174,6 @@ The LED is exposed through the LED subsystem, and can be found in: | |||
174 | The mail LED is autodetected, so if you don't have one, the LED device won't | 174 | The mail LED is autodetected, so if you don't have one, the LED device won't |
175 | be registered. | 175 | be registered. |
176 | 176 | ||
177 | If you have a mail LED that is not green, please report this to me. | ||
178 | |||
179 | Backlight | 177 | Backlight |
180 | ********* | 178 | ********* |
181 | 179 | ||
diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt index a0cda062bc33..7fa7fe71d7a8 100644 --- a/Documentation/networking/bonding.txt +++ b/Documentation/networking/bonding.txt | |||
@@ -289,35 +289,73 @@ downdelay | |||
289 | fail_over_mac | 289 | fail_over_mac |
290 | 290 | ||
291 | Specifies whether active-backup mode should set all slaves to | 291 | Specifies whether active-backup mode should set all slaves to |
292 | the same MAC address (the traditional behavior), or, when | 292 | the same MAC address at enslavement (the traditional |
293 | enabled, change the bond's MAC address when changing the | 293 | behavior), or, when enabled, perform special handling of the |
294 | active interface (i.e., fail over the MAC address itself). | 294 | bond's MAC address in accordance with the selected policy. |
295 | 295 | ||
296 | Fail over MAC is useful for devices that cannot ever alter | 296 | Possible values are: |
297 | their MAC address, or for devices that refuse incoming | 297 | |
298 | broadcasts with their own source MAC (which interferes with | 298 | none or 0 |
299 | the ARP monitor). | 299 | |
300 | 300 | This setting disables fail_over_mac, and causes | |
301 | The down side of fail over MAC is that every device on the | 301 | bonding to set all slaves of an active-backup bond to |
302 | network must be updated via gratuitous ARP, vs. just updating | 302 | the same MAC address at enslavement time. This is the |
303 | a switch or set of switches (which often takes place for any | 303 | default. |
304 | traffic, not just ARP traffic, if the switch snoops incoming | 304 | |
305 | traffic to update its tables) for the traditional method. If | 305 | active or 1 |
306 | the gratuitous ARP is lost, communication may be disrupted. | 306 | |
307 | 307 | The "active" fail_over_mac policy indicates that the | |
308 | When fail over MAC is used in conjuction with the mii monitor, | 308 | MAC address of the bond should always be the MAC |
309 | devices which assert link up prior to being able to actually | 309 | address of the currently active slave. The MAC |
310 | transmit and receive are particularly susecptible to loss of | 310 | address of the slaves is not changed; instead, the MAC |
311 | the gratuitous ARP, and an appropriate updelay setting may be | 311 | address of the bond changes during a failover. |
312 | required. | 312 | |
313 | 313 | This policy is useful for devices that cannot ever | |
314 | A value of 0 disables fail over MAC, and is the default. A | 314 | alter their MAC address, or for devices that refuse |
315 | value of 1 enables fail over MAC. This option is enabled | 315 | incoming broadcasts with their own source MAC (which |
316 | automatically if the first slave added cannot change its MAC | 316 | interferes with the ARP monitor). |
317 | address. This option may be modified via sysfs only when no | 317 | |
318 | slaves are present in the bond. | 318 | The down side of this policy is that every device on |
319 | 319 | the network must be updated via gratuitous ARP, | |
320 | This option was added in bonding version 3.2.0. | 320 | vs. just updating a switch or set of switches (which |
321 | often takes place for any traffic, not just ARP | ||
322 | traffic, if the switch snoops incoming traffic to | ||
323 | update its tables) for the traditional method. If the | ||
324 | gratuitous ARP is lost, communication may be | ||
325 | disrupted. | ||
326 | |||
327 | When this policy is used in conjuction with the mii | ||
328 | monitor, devices which assert link up prior to being | ||
329 | able to actually transmit and receive are particularly | ||
330 | susecptible to loss of the gratuitous ARP, and an | ||
331 | appropriate updelay setting may be required. | ||
332 | |||
333 | follow or 2 | ||
334 | |||
335 | The "follow" fail_over_mac policy causes the MAC | ||
336 | address of the bond to be selected normally (normally | ||
337 | the MAC address of the first slave added to the bond). | ||
338 | However, the second and subsequent slaves are not set | ||
339 | to this MAC address while they are in a backup role; a | ||
340 | slave is programmed with the bond's MAC address at | ||
341 | failover time (and the formerly active slave receives | ||
342 | the newly active slave's MAC address). | ||
343 | |||
344 | This policy is useful for multiport devices that | ||
345 | either become confused or incur a performance penalty | ||
346 | when multiple ports are programmed with the same MAC | ||
347 | address. | ||
348 | |||
349 | |||
350 | The default policy is none, unless the first slave cannot | ||
351 | change its MAC address, in which case the active policy is | ||
352 | selected by default. | ||
353 | |||
354 | This option may be modified via sysfs only when no slaves are | ||
355 | present in the bond. | ||
356 | |||
357 | This option was added in bonding version 3.2.0. The "follow" | ||
358 | policy was added in bonding version 3.3.0. | ||
321 | 359 | ||
322 | lacp_rate | 360 | lacp_rate |
323 | 361 | ||
@@ -338,7 +376,8 @@ max_bonds | |||
338 | Specifies the number of bonding devices to create for this | 376 | Specifies the number of bonding devices to create for this |
339 | instance of the bonding driver. E.g., if max_bonds is 3, and | 377 | instance of the bonding driver. E.g., if max_bonds is 3, and |
340 | the bonding driver is not already loaded, then bond0, bond1 | 378 | the bonding driver is not already loaded, then bond0, bond1 |
341 | and bond2 will be created. The default value is 1. | 379 | and bond2 will be created. The default value is 1. Specifying |
380 | a value of 0 will load bonding, but will not create any devices. | ||
342 | 381 | ||
343 | miimon | 382 | miimon |
344 | 383 | ||
@@ -501,6 +540,17 @@ mode | |||
501 | swapped with the new curr_active_slave that was | 540 | swapped with the new curr_active_slave that was |
502 | chosen. | 541 | chosen. |
503 | 542 | ||
543 | num_grat_arp | ||
544 | |||
545 | Specifies the number of gratuitous ARPs to be issued after a | ||
546 | failover event. One gratuitous ARP is issued immediately after | ||
547 | the failover, subsequent ARPs are sent at a rate of one per link | ||
548 | monitor interval (arp_interval or miimon, whichever is active). | ||
549 | |||
550 | The valid range is 0 - 255; the default value is 1. This option | ||
551 | affects only the active-backup mode. This option was added for | ||
552 | bonding version 3.3.0. | ||
553 | |||
504 | primary | 554 | primary |
505 | 555 | ||
506 | A string (eth0, eth2, etc) specifying which slave is the | 556 | A string (eth0, eth2, etc) specifying which slave is the |
diff --git a/Documentation/networking/dm9000.txt b/Documentation/networking/dm9000.txt new file mode 100644 index 000000000000..65df3dea5561 --- /dev/null +++ b/Documentation/networking/dm9000.txt | |||
@@ -0,0 +1,167 @@ | |||
1 | DM9000 Network driver | ||
2 | ===================== | ||
3 | |||
4 | Copyright 2008 Simtec Electronics, | ||
5 | Ben Dooks <ben@simtec.co.uk> <ben-linux@fluff.org> | ||
6 | |||
7 | |||
8 | Introduction | ||
9 | ------------ | ||
10 | |||
11 | This file describes how to use the DM9000 platform-device based network driver | ||
12 | that is contained in the files drivers/net/dm9000.c and drivers/net/dm9000.h. | ||
13 | |||
14 | The driver supports three DM9000 variants, the DM9000E which is the first chip | ||
15 | supported as well as the newer DM9000A and DM9000B devices. It is currently | ||
16 | maintained and tested by Ben Dooks, who should be CC: to any patches for this | ||
17 | driver. | ||
18 | |||
19 | |||
20 | Defining the platform device | ||
21 | ---------------------------- | ||
22 | |||
23 | The minimum set of resources attached to the platform device are as follows: | ||
24 | |||
25 | 1) The physical address of the address register | ||
26 | 2) The physical address of the data register | ||
27 | 3) The IRQ line the device's interrupt pin is connected to. | ||
28 | |||
29 | These resources should be specified in that order, as the ordering of the | ||
30 | two address regions is important (the driver expects these to be address | ||
31 | and then data). | ||
32 | |||
33 | An example from arch/arm/mach-s3c2410/mach-bast.c is: | ||
34 | |||
35 | static struct resource bast_dm9k_resource[] = { | ||
36 | [0] = { | ||
37 | .start = S3C2410_CS5 + BAST_PA_DM9000, | ||
38 | .end = S3C2410_CS5 + BAST_PA_DM9000 + 3, | ||
39 | .flags = IORESOURCE_MEM, | ||
40 | }, | ||
41 | [1] = { | ||
42 | .start = S3C2410_CS5 + BAST_PA_DM9000 + 0x40, | ||
43 | .end = S3C2410_CS5 + BAST_PA_DM9000 + 0x40 + 0x3f, | ||
44 | .flags = IORESOURCE_MEM, | ||
45 | }, | ||
46 | [2] = { | ||
47 | .start = IRQ_DM9000, | ||
48 | .end = IRQ_DM9000, | ||
49 | .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL, | ||
50 | } | ||
51 | }; | ||
52 | |||
53 | static struct platform_device bast_device_dm9k = { | ||
54 | .name = "dm9000", | ||
55 | .id = 0, | ||
56 | .num_resources = ARRAY_SIZE(bast_dm9k_resource), | ||
57 | .resource = bast_dm9k_resource, | ||
58 | }; | ||
59 | |||
60 | Note the setting of the IRQ trigger flag in bast_dm9k_resource[2].flags, | ||
61 | as this will generate a warning if it is not present. The trigger from | ||
62 | the flags field will be passed to request_irq() when registering the IRQ | ||
63 | handler to ensure that the IRQ is setup correctly. | ||
64 | |||
65 | This shows a typical platform device, without the optional configuration | ||
66 | platform data supplied. The next example uses the same resources, but adds | ||
67 | the optional platform data to pass extra configuration data: | ||
68 | |||
69 | static struct dm9000_plat_data bast_dm9k_platdata = { | ||
70 | .flags = DM9000_PLATF_16BITONLY, | ||
71 | }; | ||
72 | |||
73 | static struct platform_device bast_device_dm9k = { | ||
74 | .name = "dm9000", | ||
75 | .id = 0, | ||
76 | .num_resources = ARRAY_SIZE(bast_dm9k_resource), | ||
77 | .resource = bast_dm9k_resource, | ||
78 | .dev = { | ||
79 | .platform_data = &bast_dm9k_platdata, | ||
80 | } | ||
81 | }; | ||
82 | |||
83 | The platform data is defined in include/linux/dm9000.h and described below. | ||
84 | |||
85 | |||
86 | Platform data | ||
87 | ------------- | ||
88 | |||
89 | Extra platform data for the DM9000 can describe the IO bus width to the | ||
90 | device, whether or not an external PHY is attached to the device and | ||
91 | the availability of an external configuration EEPROM. | ||
92 | |||
93 | The flags for the platform data .flags field are as follows: | ||
94 | |||
95 | DM9000_PLATF_8BITONLY | ||
96 | |||
97 | The IO should be done with 8bit operations. | ||
98 | |||
99 | DM9000_PLATF_16BITONLY | ||
100 | |||
101 | The IO should be done with 16bit operations. | ||
102 | |||
103 | DM9000_PLATF_32BITONLY | ||
104 | |||
105 | The IO should be done with 32bit operations. | ||
106 | |||
107 | DM9000_PLATF_EXT_PHY | ||
108 | |||
109 | The chip is connected to an external PHY. | ||
110 | |||
111 | DM9000_PLATF_NO_EEPROM | ||
112 | |||
113 | This can be used to signify that the board does not have an | ||
114 | EEPROM, or that the EEPROM should be hidden from the user. | ||
115 | |||
116 | DM9000_PLATF_SIMPLE_PHY | ||
117 | |||
118 | Switch to using the simpler PHY polling method which does not | ||
119 | try and read the MII PHY state regularly. This is only available | ||
120 | when using the internal PHY. See the section on link state polling | ||
121 | for more information. | ||
122 | |||
123 | The config symbol DM9000_FORCE_SIMPLE_PHY_POLL, Kconfig entry | ||
124 | "Force simple NSR based PHY polling" allows this flag to be | ||
125 | forced on at build time. | ||
126 | |||
127 | |||
128 | PHY Link state polling | ||
129 | ---------------------- | ||
130 | |||
131 | The driver keeps track of the link state and informs the network core | ||
132 | about link (carrier) availablilty. This is managed by several methods | ||
133 | depending on the version of the chip and on which PHY is being used. | ||
134 | |||
135 | For the internal PHY, the original (and currently default) method is | ||
136 | to read the MII state, either when the status changes if we have the | ||
137 | necessary interrupt support in the chip or every two seconds via a | ||
138 | periodic timer. | ||
139 | |||
140 | To reduce the overhead for the internal PHY, there is now the option | ||
141 | of using the DM9000_FORCE_SIMPLE_PHY_POLL config, or DM9000_PLATF_SIMPLE_PHY | ||
142 | platform data option to read the summary information without the | ||
143 | expensive MII accesses. This method is faster, but does not print | ||
144 | as much information. | ||
145 | |||
146 | When using an external PHY, the driver currently has to poll the MII | ||
147 | link status as there is no method for getting an interrupt on link change. | ||
148 | |||
149 | |||
150 | DM9000A / DM9000B | ||
151 | ----------------- | ||
152 | |||
153 | These chips are functionally similar to the DM9000E and are supported easily | ||
154 | by the same driver. The features are: | ||
155 | |||
156 | 1) Interrupt on internal PHY state change. This means that the periodic | ||
157 | polling of the PHY status may be disabled on these devices when using | ||
158 | the internal PHY. | ||
159 | |||
160 | 2) TCP/UDP checksum offloading, which the driver does not currently support. | ||
161 | |||
162 | |||
163 | ethtool | ||
164 | ------- | ||
165 | |||
166 | The driver supports the ethtool interface for access to the driver | ||
167 | state information, the PHY state and the EEPROM. | ||
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 17f1f91af35c..d84932650fd3 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt | |||
@@ -148,9 +148,9 @@ tcp_available_congestion_control - STRING | |||
148 | but not loaded. | 148 | but not loaded. |
149 | 149 | ||
150 | tcp_base_mss - INTEGER | 150 | tcp_base_mss - INTEGER |
151 | The initial value of search_low to be used by Packetization Layer | 151 | The initial value of search_low to be used by the packetization layer |
152 | Path MTU Discovery (MTU probing). If MTU probing is enabled, | 152 | Path MTU discovery (MTU probing). If MTU probing is enabled, |
153 | this is the inital MSS used by the connection. | 153 | this is the initial MSS used by the connection. |
154 | 154 | ||
155 | tcp_congestion_control - STRING | 155 | tcp_congestion_control - STRING |
156 | Set the congestion control algorithm to be used for new | 156 | Set the congestion control algorithm to be used for new |
@@ -185,10 +185,9 @@ tcp_frto - INTEGER | |||
185 | timeouts. It is particularly beneficial in wireless environments | 185 | timeouts. It is particularly beneficial in wireless environments |
186 | where packet loss is typically due to random radio interference | 186 | where packet loss is typically due to random radio interference |
187 | rather than intermediate router congestion. F-RTO is sender-side | 187 | rather than intermediate router congestion. F-RTO is sender-side |
188 | only modification. Therefore it does not require any support from | 188 | only modification. Therefore it does not require any support from |
189 | the peer, but in a typical case, however, where wireless link is | 189 | the peer. |
190 | the local access link and most of the data flows downlink, the | 190 | |
191 | faraway servers should have F-RTO enabled to take advantage of it. | ||
192 | If set to 1, basic version is enabled. 2 enables SACK enhanced | 191 | If set to 1, basic version is enabled. 2 enables SACK enhanced |
193 | F-RTO if flow uses SACK. The basic version can be used also when | 192 | F-RTO if flow uses SACK. The basic version can be used also when |
194 | SACK is in use though scenario(s) with it exists where F-RTO | 193 | SACK is in use though scenario(s) with it exists where F-RTO |
@@ -276,7 +275,7 @@ tcp_mem - vector of 3 INTEGERs: min, pressure, max | |||
276 | memory. | 275 | memory. |
277 | 276 | ||
278 | tcp_moderate_rcvbuf - BOOLEAN | 277 | tcp_moderate_rcvbuf - BOOLEAN |
279 | If set, TCP performs receive buffer autotuning, attempting to | 278 | If set, TCP performs receive buffer auto-tuning, attempting to |
280 | automatically size the buffer (no greater than tcp_rmem[2]) to | 279 | automatically size the buffer (no greater than tcp_rmem[2]) to |
281 | match the size required by the path for full throughput. Enabled by | 280 | match the size required by the path for full throughput. Enabled by |
282 | default. | 281 | default. |
@@ -336,7 +335,7 @@ tcp_rmem - vector of 3 INTEGERs: min, default, max | |||
336 | pressure. | 335 | pressure. |
337 | Default: 8K | 336 | Default: 8K |
338 | 337 | ||
339 | default: default size of receive buffer used by TCP sockets. | 338 | default: initial size of receive buffer used by TCP sockets. |
340 | This value overrides net.core.rmem_default used by other protocols. | 339 | This value overrides net.core.rmem_default used by other protocols. |
341 | Default: 87380 bytes. This value results in window of 65535 with | 340 | Default: 87380 bytes. This value results in window of 65535 with |
342 | default setting of tcp_adv_win_scale and tcp_app_win:0 and a bit | 341 | default setting of tcp_adv_win_scale and tcp_app_win:0 and a bit |
@@ -344,8 +343,10 @@ tcp_rmem - vector of 3 INTEGERs: min, default, max | |||
344 | 343 | ||
345 | max: maximal size of receive buffer allowed for automatically | 344 | max: maximal size of receive buffer allowed for automatically |
346 | selected receiver buffers for TCP socket. This value does not override | 345 | selected receiver buffers for TCP socket. This value does not override |
347 | net.core.rmem_max, "static" selection via SO_RCVBUF does not use this. | 346 | net.core.rmem_max. Calling setsockopt() with SO_RCVBUF disables |
348 | Default: 87380*2 bytes. | 347 | automatic tuning of that socket's receive buffer size, in which |
348 | case this value is ignored. | ||
349 | Default: between 87380B and 4MB, depending on RAM size. | ||
349 | 350 | ||
350 | tcp_sack - BOOLEAN | 351 | tcp_sack - BOOLEAN |
351 | Enable select acknowledgments (SACKS). | 352 | Enable select acknowledgments (SACKS). |
@@ -358,7 +359,7 @@ tcp_slow_start_after_idle - BOOLEAN | |||
358 | Default: 1 | 359 | Default: 1 |
359 | 360 | ||
360 | tcp_stdurg - BOOLEAN | 361 | tcp_stdurg - BOOLEAN |
361 | Use the Host requirements interpretation of the TCP urg pointer field. | 362 | Use the Host requirements interpretation of the TCP urgent pointer field. |
362 | Most hosts use the older BSD interpretation, so if you turn this on | 363 | Most hosts use the older BSD interpretation, so if you turn this on |
363 | Linux might not communicate correctly with them. | 364 | Linux might not communicate correctly with them. |
364 | Default: FALSE | 365 | Default: FALSE |
@@ -371,12 +372,12 @@ tcp_synack_retries - INTEGER | |||
371 | tcp_syncookies - BOOLEAN | 372 | tcp_syncookies - BOOLEAN |
372 | Only valid when the kernel was compiled with CONFIG_SYNCOOKIES | 373 | Only valid when the kernel was compiled with CONFIG_SYNCOOKIES |
373 | Send out syncookies when the syn backlog queue of a socket | 374 | Send out syncookies when the syn backlog queue of a socket |
374 | overflows. This is to prevent against the common 'syn flood attack' | 375 | overflows. This is to prevent against the common 'SYN flood attack' |
375 | Default: FALSE | 376 | Default: FALSE |
376 | 377 | ||
377 | Note, that syncookies is fallback facility. | 378 | Note, that syncookies is fallback facility. |
378 | It MUST NOT be used to help highly loaded servers to stand | 379 | It MUST NOT be used to help highly loaded servers to stand |
379 | against legal connection rate. If you see synflood warnings | 380 | against legal connection rate. If you see SYN flood warnings |
380 | in your logs, but investigation shows that they occur | 381 | in your logs, but investigation shows that they occur |
381 | because of overload with legal connections, you should tune | 382 | because of overload with legal connections, you should tune |
382 | another parameters until this warning disappear. | 383 | another parameters until this warning disappear. |
@@ -386,7 +387,7 @@ tcp_syncookies - BOOLEAN | |||
386 | to use TCP extensions, can result in serious degradation | 387 | to use TCP extensions, can result in serious degradation |
387 | of some services (f.e. SMTP relaying), visible not by you, | 388 | of some services (f.e. SMTP relaying), visible not by you, |
388 | but your clients and relays, contacting you. While you see | 389 | but your clients and relays, contacting you. While you see |
389 | synflood warnings in logs not being really flooded, your server | 390 | SYN flood warnings in logs not being really flooded, your server |
390 | is seriously misconfigured. | 391 | is seriously misconfigured. |
391 | 392 | ||
392 | tcp_syn_retries - INTEGER | 393 | tcp_syn_retries - INTEGER |
@@ -419,19 +420,21 @@ tcp_window_scaling - BOOLEAN | |||
419 | Enable window scaling as defined in RFC1323. | 420 | Enable window scaling as defined in RFC1323. |
420 | 421 | ||
421 | tcp_wmem - vector of 3 INTEGERs: min, default, max | 422 | tcp_wmem - vector of 3 INTEGERs: min, default, max |
422 | min: Amount of memory reserved for send buffers for TCP socket. | 423 | min: Amount of memory reserved for send buffers for TCP sockets. |
423 | Each TCP socket has rights to use it due to fact of its birth. | 424 | Each TCP socket has rights to use it due to fact of its birth. |
424 | Default: 4K | 425 | Default: 4K |
425 | 426 | ||
426 | default: Amount of memory allowed for send buffers for TCP socket | 427 | default: initial size of send buffer used by TCP sockets. This |
427 | by default. This value overrides net.core.wmem_default used | 428 | value overrides net.core.wmem_default used by other protocols. |
428 | by other protocols, it is usually lower than net.core.wmem_default. | 429 | It is usually lower than net.core.wmem_default. |
429 | Default: 16K | 430 | Default: 16K |
430 | 431 | ||
431 | max: Maximal amount of memory allowed for automatically selected | 432 | max: Maximal amount of memory allowed for automatically tuned |
432 | send buffers for TCP socket. This value does not override | 433 | send buffers for TCP sockets. This value does not override |
433 | net.core.wmem_max, "static" selection via SO_SNDBUF does not use this. | 434 | net.core.wmem_max. Calling setsockopt() with SO_SNDBUF disables |
434 | Default: 128K | 435 | automatic tuning of that socket's send buffer size, in which case |
436 | this value is ignored. | ||
437 | Default: between 64K and 4MB, depending on RAM size. | ||
435 | 438 | ||
436 | tcp_workaround_signed_windows - BOOLEAN | 439 | tcp_workaround_signed_windows - BOOLEAN |
437 | If set, assume no receipt of a window scaling option means the | 440 | If set, assume no receipt of a window scaling option means the |
@@ -548,8 +551,9 @@ icmp_echo_ignore_broadcasts - BOOLEAN | |||
548 | icmp_ratelimit - INTEGER | 551 | icmp_ratelimit - INTEGER |
549 | Limit the maximal rates for sending ICMP packets whose type matches | 552 | Limit the maximal rates for sending ICMP packets whose type matches |
550 | icmp_ratemask (see below) to specific targets. | 553 | icmp_ratemask (see below) to specific targets. |
551 | 0 to disable any limiting, otherwise the maximal rate in jiffies(1) | 554 | 0 to disable any limiting, |
552 | Default: 100 | 555 | otherwise the minimal space between responses in milliseconds. |
556 | Default: 1000 | ||
553 | 557 | ||
554 | icmp_ratemask - INTEGER | 558 | icmp_ratemask - INTEGER |
555 | Mask made of ICMP types for which rates are being limited. | 559 | Mask made of ICMP types for which rates are being limited. |
@@ -1020,11 +1024,23 @@ max_addresses - INTEGER | |||
1020 | autoconfigured addresses. | 1024 | autoconfigured addresses. |
1021 | Default: 16 | 1025 | Default: 16 |
1022 | 1026 | ||
1027 | disable_ipv6 - BOOLEAN | ||
1028 | Disable IPv6 operation. | ||
1029 | Default: FALSE (enable IPv6 operation) | ||
1030 | |||
1031 | accept_dad - INTEGER | ||
1032 | Whether to accept DAD (Duplicate Address Detection). | ||
1033 | 0: Disable DAD | ||
1034 | 1: Enable DAD (default) | ||
1035 | 2: Enable DAD, and disable IPv6 operation if MAC-based duplicate | ||
1036 | link-local address has been found. | ||
1037 | |||
1023 | icmp/*: | 1038 | icmp/*: |
1024 | ratelimit - INTEGER | 1039 | ratelimit - INTEGER |
1025 | Limit the maximal rates for sending ICMPv6 packets. | 1040 | Limit the maximal rates for sending ICMPv6 packets. |
1026 | 0 to disable any limiting, otherwise the maximal rate in jiffies(1) | 1041 | 0 to disable any limiting, |
1027 | Default: 100 | 1042 | otherwise the minimal space between responses in milliseconds. |
1043 | Default: 1000 | ||
1028 | 1044 | ||
1029 | 1045 | ||
1030 | IPv6 Update by: | 1046 | IPv6 Update by: |
@@ -1060,24 +1076,193 @@ bridge-nf-filter-pppoe-tagged - BOOLEAN | |||
1060 | Default: 1 | 1076 | Default: 1 |
1061 | 1077 | ||
1062 | 1078 | ||
1063 | UNDOCUMENTED: | 1079 | proc/sys/net/sctp/* Variables: |
1080 | |||
1081 | addip_enable - BOOLEAN | ||
1082 | Enable or disable extension of Dynamic Address Reconfiguration | ||
1083 | (ADD-IP) functionality specified in RFC5061. This extension provides | ||
1084 | the ability to dynamically add and remove new addresses for the SCTP | ||
1085 | associations. | ||
1086 | |||
1087 | 1: Enable extension. | ||
1088 | |||
1089 | 0: Disable extension. | ||
1090 | |||
1091 | Default: 0 | ||
1092 | |||
1093 | addip_noauth_enable - BOOLEAN | ||
1094 | Dynamic Address Reconfiguration (ADD-IP) requires the use of | ||
1095 | authentication to protect the operations of adding or removing new | ||
1096 | addresses. This requirement is mandated so that unauthorized hosts | ||
1097 | would not be able to hijack associations. However, older | ||
1098 | implementations may not have implemented this requirement while | ||
1099 | allowing the ADD-IP extension. For reasons of interoperability, | ||
1100 | we provide this variable to control the enforcement of the | ||
1101 | authentication requirement. | ||
1102 | |||
1103 | 1: Allow ADD-IP extension to be used without authentication. This | ||
1104 | should only be set in a closed environment for interoperability | ||
1105 | with older implementations. | ||
1106 | |||
1107 | 0: Enforce the authentication requirement | ||
1108 | |||
1109 | Default: 0 | ||
1110 | |||
1111 | auth_enable - BOOLEAN | ||
1112 | Enable or disable Authenticated Chunks extension. This extension | ||
1113 | provides the ability to send and receive authenticated chunks and is | ||
1114 | required for secure operation of Dynamic Address Reconfiguration | ||
1115 | (ADD-IP) extension. | ||
1116 | |||
1117 | 1: Enable this extension. | ||
1118 | 0: Disable this extension. | ||
1119 | |||
1120 | Default: 0 | ||
1121 | |||
1122 | prsctp_enable - BOOLEAN | ||
1123 | Enable or disable the Partial Reliability extension (RFC3758) which | ||
1124 | is used to notify peers that a given DATA should no longer be expected. | ||
1125 | |||
1126 | 1: Enable extension | ||
1127 | 0: Disable | ||
1128 | |||
1129 | Default: 1 | ||
1130 | |||
1131 | max_burst - INTEGER | ||
1132 | The limit of the number of new packets that can be initially sent. It | ||
1133 | controls how bursty the generated traffic can be. | ||
1134 | |||
1135 | Default: 4 | ||
1136 | |||
1137 | association_max_retrans - INTEGER | ||
1138 | Set the maximum number for retransmissions that an association can | ||
1139 | attempt deciding that the remote end is unreachable. If this value | ||
1140 | is exceeded, the association is terminated. | ||
1141 | |||
1142 | Default: 10 | ||
1143 | |||
1144 | max_init_retransmits - INTEGER | ||
1145 | The maximum number of retransmissions of INIT and COOKIE-ECHO chunks | ||
1146 | that an association will attempt before declaring the destination | ||
1147 | unreachable and terminating. | ||
1148 | |||
1149 | Default: 8 | ||
1150 | |||
1151 | path_max_retrans - INTEGER | ||
1152 | The maximum number of retransmissions that will be attempted on a given | ||
1153 | path. Once this threshold is exceeded, the path is considered | ||
1154 | unreachable, and new traffic will use a different path when the | ||
1155 | association is multihomed. | ||
1156 | |||
1157 | Default: 5 | ||
1158 | |||
1159 | rto_initial - INTEGER | ||
1160 | The initial round trip timeout value in milliseconds that will be used | ||
1161 | in calculating round trip times. This is the initial time interval | ||
1162 | for retransmissions. | ||
1064 | 1163 | ||
1065 | dev_weight FIXME | 1164 | Default: 3000 |
1066 | discovery_slots FIXME | 1165 | |
1067 | discovery_timeout FIXME | 1166 | rto_max - INTEGER |
1068 | fast_poll_increase FIXME | 1167 | The maximum value (in milliseconds) of the round trip timeout. This |
1069 | ip6_queue_maxlen FIXME | 1168 | is the largest time interval that can elapse between retransmissions. |
1070 | lap_keepalive_time FIXME | 1169 | |
1071 | lo_cong FIXME | 1170 | Default: 60000 |
1072 | max_baud_rate FIXME | 1171 | |
1073 | max_dgram_qlen FIXME | 1172 | rto_min - INTEGER |
1074 | max_noreply_time FIXME | 1173 | The minimum value (in milliseconds) of the round trip timeout. This |
1075 | max_tx_data_size FIXME | 1174 | is the smallest time interval the can elapse between retransmissions. |
1076 | max_tx_window FIXME | 1175 | |
1077 | min_tx_turn_time FIXME | 1176 | Default: 1000 |
1078 | mod_cong FIXME | 1177 | |
1079 | no_cong FIXME | 1178 | hb_interval - INTEGER |
1080 | no_cong_thresh FIXME | 1179 | The interval (in milliseconds) between HEARTBEAT chunks. These chunks |
1081 | slot_timeout FIXME | 1180 | are sent at the specified interval on idle paths to probe the state of |
1082 | warn_noreply_time FIXME | 1181 | a given path between 2 associations. |
1182 | |||
1183 | Default: 30000 | ||
1184 | |||
1185 | sack_timeout - INTEGER | ||
1186 | The amount of time (in milliseconds) that the implementation will wait | ||
1187 | to send a SACK. | ||
1188 | |||
1189 | Default: 200 | ||
1190 | |||
1191 | valid_cookie_life - INTEGER | ||
1192 | The default lifetime of the SCTP cookie (in milliseconds). The cookie | ||
1193 | is used during association establishment. | ||
1194 | |||
1195 | Default: 60000 | ||
1196 | |||
1197 | cookie_preserve_enable - BOOLEAN | ||
1198 | Enable or disable the ability to extend the lifetime of the SCTP cookie | ||
1199 | that is used during the establishment phase of SCTP association | ||
1200 | |||
1201 | 1: Enable cookie lifetime extension. | ||
1202 | 0: Disable | ||
1203 | |||
1204 | Default: 1 | ||
1205 | |||
1206 | rcvbuf_policy - INTEGER | ||
1207 | Determines if the receive buffer is attributed to the socket or to | ||
1208 | association. SCTP supports the capability to create multiple | ||
1209 | associations on a single socket. When using this capability, it is | ||
1210 | possible that a single stalled association that's buffering a lot | ||
1211 | of data may block other associations from delivering their data by | ||
1212 | consuming all of the receive buffer space. To work around this, | ||
1213 | the rcvbuf_policy could be set to attribute the receiver buffer space | ||
1214 | to each association instead of the socket. This prevents the described | ||
1215 | blocking. | ||
1216 | |||
1217 | 1: rcvbuf space is per association | ||
1218 | 0: recbuf space is per socket | ||
1219 | |||
1220 | Default: 0 | ||
1221 | |||
1222 | sndbuf_policy - INTEGER | ||
1223 | Similar to rcvbuf_policy above, this applies to send buffer space. | ||
1224 | |||
1225 | 1: Send buffer is tracked per association | ||
1226 | 0: Send buffer is tracked per socket. | ||
1227 | |||
1228 | Default: 0 | ||
1229 | |||
1230 | sctp_mem - vector of 3 INTEGERs: min, pressure, max | ||
1231 | Number of pages allowed for queueing by all SCTP sockets. | ||
1232 | |||
1233 | min: Below this number of pages SCTP is not bothered about its | ||
1234 | memory appetite. When amount of memory allocated by SCTP exceeds | ||
1235 | this number, SCTP starts to moderate memory usage. | ||
1236 | |||
1237 | pressure: This value was introduced to follow format of tcp_mem. | ||
1238 | |||
1239 | max: Number of pages allowed for queueing by all SCTP sockets. | ||
1240 | |||
1241 | Default is calculated at boot time from amount of available memory. | ||
1242 | |||
1243 | sctp_rmem - vector of 3 INTEGERs: min, default, max | ||
1244 | See tcp_rmem for a description. | ||
1245 | |||
1246 | sctp_wmem - vector of 3 INTEGERs: min, default, max | ||
1247 | See tcp_wmem for a description. | ||
1248 | |||
1249 | UNDOCUMENTED: | ||
1083 | 1250 | ||
1251 | /proc/sys/net/core/* | ||
1252 | dev_weight FIXME | ||
1253 | |||
1254 | /proc/sys/net/unix/* | ||
1255 | max_dgram_qlen FIXME | ||
1256 | |||
1257 | /proc/sys/net/irda/* | ||
1258 | fast_poll_increase FIXME | ||
1259 | warn_noreply_time FIXME | ||
1260 | discovery_slots FIXME | ||
1261 | slot_timeout FIXME | ||
1262 | max_baud_rate FIXME | ||
1263 | discovery_timeout FIXME | ||
1264 | lap_keepalive_time FIXME | ||
1265 | max_noreply_time FIXME | ||
1266 | max_tx_data_size FIXME | ||
1267 | max_tx_window FIXME | ||
1268 | min_tx_turn_time FIXME | ||
diff --git a/Documentation/networking/ixgb.txt b/Documentation/networking/ixgb.txt index 7c98277777eb..a0d0ffb5e584 100644 --- a/Documentation/networking/ixgb.txt +++ b/Documentation/networking/ixgb.txt | |||
@@ -1,7 +1,7 @@ | |||
1 | Linux* Base Driver for the Intel(R) PRO/10GbE Family of Adapters | 1 | Linux Base Driver for 10 Gigabit Intel(R) Network Connection |
2 | ================================================================ | 2 | ============================================================= |
3 | 3 | ||
4 | November 17, 2004 | 4 | October 9, 2007 |
5 | 5 | ||
6 | 6 | ||
7 | Contents | 7 | Contents |
@@ -9,94 +9,151 @@ Contents | |||
9 | 9 | ||
10 | - In This Release | 10 | - In This Release |
11 | - Identifying Your Adapter | 11 | - Identifying Your Adapter |
12 | - Building and Installation | ||
12 | - Command Line Parameters | 13 | - Command Line Parameters |
13 | - Improving Performance | 14 | - Improving Performance |
15 | - Additional Configurations | ||
16 | - Known Issues/Troubleshooting | ||
14 | - Support | 17 | - Support |
15 | 18 | ||
16 | 19 | ||
20 | |||
17 | In This Release | 21 | In This Release |
18 | =============== | 22 | =============== |
19 | 23 | ||
20 | This file describes the Linux* Base Driver for the Intel(R) PRO/10GbE Family | 24 | This file describes the ixgb Linux Base Driver for the 10 Gigabit Intel(R) |
21 | of Adapters, version 1.0.x. | 25 | Network Connection. This driver includes support for Itanium(R)2-based |
26 | systems. | ||
27 | |||
28 | For questions related to hardware requirements, refer to the documentation | ||
29 | supplied with your 10 Gigabit adapter. All hardware requirements listed apply | ||
30 | to use with Linux. | ||
31 | |||
32 | The following features are available in this kernel: | ||
33 | - Native VLANs | ||
34 | - Channel Bonding (teaming) | ||
35 | - SNMP | ||
36 | |||
37 | Channel Bonding documentation can be found in the Linux kernel source: | ||
38 | /Documentation/networking/bonding.txt | ||
39 | |||
40 | The driver information previously displayed in the /proc filesystem is not | ||
41 | supported in this release. Alternatively, you can use ethtool (version 1.6 | ||
42 | or later), lspci, and ifconfig to obtain the same information. | ||
43 | |||
44 | Instructions on updating ethtool can be found in the section "Additional | ||
45 | Configurations" later in this document. | ||
22 | 46 | ||
23 | For questions related to hardware requirements, refer to the documentation | ||
24 | supplied with your Intel PRO/10GbE adapter. All hardware requirements listed | ||
25 | apply to use with Linux. | ||
26 | 47 | ||
27 | Identifying Your Adapter | 48 | Identifying Your Adapter |
28 | ======================== | 49 | ======================== |
29 | 50 | ||
30 | To verify your Intel adapter is supported, find the board ID number on the | 51 | The following Intel network adapters are compatible with the drivers in this |
31 | adapter. Look for a label that has a barcode and a number in the format | 52 | release: |
32 | A12345-001. | 53 | |
54 | Controller Adapter Name Physical Layer | ||
55 | ---------- ------------ -------------- | ||
56 | 82597EX Intel(R) PRO/10GbE LR/SR/CX4 10G Base-LR (1310 nm optical fiber) | ||
57 | Server Adapters 10G Base-SR (850 nm optical fiber) | ||
58 | 10G Base-CX4(twin-axial copper cabling) | ||
59 | |||
60 | For more information on how to identify your adapter, go to the Adapter & | ||
61 | Driver ID Guide at: | ||
62 | |||
63 | http://support.intel.com/support/network/sb/CS-012904.htm | ||
64 | |||
65 | |||
66 | Building and Installation | ||
67 | ========================= | ||
68 | |||
69 | select m for "Intel(R) PRO/10GbE support" located at: | ||
70 | Location: | ||
71 | -> Device Drivers | ||
72 | -> Network device support (NETDEVICES [=y]) | ||
73 | -> Ethernet (10000 Mbit) (NETDEV_10000 [=y]) | ||
74 | 1. make modules && make modules_install | ||
75 | |||
76 | 2. Load the module: | ||
77 | |||
78 | Â Â Â Â modprobe ixgb <parameter>=<value> | ||
79 | |||
80 | The insmod command can be used if the full | ||
81 | path to the driver module is specified. For example: | ||
82 | |||
83 | insmod /lib/modules/<KERNEL VERSION>/kernel/drivers/net/ixgb/ixgb.ko | ||
84 | |||
85 | With 2.6 based kernels also make sure that older ixgb drivers are | ||
86 | removed from the kernel, before loading the new module: | ||
33 | 87 | ||
34 | Use the above information and the Adapter & Driver ID Guide at: | 88 | rmmod ixgb; modprobe ixgb |
35 | 89 | ||
36 | http://support.intel.com/support/network/adapter/pro100/21397.htm | 90 | 3. Assign an IP address to the interface by entering the following, where |
91 | x is the interface number: | ||
37 | 92 | ||
38 | For the latest Intel network drivers for Linux, go to: | 93 | ifconfig ethx <IP_address> |
94 | |||
95 | 4. Verify that the interface works. Enter the following, where <IP_address> | ||
96 | is the IP address for another machine on the same subnet as the interface | ||
97 | that is being tested: | ||
98 | |||
99 | ping <IP_address> | ||
39 | 100 | ||
40 | http://downloadfinder.intel.com/scripts-df/support_intel.asp | ||
41 | 101 | ||
42 | Command Line Parameters | 102 | Command Line Parameters |
43 | ======================= | 103 | ======================= |
44 | 104 | ||
45 | If the driver is built as a module, the following optional parameters are | 105 | If the driver is built as a module, the following optional parameters are |
46 | used by entering them on the command line with the modprobe or insmod command | 106 | used by entering them on the command line with the modprobe command using |
47 | using this syntax: | 107 | this syntax: |
48 | 108 | ||
49 | modprobe ixgb [<option>=<VAL1>,<VAL2>,...] | 109 | modprobe ixgb [<option>=<VAL1>,<VAL2>,...] |
50 | 110 | ||
51 | insmod ixgb [<option>=<VAL1>,<VAL2>,...] | 111 | For example, with two 10GbE PCI adapters, entering: |
52 | 112 | ||
53 | For example, with two PRO/10GbE PCI adapters, entering: | 113 | modprobe ixgb TxDescriptors=80,128 |
54 | 114 | ||
55 | insmod ixgb TxDescriptors=80,128 | 115 | loads the ixgb driver with 80 TX resources for the first adapter and 128 TX |
56 | |||
57 | loads the ixgb driver with 80 TX resources for the first adapter and 128 TX | ||
58 | resources for the second adapter. | 116 | resources for the second adapter. |
59 | 117 | ||
60 | The default value for each parameter is generally the recommended setting, | 118 | The default value for each parameter is generally the recommended setting, |
61 | unless otherwise noted. Also, if the driver is statically built into the | 119 | unless otherwise noted. |
62 | kernel, the driver is loaded with the default values for all the parameters. | ||
63 | Ethtool can be used to change some of the parameters at runtime. | ||
64 | 120 | ||
65 | FlowControl | 121 | FlowControl |
66 | Valid Range: 0-3 (0=none, 1=Rx only, 2=Tx only, 3=Rx&Tx) | 122 | Valid Range: 0-3 (0=none, 1=Rx only, 2=Tx only, 3=Rx&Tx) |
67 | Default: Read from the EEPROM | 123 | Default: Read from the EEPROM |
68 | If EEPROM is not detected, default is 3 | 124 | If EEPROM is not detected, default is 1 |
69 | This parameter controls the automatic generation(Tx) and response(Rx) to | 125 | This parameter controls the automatic generation(Tx) and response(Rx) to |
70 | Ethernet PAUSE frames. | 126 | Ethernet PAUSE frames. There are hardware bugs associated with enabling |
127 | Tx flow control so beware. | ||
71 | 128 | ||
72 | RxDescriptors | 129 | RxDescriptors |
73 | Valid Range: 64-512 | 130 | Valid Range: 64-512 |
74 | Default Value: 512 | 131 | Default Value: 512 |
75 | This value is the number of receive descriptors allocated by the driver. | 132 | This value is the number of receive descriptors allocated by the driver. |
76 | Increasing this value allows the driver to buffer more incoming packets. | 133 | Increasing this value allows the driver to buffer more incoming packets. |
77 | Each descriptor is 16 bytes. A receive buffer is also allocated for | 134 | Each descriptor is 16 bytes. A receive buffer is also allocated for |
78 | each descriptor and can be either 2048, 4056, 8192, or 16384 bytes, | 135 | each descriptor and can be either 2048, 4056, 8192, or 16384 bytes, |
79 | depending on the MTU setting. When the MTU size is 1500 or less, the | 136 | depending on the MTU setting. When the MTU size is 1500 or less, the |
80 | receive buffer size is 2048 bytes. When the MTU is greater than 1500 the | 137 | receive buffer size is 2048 bytes. When the MTU is greater than 1500 the |
81 | receive buffer size will be either 4056, 8192, or 16384 bytes. The | 138 | receive buffer size will be either 4056, 8192, or 16384 bytes. The |
82 | maximum MTU size is 16114. | 139 | maximum MTU size is 16114. |
83 | 140 | ||
84 | RxIntDelay | 141 | RxIntDelay |
85 | Valid Range: 0-65535 (0=off) | 142 | Valid Range: 0-65535 (0=off) |
86 | Default Value: 6 | 143 | Default Value: 72 |
87 | This value delays the generation of receive interrupts in units of | 144 | This value delays the generation of receive interrupts in units of |
88 | 0.8192 microseconds. Receive interrupt reduction can improve CPU | 145 | 0.8192 microseconds. Receive interrupt reduction can improve CPU |
89 | efficiency if properly tuned for specific network traffic. Increasing | 146 | efficiency if properly tuned for specific network traffic. Increasing |
90 | this value adds extra latency to frame reception and can end up | 147 | this value adds extra latency to frame reception and can end up |
91 | decreasing the throughput of TCP traffic. If the system is reporting | 148 | decreasing the throughput of TCP traffic. If the system is reporting |
92 | dropped receives, this value may be set too high, causing the driver to | 149 | dropped receives, this value may be set too high, causing the driver to |
93 | run out of available receive descriptors. | 150 | run out of available receive descriptors. |
94 | 151 | ||
95 | TxDescriptors | 152 | TxDescriptors |
96 | Valid Range: 64-4096 | 153 | Valid Range: 64-4096 |
97 | Default Value: 256 | 154 | Default Value: 256 |
98 | This value is the number of transmit descriptors allocated by the driver. | 155 | This value is the number of transmit descriptors allocated by the driver. |
99 | Increasing this value allows the driver to queue more transmits. Each | 156 | Increasing this value allows the driver to queue more transmits. Each |
100 | descriptor is 16 bytes. | 157 | descriptor is 16 bytes. |
101 | 158 | ||
102 | XsumRX | 159 | XsumRX |
@@ -105,51 +162,49 @@ Default Value: 1 | |||
105 | A value of '1' indicates that the driver should enable IP checksum | 162 | A value of '1' indicates that the driver should enable IP checksum |
106 | offload for received packets (both UDP and TCP) to the adapter hardware. | 163 | offload for received packets (both UDP and TCP) to the adapter hardware. |
107 | 164 | ||
108 | XsumTX | ||
109 | Valid Range: 0-1 | ||
110 | Default Value: 1 | ||
111 | A value of '1' indicates that the driver should enable IP checksum | ||
112 | offload for transmitted packets (both UDP and TCP) to the adapter | ||
113 | hardware. | ||
114 | 165 | ||
115 | Improving Performance | 166 | Improving Performance |
116 | ===================== | 167 | ===================== |
117 | 168 | ||
118 | With the Intel PRO/10 GbE adapter, the default Linux configuration will very | 169 | With the 10 Gigabit server adapters, the default Linux configuration will |
119 | likely limit the total available throughput artificially. There is a set of | 170 | very likely limit the total available throughput artificially. There is a set |
120 | things that when applied together increase the ability of Linux to transmit | 171 | of configuration changes that, when applied together, will increase the ability |
121 | and receive data. The following enhancements were originally acquired from | 172 | of Linux to transmit and receive data. The following enhancements were |
122 | settings published at http://www.spec.org/web99 for various submitted results | 173 | originally acquired from settings published at http://www.spec.org/web99/ for |
123 | using Linux. | 174 | various submitted results using Linux. |
124 | 175 | ||
125 | NOTE: These changes are only suggestions, and serve as a starting point for | 176 | NOTE: These changes are only suggestions, and serve as a starting point for |
126 | tuning your network performance. | 177 | tuning your network performance. |
127 | 178 | ||
128 | The changes are made in three major ways, listed in order of greatest effect: | 179 | The changes are made in three major ways, listed in order of greatest effect: |
129 | - Use ifconfig to modify the mtu (maximum transmission unit) and the txqueuelen | 180 | - Use ifconfig to modify the mtu (maximum transmission unit) and the txqueuelen |
130 | parameter. | 181 | parameter. |
131 | - Use sysctl to modify /proc parameters (essentially kernel tuning) | 182 | - Use sysctl to modify /proc parameters (essentially kernel tuning) |
132 | - Use setpci to modify the MMRBC field in PCI-X configuration space to increase | 183 | - Use setpci to modify the MMRBC field in PCI-X configuration space to increase |
133 | transmit burst lengths on the bus. | 184 | transmit burst lengths on the bus. |
134 | 185 | ||
135 | NOTE: setpci modifies the adapter's configuration registers to allow it to read | 186 | NOTE: setpci modifies the adapter's configuration registers to allow it to read |
136 | up to 4k bytes at a time (for transmits). However, for some systems the | 187 | up to 4k bytes at a time (for transmits). However, for some systems the |
137 | behavior after modifying this register may be undefined (possibly errors of some | 188 | behavior after modifying this register may be undefined (possibly errors of |
138 | kind). A power-cycle, hard reset or explicitly setting the e6 register back to | 189 | some kind). A power-cycle, hard reset or explicitly setting the e6 register |
139 | 22 (setpci -d 8086:1048 e6.b=22) may be required to get back to a stable | 190 | back to 22 (setpci -d 8086:1a48 e6.b=22) may be required to get back to a |
140 | configuration. | 191 | stable configuration. |
141 | 192 | ||
142 | - COPY these lines and paste them into ixgb_perf.sh: | 193 | - COPY these lines and paste them into ixgb_perf.sh: |
143 | #!/bin/bash | 194 | #!/bin/bash |
144 | echo "configuring network performance , edit this file to change the interface" | 195 | echo "configuring network performance , edit this file to change the interface |
196 | or device ID of 10GbE card" | ||
145 | # set mmrbc to 4k reads, modify only Intel 10GbE device IDs | 197 | # set mmrbc to 4k reads, modify only Intel 10GbE device IDs |
146 | setpci -d 8086:1048 e6.b=2e | 198 | # replace 1a48 with appropriate 10GbE device's ID installed on the system, |
147 | # set the MTU (max transmission unit) - it requires your switch and clients to change too! | 199 | # if needed. |
200 | setpci -d 8086:1a48 e6.b=2e | ||
201 | # set the MTU (max transmission unit) - it requires your switch and clients | ||
202 | # to change as well. | ||
148 | # set the txqueuelen | 203 | # set the txqueuelen |
149 | # your ixgb adapter should be loaded as eth1 for this to work, change if needed | 204 | # your ixgb adapter should be loaded as eth1 for this to work, change if needed |
150 | ifconfig eth1 mtu 9000 txqueuelen 1000 up | 205 | ifconfig eth1 mtu 9000 txqueuelen 1000 up |
151 | # call the sysctl utility to modify /proc/sys entries | 206 | # call the sysctl utility to modify /proc/sys entries |
152 | sysctl -p ./sysctl_ixgb.conf | 207 | sysctl -p ./sysctl_ixgb.conf |
153 | - END ixgb_perf.sh | 208 | - END ixgb_perf.sh |
154 | 209 | ||
155 | - COPY these lines and paste them into sysctl_ixgb.conf: | 210 | - COPY these lines and paste them into sysctl_ixgb.conf: |
@@ -159,54 +214,220 @@ sysctl -p ./sysctl_ixgb.conf | |||
159 | # several network benchmark tests, your mileage may vary | 214 | # several network benchmark tests, your mileage may vary |
160 | 215 | ||
161 | ### IPV4 specific settings | 216 | ### IPV4 specific settings |
162 | net.ipv4.tcp_timestamps = 0 # turns TCP timestamp support off, default 1, reduces CPU use | 217 | # turn TCP timestamp support off, default 1, reduces CPU use |
163 | net.ipv4.tcp_sack = 0 # turn SACK support off, default on | 218 | net.ipv4.tcp_timestamps = 0 |
164 | # on systems with a VERY fast bus -> memory interface this is the big gainer | 219 | # turn SACK support off, default on |
165 | net.ipv4.tcp_rmem = 10000000 10000000 10000000 # sets min/default/max TCP read buffer, default 4096 87380 174760 | 220 | # on systems with a VERY fast bus -> memory interface this is the big gainer |
166 | net.ipv4.tcp_wmem = 10000000 10000000 10000000 # sets min/pressure/max TCP write buffer, default 4096 16384 131072 | 221 | net.ipv4.tcp_sack = 0 |
167 | net.ipv4.tcp_mem = 10000000 10000000 10000000 # sets min/pressure/max TCP buffer space, default 31744 32256 32768 | 222 | # set min/default/max TCP read buffer, default 4096 87380 174760 |
223 | net.ipv4.tcp_rmem = 10000000 10000000 10000000 | ||
224 | # set min/pressure/max TCP write buffer, default 4096 16384 131072 | ||
225 | net.ipv4.tcp_wmem = 10000000 10000000 10000000 | ||
226 | # set min/pressure/max TCP buffer space, default 31744 32256 32768 | ||
227 | net.ipv4.tcp_mem = 10000000 10000000 10000000 | ||
168 | 228 | ||
169 | ### CORE settings (mostly for socket and UDP effect) | 229 | ### CORE settings (mostly for socket and UDP effect) |
170 | net.core.rmem_max = 524287 # maximum receive socket buffer size, default 131071 | 230 | # set maximum receive socket buffer size, default 131071 |
171 | net.core.wmem_max = 524287 # maximum send socket buffer size, default 131071 | 231 | net.core.rmem_max = 524287 |
172 | net.core.rmem_default = 524287 # default receive socket buffer size, default 65535 | 232 | # set maximum send socket buffer size, default 131071 |
173 | net.core.wmem_default = 524287 # default send socket buffer size, default 65535 | 233 | net.core.wmem_max = 524287 |
174 | net.core.optmem_max = 524287 # maximum amount of option memory buffers, default 10240 | 234 | # set default receive socket buffer size, default 65535 |
175 | net.core.netdev_max_backlog = 300000 # number of unprocessed input packets before kernel starts dropping them, default 300 | 235 | net.core.rmem_default = 524287 |
236 | # set default send socket buffer size, default 65535 | ||
237 | net.core.wmem_default = 524287 | ||
238 | # set maximum amount of option memory buffers, default 10240 | ||
239 | net.core.optmem_max = 524287 | ||
240 | # set number of unprocessed input packets before kernel starts dropping them; default 300 | ||
241 | net.core.netdev_max_backlog = 300000 | ||
176 | - END sysctl_ixgb.conf | 242 | - END sysctl_ixgb.conf |
177 | 243 | ||
178 | Edit the ixgb_perf.sh script if necessary to change eth1 to whatever interface | 244 | Edit the ixgb_perf.sh script if necessary to change eth1 to whatever interface |
179 | your ixgb driver is using. | 245 | your ixgb driver is using and/or replace '1a48' with appropriate 10GbE device's |
246 | ID installed on the system. | ||
180 | 247 | ||
181 | NOTE: Unless these scripts are added to the boot process, these changes will | 248 | NOTE: Unless these scripts are added to the boot process, these changes will |
182 | only last only until the next system reboot. | 249 | only last only until the next system reboot. |
183 | 250 | ||
184 | 251 | ||
185 | Resolving Slow UDP Traffic | 252 | Resolving Slow UDP Traffic |
186 | -------------------------- | 253 | -------------------------- |
254 | If your server does not seem to be able to receive UDP traffic as fast as it | ||
255 | can receive TCP traffic, it could be because Linux, by default, does not set | ||
256 | the network stack buffers as large as they need to be to support high UDP | ||
257 | transfer rates. One way to alleviate this problem is to allow more memory to | ||
258 | be used by the IP stack to store incoming data. | ||
187 | 259 | ||
188 | If your server does not seem to be able to receive UDP traffic as fast as it | 260 | For instance, use the commands: |
189 | can receive TCP traffic, it could be because Linux, by default, does not set | ||
190 | the network stack buffers as large as they need to be to support high UDP | ||
191 | transfer rates. One way to alleviate this problem is to allow more memory to | ||
192 | be used by the IP stack to store incoming data. | ||
193 | |||
194 | For instance, use the commands: | ||
195 | sysctl -w net.core.rmem_max=262143 | 261 | sysctl -w net.core.rmem_max=262143 |
196 | and | 262 | and |
197 | sysctl -w net.core.rmem_default=262143 | 263 | sysctl -w net.core.rmem_default=262143 |
198 | to increase the read buffer memory max and default to 262143 (256k - 1) from | 264 | to increase the read buffer memory max and default to 262143 (256k - 1) from |
199 | defaults of max=131071 (128k - 1) and default=65535 (64k - 1). These variables | 265 | defaults of max=131071 (128k - 1) and default=65535 (64k - 1). These variables |
200 | will increase the amount of memory used by the network stack for receives, and | 266 | will increase the amount of memory used by the network stack for receives, and |
201 | can be increased significantly more if necessary for your application. | 267 | can be increased significantly more if necessary for your application. |
202 | 268 | ||
269 | |||
270 | Additional Configurations | ||
271 | ========================= | ||
272 | |||
273 | Configuring the Driver on Different Distributions | ||
274 | ------------------------------------------------- | ||
275 | Configuring a network driver to load properly when the system is started is | ||
276 | distribution dependent. Typically, the configuration process involves adding | ||
277 | an alias line to /etc/modprobe.conf as well as editing other system startup | ||
278 | scripts and/or configuration files. Many popular Linux distributions ship | ||
279 | with tools to make these changes for you. To learn the proper way to | ||
280 | configure a network device for your system, refer to your distribution | ||
281 | documentation. If during this process you are asked for the driver or module | ||
282 | name, the name for the Linux Base Driver for the Intel 10GbE Family of | ||
283 | Adapters is ixgb. | ||
284 | |||
285 | Viewing Link Messages | ||
286 | --------------------- | ||
287 | Link messages will not be displayed to the console if the distribution is | ||
288 | restricting system messages. In order to see network driver link messages on | ||
289 | your console, set dmesg to eight by entering the following: | ||
290 | |||
291 | dmesg -n 8 | ||
292 | |||
293 | NOTE: This setting is not saved across reboots. | ||
294 | |||
295 | |||
296 | Jumbo Frames | ||
297 | ------------ | ||
298 | The driver supports Jumbo Frames for all adapters. Jumbo Frames support is | ||
299 | enabled by changing the MTU to a value larger than the default of 1500. | ||
300 | The maximum value for the MTU is 16114. Use the ifconfig command to | ||
301 | increase the MTU size. For example: | ||
302 | |||
303 | ifconfig ethx mtu 9000 up | ||
304 | |||
305 | The maximum MTU setting for Jumbo Frames is 16114. This value coincides | ||
306 | with the maximum Jumbo Frames size of 16128. | ||
307 | |||
308 | |||
309 | Ethtool | ||
310 | ------- | ||
311 | The driver utilizes the ethtool interface for driver configuration and | ||
312 | diagnostics, as well as displaying statistical information. Ethtool | ||
313 | version 1.6 or later is required for this functionality. | ||
314 | |||
315 | The latest release of ethtool can be found from | ||
316 | http://sourceforge.net/projects/gkernel | ||
317 | |||
318 | NOTE: Ethtool 1.6 only supports a limited set of ethtool options. Support | ||
319 | for a more complete ethtool feature set can be enabled by upgrading | ||
320 | to the latest version. | ||
321 | |||
322 | |||
323 | NAPI | ||
324 | ---- | ||
325 | |||
326 | NAPI (Rx polling mode) is supported in the ixgb driver. NAPI is enabled | ||
327 | or disabled based on the configuration of the kernel. see CONFIG_IXGB_NAPI | ||
328 | |||
329 | See www.cyberus.ca/~hadi/usenix-paper.tgz for more information on NAPI. | ||
330 | |||
331 | |||
332 | Known Issues/Troubleshooting | ||
333 | ============================ | ||
334 | |||
335 | NOTE: After installing the driver, if your Intel Network Connection is not | ||
336 | working, verify in the "In This Release" section of the readme that you have | ||
337 | installed the correct driver. | ||
338 | |||
339 | Intel(R) PRO/10GbE CX4 Server Adapter Cable Interoperability Issue with | ||
340 | Fujitsu XENPAK Module in SmartBits Chassis | ||
341 | --------------------------------------------------------------------- | ||
342 | Excessive CRC errors may be observed if the Intel(R) PRO/10GbE CX4 | ||
343 | Server adapter is connected to a Fujitsu XENPAK CX4 module in a SmartBits | ||
344 | chassis using 15 m/24AWG cable assemblies manufactured by Fujitsu or Leoni. | ||
345 | The CRC errors may be received either by the Intel(R) PRO/10GbE CX4 | ||
346 | Server adapter or the SmartBits. If this situation occurs using a different | ||
347 | cable assembly may resolve the issue. | ||
348 | |||
349 | CX4 Server Adapter Cable Interoperability Issues with HP Procurve 3400cl | ||
350 | Switch Port | ||
351 | ------------------------------------------------------------------------ | ||
352 | Excessive CRC errors may be observed if the Intel(R) PRO/10GbE CX4 Server | ||
353 | adapter is connected to an HP Procurve 3400cl switch port using short cables | ||
354 | (1 m or shorter). If this situation occurs, using a longer cable may resolve | ||
355 | the issue. | ||
356 | |||
357 | Excessive CRC errors may be observed using Fujitsu 24AWG cable assemblies that | ||
358 | Are 10 m or longer or where using a Leoni 15 m/24AWG cable assembly. The CRC | ||
359 | errors may be received either by the CX4 Server adapter or at the switch. If | ||
360 | this situation occurs, using a different cable assembly may resolve the issue. | ||
361 | |||
362 | |||
363 | Jumbo Frames System Requirement | ||
364 | ------------------------------- | ||
365 | Memory allocation failures have been observed on Linux systems with 64 MB | ||
366 | of RAM or less that are running Jumbo Frames. If you are using Jumbo | ||
367 | Frames, your system may require more than the advertised minimum | ||
368 | requirement of 64 MB of system memory. | ||
369 | |||
370 | |||
371 | Performance Degradation with Jumbo Frames | ||
372 | ----------------------------------------- | ||
373 | Degradation in throughput performance may be observed in some Jumbo frames | ||
374 | environments. If this is observed, increasing the application's socket buffer | ||
375 | size and/or increasing the /proc/sys/net/ipv4/tcp_*mem entry values may help. | ||
376 | See the specific application manual and /usr/src/linux*/Documentation/ | ||
377 | networking/ip-sysctl.txt for more details. | ||
378 | |||
379 | |||
380 | Allocating Rx Buffers when Using Jumbo Frames | ||
381 | --------------------------------------------- | ||
382 | Allocating Rx buffers when using Jumbo Frames on 2.6.x kernels may fail if | ||
383 | the available memory is heavily fragmented. This issue may be seen with PCI-X | ||
384 | adapters or with packet split disabled. This can be reduced or eliminated | ||
385 | by changing the amount of available memory for receive buffer allocation, by | ||
386 | increasing /proc/sys/vm/min_free_kbytes. | ||
387 | |||
388 | |||
389 | Multiple Interfaces on Same Ethernet Broadcast Network | ||
390 | ------------------------------------------------------ | ||
391 | Due to the default ARP behavior on Linux, it is not possible to have | ||
392 | one system on two IP networks in the same Ethernet broadcast domain | ||
393 | (non-partitioned switch) behave as expected. All Ethernet interfaces | ||
394 | will respond to IP traffic for any IP address assigned to the system. | ||
395 | This results in unbalanced receive traffic. | ||
396 | |||
397 | If you have multiple interfaces in a server, do either of the following: | ||
398 | |||
399 | - Turn on ARP filtering by entering: | ||
400 | echo 1 > /proc/sys/net/ipv4/conf/all/arp_filter | ||
401 | |||
402 | - Install the interfaces in separate broadcast domains - either in | ||
403 | different switches or in a switch partitioned to VLANs. | ||
404 | |||
405 | |||
406 | UDP Stress Test Dropped Packet Issue | ||
407 | -------------------------------------- | ||
408 | Under small packets UDP stress test with 10GbE driver, the Linux system | ||
409 | may drop UDP packets due to the fullness of socket buffers. You may want | ||
410 | to change the driver's Flow Control variables to the minimum value for | ||
411 | controlling packet reception. | ||
412 | |||
413 | |||
414 | Tx Hangs Possible Under Stress | ||
415 | ------------------------------ | ||
416 | Under stress conditions, if TX hangs occur, turning off TSO | ||
417 | "ethtool -K eth0 tso off" may resolve the problem. | ||
418 | |||
419 | |||
203 | Support | 420 | Support |
204 | ======= | 421 | ======= |
205 | 422 | ||
206 | For general information and support, go to the Intel support website at: | 423 | For general information, go to the Intel support website at: |
207 | 424 | ||
208 | http://support.intel.com | 425 | http://support.intel.com |
209 | 426 | ||
427 | or the Intel Wired Networking project hosted by Sourceforge at: | ||
428 | |||
429 | http://sourceforge.net/projects/e1000 | ||
430 | |||
210 | If an issue is identified with the released source code on the supported | 431 | If an issue is identified with the released source code on the supported |
211 | kernel with a supported adapter, email the specific information related to | 432 | kernel with a supported adapter, email the specific information related |
212 | the issue to linux.nics@intel.com. | 433 | to the issue to e1000-devel@lists.sf.net |
diff --git a/Documentation/networking/mac80211_hwsim/README b/Documentation/networking/mac80211_hwsim/README new file mode 100644 index 000000000000..2ff8ccb8dc37 --- /dev/null +++ b/Documentation/networking/mac80211_hwsim/README | |||
@@ -0,0 +1,67 @@ | |||
1 | mac80211_hwsim - software simulator of 802.11 radio(s) for mac80211 | ||
2 | Copyright (c) 2008, Jouni Malinen <j@w1.fi> | ||
3 | |||
4 | This program is free software; you can redistribute it and/or modify | ||
5 | it under the terms of the GNU General Public License version 2 as | ||
6 | published by the Free Software Foundation. | ||
7 | |||
8 | |||
9 | Introduction | ||
10 | |||
11 | mac80211_hwsim is a Linux kernel module that can be used to simulate | ||
12 | arbitrary number of IEEE 802.11 radios for mac80211. It can be used to | ||
13 | test most of the mac80211 functionality and user space tools (e.g., | ||
14 | hostapd and wpa_supplicant) in a way that matches very closely with | ||
15 | the normal case of using real WLAN hardware. From the mac80211 view | ||
16 | point, mac80211_hwsim is yet another hardware driver, i.e., no changes | ||
17 | to mac80211 are needed to use this testing tool. | ||
18 | |||
19 | The main goal for mac80211_hwsim is to make it easier for developers | ||
20 | to test their code and work with new features to mac80211, hostapd, | ||
21 | and wpa_supplicant. The simulated radios do not have the limitations | ||
22 | of real hardware, so it is easy to generate an arbitrary test setup | ||
23 | and always reproduce the same setup for future tests. In addition, | ||
24 | since all radio operation is simulated, any channel can be used in | ||
25 | tests regardless of regulatory rules. | ||
26 | |||
27 | mac80211_hwsim kernel module has a parameter 'radios' that can be used | ||
28 | to select how many radios are simulated (default 2). This allows | ||
29 | configuration of both very simply setups (e.g., just a single access | ||
30 | point and a station) or large scale tests (multiple access points with | ||
31 | hundreds of stations). | ||
32 | |||
33 | mac80211_hwsim works by tracking the current channel of each virtual | ||
34 | radio and copying all transmitted frames to all other radios that are | ||
35 | currently enabled and on the same channel as the transmitting | ||
36 | radio. Software encryption in mac80211 is used so that the frames are | ||
37 | actually encrypted over the virtual air interface to allow more | ||
38 | complete testing of encryption. | ||
39 | |||
40 | A global monitoring netdev, hwsim#, is created independent of | ||
41 | mac80211. This interface can be used to monitor all transmitted frames | ||
42 | regardless of channel. | ||
43 | |||
44 | |||
45 | Simple example | ||
46 | |||
47 | This example shows how to use mac80211_hwsim to simulate two radios: | ||
48 | one to act as an access point and the other as a station that | ||
49 | associates with the AP. hostapd and wpa_supplicant are used to take | ||
50 | care of WPA2-PSK authentication. In addition, hostapd is also | ||
51 | processing access point side of association. | ||
52 | |||
53 | Please note that the current Linux kernel does not enable AP mode, so a | ||
54 | simple patch is needed to enable AP mode selection: | ||
55 | http://johannes.sipsolutions.net/patches/kernel/all/LATEST/006-allow-ap-vlan-modes.patch | ||
56 | |||
57 | |||
58 | # Build mac80211_hwsim as part of kernel configuration | ||
59 | |||
60 | # Load the module | ||
61 | modprobe mac80211_hwsim | ||
62 | |||
63 | # Run hostapd (AP) for wlan0 | ||
64 | hostapd hostapd.conf | ||
65 | |||
66 | # Run wpa_supplicant (station) for wlan1 | ||
67 | wpa_supplicant -Dwext -iwlan1 -c wpa_supplicant.conf | ||
diff --git a/Documentation/networking/mac80211_hwsim/hostapd.conf b/Documentation/networking/mac80211_hwsim/hostapd.conf new file mode 100644 index 000000000000..08cde7e35f2e --- /dev/null +++ b/Documentation/networking/mac80211_hwsim/hostapd.conf | |||
@@ -0,0 +1,11 @@ | |||
1 | interface=wlan0 | ||
2 | driver=nl80211 | ||
3 | |||
4 | hw_mode=g | ||
5 | channel=1 | ||
6 | ssid=mac80211 test | ||
7 | |||
8 | wpa=2 | ||
9 | wpa_key_mgmt=WPA-PSK | ||
10 | wpa_pairwise=CCMP | ||
11 | wpa_passphrase=12345678 | ||
diff --git a/Documentation/networking/mac80211_hwsim/wpa_supplicant.conf b/Documentation/networking/mac80211_hwsim/wpa_supplicant.conf new file mode 100644 index 000000000000..299128cff035 --- /dev/null +++ b/Documentation/networking/mac80211_hwsim/wpa_supplicant.conf | |||
@@ -0,0 +1,10 @@ | |||
1 | ctrl_interface=/var/run/wpa_supplicant | ||
2 | |||
3 | network={ | ||
4 | ssid="mac80211 test" | ||
5 | psk="12345678" | ||
6 | key_mgmt=WPA-PSK | ||
7 | proto=WPA2 | ||
8 | pairwise=CCMP | ||
9 | group=CCMP | ||
10 | } | ||
diff --git a/Documentation/networking/multiqueue.txt b/Documentation/networking/multiqueue.txt index ea5a42e8f79f..d391ea631141 100644 --- a/Documentation/networking/multiqueue.txt +++ b/Documentation/networking/multiqueue.txt | |||
@@ -3,19 +3,11 @@ | |||
3 | =========================================== | 3 | =========================================== |
4 | 4 | ||
5 | Section 1: Base driver requirements for implementing multiqueue support | 5 | Section 1: Base driver requirements for implementing multiqueue support |
6 | Section 2: Qdisc support for multiqueue devices | ||
7 | Section 3: Brief howto using PRIO or RR for multiqueue devices | ||
8 | |||
9 | 6 | ||
10 | Intro: Kernel support for multiqueue devices | 7 | Intro: Kernel support for multiqueue devices |
11 | --------------------------------------------------------- | 8 | --------------------------------------------------------- |
12 | 9 | ||
13 | Kernel support for multiqueue devices is only an API that is presented to the | 10 | Kernel support for multiqueue devices is always present. |
14 | netdevice layer for base drivers to implement. This feature is part of the | ||
15 | core networking stack, and all network devices will be running on the | ||
16 | multiqueue-aware stack. If a base driver only has one queue, then these | ||
17 | changes are transparent to that driver. | ||
18 | |||
19 | 11 | ||
20 | Section 1: Base driver requirements for implementing multiqueue support | 12 | Section 1: Base driver requirements for implementing multiqueue support |
21 | ----------------------------------------------------------------------- | 13 | ----------------------------------------------------------------------- |
@@ -32,84 +24,4 @@ netif_{start|stop|wake}_subqueue() functions to manage each queue while the | |||
32 | device is still operational. netdev->queue_lock is still used when the device | 24 | device is still operational. netdev->queue_lock is still used when the device |
33 | comes online or when it's completely shut down (unregister_netdev(), etc.). | 25 | comes online or when it's completely shut down (unregister_netdev(), etc.). |
34 | 26 | ||
35 | Finally, the base driver should indicate that it is a multiqueue device. The | ||
36 | feature flag NETIF_F_MULTI_QUEUE should be added to the netdev->features | ||
37 | bitmap on device initialization. Below is an example from e1000: | ||
38 | |||
39 | #ifdef CONFIG_E1000_MQ | ||
40 | if ( (adapter->hw.mac.type == e1000_82571) || | ||
41 | (adapter->hw.mac.type == e1000_82572) || | ||
42 | (adapter->hw.mac.type == e1000_80003es2lan)) | ||
43 | netdev->features |= NETIF_F_MULTI_QUEUE; | ||
44 | #endif | ||
45 | |||
46 | |||
47 | Section 2: Qdisc support for multiqueue devices | ||
48 | ----------------------------------------------- | ||
49 | |||
50 | Currently two qdiscs support multiqueue devices. A new round-robin qdisc, | ||
51 | sch_rr, and sch_prio. The qdisc is responsible for classifying the skb's to | ||
52 | bands and queues, and will store the queue mapping into skb->queue_mapping. | ||
53 | Use this field in the base driver to determine which queue to send the skb | ||
54 | to. | ||
55 | |||
56 | sch_rr has been added for hardware that doesn't want scheduling policies from | ||
57 | software, so it's a straight round-robin qdisc. It uses the same syntax and | ||
58 | classification priomap that sch_prio uses, so it should be intuitive to | ||
59 | configure for people who've used sch_prio. | ||
60 | |||
61 | In order to utilitize the multiqueue features of the qdiscs, the network | ||
62 | device layer needs to enable multiple queue support. This can be done by | ||
63 | selecting NETDEVICES_MULTIQUEUE under Drivers. | ||
64 | |||
65 | The PRIO qdisc naturally plugs into a multiqueue device. If | ||
66 | NETDEVICES_MULTIQUEUE is selected, then on qdisc load, the number of | ||
67 | bands requested is compared to the number of queues on the hardware. If they | ||
68 | are equal, it sets a one-to-one mapping up between the queues and bands. If | ||
69 | they're not equal, it will not load the qdisc. This is the same behavior | ||
70 | for RR. Once the association is made, any skb that is classified will have | ||
71 | skb->queue_mapping set, which will allow the driver to properly queue skb's | ||
72 | to multiple queues. | ||
73 | |||
74 | |||
75 | Section 3: Brief howto using PRIO and RR for multiqueue devices | ||
76 | --------------------------------------------------------------- | ||
77 | |||
78 | The userspace command 'tc,' part of the iproute2 package, is used to configure | ||
79 | qdiscs. To add the PRIO qdisc to your network device, assuming the device is | ||
80 | called eth0, run the following command: | ||
81 | |||
82 | # tc qdisc add dev eth0 root handle 1: prio bands 4 multiqueue | ||
83 | |||
84 | This will create 4 bands, 0 being highest priority, and associate those bands | ||
85 | to the queues on your NIC. Assuming eth0 has 4 Tx queues, the band mapping | ||
86 | would look like: | ||
87 | |||
88 | band 0 => queue 0 | ||
89 | band 1 => queue 1 | ||
90 | band 2 => queue 2 | ||
91 | band 3 => queue 3 | ||
92 | |||
93 | Traffic will begin flowing through each queue if your TOS values are assigning | ||
94 | traffic across the various bands. For example, ssh traffic will always try to | ||
95 | go out band 0 based on TOS -> Linux priority conversion (realtime traffic), | ||
96 | so it will be sent out queue 0. ICMP traffic (pings) fall into the "normal" | ||
97 | traffic classification, which is band 1. Therefore pings will be send out | ||
98 | queue 1 on the NIC. | ||
99 | |||
100 | Note the use of the multiqueue keyword. This is only in versions of iproute2 | ||
101 | that support multiqueue networking devices; if this is omitted when loading | ||
102 | a qdisc onto a multiqueue device, the qdisc will load and operate the same | ||
103 | if it were loaded onto a single-queue device (i.e. - sends all traffic to | ||
104 | queue 0). | ||
105 | |||
106 | Another alternative to multiqueue band allocation can be done by using the | ||
107 | multiqueue option and specify 0 bands. If this is the case, the qdisc will | ||
108 | allocate the number of bands to equal the number of queues that the device | ||
109 | reports, and bring the qdisc online. | ||
110 | |||
111 | The behavior of tc filters remains the same, where it will override TOS priority | ||
112 | classification. | ||
113 | |||
114 | |||
115 | Author: Peter P. Waskiewicz Jr. <peter.p.waskiewicz.jr@intel.com> | 27 | Author: Peter P. Waskiewicz Jr. <peter.p.waskiewicz.jr@intel.com> |
diff --git a/Documentation/networking/s2io.txt b/Documentation/networking/s2io.txt index 1e28e2ddb90a..c3d6b4d5d014 100644 --- a/Documentation/networking/s2io.txt +++ b/Documentation/networking/s2io.txt | |||
@@ -52,13 +52,10 @@ d. MSI/MSI-X. Can be enabled on platforms which support this feature | |||
52 | (IA64, Xeon) resulting in noticeable performance improvement(upto 7% | 52 | (IA64, Xeon) resulting in noticeable performance improvement(upto 7% |
53 | on certain platforms). | 53 | on certain platforms). |
54 | 54 | ||
55 | e. NAPI. Compile-time option(CONFIG_S2IO_NAPI) for better Rx interrupt | 55 | e. Statistics. Comprehensive MAC-level and software statistics displayed |
56 | moderation. | ||
57 | |||
58 | f. Statistics. Comprehensive MAC-level and software statistics displayed | ||
59 | using "ethtool -S" option. | 56 | using "ethtool -S" option. |
60 | 57 | ||
61 | g. Multi-FIFO/Ring. Supports up to 8 transmit queues and receive rings, | 58 | f. Multi-FIFO/Ring. Supports up to 8 transmit queues and receive rings, |
62 | with multiple steering options. | 59 | with multiple steering options. |
63 | 60 | ||
64 | 4. Command line parameters | 61 | 4. Command line parameters |
diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt index 1d2a772506cf..aee243a846a2 100644 --- a/Documentation/powerpc/booting-without-of.txt +++ b/Documentation/powerpc/booting-without-of.txt | |||
@@ -58,6 +58,7 @@ Table of Contents | |||
58 | o) Xilinx IP cores | 58 | o) Xilinx IP cores |
59 | p) Freescale Synchronous Serial Interface | 59 | p) Freescale Synchronous Serial Interface |
60 | q) USB EHCI controllers | 60 | q) USB EHCI controllers |
61 | r) MDIO on GPIOs | ||
61 | 62 | ||
62 | VII - Marvell Discovery mv64[345]6x System Controller chips | 63 | VII - Marvell Discovery mv64[345]6x System Controller chips |
63 | 1) The /system-controller node | 64 | 1) The /system-controller node |
@@ -1246,80 +1247,7 @@ descriptions for the SOC devices for which new nodes have been | |||
1246 | defined; this list will expand as more and more SOC-containing | 1247 | defined; this list will expand as more and more SOC-containing |
1247 | platforms are moved over to use the flattened-device-tree model. | 1248 | platforms are moved over to use the flattened-device-tree model. |
1248 | 1249 | ||
1249 | a) MDIO IO device | 1250 | a) PHY nodes |
1250 | |||
1251 | The MDIO is a bus to which the PHY devices are connected. For each | ||
1252 | device that exists on this bus, a child node should be created. See | ||
1253 | the definition of the PHY node below for an example of how to define | ||
1254 | a PHY. | ||
1255 | |||
1256 | Required properties: | ||
1257 | - reg : Offset and length of the register set for the device | ||
1258 | - compatible : Should define the compatible device type for the | ||
1259 | mdio. Currently, this is most likely to be "fsl,gianfar-mdio" | ||
1260 | |||
1261 | Example: | ||
1262 | |||
1263 | mdio@24520 { | ||
1264 | reg = <24520 20>; | ||
1265 | compatible = "fsl,gianfar-mdio"; | ||
1266 | |||
1267 | ethernet-phy@0 { | ||
1268 | ...... | ||
1269 | }; | ||
1270 | }; | ||
1271 | |||
1272 | |||
1273 | b) Gianfar-compatible ethernet nodes | ||
1274 | |||
1275 | Required properties: | ||
1276 | |||
1277 | - device_type : Should be "network" | ||
1278 | - model : Model of the device. Can be "TSEC", "eTSEC", or "FEC" | ||
1279 | - compatible : Should be "gianfar" | ||
1280 | - reg : Offset and length of the register set for the device | ||
1281 | - mac-address : List of bytes representing the ethernet address of | ||
1282 | this controller | ||
1283 | - interrupts : <a b> where a is the interrupt number and b is a | ||
1284 | field that represents an encoding of the sense and level | ||
1285 | information for the interrupt. This should be encoded based on | ||
1286 | the information in section 2) depending on the type of interrupt | ||
1287 | controller you have. | ||
1288 | - interrupt-parent : the phandle for the interrupt controller that | ||
1289 | services interrupts for this device. | ||
1290 | - phy-handle : The phandle for the PHY connected to this ethernet | ||
1291 | controller. | ||
1292 | - fixed-link : <a b c d e> where a is emulated phy id - choose any, | ||
1293 | but unique to the all specified fixed-links, b is duplex - 0 half, | ||
1294 | 1 full, c is link speed - d#10/d#100/d#1000, d is pause - 0 no | ||
1295 | pause, 1 pause, e is asym_pause - 0 no asym_pause, 1 asym_pause. | ||
1296 | |||
1297 | Recommended properties: | ||
1298 | |||
1299 | - phy-connection-type : a string naming the controller/PHY interface type, | ||
1300 | i.e., "mii" (default), "rmii", "gmii", "rgmii", "rgmii-id", "sgmii", | ||
1301 | "tbi", or "rtbi". This property is only really needed if the connection | ||
1302 | is of type "rgmii-id", as all other connection types are detected by | ||
1303 | hardware. | ||
1304 | |||
1305 | |||
1306 | Example: | ||
1307 | |||
1308 | ethernet@24000 { | ||
1309 | #size-cells = <0>; | ||
1310 | device_type = "network"; | ||
1311 | model = "TSEC"; | ||
1312 | compatible = "gianfar"; | ||
1313 | reg = <24000 1000>; | ||
1314 | mac-address = [ 00 E0 0C 00 73 00 ]; | ||
1315 | interrupts = <d 3 e 3 12 3>; | ||
1316 | interrupt-parent = <40000>; | ||
1317 | phy-handle = <2452000> | ||
1318 | }; | ||
1319 | |||
1320 | |||
1321 | |||
1322 | c) PHY nodes | ||
1323 | 1251 | ||
1324 | Required properties: | 1252 | Required properties: |
1325 | 1253 | ||
@@ -1347,7 +1275,7 @@ platforms are moved over to use the flattened-device-tree model. | |||
1347 | }; | 1275 | }; |
1348 | 1276 | ||
1349 | 1277 | ||
1350 | d) Interrupt controllers | 1278 | b) Interrupt controllers |
1351 | 1279 | ||
1352 | Some SOC devices contain interrupt controllers that are different | 1280 | Some SOC devices contain interrupt controllers that are different |
1353 | from the standard Open PIC specification. The SOC device nodes for | 1281 | from the standard Open PIC specification. The SOC device nodes for |
@@ -1360,491 +1288,14 @@ platforms are moved over to use the flattened-device-tree model. | |||
1360 | 1288 | ||
1361 | pic@40000 { | 1289 | pic@40000 { |
1362 | linux,phandle = <40000>; | 1290 | linux,phandle = <40000>; |
1363 | clock-frequency = <0>; | ||
1364 | interrupt-controller; | 1291 | interrupt-controller; |
1365 | #address-cells = <0>; | 1292 | #address-cells = <0>; |
1366 | reg = <40000 40000>; | 1293 | reg = <40000 40000>; |
1367 | built-in; | ||
1368 | compatible = "chrp,open-pic"; | 1294 | compatible = "chrp,open-pic"; |
1369 | device_type = "open-pic"; | 1295 | device_type = "open-pic"; |
1370 | big-endian; | ||
1371 | }; | ||
1372 | |||
1373 | |||
1374 | e) I2C | ||
1375 | |||
1376 | Required properties : | ||
1377 | |||
1378 | - device_type : Should be "i2c" | ||
1379 | - reg : Offset and length of the register set for the device | ||
1380 | |||
1381 | Recommended properties : | ||
1382 | |||
1383 | - compatible : Should be "fsl-i2c" for parts compatible with | ||
1384 | Freescale I2C specifications. | ||
1385 | - interrupts : <a b> where a is the interrupt number and b is a | ||
1386 | field that represents an encoding of the sense and level | ||
1387 | information for the interrupt. This should be encoded based on | ||
1388 | the information in section 2) depending on the type of interrupt | ||
1389 | controller you have. | ||
1390 | - interrupt-parent : the phandle for the interrupt controller that | ||
1391 | services interrupts for this device. | ||
1392 | - dfsrr : boolean; if defined, indicates that this I2C device has | ||
1393 | a digital filter sampling rate register | ||
1394 | - fsl5200-clocking : boolean; if defined, indicated that this device | ||
1395 | uses the FSL 5200 clocking mechanism. | ||
1396 | |||
1397 | Example : | ||
1398 | |||
1399 | i2c@3000 { | ||
1400 | interrupt-parent = <40000>; | ||
1401 | interrupts = <1b 3>; | ||
1402 | reg = <3000 18>; | ||
1403 | device_type = "i2c"; | ||
1404 | compatible = "fsl-i2c"; | ||
1405 | dfsrr; | ||
1406 | }; | ||
1407 | |||
1408 | |||
1409 | f) Freescale SOC USB controllers | ||
1410 | |||
1411 | The device node for a USB controller that is part of a Freescale | ||
1412 | SOC is as described in the document "Open Firmware Recommended | ||
1413 | Practice : Universal Serial Bus" with the following modifications | ||
1414 | and additions : | ||
1415 | |||
1416 | Required properties : | ||
1417 | - compatible : Should be "fsl-usb2-mph" for multi port host USB | ||
1418 | controllers, or "fsl-usb2-dr" for dual role USB controllers | ||
1419 | - phy_type : For multi port host USB controllers, should be one of | ||
1420 | "ulpi", or "serial". For dual role USB controllers, should be | ||
1421 | one of "ulpi", "utmi", "utmi_wide", or "serial". | ||
1422 | - reg : Offset and length of the register set for the device | ||
1423 | - port0 : boolean; if defined, indicates port0 is connected for | ||
1424 | fsl-usb2-mph compatible controllers. Either this property or | ||
1425 | "port1" (or both) must be defined for "fsl-usb2-mph" compatible | ||
1426 | controllers. | ||
1427 | - port1 : boolean; if defined, indicates port1 is connected for | ||
1428 | fsl-usb2-mph compatible controllers. Either this property or | ||
1429 | "port0" (or both) must be defined for "fsl-usb2-mph" compatible | ||
1430 | controllers. | ||
1431 | - dr_mode : indicates the working mode for "fsl-usb2-dr" compatible | ||
1432 | controllers. Can be "host", "peripheral", or "otg". Default to | ||
1433 | "host" if not defined for backward compatibility. | ||
1434 | |||
1435 | Recommended properties : | ||
1436 | - interrupts : <a b> where a is the interrupt number and b is a | ||
1437 | field that represents an encoding of the sense and level | ||
1438 | information for the interrupt. This should be encoded based on | ||
1439 | the information in section 2) depending on the type of interrupt | ||
1440 | controller you have. | ||
1441 | - interrupt-parent : the phandle for the interrupt controller that | ||
1442 | services interrupts for this device. | ||
1443 | |||
1444 | Example multi port host USB controller device node : | ||
1445 | usb@22000 { | ||
1446 | compatible = "fsl-usb2-mph"; | ||
1447 | reg = <22000 1000>; | ||
1448 | #address-cells = <1>; | ||
1449 | #size-cells = <0>; | ||
1450 | interrupt-parent = <700>; | ||
1451 | interrupts = <27 1>; | ||
1452 | phy_type = "ulpi"; | ||
1453 | port0; | ||
1454 | port1; | ||
1455 | }; | ||
1456 | |||
1457 | Example dual role USB controller device node : | ||
1458 | usb@23000 { | ||
1459 | compatible = "fsl-usb2-dr"; | ||
1460 | reg = <23000 1000>; | ||
1461 | #address-cells = <1>; | ||
1462 | #size-cells = <0>; | ||
1463 | interrupt-parent = <700>; | ||
1464 | interrupts = <26 1>; | ||
1465 | dr_mode = "otg"; | ||
1466 | phy = "ulpi"; | ||
1467 | }; | ||
1468 | |||
1469 | |||
1470 | g) Freescale SOC SEC Security Engines | ||
1471 | |||
1472 | Required properties: | ||
1473 | |||
1474 | - device_type : Should be "crypto" | ||
1475 | - model : Model of the device. Should be "SEC1" or "SEC2" | ||
1476 | - compatible : Should be "talitos" | ||
1477 | - reg : Offset and length of the register set for the device | ||
1478 | - interrupts : <a b> where a is the interrupt number and b is a | ||
1479 | field that represents an encoding of the sense and level | ||
1480 | information for the interrupt. This should be encoded based on | ||
1481 | the information in section 2) depending on the type of interrupt | ||
1482 | controller you have. | ||
1483 | - interrupt-parent : the phandle for the interrupt controller that | ||
1484 | services interrupts for this device. | ||
1485 | - num-channels : An integer representing the number of channels | ||
1486 | available. | ||
1487 | - channel-fifo-len : An integer representing the number of | ||
1488 | descriptor pointers each channel fetch fifo can hold. | ||
1489 | - exec-units-mask : The bitmask representing what execution units | ||
1490 | (EUs) are available. It's a single 32-bit cell. EU information | ||
1491 | should be encoded following the SEC's Descriptor Header Dword | ||
1492 | EU_SEL0 field documentation, i.e. as follows: | ||
1493 | |||
1494 | bit 0 = reserved - should be 0 | ||
1495 | bit 1 = set if SEC has the ARC4 EU (AFEU) | ||
1496 | bit 2 = set if SEC has the DES/3DES EU (DEU) | ||
1497 | bit 3 = set if SEC has the message digest EU (MDEU) | ||
1498 | bit 4 = set if SEC has the random number generator EU (RNG) | ||
1499 | bit 5 = set if SEC has the public key EU (PKEU) | ||
1500 | bit 6 = set if SEC has the AES EU (AESU) | ||
1501 | bit 7 = set if SEC has the Kasumi EU (KEU) | ||
1502 | |||
1503 | bits 8 through 31 are reserved for future SEC EUs. | ||
1504 | |||
1505 | - descriptor-types-mask : The bitmask representing what descriptors | ||
1506 | are available. It's a single 32-bit cell. Descriptor type | ||
1507 | information should be encoded following the SEC's Descriptor | ||
1508 | Header Dword DESC_TYPE field documentation, i.e. as follows: | ||
1509 | |||
1510 | bit 0 = set if SEC supports the aesu_ctr_nonsnoop desc. type | ||
1511 | bit 1 = set if SEC supports the ipsec_esp descriptor type | ||
1512 | bit 2 = set if SEC supports the common_nonsnoop desc. type | ||
1513 | bit 3 = set if SEC supports the 802.11i AES ccmp desc. type | ||
1514 | bit 4 = set if SEC supports the hmac_snoop_no_afeu desc. type | ||
1515 | bit 5 = set if SEC supports the srtp descriptor type | ||
1516 | bit 6 = set if SEC supports the non_hmac_snoop_no_afeu desc.type | ||
1517 | bit 7 = set if SEC supports the pkeu_assemble descriptor type | ||
1518 | bit 8 = set if SEC supports the aesu_key_expand_output desc.type | ||
1519 | bit 9 = set if SEC supports the pkeu_ptmul descriptor type | ||
1520 | bit 10 = set if SEC supports the common_nonsnoop_afeu desc. type | ||
1521 | bit 11 = set if SEC supports the pkeu_ptadd_dbl descriptor type | ||
1522 | |||
1523 | ..and so on and so forth. | ||
1524 | |||
1525 | Example: | ||
1526 | |||
1527 | /* MPC8548E */ | ||
1528 | crypto@30000 { | ||
1529 | device_type = "crypto"; | ||
1530 | model = "SEC2"; | ||
1531 | compatible = "talitos"; | ||
1532 | reg = <30000 10000>; | ||
1533 | interrupts = <1d 3>; | ||
1534 | interrupt-parent = <40000>; | ||
1535 | num-channels = <4>; | ||
1536 | channel-fifo-len = <18>; | ||
1537 | exec-units-mask = <000000fe>; | ||
1538 | descriptor-types-mask = <012b0ebf>; | ||
1539 | }; | ||
1540 | |||
1541 | h) Board Control and Status (BCSR) | ||
1542 | |||
1543 | Required properties: | ||
1544 | |||
1545 | - device_type : Should be "board-control" | ||
1546 | - reg : Offset and length of the register set for the device | ||
1547 | |||
1548 | Example: | ||
1549 | |||
1550 | bcsr@f8000000 { | ||
1551 | device_type = "board-control"; | ||
1552 | reg = <f8000000 8000>; | ||
1553 | }; | ||
1554 | |||
1555 | i) Freescale QUICC Engine module (QE) | ||
1556 | This represents qe module that is installed on PowerQUICC II Pro. | ||
1557 | |||
1558 | NOTE: This is an interim binding; it should be updated to fit | ||
1559 | in with the CPM binding later in this document. | ||
1560 | |||
1561 | Basically, it is a bus of devices, that could act more or less | ||
1562 | as a complete entity (UCC, USB etc ). All of them should be siblings on | ||
1563 | the "root" qe node, using the common properties from there. | ||
1564 | The description below applies to the qe of MPC8360 and | ||
1565 | more nodes and properties would be extended in the future. | ||
1566 | |||
1567 | i) Root QE device | ||
1568 | |||
1569 | Required properties: | ||
1570 | - compatible : should be "fsl,qe"; | ||
1571 | - model : precise model of the QE, Can be "QE", "CPM", or "CPM2" | ||
1572 | - reg : offset and length of the device registers. | ||
1573 | - bus-frequency : the clock frequency for QUICC Engine. | ||
1574 | |||
1575 | Recommended properties | ||
1576 | - brg-frequency : the internal clock source frequency for baud-rate | ||
1577 | generators in Hz. | ||
1578 | |||
1579 | Example: | ||
1580 | qe@e0100000 { | ||
1581 | #address-cells = <1>; | ||
1582 | #size-cells = <1>; | ||
1583 | #interrupt-cells = <2>; | ||
1584 | compatible = "fsl,qe"; | ||
1585 | ranges = <0 e0100000 00100000>; | ||
1586 | reg = <e0100000 480>; | ||
1587 | brg-frequency = <0>; | ||
1588 | bus-frequency = <179A7B00>; | ||
1589 | } | ||
1590 | |||
1591 | |||
1592 | ii) SPI (Serial Peripheral Interface) | ||
1593 | |||
1594 | Required properties: | ||
1595 | - cell-index : SPI controller index. | ||
1596 | - compatible : should be "fsl,spi". | ||
1597 | - mode : the SPI operation mode, it can be "cpu" or "cpu-qe". | ||
1598 | - reg : Offset and length of the register set for the device | ||
1599 | - interrupts : <a b> where a is the interrupt number and b is a | ||
1600 | field that represents an encoding of the sense and level | ||
1601 | information for the interrupt. This should be encoded based on | ||
1602 | the information in section 2) depending on the type of interrupt | ||
1603 | controller you have. | ||
1604 | - interrupt-parent : the phandle for the interrupt controller that | ||
1605 | services interrupts for this device. | ||
1606 | |||
1607 | Example: | ||
1608 | spi@4c0 { | ||
1609 | cell-index = <0>; | ||
1610 | compatible = "fsl,spi"; | ||
1611 | reg = <4c0 40>; | ||
1612 | interrupts = <82 0>; | ||
1613 | interrupt-parent = <700>; | ||
1614 | mode = "cpu"; | ||
1615 | }; | ||
1616 | |||
1617 | |||
1618 | iii) USB (Universal Serial Bus Controller) | ||
1619 | |||
1620 | Required properties: | ||
1621 | - compatible : could be "qe_udc" or "fhci-hcd". | ||
1622 | - mode : the could be "host" or "slave". | ||
1623 | - reg : Offset and length of the register set for the device | ||
1624 | - interrupts : <a b> where a is the interrupt number and b is a | ||
1625 | field that represents an encoding of the sense and level | ||
1626 | information for the interrupt. This should be encoded based on | ||
1627 | the information in section 2) depending on the type of interrupt | ||
1628 | controller you have. | ||
1629 | - interrupt-parent : the phandle for the interrupt controller that | ||
1630 | services interrupts for this device. | ||
1631 | |||
1632 | Example(slave): | ||
1633 | usb@6c0 { | ||
1634 | compatible = "qe_udc"; | ||
1635 | reg = <6c0 40>; | ||
1636 | interrupts = <8b 0>; | ||
1637 | interrupt-parent = <700>; | ||
1638 | mode = "slave"; | ||
1639 | }; | ||
1640 | |||
1641 | |||
1642 | iv) UCC (Unified Communications Controllers) | ||
1643 | |||
1644 | Required properties: | ||
1645 | - device_type : should be "network", "hldc", "uart", "transparent" | ||
1646 | "bisync", "atm", or "serial". | ||
1647 | - compatible : could be "ucc_geth" or "fsl_atm" and so on. | ||
1648 | - cell-index : the ucc number(1-8), corresponding to UCCx in UM. | ||
1649 | - reg : Offset and length of the register set for the device | ||
1650 | - interrupts : <a b> where a is the interrupt number and b is a | ||
1651 | field that represents an encoding of the sense and level | ||
1652 | information for the interrupt. This should be encoded based on | ||
1653 | the information in section 2) depending on the type of interrupt | ||
1654 | controller you have. | ||
1655 | - interrupt-parent : the phandle for the interrupt controller that | ||
1656 | services interrupts for this device. | ||
1657 | - pio-handle : The phandle for the Parallel I/O port configuration. | ||
1658 | - port-number : for UART drivers, the port number to use, between 0 and 3. | ||
1659 | This usually corresponds to the /dev/ttyQE device, e.g. <0> = /dev/ttyQE0. | ||
1660 | The port number is added to the minor number of the device. Unlike the | ||
1661 | CPM UART driver, the port-number is required for the QE UART driver. | ||
1662 | - soft-uart : for UART drivers, if specified this means the QE UART device | ||
1663 | driver should use "Soft-UART" mode, which is needed on some SOCs that have | ||
1664 | broken UART hardware. Soft-UART is provided via a microcode upload. | ||
1665 | - rx-clock-name: the UCC receive clock source | ||
1666 | "none": clock source is disabled | ||
1667 | "brg1" through "brg16": clock source is BRG1-BRG16, respectively | ||
1668 | "clk1" through "clk24": clock source is CLK1-CLK24, respectively | ||
1669 | - tx-clock-name: the UCC transmit clock source | ||
1670 | "none": clock source is disabled | ||
1671 | "brg1" through "brg16": clock source is BRG1-BRG16, respectively | ||
1672 | "clk1" through "clk24": clock source is CLK1-CLK24, respectively | ||
1673 | The following two properties are deprecated. rx-clock has been replaced | ||
1674 | with rx-clock-name, and tx-clock has been replaced with tx-clock-name. | ||
1675 | Drivers that currently use the deprecated properties should continue to | ||
1676 | do so, in order to support older device trees, but they should be updated | ||
1677 | to check for the new properties first. | ||
1678 | - rx-clock : represents the UCC receive clock source. | ||
1679 | 0x00 : clock source is disabled; | ||
1680 | 0x1~0x10 : clock source is BRG1~BRG16 respectively; | ||
1681 | 0x11~0x28: clock source is QE_CLK1~QE_CLK24 respectively. | ||
1682 | - tx-clock: represents the UCC transmit clock source; | ||
1683 | 0x00 : clock source is disabled; | ||
1684 | 0x1~0x10 : clock source is BRG1~BRG16 respectively; | ||
1685 | 0x11~0x28: clock source is QE_CLK1~QE_CLK24 respectively. | ||
1686 | |||
1687 | Required properties for network device_type: | ||
1688 | - mac-address : list of bytes representing the ethernet address. | ||
1689 | - phy-handle : The phandle for the PHY connected to this controller. | ||
1690 | |||
1691 | Recommended properties: | ||
1692 | - phy-connection-type : a string naming the controller/PHY interface type, | ||
1693 | i.e., "mii" (default), "rmii", "gmii", "rgmii", "rgmii-id" (Internal | ||
1694 | Delay), "rgmii-txid" (delay on TX only), "rgmii-rxid" (delay on RX only), | ||
1695 | "tbi", or "rtbi". | ||
1696 | |||
1697 | Example: | ||
1698 | ucc@2000 { | ||
1699 | device_type = "network"; | ||
1700 | compatible = "ucc_geth"; | ||
1701 | cell-index = <1>; | ||
1702 | reg = <2000 200>; | ||
1703 | interrupts = <a0 0>; | ||
1704 | interrupt-parent = <700>; | ||
1705 | mac-address = [ 00 04 9f 00 23 23 ]; | ||
1706 | rx-clock = "none"; | ||
1707 | tx-clock = "clk9"; | ||
1708 | phy-handle = <212000>; | ||
1709 | phy-connection-type = "gmii"; | ||
1710 | pio-handle = <140001>; | ||
1711 | }; | 1296 | }; |
1712 | 1297 | ||
1713 | 1298 | c) CFI or JEDEC memory-mapped NOR flash | |
1714 | v) Parallel I/O Ports | ||
1715 | |||
1716 | This node configures Parallel I/O ports for CPUs with QE support. | ||
1717 | The node should reside in the "soc" node of the tree. For each | ||
1718 | device that using parallel I/O ports, a child node should be created. | ||
1719 | See the definition of the Pin configuration nodes below for more | ||
1720 | information. | ||
1721 | |||
1722 | Required properties: | ||
1723 | - device_type : should be "par_io". | ||
1724 | - reg : offset to the register set and its length. | ||
1725 | - num-ports : number of Parallel I/O ports | ||
1726 | |||
1727 | Example: | ||
1728 | par_io@1400 { | ||
1729 | reg = <1400 100>; | ||
1730 | #address-cells = <1>; | ||
1731 | #size-cells = <0>; | ||
1732 | device_type = "par_io"; | ||
1733 | num-ports = <7>; | ||
1734 | ucc_pin@01 { | ||
1735 | ...... | ||
1736 | }; | ||
1737 | |||
1738 | |||
1739 | vi) Pin configuration nodes | ||
1740 | |||
1741 | Required properties: | ||
1742 | - linux,phandle : phandle of this node; likely referenced by a QE | ||
1743 | device. | ||
1744 | - pio-map : array of pin configurations. Each pin is defined by 6 | ||
1745 | integers. The six numbers are respectively: port, pin, dir, | ||
1746 | open_drain, assignment, has_irq. | ||
1747 | - port : port number of the pin; 0-6 represent port A-G in UM. | ||
1748 | - pin : pin number in the port. | ||
1749 | - dir : direction of the pin, should encode as follows: | ||
1750 | |||
1751 | 0 = The pin is disabled | ||
1752 | 1 = The pin is an output | ||
1753 | 2 = The pin is an input | ||
1754 | 3 = The pin is I/O | ||
1755 | |||
1756 | - open_drain : indicates the pin is normal or wired-OR: | ||
1757 | |||
1758 | 0 = The pin is actively driven as an output | ||
1759 | 1 = The pin is an open-drain driver. As an output, the pin is | ||
1760 | driven active-low, otherwise it is three-stated. | ||
1761 | |||
1762 | - assignment : function number of the pin according to the Pin Assignment | ||
1763 | tables in User Manual. Each pin can have up to 4 possible functions in | ||
1764 | QE and two options for CPM. | ||
1765 | - has_irq : indicates if the pin is used as source of external | ||
1766 | interrupts. | ||
1767 | |||
1768 | Example: | ||
1769 | ucc_pin@01 { | ||
1770 | linux,phandle = <140001>; | ||
1771 | pio-map = < | ||
1772 | /* port pin dir open_drain assignment has_irq */ | ||
1773 | 0 3 1 0 1 0 /* TxD0 */ | ||
1774 | 0 4 1 0 1 0 /* TxD1 */ | ||
1775 | 0 5 1 0 1 0 /* TxD2 */ | ||
1776 | 0 6 1 0 1 0 /* TxD3 */ | ||
1777 | 1 6 1 0 3 0 /* TxD4 */ | ||
1778 | 1 7 1 0 1 0 /* TxD5 */ | ||
1779 | 1 9 1 0 2 0 /* TxD6 */ | ||
1780 | 1 a 1 0 2 0 /* TxD7 */ | ||
1781 | 0 9 2 0 1 0 /* RxD0 */ | ||
1782 | 0 a 2 0 1 0 /* RxD1 */ | ||
1783 | 0 b 2 0 1 0 /* RxD2 */ | ||
1784 | 0 c 2 0 1 0 /* RxD3 */ | ||
1785 | 0 d 2 0 1 0 /* RxD4 */ | ||
1786 | 1 1 2 0 2 0 /* RxD5 */ | ||
1787 | 1 0 2 0 2 0 /* RxD6 */ | ||
1788 | 1 4 2 0 2 0 /* RxD7 */ | ||
1789 | 0 7 1 0 1 0 /* TX_EN */ | ||
1790 | 0 8 1 0 1 0 /* TX_ER */ | ||
1791 | 0 f 2 0 1 0 /* RX_DV */ | ||
1792 | 0 10 2 0 1 0 /* RX_ER */ | ||
1793 | 0 0 2 0 1 0 /* RX_CLK */ | ||
1794 | 2 9 1 0 3 0 /* GTX_CLK - CLK10 */ | ||
1795 | 2 8 2 0 1 0>; /* GTX125 - CLK9 */ | ||
1796 | }; | ||
1797 | |||
1798 | vii) Multi-User RAM (MURAM) | ||
1799 | |||
1800 | Required properties: | ||
1801 | - compatible : should be "fsl,qe-muram", "fsl,cpm-muram". | ||
1802 | - mode : the could be "host" or "slave". | ||
1803 | - ranges : Should be defined as specified in 1) to describe the | ||
1804 | translation of MURAM addresses. | ||
1805 | - data-only : sub-node which defines the address area under MURAM | ||
1806 | bus that can be allocated as data/parameter | ||
1807 | |||
1808 | Example: | ||
1809 | |||
1810 | muram@10000 { | ||
1811 | compatible = "fsl,qe-muram", "fsl,cpm-muram"; | ||
1812 | ranges = <0 00010000 0000c000>; | ||
1813 | |||
1814 | data-only@0{ | ||
1815 | compatible = "fsl,qe-muram-data", | ||
1816 | "fsl,cpm-muram-data"; | ||
1817 | reg = <0 c000>; | ||
1818 | }; | ||
1819 | }; | ||
1820 | |||
1821 | viii) Uploaded QE firmware | ||
1822 | |||
1823 | If a new firwmare has been uploaded to the QE (usually by the | ||
1824 | boot loader), then a 'firmware' child node should be added to the QE | ||
1825 | node. This node provides information on the uploaded firmware that | ||
1826 | device drivers may need. | ||
1827 | |||
1828 | Required properties: | ||
1829 | - id: The string name of the firmware. This is taken from the 'id' | ||
1830 | member of the qe_firmware structure of the uploaded firmware. | ||
1831 | Device drivers can search this string to determine if the | ||
1832 | firmware they want is already present. | ||
1833 | - extended-modes: The Extended Modes bitfield, taken from the | ||
1834 | firmware binary. It is a 64-bit number represented | ||
1835 | as an array of two 32-bit numbers. | ||
1836 | - virtual-traps: The virtual traps, taken from the firmware binary. | ||
1837 | It is an array of 8 32-bit numbers. | ||
1838 | |||
1839 | Example: | ||
1840 | |||
1841 | firmware { | ||
1842 | id = "Soft-UART"; | ||
1843 | extended-modes = <0 0>; | ||
1844 | virtual-traps = <0 0 0 0 0 0 0 0>; | ||
1845 | } | ||
1846 | |||
1847 | j) CFI or JEDEC memory-mapped NOR flash | ||
1848 | 1299 | ||
1849 | Flash chips (Memory Technology Devices) are often used for solid state | 1300 | Flash chips (Memory Technology Devices) are often used for solid state |
1850 | file systems on embedded devices. | 1301 | file systems on embedded devices. |
@@ -1908,268 +1359,7 @@ platforms are moved over to use the flattened-device-tree model. | |||
1908 | }; | 1359 | }; |
1909 | }; | 1360 | }; |
1910 | 1361 | ||
1911 | k) Global Utilities Block | 1362 | d) 4xx/Axon EMAC ethernet nodes |
1912 | |||
1913 | The global utilities block controls power management, I/O device | ||
1914 | enabling, power-on-reset configuration monitoring, general-purpose | ||
1915 | I/O signal configuration, alternate function selection for multiplexed | ||
1916 | signals, and clock control. | ||
1917 | |||
1918 | Required properties: | ||
1919 | |||
1920 | - compatible : Should define the compatible device type for | ||
1921 | global-utilities. | ||
1922 | - reg : Offset and length of the register set for the device. | ||
1923 | |||
1924 | Recommended properties: | ||
1925 | |||
1926 | - fsl,has-rstcr : Indicates that the global utilities register set | ||
1927 | contains a functioning "reset control register" (i.e. the board | ||
1928 | is wired to reset upon setting the HRESET_REQ bit in this register). | ||
1929 | |||
1930 | Example: | ||
1931 | |||
1932 | global-utilities@e0000 { /* global utilities block */ | ||
1933 | compatible = "fsl,mpc8548-guts"; | ||
1934 | reg = <e0000 1000>; | ||
1935 | fsl,has-rstcr; | ||
1936 | }; | ||
1937 | |||
1938 | l) Freescale Communications Processor Module | ||
1939 | |||
1940 | NOTE: This is an interim binding, and will likely change slightly, | ||
1941 | as more devices are supported. The QE bindings especially are | ||
1942 | incomplete. | ||
1943 | |||
1944 | i) Root CPM node | ||
1945 | |||
1946 | Properties: | ||
1947 | - compatible : "fsl,cpm1", "fsl,cpm2", or "fsl,qe". | ||
1948 | - reg : A 48-byte region beginning with CPCR. | ||
1949 | |||
1950 | Example: | ||
1951 | cpm@119c0 { | ||
1952 | #address-cells = <1>; | ||
1953 | #size-cells = <1>; | ||
1954 | #interrupt-cells = <2>; | ||
1955 | compatible = "fsl,mpc8272-cpm", "fsl,cpm2"; | ||
1956 | reg = <119c0 30>; | ||
1957 | } | ||
1958 | |||
1959 | ii) Properties common to mulitple CPM/QE devices | ||
1960 | |||
1961 | - fsl,cpm-command : This value is ORed with the opcode and command flag | ||
1962 | to specify the device on which a CPM command operates. | ||
1963 | |||
1964 | - fsl,cpm-brg : Indicates which baud rate generator the device | ||
1965 | is associated with. If absent, an unused BRG | ||
1966 | should be dynamically allocated. If zero, the | ||
1967 | device uses an external clock rather than a BRG. | ||
1968 | |||
1969 | - reg : Unless otherwise specified, the first resource represents the | ||
1970 | scc/fcc/ucc registers, and the second represents the device's | ||
1971 | parameter RAM region (if it has one). | ||
1972 | |||
1973 | iii) Serial | ||
1974 | |||
1975 | Currently defined compatibles: | ||
1976 | - fsl,cpm1-smc-uart | ||
1977 | - fsl,cpm2-smc-uart | ||
1978 | - fsl,cpm1-scc-uart | ||
1979 | - fsl,cpm2-scc-uart | ||
1980 | - fsl,qe-uart | ||
1981 | |||
1982 | Example: | ||
1983 | |||
1984 | serial@11a00 { | ||
1985 | device_type = "serial"; | ||
1986 | compatible = "fsl,mpc8272-scc-uart", | ||
1987 | "fsl,cpm2-scc-uart"; | ||
1988 | reg = <11a00 20 8000 100>; | ||
1989 | interrupts = <28 8>; | ||
1990 | interrupt-parent = <&PIC>; | ||
1991 | fsl,cpm-brg = <1>; | ||
1992 | fsl,cpm-command = <00800000>; | ||
1993 | }; | ||
1994 | |||
1995 | iii) Network | ||
1996 | |||
1997 | Currently defined compatibles: | ||
1998 | - fsl,cpm1-scc-enet | ||
1999 | - fsl,cpm2-scc-enet | ||
2000 | - fsl,cpm1-fec-enet | ||
2001 | - fsl,cpm2-fcc-enet (third resource is GFEMR) | ||
2002 | - fsl,qe-enet | ||
2003 | |||
2004 | Example: | ||
2005 | |||
2006 | ethernet@11300 { | ||
2007 | device_type = "network"; | ||
2008 | compatible = "fsl,mpc8272-fcc-enet", | ||
2009 | "fsl,cpm2-fcc-enet"; | ||
2010 | reg = <11300 20 8400 100 11390 1>; | ||
2011 | local-mac-address = [ 00 00 00 00 00 00 ]; | ||
2012 | interrupts = <20 8>; | ||
2013 | interrupt-parent = <&PIC>; | ||
2014 | phy-handle = <&PHY0>; | ||
2015 | fsl,cpm-command = <12000300>; | ||
2016 | }; | ||
2017 | |||
2018 | iv) MDIO | ||
2019 | |||
2020 | Currently defined compatibles: | ||
2021 | fsl,pq1-fec-mdio (reg is same as first resource of FEC device) | ||
2022 | fsl,cpm2-mdio-bitbang (reg is port C registers) | ||
2023 | |||
2024 | Properties for fsl,cpm2-mdio-bitbang: | ||
2025 | fsl,mdio-pin : pin of port C controlling mdio data | ||
2026 | fsl,mdc-pin : pin of port C controlling mdio clock | ||
2027 | |||
2028 | Example: | ||
2029 | |||
2030 | mdio@10d40 { | ||
2031 | device_type = "mdio"; | ||
2032 | compatible = "fsl,mpc8272ads-mdio-bitbang", | ||
2033 | "fsl,mpc8272-mdio-bitbang", | ||
2034 | "fsl,cpm2-mdio-bitbang"; | ||
2035 | reg = <10d40 14>; | ||
2036 | #address-cells = <1>; | ||
2037 | #size-cells = <0>; | ||
2038 | fsl,mdio-pin = <12>; | ||
2039 | fsl,mdc-pin = <13>; | ||
2040 | }; | ||
2041 | |||
2042 | v) Baud Rate Generators | ||
2043 | |||
2044 | Currently defined compatibles: | ||
2045 | fsl,cpm-brg | ||
2046 | fsl,cpm1-brg | ||
2047 | fsl,cpm2-brg | ||
2048 | |||
2049 | Properties: | ||
2050 | - reg : There may be an arbitrary number of reg resources; BRG | ||
2051 | numbers are assigned to these in order. | ||
2052 | - clock-frequency : Specifies the base frequency driving | ||
2053 | the BRG. | ||
2054 | |||
2055 | Example: | ||
2056 | |||
2057 | brg@119f0 { | ||
2058 | compatible = "fsl,mpc8272-brg", | ||
2059 | "fsl,cpm2-brg", | ||
2060 | "fsl,cpm-brg"; | ||
2061 | reg = <119f0 10 115f0 10>; | ||
2062 | clock-frequency = <d#25000000>; | ||
2063 | }; | ||
2064 | |||
2065 | vi) Interrupt Controllers | ||
2066 | |||
2067 | Currently defined compatibles: | ||
2068 | - fsl,cpm1-pic | ||
2069 | - only one interrupt cell | ||
2070 | - fsl,pq1-pic | ||
2071 | - fsl,cpm2-pic | ||
2072 | - second interrupt cell is level/sense: | ||
2073 | - 2 is falling edge | ||
2074 | - 8 is active low | ||
2075 | |||
2076 | Example: | ||
2077 | |||
2078 | interrupt-controller@10c00 { | ||
2079 | #interrupt-cells = <2>; | ||
2080 | interrupt-controller; | ||
2081 | reg = <10c00 80>; | ||
2082 | compatible = "mpc8272-pic", "fsl,cpm2-pic"; | ||
2083 | }; | ||
2084 | |||
2085 | vii) USB (Universal Serial Bus Controller) | ||
2086 | |||
2087 | Properties: | ||
2088 | - compatible : "fsl,cpm1-usb", "fsl,cpm2-usb", "fsl,qe-usb" | ||
2089 | |||
2090 | Example: | ||
2091 | usb@11bc0 { | ||
2092 | #address-cells = <1>; | ||
2093 | #size-cells = <0>; | ||
2094 | compatible = "fsl,cpm2-usb"; | ||
2095 | reg = <11b60 18 8b00 100>; | ||
2096 | interrupts = <b 8>; | ||
2097 | interrupt-parent = <&PIC>; | ||
2098 | fsl,cpm-command = <2e600000>; | ||
2099 | }; | ||
2100 | |||
2101 | viii) Multi-User RAM (MURAM) | ||
2102 | |||
2103 | The multi-user/dual-ported RAM is expressed as a bus under the CPM node. | ||
2104 | |||
2105 | Ranges must be set up subject to the following restrictions: | ||
2106 | |||
2107 | - Children's reg nodes must be offsets from the start of all muram, even | ||
2108 | if the user-data area does not begin at zero. | ||
2109 | - If multiple range entries are used, the difference between the parent | ||
2110 | address and the child address must be the same in all, so that a single | ||
2111 | mapping can cover them all while maintaining the ability to determine | ||
2112 | CPM-side offsets with pointer subtraction. It is recommended that | ||
2113 | multiple range entries not be used. | ||
2114 | - A child address of zero must be translatable, even if no reg resources | ||
2115 | contain it. | ||
2116 | |||
2117 | A child "data" node must exist, compatible with "fsl,cpm-muram-data", to | ||
2118 | indicate the portion of muram that is usable by the OS for arbitrary | ||
2119 | purposes. The data node may have an arbitrary number of reg resources, | ||
2120 | all of which contribute to the allocatable muram pool. | ||
2121 | |||
2122 | Example, based on mpc8272: | ||
2123 | |||
2124 | muram@0 { | ||
2125 | #address-cells = <1>; | ||
2126 | #size-cells = <1>; | ||
2127 | ranges = <0 0 10000>; | ||
2128 | |||
2129 | data@0 { | ||
2130 | compatible = "fsl,cpm-muram-data"; | ||
2131 | reg = <0 2000 9800 800>; | ||
2132 | }; | ||
2133 | }; | ||
2134 | |||
2135 | m) Chipselect/Local Bus | ||
2136 | |||
2137 | Properties: | ||
2138 | - name : Should be localbus | ||
2139 | - #address-cells : Should be either two or three. The first cell is the | ||
2140 | chipselect number, and the remaining cells are the | ||
2141 | offset into the chipselect. | ||
2142 | - #size-cells : Either one or two, depending on how large each chipselect | ||
2143 | can be. | ||
2144 | - ranges : Each range corresponds to a single chipselect, and cover | ||
2145 | the entire access window as configured. | ||
2146 | |||
2147 | Example: | ||
2148 | localbus@f0010100 { | ||
2149 | compatible = "fsl,mpc8272-localbus", | ||
2150 | "fsl,pq2-localbus"; | ||
2151 | #address-cells = <2>; | ||
2152 | #size-cells = <1>; | ||
2153 | reg = <f0010100 40>; | ||
2154 | |||
2155 | ranges = <0 0 fe000000 02000000 | ||
2156 | 1 0 f4500000 00008000>; | ||
2157 | |||
2158 | flash@0,0 { | ||
2159 | compatible = "jedec-flash"; | ||
2160 | reg = <0 0 2000000>; | ||
2161 | bank-width = <4>; | ||
2162 | device-width = <1>; | ||
2163 | }; | ||
2164 | |||
2165 | board-control@1,0 { | ||
2166 | reg = <1 0 20>; | ||
2167 | compatible = "fsl,mpc8272ads-bcsr"; | ||
2168 | }; | ||
2169 | }; | ||
2170 | |||
2171 | |||
2172 | n) 4xx/Axon EMAC ethernet nodes | ||
2173 | 1363 | ||
2174 | The EMAC ethernet controller in IBM and AMCC 4xx chips, and also | 1364 | The EMAC ethernet controller in IBM and AMCC 4xx chips, and also |
2175 | the Axon bridge. To operate this needs to interact with a ths | 1365 | the Axon bridge. To operate this needs to interact with a ths |
@@ -2317,7 +1507,7 @@ platforms are moved over to use the flattened-device-tree model. | |||
2317 | available. | 1507 | available. |
2318 | For Axon: 0x0000012a | 1508 | For Axon: 0x0000012a |
2319 | 1509 | ||
2320 | o) Xilinx IP cores | 1510 | e) Xilinx IP cores |
2321 | 1511 | ||
2322 | The Xilinx EDK toolchain ships with a set of IP cores (devices) for use | 1512 | The Xilinx EDK toolchain ships with a set of IP cores (devices) for use |
2323 | in Xilinx Spartan and Virtex FPGAs. The devices cover the whole range | 1513 | in Xilinx Spartan and Virtex FPGAs. The devices cover the whole range |
@@ -2611,206 +1801,7 @@ platforms are moved over to use the flattened-device-tree model. | |||
2611 | - reg-offset : A value of 3 is required | 1801 | - reg-offset : A value of 3 is required |
2612 | - reg-shift : A value of 2 is required | 1802 | - reg-shift : A value of 2 is required |
2613 | 1803 | ||
2614 | 1804 | f) USB EHCI controllers | |
2615 | p) Freescale Synchronous Serial Interface | ||
2616 | |||
2617 | The SSI is a serial device that communicates with audio codecs. It can | ||
2618 | be programmed in AC97, I2S, left-justified, or right-justified modes. | ||
2619 | |||
2620 | Required properties: | ||
2621 | - compatible : compatible list, containing "fsl,ssi" | ||
2622 | - cell-index : the SSI, <0> = SSI1, <1> = SSI2, and so on | ||
2623 | - reg : offset and length of the register set for the device | ||
2624 | - interrupts : <a b> where a is the interrupt number and b is a | ||
2625 | field that represents an encoding of the sense and | ||
2626 | level information for the interrupt. This should be | ||
2627 | encoded based on the information in section 2) | ||
2628 | depending on the type of interrupt controller you | ||
2629 | have. | ||
2630 | - interrupt-parent : the phandle for the interrupt controller that | ||
2631 | services interrupts for this device. | ||
2632 | - fsl,mode : the operating mode for the SSI interface | ||
2633 | "i2s-slave" - I2S mode, SSI is clock slave | ||
2634 | "i2s-master" - I2S mode, SSI is clock master | ||
2635 | "lj-slave" - left-justified mode, SSI is clock slave | ||
2636 | "lj-master" - l.j. mode, SSI is clock master | ||
2637 | "rj-slave" - right-justified mode, SSI is clock slave | ||
2638 | "rj-master" - r.j., SSI is clock master | ||
2639 | "ac97-slave" - AC97 mode, SSI is clock slave | ||
2640 | "ac97-master" - AC97 mode, SSI is clock master | ||
2641 | |||
2642 | Optional properties: | ||
2643 | - codec-handle : phandle to a 'codec' node that defines an audio | ||
2644 | codec connected to this SSI. This node is typically | ||
2645 | a child of an I2C or other control node. | ||
2646 | |||
2647 | Child 'codec' node required properties: | ||
2648 | - compatible : compatible list, contains the name of the codec | ||
2649 | |||
2650 | Child 'codec' node optional properties: | ||
2651 | - clock-frequency : The frequency of the input clock, which typically | ||
2652 | comes from an on-board dedicated oscillator. | ||
2653 | |||
2654 | * Freescale 83xx DMA Controller | ||
2655 | |||
2656 | Freescale PowerPC 83xx have on chip general purpose DMA controllers. | ||
2657 | |||
2658 | Required properties: | ||
2659 | |||
2660 | - compatible : compatible list, contains 2 entries, first is | ||
2661 | "fsl,CHIP-dma", where CHIP is the processor | ||
2662 | (mpc8349, mpc8360, etc.) and the second is | ||
2663 | "fsl,elo-dma" | ||
2664 | - reg : <registers mapping for DMA general status reg> | ||
2665 | - ranges : Should be defined as specified in 1) to describe the | ||
2666 | DMA controller channels. | ||
2667 | - cell-index : controller index. 0 for controller @ 0x8100 | ||
2668 | - interrupts : <interrupt mapping for DMA IRQ> | ||
2669 | - interrupt-parent : optional, if needed for interrupt mapping | ||
2670 | |||
2671 | |||
2672 | - DMA channel nodes: | ||
2673 | - compatible : compatible list, contains 2 entries, first is | ||
2674 | "fsl,CHIP-dma-channel", where CHIP is the processor | ||
2675 | (mpc8349, mpc8350, etc.) and the second is | ||
2676 | "fsl,elo-dma-channel" | ||
2677 | - reg : <registers mapping for channel> | ||
2678 | - cell-index : dma channel index starts at 0. | ||
2679 | |||
2680 | Optional properties: | ||
2681 | - interrupts : <interrupt mapping for DMA channel IRQ> | ||
2682 | (on 83xx this is expected to be identical to | ||
2683 | the interrupts property of the parent node) | ||
2684 | - interrupt-parent : optional, if needed for interrupt mapping | ||
2685 | |||
2686 | Example: | ||
2687 | dma@82a8 { | ||
2688 | #address-cells = <1>; | ||
2689 | #size-cells = <1>; | ||
2690 | compatible = "fsl,mpc8349-dma", "fsl,elo-dma"; | ||
2691 | reg = <82a8 4>; | ||
2692 | ranges = <0 8100 1a4>; | ||
2693 | interrupt-parent = <&ipic>; | ||
2694 | interrupts = <47 8>; | ||
2695 | cell-index = <0>; | ||
2696 | dma-channel@0 { | ||
2697 | compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel"; | ||
2698 | cell-index = <0>; | ||
2699 | reg = <0 80>; | ||
2700 | }; | ||
2701 | dma-channel@80 { | ||
2702 | compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel"; | ||
2703 | cell-index = <1>; | ||
2704 | reg = <80 80>; | ||
2705 | }; | ||
2706 | dma-channel@100 { | ||
2707 | compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel"; | ||
2708 | cell-index = <2>; | ||
2709 | reg = <100 80>; | ||
2710 | }; | ||
2711 | dma-channel@180 { | ||
2712 | compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel"; | ||
2713 | cell-index = <3>; | ||
2714 | reg = <180 80>; | ||
2715 | }; | ||
2716 | }; | ||
2717 | |||
2718 | * Freescale 85xx/86xx DMA Controller | ||
2719 | |||
2720 | Freescale PowerPC 85xx/86xx have on chip general purpose DMA controllers. | ||
2721 | |||
2722 | Required properties: | ||
2723 | |||
2724 | - compatible : compatible list, contains 2 entries, first is | ||
2725 | "fsl,CHIP-dma", where CHIP is the processor | ||
2726 | (mpc8540, mpc8540, etc.) and the second is | ||
2727 | "fsl,eloplus-dma" | ||
2728 | - reg : <registers mapping for DMA general status reg> | ||
2729 | - cell-index : controller index. 0 for controller @ 0x21000, | ||
2730 | 1 for controller @ 0xc000 | ||
2731 | - ranges : Should be defined as specified in 1) to describe the | ||
2732 | DMA controller channels. | ||
2733 | |||
2734 | - DMA channel nodes: | ||
2735 | - compatible : compatible list, contains 2 entries, first is | ||
2736 | "fsl,CHIP-dma-channel", where CHIP is the processor | ||
2737 | (mpc8540, mpc8560, etc.) and the second is | ||
2738 | "fsl,eloplus-dma-channel" | ||
2739 | - cell-index : dma channel index starts at 0. | ||
2740 | - reg : <registers mapping for channel> | ||
2741 | - interrupts : <interrupt mapping for DMA channel IRQ> | ||
2742 | - interrupt-parent : optional, if needed for interrupt mapping | ||
2743 | |||
2744 | Example: | ||
2745 | dma@21300 { | ||
2746 | #address-cells = <1>; | ||
2747 | #size-cells = <1>; | ||
2748 | compatible = "fsl,mpc8540-dma", "fsl,eloplus-dma"; | ||
2749 | reg = <21300 4>; | ||
2750 | ranges = <0 21100 200>; | ||
2751 | cell-index = <0>; | ||
2752 | dma-channel@0 { | ||
2753 | compatible = "fsl,mpc8540-dma-channel", "fsl,eloplus-dma-channel"; | ||
2754 | reg = <0 80>; | ||
2755 | cell-index = <0>; | ||
2756 | interrupt-parent = <&mpic>; | ||
2757 | interrupts = <14 2>; | ||
2758 | }; | ||
2759 | dma-channel@80 { | ||
2760 | compatible = "fsl,mpc8540-dma-channel", "fsl,eloplus-dma-channel"; | ||
2761 | reg = <80 80>; | ||
2762 | cell-index = <1>; | ||
2763 | interrupt-parent = <&mpic>; | ||
2764 | interrupts = <15 2>; | ||
2765 | }; | ||
2766 | dma-channel@100 { | ||
2767 | compatible = "fsl,mpc8540-dma-channel", "fsl,eloplus-dma-channel"; | ||
2768 | reg = <100 80>; | ||
2769 | cell-index = <2>; | ||
2770 | interrupt-parent = <&mpic>; | ||
2771 | interrupts = <16 2>; | ||
2772 | }; | ||
2773 | dma-channel@180 { | ||
2774 | compatible = "fsl,mpc8540-dma-channel", "fsl,eloplus-dma-channel"; | ||
2775 | reg = <180 80>; | ||
2776 | cell-index = <3>; | ||
2777 | interrupt-parent = <&mpic>; | ||
2778 | interrupts = <17 2>; | ||
2779 | }; | ||
2780 | }; | ||
2781 | |||
2782 | * Freescale 8xxx/3.0 Gb/s SATA nodes | ||
2783 | |||
2784 | SATA nodes are defined to describe on-chip Serial ATA controllers. | ||
2785 | Each SATA port should have its own node. | ||
2786 | |||
2787 | Required properties: | ||
2788 | - compatible : compatible list, contains 2 entries, first is | ||
2789 | "fsl,CHIP-sata", where CHIP is the processor | ||
2790 | (mpc8315, mpc8379, etc.) and the second is | ||
2791 | "fsl,pq-sata" | ||
2792 | - interrupts : <interrupt mapping for SATA IRQ> | ||
2793 | - cell-index : controller index. | ||
2794 | 1 for controller @ 0x18000 | ||
2795 | 2 for controller @ 0x19000 | ||
2796 | 3 for controller @ 0x1a000 | ||
2797 | 4 for controller @ 0x1b000 | ||
2798 | |||
2799 | Optional properties: | ||
2800 | - interrupt-parent : optional, if needed for interrupt mapping | ||
2801 | - reg : <registers mapping> | ||
2802 | |||
2803 | Example: | ||
2804 | |||
2805 | sata@18000 { | ||
2806 | compatible = "fsl,mpc8379-sata", "fsl,pq-sata"; | ||
2807 | reg = <0x18000 0x1000>; | ||
2808 | cell-index = <1>; | ||
2809 | interrupts = <2c 8>; | ||
2810 | interrupt-parent = < &ipic >; | ||
2811 | }; | ||
2812 | |||
2813 | q) USB EHCI controllers | ||
2814 | 1805 | ||
2815 | Required properties: | 1806 | Required properties: |
2816 | - compatible : should be "usb-ehci". | 1807 | - compatible : should be "usb-ehci". |
@@ -2870,6 +1861,26 @@ platforms are moved over to use the flattened-device-tree model. | |||
2870 | reg = <0xe8000000 32>; | 1861 | reg = <0xe8000000 32>; |
2871 | }; | 1862 | }; |
2872 | 1863 | ||
1864 | r) MDIO on GPIOs | ||
1865 | |||
1866 | Currently defined compatibles: | ||
1867 | - virtual,gpio-mdio | ||
1868 | |||
1869 | MDC and MDIO lines connected to GPIO controllers are listed in the | ||
1870 | gpios property as described in section VIII.1 in the following order: | ||
1871 | |||
1872 | MDC, MDIO. | ||
1873 | |||
1874 | Example: | ||
1875 | |||
1876 | mdio { | ||
1877 | compatible = "virtual,mdio-gpio"; | ||
1878 | #address-cells = <1>; | ||
1879 | #size-cells = <0>; | ||
1880 | gpios = <&qe_pio_a 11 | ||
1881 | &qe_pio_c 6>; | ||
1882 | }; | ||
1883 | |||
2873 | VII - Marvell Discovery mv64[345]6x System Controller chips | 1884 | VII - Marvell Discovery mv64[345]6x System Controller chips |
2874 | =========================================================== | 1885 | =========================================================== |
2875 | 1886 | ||
@@ -3622,14 +2633,11 @@ not necessary as they are usually the same as the root node. | |||
3622 | 2633 | ||
3623 | pic@40000 { | 2634 | pic@40000 { |
3624 | linux,phandle = <40000>; | 2635 | linux,phandle = <40000>; |
3625 | clock-frequency = <0>; | ||
3626 | interrupt-controller; | 2636 | interrupt-controller; |
3627 | #address-cells = <0>; | 2637 | #address-cells = <0>; |
3628 | reg = <40000 40000>; | 2638 | reg = <40000 40000>; |
3629 | built-in; | ||
3630 | compatible = "chrp,open-pic"; | 2639 | compatible = "chrp,open-pic"; |
3631 | device_type = "open-pic"; | 2640 | device_type = "open-pic"; |
3632 | big-endian; | ||
3633 | }; | 2641 | }; |
3634 | 2642 | ||
3635 | i2c@3000 { | 2643 | i2c@3000 { |
diff --git a/Documentation/powerpc/bootwrapper.txt b/Documentation/powerpc/bootwrapper.txt new file mode 100644 index 000000000000..d60fced5e1cc --- /dev/null +++ b/Documentation/powerpc/bootwrapper.txt | |||
@@ -0,0 +1,141 @@ | |||
1 | The PowerPC boot wrapper | ||
2 | ------------------------ | ||
3 | Copyright (C) Secret Lab Technologies Ltd. | ||
4 | |||
5 | PowerPC image targets compresses and wraps the kernel image (vmlinux) with | ||
6 | a boot wrapper to make it usable by the system firmware. There is no | ||
7 | standard PowerPC firmware interface, so the boot wrapper is designed to | ||
8 | be adaptable for each kind of image that needs to be built. | ||
9 | |||
10 | The boot wrapper can be found in the arch/powerpc/boot/ directory. The | ||
11 | Makefile in that directory has targets for all the available image types. | ||
12 | The different image types are used to support all of the various firmware | ||
13 | interfaces found on PowerPC platforms. OpenFirmware is the most commonly | ||
14 | used firmware type on general purpose PowerPC systems from Apple, IBM and | ||
15 | others. U-Boot is typically found on embedded PowerPC hardware, but there | ||
16 | are a handful of other firmware implementations which are also popular. Each | ||
17 | firmware interface requires a different image format. | ||
18 | |||
19 | The boot wrapper is built from the makefile in arch/powerpc/boot/Makefile and | ||
20 | it uses the wrapper script (arch/powerpc/boot/wrapper) to generate target | ||
21 | image. The details of the build system is discussed in the next section. | ||
22 | Currently, the following image format targets exist: | ||
23 | |||
24 | cuImage.%: Backwards compatible uImage for older version of | ||
25 | U-Boot (for versions that don't understand the device | ||
26 | tree). This image embeds a device tree blob inside | ||
27 | the image. The boot wrapper, kernel and device tree | ||
28 | are all embedded inside the U-Boot uImage file format | ||
29 | with boot wrapper code that extracts data from the old | ||
30 | bd_info structure and loads the data into the device | ||
31 | tree before jumping into the kernel. | ||
32 | Because of the series of #ifdefs found in the | ||
33 | bd_info structure used in the old U-Boot interfaces, | ||
34 | cuImages are platform specific. Each specific | ||
35 | U-Boot platform has a different platform init file | ||
36 | which populates the embedded device tree with data | ||
37 | from the platform specific bd_info file. The platform | ||
38 | specific cuImage platform init code can be found in | ||
39 | arch/powerpc/boot/cuboot.*.c. Selection of the correct | ||
40 | cuImage init code for a specific board can be found in | ||
41 | the wrapper structure. | ||
42 | dtbImage.%: Similar to zImage, except device tree blob is embedded | ||
43 | inside the image instead of provided by firmware. The | ||
44 | output image file can be either an elf file or a flat | ||
45 | binary depending on the platform. | ||
46 | dtbImages are used on systems which do not have an | ||
47 | interface for passing a device tree directly. | ||
48 | dtbImages are similar to simpleImages except that | ||
49 | dtbImages have platform specific code for extracting | ||
50 | data from the board firmware, but simpleImages do not | ||
51 | talk to the firmware at all. | ||
52 | PlayStation 3 support uses dtbImage. So do Embedded | ||
53 | Planet boards using the PlanetCore firmware. Board | ||
54 | specific initialization code is typically found in a | ||
55 | file named arch/powerpc/boot/<platform>.c; but this | ||
56 | can be overridden by the wrapper script. | ||
57 | simpleImage.%: Firmware independent compressed image that does not | ||
58 | depend on any particular firmware interface and embeds | ||
59 | a device tree blob. This image is a flat binary that | ||
60 | can be loaded to any location in RAM and jumped to. | ||
61 | Firmware cannot pass any configuration data to the | ||
62 | kernel with this image type and it depends entirely on | ||
63 | the embedded device tree for all information. | ||
64 | The simpleImage is useful for booting systems with | ||
65 | an unknown firmware interface or for booting from | ||
66 | a debugger when no firmware is present (such as on | ||
67 | the Xilinx Virtex platform). The only assumption that | ||
68 | simpleImage makes is that RAM is correctly initialized | ||
69 | and that the MMU is either off or has RAM mapped to | ||
70 | base address 0. | ||
71 | simpleImage also supports inserting special platform | ||
72 | specific initialization code to the start of the bootup | ||
73 | sequence. The virtex405 platform uses this feature to | ||
74 | ensure that the cache is invalidated before caching | ||
75 | is enabled. Platform specific initialization code is | ||
76 | added as part of the wrapper script and is keyed on | ||
77 | the image target name. For example, all | ||
78 | simpleImage.virtex405-* targets will add the | ||
79 | virtex405-head.S initialization code (This also means | ||
80 | that the dts file for virtex405 targets should be | ||
81 | named (virtex405-<board>.dts). Search the wrapper | ||
82 | script for 'virtex405' and see the file | ||
83 | arch/powerpc/boot/virtex405-head.S for details. | ||
84 | treeImage.%; Image format for used with OpenBIOS firmware found | ||
85 | on some ppc4xx hardware. This image embeds a device | ||
86 | tree blob inside the image. | ||
87 | uImage: Native image format used by U-Boot. The uImage target | ||
88 | does not add any boot code. It just wraps a compressed | ||
89 | vmlinux in the uImage data structure. This image | ||
90 | requires a version of U-Boot that is able to pass | ||
91 | a device tree to the kernel at boot. If using an older | ||
92 | version of U-Boot, then you need to use a cuImage | ||
93 | instead. | ||
94 | zImage.%: Image format which does not embed a device tree. | ||
95 | Used by OpenFirmware and other firmware interfaces | ||
96 | which are able to supply a device tree. This image | ||
97 | expects firmware to provide the device tree at boot. | ||
98 | Typically, if you have general purpose PowerPC | ||
99 | hardware then you want this image format. | ||
100 | |||
101 | Image types which embed a device tree blob (simpleImage, dtbImage, treeImage, | ||
102 | and cuImage) all generate the device tree blob from a file in the | ||
103 | arch/powerpc/boot/dts/ directory. The Makefile selects the correct device | ||
104 | tree source based on the name of the target. Therefore, if the kernel is | ||
105 | built with 'make treeImage.walnut simpleImage.virtex405-ml403', then the | ||
106 | build system will use arch/powerpc/boot/dts/walnut.dts to build | ||
107 | treeImage.walnut and arch/powerpc/boot/dts/virtex405-ml403.dts to build | ||
108 | the simpleImage.virtex405-ml403. | ||
109 | |||
110 | Two special targets called 'zImage' and 'zImage.initrd' also exist. These | ||
111 | targets build all the default images as selected by the kernel configuration. | ||
112 | Default images are selected by the boot wrapper Makefile | ||
113 | (arch/powerpc/boot/Makefile) by adding targets to the $image-y variable. Look | ||
114 | at the Makefile to see which default image targets are available. | ||
115 | |||
116 | How it is built | ||
117 | --------------- | ||
118 | arch/powerpc is designed to support multiplatform kernels, which means | ||
119 | that a single vmlinux image can be booted on many different target boards. | ||
120 | It also means that the boot wrapper must be able to wrap for many kinds of | ||
121 | images on a single build. The design decision was made to not use any | ||
122 | conditional compilation code (#ifdef, etc) in the boot wrapper source code. | ||
123 | All of the boot wrapper pieces are buildable at any time regardless of the | ||
124 | kernel configuration. Building all the wrapper bits on every kernel build | ||
125 | also ensures that obscure parts of the wrapper are at the very least compile | ||
126 | tested in a large variety of environments. | ||
127 | |||
128 | The wrapper is adapted for different image types at link time by linking in | ||
129 | just the wrapper bits that are appropriate for the image type. The 'wrapper | ||
130 | script' (found in arch/powerpc/boot/wrapper) is called by the Makefile and | ||
131 | is responsible for selecting the correct wrapper bits for the image type. | ||
132 | The arguments are well documented in the script's comment block, so they | ||
133 | are not repeated here. However, it is worth mentioning that the script | ||
134 | uses the -p (platform) argument as the main method of deciding which wrapper | ||
135 | bits to compile in. Look for the large 'case "$platform" in' block in the | ||
136 | middle of the script. This is also the place where platform specific fixups | ||
137 | can be selected by changing the link order. | ||
138 | |||
139 | In particular, care should be taken when working with cuImages. cuImage | ||
140 | wrapper bits are very board specific and care should be taken to make sure | ||
141 | the target you are trying to build is supported by the wrapper bits. | ||
diff --git a/Documentation/powerpc/dts-bindings/fsl/board.txt b/Documentation/powerpc/dts-bindings/fsl/board.txt new file mode 100644 index 000000000000..74ae6f1cd2d6 --- /dev/null +++ b/Documentation/powerpc/dts-bindings/fsl/board.txt | |||
@@ -0,0 +1,29 @@ | |||
1 | * Board Control and Status (BCSR) | ||
2 | |||
3 | Required properties: | ||
4 | |||
5 | - device_type : Should be "board-control" | ||
6 | - reg : Offset and length of the register set for the device | ||
7 | |||
8 | Example: | ||
9 | |||
10 | bcsr@f8000000 { | ||
11 | device_type = "board-control"; | ||
12 | reg = <f8000000 8000>; | ||
13 | }; | ||
14 | |||
15 | * Freescale on board FPGA | ||
16 | |||
17 | This is the memory-mapped registers for on board FPGA. | ||
18 | |||
19 | Required properities: | ||
20 | - compatible : should be "fsl,fpga-pixis". | ||
21 | - reg : should contain the address and the lenght of the FPPGA register | ||
22 | set. | ||
23 | |||
24 | Example (MPC8610HPCD): | ||
25 | |||
26 | board-control@e8000000 { | ||
27 | compatible = "fsl,fpga-pixis"; | ||
28 | reg = <0xe8000000 32>; | ||
29 | }; | ||
diff --git a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/cpm.txt b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/cpm.txt new file mode 100644 index 000000000000..088fc471e03a --- /dev/null +++ b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/cpm.txt | |||
@@ -0,0 +1,67 @@ | |||
1 | * Freescale Communications Processor Module | ||
2 | |||
3 | NOTE: This is an interim binding, and will likely change slightly, | ||
4 | as more devices are supported. The QE bindings especially are | ||
5 | incomplete. | ||
6 | |||
7 | * Root CPM node | ||
8 | |||
9 | Properties: | ||
10 | - compatible : "fsl,cpm1", "fsl,cpm2", or "fsl,qe". | ||
11 | - reg : A 48-byte region beginning with CPCR. | ||
12 | |||
13 | Example: | ||
14 | cpm@119c0 { | ||
15 | #address-cells = <1>; | ||
16 | #size-cells = <1>; | ||
17 | #interrupt-cells = <2>; | ||
18 | compatible = "fsl,mpc8272-cpm", "fsl,cpm2"; | ||
19 | reg = <119c0 30>; | ||
20 | } | ||
21 | |||
22 | * Properties common to mulitple CPM/QE devices | ||
23 | |||
24 | - fsl,cpm-command : This value is ORed with the opcode and command flag | ||
25 | to specify the device on which a CPM command operates. | ||
26 | |||
27 | - fsl,cpm-brg : Indicates which baud rate generator the device | ||
28 | is associated with. If absent, an unused BRG | ||
29 | should be dynamically allocated. If zero, the | ||
30 | device uses an external clock rather than a BRG. | ||
31 | |||
32 | - reg : Unless otherwise specified, the first resource represents the | ||
33 | scc/fcc/ucc registers, and the second represents the device's | ||
34 | parameter RAM region (if it has one). | ||
35 | |||
36 | * Multi-User RAM (MURAM) | ||
37 | |||
38 | The multi-user/dual-ported RAM is expressed as a bus under the CPM node. | ||
39 | |||
40 | Ranges must be set up subject to the following restrictions: | ||
41 | |||
42 | - Children's reg nodes must be offsets from the start of all muram, even | ||
43 | if the user-data area does not begin at zero. | ||
44 | - If multiple range entries are used, the difference between the parent | ||
45 | address and the child address must be the same in all, so that a single | ||
46 | mapping can cover them all while maintaining the ability to determine | ||
47 | CPM-side offsets with pointer subtraction. It is recommended that | ||
48 | multiple range entries not be used. | ||
49 | - A child address of zero must be translatable, even if no reg resources | ||
50 | contain it. | ||
51 | |||
52 | A child "data" node must exist, compatible with "fsl,cpm-muram-data", to | ||
53 | indicate the portion of muram that is usable by the OS for arbitrary | ||
54 | purposes. The data node may have an arbitrary number of reg resources, | ||
55 | all of which contribute to the allocatable muram pool. | ||
56 | |||
57 | Example, based on mpc8272: | ||
58 | muram@0 { | ||
59 | #address-cells = <1>; | ||
60 | #size-cells = <1>; | ||
61 | ranges = <0 0 10000>; | ||
62 | |||
63 | data@0 { | ||
64 | compatible = "fsl,cpm-muram-data"; | ||
65 | reg = <0 2000 9800 800>; | ||
66 | }; | ||
67 | }; | ||
diff --git a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/cpm/brg.txt b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/cpm/brg.txt new file mode 100644 index 000000000000..4c7d45eaf025 --- /dev/null +++ b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/cpm/brg.txt | |||
@@ -0,0 +1,21 @@ | |||
1 | * Baud Rate Generators | ||
2 | |||
3 | Currently defined compatibles: | ||
4 | fsl,cpm-brg | ||
5 | fsl,cpm1-brg | ||
6 | fsl,cpm2-brg | ||
7 | |||
8 | Properties: | ||
9 | - reg : There may be an arbitrary number of reg resources; BRG | ||
10 | numbers are assigned to these in order. | ||
11 | - clock-frequency : Specifies the base frequency driving | ||
12 | the BRG. | ||
13 | |||
14 | Example: | ||
15 | brg@119f0 { | ||
16 | compatible = "fsl,mpc8272-brg", | ||
17 | "fsl,cpm2-brg", | ||
18 | "fsl,cpm-brg"; | ||
19 | reg = <119f0 10 115f0 10>; | ||
20 | clock-frequency = <d#25000000>; | ||
21 | }; | ||
diff --git a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/cpm/i2c.txt b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/cpm/i2c.txt new file mode 100644 index 000000000000..87bc6048667e --- /dev/null +++ b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/cpm/i2c.txt | |||
@@ -0,0 +1,41 @@ | |||
1 | * I2C | ||
2 | |||
3 | The I2C controller is expressed as a bus under the CPM node. | ||
4 | |||
5 | Properties: | ||
6 | - compatible : "fsl,cpm1-i2c", "fsl,cpm2-i2c" | ||
7 | - reg : On CPM2 devices, the second resource doesn't specify the I2C | ||
8 | Parameter RAM itself, but the I2C_BASE field of the CPM2 Parameter RAM | ||
9 | (typically 0x8afc 0x2). | ||
10 | - #address-cells : Should be one. The cell is the i2c device address with | ||
11 | the r/w bit set to zero. | ||
12 | - #size-cells : Should be zero. | ||
13 | - clock-frequency : Can be used to set the i2c clock frequency. If | ||
14 | unspecified, a default frequency of 60kHz is being used. | ||
15 | The following two properties are deprecated. They are only used by legacy | ||
16 | i2c drivers to find the bus to probe: | ||
17 | - linux,i2c-index : Can be used to hard code an i2c bus number. By default, | ||
18 | the bus number is dynamically assigned by the i2c core. | ||
19 | - linux,i2c-class : Can be used to override the i2c class. The class is used | ||
20 | by legacy i2c device drivers to find a bus in a specific context like | ||
21 | system management, video or sound. By default, I2C_CLASS_HWMON (1) is | ||
22 | being used. The definition of the classes can be found in | ||
23 | include/i2c/i2c.h | ||
24 | |||
25 | Example, based on mpc823: | ||
26 | |||
27 | i2c@860 { | ||
28 | compatible = "fsl,mpc823-i2c", | ||
29 | "fsl,cpm1-i2c"; | ||
30 | reg = <0x860 0x20 0x3c80 0x30>; | ||
31 | interrupts = <16>; | ||
32 | interrupt-parent = <&CPM_PIC>; | ||
33 | fsl,cpm-command = <0x10>; | ||
34 | #address-cells = <1>; | ||
35 | #size-cells = <0>; | ||
36 | |||
37 | rtc@68 { | ||
38 | compatible = "dallas,ds1307"; | ||
39 | reg = <0x68>; | ||
40 | }; | ||
41 | }; | ||
diff --git a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/cpm/pic.txt b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/cpm/pic.txt new file mode 100644 index 000000000000..8e3ee1681618 --- /dev/null +++ b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/cpm/pic.txt | |||
@@ -0,0 +1,18 @@ | |||
1 | * Interrupt Controllers | ||
2 | |||
3 | Currently defined compatibles: | ||
4 | - fsl,cpm1-pic | ||
5 | - only one interrupt cell | ||
6 | - fsl,pq1-pic | ||
7 | - fsl,cpm2-pic | ||
8 | - second interrupt cell is level/sense: | ||
9 | - 2 is falling edge | ||
10 | - 8 is active low | ||
11 | |||
12 | Example: | ||
13 | interrupt-controller@10c00 { | ||
14 | #interrupt-cells = <2>; | ||
15 | interrupt-controller; | ||
16 | reg = <10c00 80>; | ||
17 | compatible = "mpc8272-pic", "fsl,cpm2-pic"; | ||
18 | }; | ||
diff --git a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/cpm/usb.txt b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/cpm/usb.txt new file mode 100644 index 000000000000..74bfda4bb824 --- /dev/null +++ b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/cpm/usb.txt | |||
@@ -0,0 +1,15 @@ | |||
1 | * USB (Universal Serial Bus Controller) | ||
2 | |||
3 | Properties: | ||
4 | - compatible : "fsl,cpm1-usb", "fsl,cpm2-usb", "fsl,qe-usb" | ||
5 | |||
6 | Example: | ||
7 | usb@11bc0 { | ||
8 | #address-cells = <1>; | ||
9 | #size-cells = <0>; | ||
10 | compatible = "fsl,cpm2-usb"; | ||
11 | reg = <11b60 18 8b00 100>; | ||
12 | interrupts = <b 8>; | ||
13 | interrupt-parent = <&PIC>; | ||
14 | fsl,cpm-command = <2e600000>; | ||
15 | }; | ||
diff --git a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/network.txt b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/network.txt new file mode 100644 index 000000000000..0e4269446580 --- /dev/null +++ b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/network.txt | |||
@@ -0,0 +1,45 @@ | |||
1 | * Network | ||
2 | |||
3 | Currently defined compatibles: | ||
4 | - fsl,cpm1-scc-enet | ||
5 | - fsl,cpm2-scc-enet | ||
6 | - fsl,cpm1-fec-enet | ||
7 | - fsl,cpm2-fcc-enet (third resource is GFEMR) | ||
8 | - fsl,qe-enet | ||
9 | |||
10 | Example: | ||
11 | |||
12 | ethernet@11300 { | ||
13 | device_type = "network"; | ||
14 | compatible = "fsl,mpc8272-fcc-enet", | ||
15 | "fsl,cpm2-fcc-enet"; | ||
16 | reg = <11300 20 8400 100 11390 1>; | ||
17 | local-mac-address = [ 00 00 00 00 00 00 ]; | ||
18 | interrupts = <20 8>; | ||
19 | interrupt-parent = <&PIC>; | ||
20 | phy-handle = <&PHY0>; | ||
21 | fsl,cpm-command = <12000300>; | ||
22 | }; | ||
23 | |||
24 | * MDIO | ||
25 | |||
26 | Currently defined compatibles: | ||
27 | fsl,pq1-fec-mdio (reg is same as first resource of FEC device) | ||
28 | fsl,cpm2-mdio-bitbang (reg is port C registers) | ||
29 | |||
30 | Properties for fsl,cpm2-mdio-bitbang: | ||
31 | fsl,mdio-pin : pin of port C controlling mdio data | ||
32 | fsl,mdc-pin : pin of port C controlling mdio clock | ||
33 | |||
34 | Example: | ||
35 | mdio@10d40 { | ||
36 | device_type = "mdio"; | ||
37 | compatible = "fsl,mpc8272ads-mdio-bitbang", | ||
38 | "fsl,mpc8272-mdio-bitbang", | ||
39 | "fsl,cpm2-mdio-bitbang"; | ||
40 | reg = <10d40 14>; | ||
41 | #address-cells = <1>; | ||
42 | #size-cells = <0>; | ||
43 | fsl,mdio-pin = <12>; | ||
44 | fsl,mdc-pin = <13>; | ||
45 | }; | ||
diff --git a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe.txt b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe.txt new file mode 100644 index 000000000000..78790d58dc2c --- /dev/null +++ b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe.txt | |||
@@ -0,0 +1,58 @@ | |||
1 | * Freescale QUICC Engine module (QE) | ||
2 | This represents qe module that is installed on PowerQUICC II Pro. | ||
3 | |||
4 | NOTE: This is an interim binding; it should be updated to fit | ||
5 | in with the CPM binding later in this document. | ||
6 | |||
7 | Basically, it is a bus of devices, that could act more or less | ||
8 | as a complete entity (UCC, USB etc ). All of them should be siblings on | ||
9 | the "root" qe node, using the common properties from there. | ||
10 | The description below applies to the qe of MPC8360 and | ||
11 | more nodes and properties would be extended in the future. | ||
12 | |||
13 | i) Root QE device | ||
14 | |||
15 | Required properties: | ||
16 | - compatible : should be "fsl,qe"; | ||
17 | - model : precise model of the QE, Can be "QE", "CPM", or "CPM2" | ||
18 | - reg : offset and length of the device registers. | ||
19 | - bus-frequency : the clock frequency for QUICC Engine. | ||
20 | |||
21 | Recommended properties | ||
22 | - brg-frequency : the internal clock source frequency for baud-rate | ||
23 | generators in Hz. | ||
24 | |||
25 | Example: | ||
26 | qe@e0100000 { | ||
27 | #address-cells = <1>; | ||
28 | #size-cells = <1>; | ||
29 | #interrupt-cells = <2>; | ||
30 | compatible = "fsl,qe"; | ||
31 | ranges = <0 e0100000 00100000>; | ||
32 | reg = <e0100000 480>; | ||
33 | brg-frequency = <0>; | ||
34 | bus-frequency = <179A7B00>; | ||
35 | } | ||
36 | |||
37 | * Multi-User RAM (MURAM) | ||
38 | |||
39 | Required properties: | ||
40 | - compatible : should be "fsl,qe-muram", "fsl,cpm-muram". | ||
41 | - mode : the could be "host" or "slave". | ||
42 | - ranges : Should be defined as specified in 1) to describe the | ||
43 | translation of MURAM addresses. | ||
44 | - data-only : sub-node which defines the address area under MURAM | ||
45 | bus that can be allocated as data/parameter | ||
46 | |||
47 | Example: | ||
48 | |||
49 | muram@10000 { | ||
50 | compatible = "fsl,qe-muram", "fsl,cpm-muram"; | ||
51 | ranges = <0 00010000 0000c000>; | ||
52 | |||
53 | data-only@0{ | ||
54 | compatible = "fsl,qe-muram-data", | ||
55 | "fsl,cpm-muram-data"; | ||
56 | reg = <0 c000>; | ||
57 | }; | ||
58 | }; | ||
diff --git a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe/firmware.txt b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe/firmware.txt new file mode 100644 index 000000000000..6c238f59b2a9 --- /dev/null +++ b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe/firmware.txt | |||
@@ -0,0 +1,24 @@ | |||
1 | * Uploaded QE firmware | ||
2 | |||
3 | If a new firwmare has been uploaded to the QE (usually by the | ||
4 | boot loader), then a 'firmware' child node should be added to the QE | ||
5 | node. This node provides information on the uploaded firmware that | ||
6 | device drivers may need. | ||
7 | |||
8 | Required properties: | ||
9 | - id: The string name of the firmware. This is taken from the 'id' | ||
10 | member of the qe_firmware structure of the uploaded firmware. | ||
11 | Device drivers can search this string to determine if the | ||
12 | firmware they want is already present. | ||
13 | - extended-modes: The Extended Modes bitfield, taken from the | ||
14 | firmware binary. It is a 64-bit number represented | ||
15 | as an array of two 32-bit numbers. | ||
16 | - virtual-traps: The virtual traps, taken from the firmware binary. | ||
17 | It is an array of 8 32-bit numbers. | ||
18 | |||
19 | Example: | ||
20 | firmware { | ||
21 | id = "Soft-UART"; | ||
22 | extended-modes = <0 0>; | ||
23 | virtual-traps = <0 0 0 0 0 0 0 0>; | ||
24 | }; | ||
diff --git a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe/par_io.txt b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe/par_io.txt new file mode 100644 index 000000000000..60984260207b --- /dev/null +++ b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe/par_io.txt | |||
@@ -0,0 +1,51 @@ | |||
1 | * Parallel I/O Ports | ||
2 | |||
3 | This node configures Parallel I/O ports for CPUs with QE support. | ||
4 | The node should reside in the "soc" node of the tree. For each | ||
5 | device that using parallel I/O ports, a child node should be created. | ||
6 | See the definition of the Pin configuration nodes below for more | ||
7 | information. | ||
8 | |||
9 | Required properties: | ||
10 | - device_type : should be "par_io". | ||
11 | - reg : offset to the register set and its length. | ||
12 | - num-ports : number of Parallel I/O ports | ||
13 | |||
14 | Example: | ||
15 | par_io@1400 { | ||
16 | reg = <1400 100>; | ||
17 | #address-cells = <1>; | ||
18 | #size-cells = <0>; | ||
19 | device_type = "par_io"; | ||
20 | num-ports = <7>; | ||
21 | ucc_pin@01 { | ||
22 | ...... | ||
23 | }; | ||
24 | |||
25 | Note that "par_io" nodes are obsolete, and should not be used for | ||
26 | the new device trees. Instead, each Par I/O bank should be represented | ||
27 | via its own gpio-controller node: | ||
28 | |||
29 | Required properties: | ||
30 | - #gpio-cells : should be "2". | ||
31 | - compatible : should be "fsl,<chip>-qe-pario-bank", | ||
32 | "fsl,mpc8323-qe-pario-bank". | ||
33 | - reg : offset to the register set and its length. | ||
34 | - gpio-controller : node to identify gpio controllers. | ||
35 | |||
36 | Example: | ||
37 | qe_pio_a: gpio-controller@1400 { | ||
38 | #gpio-cells = <2>; | ||
39 | compatible = "fsl,mpc8360-qe-pario-bank", | ||
40 | "fsl,mpc8323-qe-pario-bank"; | ||
41 | reg = <0x1400 0x18>; | ||
42 | gpio-controller; | ||
43 | }; | ||
44 | |||
45 | qe_pio_e: gpio-controller@1460 { | ||
46 | #gpio-cells = <2>; | ||
47 | compatible = "fsl,mpc8360-qe-pario-bank", | ||
48 | "fsl,mpc8323-qe-pario-bank"; | ||
49 | reg = <0x1460 0x18>; | ||
50 | gpio-controller; | ||
51 | }; | ||
diff --git a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe/pincfg.txt b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe/pincfg.txt new file mode 100644 index 000000000000..c5b43061db3a --- /dev/null +++ b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe/pincfg.txt | |||
@@ -0,0 +1,60 @@ | |||
1 | * Pin configuration nodes | ||
2 | |||
3 | Required properties: | ||
4 | - linux,phandle : phandle of this node; likely referenced by a QE | ||
5 | device. | ||
6 | - pio-map : array of pin configurations. Each pin is defined by 6 | ||
7 | integers. The six numbers are respectively: port, pin, dir, | ||
8 | open_drain, assignment, has_irq. | ||
9 | - port : port number of the pin; 0-6 represent port A-G in UM. | ||
10 | - pin : pin number in the port. | ||
11 | - dir : direction of the pin, should encode as follows: | ||
12 | |||
13 | 0 = The pin is disabled | ||
14 | 1 = The pin is an output | ||
15 | 2 = The pin is an input | ||
16 | 3 = The pin is I/O | ||
17 | |||
18 | - open_drain : indicates the pin is normal or wired-OR: | ||
19 | |||
20 | 0 = The pin is actively driven as an output | ||
21 | 1 = The pin is an open-drain driver. As an output, the pin is | ||
22 | driven active-low, otherwise it is three-stated. | ||
23 | |||
24 | - assignment : function number of the pin according to the Pin Assignment | ||
25 | tables in User Manual. Each pin can have up to 4 possible functions in | ||
26 | QE and two options for CPM. | ||
27 | - has_irq : indicates if the pin is used as source of external | ||
28 | interrupts. | ||
29 | |||
30 | Example: | ||
31 | ucc_pin@01 { | ||
32 | linux,phandle = <140001>; | ||
33 | pio-map = < | ||
34 | /* port pin dir open_drain assignment has_irq */ | ||
35 | 0 3 1 0 1 0 /* TxD0 */ | ||
36 | 0 4 1 0 1 0 /* TxD1 */ | ||
37 | 0 5 1 0 1 0 /* TxD2 */ | ||
38 | 0 6 1 0 1 0 /* TxD3 */ | ||
39 | 1 6 1 0 3 0 /* TxD4 */ | ||
40 | 1 7 1 0 1 0 /* TxD5 */ | ||
41 | 1 9 1 0 2 0 /* TxD6 */ | ||
42 | 1 a 1 0 2 0 /* TxD7 */ | ||
43 | 0 9 2 0 1 0 /* RxD0 */ | ||
44 | 0 a 2 0 1 0 /* RxD1 */ | ||
45 | 0 b 2 0 1 0 /* RxD2 */ | ||
46 | 0 c 2 0 1 0 /* RxD3 */ | ||
47 | 0 d 2 0 1 0 /* RxD4 */ | ||
48 | 1 1 2 0 2 0 /* RxD5 */ | ||
49 | 1 0 2 0 2 0 /* RxD6 */ | ||
50 | 1 4 2 0 2 0 /* RxD7 */ | ||
51 | 0 7 1 0 1 0 /* TX_EN */ | ||
52 | 0 8 1 0 1 0 /* TX_ER */ | ||
53 | 0 f 2 0 1 0 /* RX_DV */ | ||
54 | 0 10 2 0 1 0 /* RX_ER */ | ||
55 | 0 0 2 0 1 0 /* RX_CLK */ | ||
56 | 2 9 1 0 3 0 /* GTX_CLK - CLK10 */ | ||
57 | 2 8 2 0 1 0>; /* GTX125 - CLK9 */ | ||
58 | }; | ||
59 | |||
60 | |||
diff --git a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe/ucc.txt b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe/ucc.txt new file mode 100644 index 000000000000..e47734bee3f0 --- /dev/null +++ b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe/ucc.txt | |||
@@ -0,0 +1,70 @@ | |||
1 | * UCC (Unified Communications Controllers) | ||
2 | |||
3 | Required properties: | ||
4 | - device_type : should be "network", "hldc", "uart", "transparent" | ||
5 | "bisync", "atm", or "serial". | ||
6 | - compatible : could be "ucc_geth" or "fsl_atm" and so on. | ||
7 | - cell-index : the ucc number(1-8), corresponding to UCCx in UM. | ||
8 | - reg : Offset and length of the register set for the device | ||
9 | - interrupts : <a b> where a is the interrupt number and b is a | ||
10 | field that represents an encoding of the sense and level | ||
11 | information for the interrupt. This should be encoded based on | ||
12 | the information in section 2) depending on the type of interrupt | ||
13 | controller you have. | ||
14 | - interrupt-parent : the phandle for the interrupt controller that | ||
15 | services interrupts for this device. | ||
16 | - pio-handle : The phandle for the Parallel I/O port configuration. | ||
17 | - port-number : for UART drivers, the port number to use, between 0 and 3. | ||
18 | This usually corresponds to the /dev/ttyQE device, e.g. <0> = /dev/ttyQE0. | ||
19 | The port number is added to the minor number of the device. Unlike the | ||
20 | CPM UART driver, the port-number is required for the QE UART driver. | ||
21 | - soft-uart : for UART drivers, if specified this means the QE UART device | ||
22 | driver should use "Soft-UART" mode, which is needed on some SOCs that have | ||
23 | broken UART hardware. Soft-UART is provided via a microcode upload. | ||
24 | - rx-clock-name: the UCC receive clock source | ||
25 | "none": clock source is disabled | ||
26 | "brg1" through "brg16": clock source is BRG1-BRG16, respectively | ||
27 | "clk1" through "clk24": clock source is CLK1-CLK24, respectively | ||
28 | - tx-clock-name: the UCC transmit clock source | ||
29 | "none": clock source is disabled | ||
30 | "brg1" through "brg16": clock source is BRG1-BRG16, respectively | ||
31 | "clk1" through "clk24": clock source is CLK1-CLK24, respectively | ||
32 | The following two properties are deprecated. rx-clock has been replaced | ||
33 | with rx-clock-name, and tx-clock has been replaced with tx-clock-name. | ||
34 | Drivers that currently use the deprecated properties should continue to | ||
35 | do so, in order to support older device trees, but they should be updated | ||
36 | to check for the new properties first. | ||
37 | - rx-clock : represents the UCC receive clock source. | ||
38 | 0x00 : clock source is disabled; | ||
39 | 0x1~0x10 : clock source is BRG1~BRG16 respectively; | ||
40 | 0x11~0x28: clock source is QE_CLK1~QE_CLK24 respectively. | ||
41 | - tx-clock: represents the UCC transmit clock source; | ||
42 | 0x00 : clock source is disabled; | ||
43 | 0x1~0x10 : clock source is BRG1~BRG16 respectively; | ||
44 | 0x11~0x28: clock source is QE_CLK1~QE_CLK24 respectively. | ||
45 | |||
46 | Required properties for network device_type: | ||
47 | - mac-address : list of bytes representing the ethernet address. | ||
48 | - phy-handle : The phandle for the PHY connected to this controller. | ||
49 | |||
50 | Recommended properties: | ||
51 | - phy-connection-type : a string naming the controller/PHY interface type, | ||
52 | i.e., "mii" (default), "rmii", "gmii", "rgmii", "rgmii-id" (Internal | ||
53 | Delay), "rgmii-txid" (delay on TX only), "rgmii-rxid" (delay on RX only), | ||
54 | "tbi", or "rtbi". | ||
55 | |||
56 | Example: | ||
57 | ucc@2000 { | ||
58 | device_type = "network"; | ||
59 | compatible = "ucc_geth"; | ||
60 | cell-index = <1>; | ||
61 | reg = <2000 200>; | ||
62 | interrupts = <a0 0>; | ||
63 | interrupt-parent = <700>; | ||
64 | mac-address = [ 00 04 9f 00 23 23 ]; | ||
65 | rx-clock = "none"; | ||
66 | tx-clock = "clk9"; | ||
67 | phy-handle = <212000>; | ||
68 | phy-connection-type = "gmii"; | ||
69 | pio-handle = <140001>; | ||
70 | }; | ||
diff --git a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe/usb.txt b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe/usb.txt new file mode 100644 index 000000000000..c8f44d6bcbcf --- /dev/null +++ b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe/usb.txt | |||
@@ -0,0 +1,22 @@ | |||
1 | * USB (Universal Serial Bus Controller) | ||
2 | |||
3 | Required properties: | ||
4 | - compatible : could be "qe_udc" or "fhci-hcd". | ||
5 | - mode : the could be "host" or "slave". | ||
6 | - reg : Offset and length of the register set for the device | ||
7 | - interrupts : <a b> where a is the interrupt number and b is a | ||
8 | field that represents an encoding of the sense and level | ||
9 | information for the interrupt. This should be encoded based on | ||
10 | the information in section 2) depending on the type of interrupt | ||
11 | controller you have. | ||
12 | - interrupt-parent : the phandle for the interrupt controller that | ||
13 | services interrupts for this device. | ||
14 | |||
15 | Example(slave): | ||
16 | usb@6c0 { | ||
17 | compatible = "qe_udc"; | ||
18 | reg = <6c0 40>; | ||
19 | interrupts = <8b 0>; | ||
20 | interrupt-parent = <700>; | ||
21 | mode = "slave"; | ||
22 | }; | ||
diff --git a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/serial.txt b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/serial.txt new file mode 100644 index 000000000000..b35f3482e3e4 --- /dev/null +++ b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/serial.txt | |||
@@ -0,0 +1,21 @@ | |||
1 | * Serial | ||
2 | |||
3 | Currently defined compatibles: | ||
4 | - fsl,cpm1-smc-uart | ||
5 | - fsl,cpm2-smc-uart | ||
6 | - fsl,cpm1-scc-uart | ||
7 | - fsl,cpm2-scc-uart | ||
8 | - fsl,qe-uart | ||
9 | |||
10 | Example: | ||
11 | |||
12 | serial@11a00 { | ||
13 | device_type = "serial"; | ||
14 | compatible = "fsl,mpc8272-scc-uart", | ||
15 | "fsl,cpm2-scc-uart"; | ||
16 | reg = <11a00 20 8000 100>; | ||
17 | interrupts = <28 8>; | ||
18 | interrupt-parent = <&PIC>; | ||
19 | fsl,cpm-brg = <1>; | ||
20 | fsl,cpm-command = <00800000>; | ||
21 | }; | ||
diff --git a/Documentation/powerpc/dts-bindings/fsl/diu.txt b/Documentation/powerpc/dts-bindings/fsl/diu.txt new file mode 100644 index 000000000000..deb35de70988 --- /dev/null +++ b/Documentation/powerpc/dts-bindings/fsl/diu.txt | |||
@@ -0,0 +1,18 @@ | |||
1 | * Freescale Display Interface Unit | ||
2 | |||
3 | The Freescale DIU is a LCD controller, with proper hardware, it can also | ||
4 | drive DVI monitors. | ||
5 | |||
6 | Required properties: | ||
7 | - compatible : should be "fsl-diu". | ||
8 | - reg : should contain at least address and length of the DIU register | ||
9 | set. | ||
10 | - Interrupts : one DIU interrupt should be describe here. | ||
11 | |||
12 | Example (MPC8610HPCD): | ||
13 | display@2c000 { | ||
14 | compatible = "fsl,diu"; | ||
15 | reg = <0x2c000 100>; | ||
16 | interrupts = <72 2>; | ||
17 | interrupt-parent = <&mpic>; | ||
18 | }; | ||
diff --git a/Documentation/powerpc/dts-bindings/fsl/dma.txt b/Documentation/powerpc/dts-bindings/fsl/dma.txt new file mode 100644 index 000000000000..86826df00e64 --- /dev/null +++ b/Documentation/powerpc/dts-bindings/fsl/dma.txt | |||
@@ -0,0 +1,127 @@ | |||
1 | * Freescale 83xx DMA Controller | ||
2 | |||
3 | Freescale PowerPC 83xx have on chip general purpose DMA controllers. | ||
4 | |||
5 | Required properties: | ||
6 | |||
7 | - compatible : compatible list, contains 2 entries, first is | ||
8 | "fsl,CHIP-dma", where CHIP is the processor | ||
9 | (mpc8349, mpc8360, etc.) and the second is | ||
10 | "fsl,elo-dma" | ||
11 | - reg : <registers mapping for DMA general status reg> | ||
12 | - ranges : Should be defined as specified in 1) to describe the | ||
13 | DMA controller channels. | ||
14 | - cell-index : controller index. 0 for controller @ 0x8100 | ||
15 | - interrupts : <interrupt mapping for DMA IRQ> | ||
16 | - interrupt-parent : optional, if needed for interrupt mapping | ||
17 | |||
18 | |||
19 | - DMA channel nodes: | ||
20 | - compatible : compatible list, contains 2 entries, first is | ||
21 | "fsl,CHIP-dma-channel", where CHIP is the processor | ||
22 | (mpc8349, mpc8350, etc.) and the second is | ||
23 | "fsl,elo-dma-channel" | ||
24 | - reg : <registers mapping for channel> | ||
25 | - cell-index : dma channel index starts at 0. | ||
26 | |||
27 | Optional properties: | ||
28 | - interrupts : <interrupt mapping for DMA channel IRQ> | ||
29 | (on 83xx this is expected to be identical to | ||
30 | the interrupts property of the parent node) | ||
31 | - interrupt-parent : optional, if needed for interrupt mapping | ||
32 | |||
33 | Example: | ||
34 | dma@82a8 { | ||
35 | #address-cells = <1>; | ||
36 | #size-cells = <1>; | ||
37 | compatible = "fsl,mpc8349-dma", "fsl,elo-dma"; | ||
38 | reg = <82a8 4>; | ||
39 | ranges = <0 8100 1a4>; | ||
40 | interrupt-parent = <&ipic>; | ||
41 | interrupts = <47 8>; | ||
42 | cell-index = <0>; | ||
43 | dma-channel@0 { | ||
44 | compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel"; | ||
45 | cell-index = <0>; | ||
46 | reg = <0 80>; | ||
47 | }; | ||
48 | dma-channel@80 { | ||
49 | compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel"; | ||
50 | cell-index = <1>; | ||
51 | reg = <80 80>; | ||
52 | }; | ||
53 | dma-channel@100 { | ||
54 | compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel"; | ||
55 | cell-index = <2>; | ||
56 | reg = <100 80>; | ||
57 | }; | ||
58 | dma-channel@180 { | ||
59 | compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel"; | ||
60 | cell-index = <3>; | ||
61 | reg = <180 80>; | ||
62 | }; | ||
63 | }; | ||
64 | |||
65 | * Freescale 85xx/86xx DMA Controller | ||
66 | |||
67 | Freescale PowerPC 85xx/86xx have on chip general purpose DMA controllers. | ||
68 | |||
69 | Required properties: | ||
70 | |||
71 | - compatible : compatible list, contains 2 entries, first is | ||
72 | "fsl,CHIP-dma", where CHIP is the processor | ||
73 | (mpc8540, mpc8540, etc.) and the second is | ||
74 | "fsl,eloplus-dma" | ||
75 | - reg : <registers mapping for DMA general status reg> | ||
76 | - cell-index : controller index. 0 for controller @ 0x21000, | ||
77 | 1 for controller @ 0xc000 | ||
78 | - ranges : Should be defined as specified in 1) to describe the | ||
79 | DMA controller channels. | ||
80 | |||
81 | - DMA channel nodes: | ||
82 | - compatible : compatible list, contains 2 entries, first is | ||
83 | "fsl,CHIP-dma-channel", where CHIP is the processor | ||
84 | (mpc8540, mpc8560, etc.) and the second is | ||
85 | "fsl,eloplus-dma-channel" | ||
86 | - cell-index : dma channel index starts at 0. | ||
87 | - reg : <registers mapping for channel> | ||
88 | - interrupts : <interrupt mapping for DMA channel IRQ> | ||
89 | - interrupt-parent : optional, if needed for interrupt mapping | ||
90 | |||
91 | Example: | ||
92 | dma@21300 { | ||
93 | #address-cells = <1>; | ||
94 | #size-cells = <1>; | ||
95 | compatible = "fsl,mpc8540-dma", "fsl,eloplus-dma"; | ||
96 | reg = <21300 4>; | ||
97 | ranges = <0 21100 200>; | ||
98 | cell-index = <0>; | ||
99 | dma-channel@0 { | ||
100 | compatible = "fsl,mpc8540-dma-channel", "fsl,eloplus-dma-channel"; | ||
101 | reg = <0 80>; | ||
102 | cell-index = <0>; | ||
103 | interrupt-parent = <&mpic>; | ||
104 | interrupts = <14 2>; | ||
105 | }; | ||
106 | dma-channel@80 { | ||
107 | compatible = "fsl,mpc8540-dma-channel", "fsl,eloplus-dma-channel"; | ||
108 | reg = <80 80>; | ||
109 | cell-index = <1>; | ||
110 | interrupt-parent = <&mpic>; | ||
111 | interrupts = <15 2>; | ||
112 | }; | ||
113 | dma-channel@100 { | ||
114 | compatible = "fsl,mpc8540-dma-channel", "fsl,eloplus-dma-channel"; | ||
115 | reg = <100 80>; | ||
116 | cell-index = <2>; | ||
117 | interrupt-parent = <&mpic>; | ||
118 | interrupts = <16 2>; | ||
119 | }; | ||
120 | dma-channel@180 { | ||
121 | compatible = "fsl,mpc8540-dma-channel", "fsl,eloplus-dma-channel"; | ||
122 | reg = <180 80>; | ||
123 | cell-index = <3>; | ||
124 | interrupt-parent = <&mpic>; | ||
125 | interrupts = <17 2>; | ||
126 | }; | ||
127 | }; | ||
diff --git a/Documentation/powerpc/dts-bindings/fsl/gtm.txt b/Documentation/powerpc/dts-bindings/fsl/gtm.txt new file mode 100644 index 000000000000..9a33efded4bc --- /dev/null +++ b/Documentation/powerpc/dts-bindings/fsl/gtm.txt | |||
@@ -0,0 +1,31 @@ | |||
1 | * Freescale General-purpose Timers Module | ||
2 | |||
3 | Required properties: | ||
4 | - compatible : should be | ||
5 | "fsl,<chip>-gtm", "fsl,gtm" for SOC GTMs | ||
6 | "fsl,<chip>-qe-gtm", "fsl,qe-gtm", "fsl,gtm" for QE GTMs | ||
7 | "fsl,<chip>-cpm2-gtm", "fsl,cpm2-gtm", "fsl,gtm" for CPM2 GTMs | ||
8 | - reg : should contain gtm registers location and length (0x40). | ||
9 | - interrupts : should contain four interrupts. | ||
10 | - interrupt-parent : interrupt source phandle. | ||
11 | - clock-frequency : specifies the frequency driving the timer. | ||
12 | |||
13 | Example: | ||
14 | |||
15 | timer@500 { | ||
16 | compatible = "fsl,mpc8360-gtm", "fsl,gtm"; | ||
17 | reg = <0x500 0x40>; | ||
18 | interrupts = <90 8 78 8 84 8 72 8>; | ||
19 | interrupt-parent = <&ipic>; | ||
20 | /* filled by u-boot */ | ||
21 | clock-frequency = <0>; | ||
22 | }; | ||
23 | |||
24 | timer@440 { | ||
25 | compatible = "fsl,mpc8360-qe-gtm", "fsl,qe-gtm", "fsl,gtm"; | ||
26 | reg = <0x440 0x40>; | ||
27 | interrupts = <12 13 14 15>; | ||
28 | interrupt-parent = <&qeic>; | ||
29 | /* filled by u-boot */ | ||
30 | clock-frequency = <0>; | ||
31 | }; | ||
diff --git a/Documentation/powerpc/dts-bindings/fsl/guts.txt b/Documentation/powerpc/dts-bindings/fsl/guts.txt new file mode 100644 index 000000000000..9e7a2417dac5 --- /dev/null +++ b/Documentation/powerpc/dts-bindings/fsl/guts.txt | |||
@@ -0,0 +1,25 @@ | |||
1 | * Global Utilities Block | ||
2 | |||
3 | The global utilities block controls power management, I/O device | ||
4 | enabling, power-on-reset configuration monitoring, general-purpose | ||
5 | I/O signal configuration, alternate function selection for multiplexed | ||
6 | signals, and clock control. | ||
7 | |||
8 | Required properties: | ||
9 | |||
10 | - compatible : Should define the compatible device type for | ||
11 | global-utilities. | ||
12 | - reg : Offset and length of the register set for the device. | ||
13 | |||
14 | Recommended properties: | ||
15 | |||
16 | - fsl,has-rstcr : Indicates that the global utilities register set | ||
17 | contains a functioning "reset control register" (i.e. the board | ||
18 | is wired to reset upon setting the HRESET_REQ bit in this register). | ||
19 | |||
20 | Example: | ||
21 | global-utilities@e0000 { /* global utilities block */ | ||
22 | compatible = "fsl,mpc8548-guts"; | ||
23 | reg = <e0000 1000>; | ||
24 | fsl,has-rstcr; | ||
25 | }; | ||
diff --git a/Documentation/powerpc/dts-bindings/fsl/i2c.txt b/Documentation/powerpc/dts-bindings/fsl/i2c.txt new file mode 100644 index 000000000000..d0ab33e21fe6 --- /dev/null +++ b/Documentation/powerpc/dts-bindings/fsl/i2c.txt | |||
@@ -0,0 +1,32 @@ | |||
1 | * I2C | ||
2 | |||
3 | Required properties : | ||
4 | |||
5 | - device_type : Should be "i2c" | ||
6 | - reg : Offset and length of the register set for the device | ||
7 | |||
8 | Recommended properties : | ||
9 | |||
10 | - compatible : Should be "fsl-i2c" for parts compatible with | ||
11 | Freescale I2C specifications. | ||
12 | - interrupts : <a b> where a is the interrupt number and b is a | ||
13 | field that represents an encoding of the sense and level | ||
14 | information for the interrupt. This should be encoded based on | ||
15 | the information in section 2) depending on the type of interrupt | ||
16 | controller you have. | ||
17 | - interrupt-parent : the phandle for the interrupt controller that | ||
18 | services interrupts for this device. | ||
19 | - dfsrr : boolean; if defined, indicates that this I2C device has | ||
20 | a digital filter sampling rate register | ||
21 | - fsl5200-clocking : boolean; if defined, indicated that this device | ||
22 | uses the FSL 5200 clocking mechanism. | ||
23 | |||
24 | Example : | ||
25 | i2c@3000 { | ||
26 | interrupt-parent = <40000>; | ||
27 | interrupts = <1b 3>; | ||
28 | reg = <3000 18>; | ||
29 | device_type = "i2c"; | ||
30 | compatible = "fsl-i2c"; | ||
31 | dfsrr; | ||
32 | }; | ||
diff --git a/Documentation/powerpc/dts-bindings/fsl/lbc.txt b/Documentation/powerpc/dts-bindings/fsl/lbc.txt new file mode 100644 index 000000000000..3300fec501c5 --- /dev/null +++ b/Documentation/powerpc/dts-bindings/fsl/lbc.txt | |||
@@ -0,0 +1,35 @@ | |||
1 | * Chipselect/Local Bus | ||
2 | |||
3 | Properties: | ||
4 | - name : Should be localbus | ||
5 | - #address-cells : Should be either two or three. The first cell is the | ||
6 | chipselect number, and the remaining cells are the | ||
7 | offset into the chipselect. | ||
8 | - #size-cells : Either one or two, depending on how large each chipselect | ||
9 | can be. | ||
10 | - ranges : Each range corresponds to a single chipselect, and cover | ||
11 | the entire access window as configured. | ||
12 | |||
13 | Example: | ||
14 | localbus@f0010100 { | ||
15 | compatible = "fsl,mpc8272-localbus", | ||
16 | "fsl,pq2-localbus"; | ||
17 | #address-cells = <2>; | ||
18 | #size-cells = <1>; | ||
19 | reg = <f0010100 40>; | ||
20 | |||
21 | ranges = <0 0 fe000000 02000000 | ||
22 | 1 0 f4500000 00008000>; | ||
23 | |||
24 | flash@0,0 { | ||
25 | compatible = "jedec-flash"; | ||
26 | reg = <0 0 2000000>; | ||
27 | bank-width = <4>; | ||
28 | device-width = <1>; | ||
29 | }; | ||
30 | |||
31 | board-control@1,0 { | ||
32 | reg = <1 0 20>; | ||
33 | compatible = "fsl,mpc8272ads-bcsr"; | ||
34 | }; | ||
35 | }; | ||
diff --git a/Documentation/powerpc/dts-bindings/fsl/msi-pic.txt b/Documentation/powerpc/dts-bindings/fsl/msi-pic.txt new file mode 100644 index 000000000000..b26b91992c55 --- /dev/null +++ b/Documentation/powerpc/dts-bindings/fsl/msi-pic.txt | |||
@@ -0,0 +1,36 @@ | |||
1 | * Freescale MSI interrupt controller | ||
2 | |||
3 | Reguired properities: | ||
4 | - compatible : compatible list, contains 2 entries, | ||
5 | first is "fsl,CHIP-msi", where CHIP is the processor(mpc8610, mpc8572, | ||
6 | etc.) and the second is "fsl,mpic-msi" or "fsl,ipic-msi" depending on | ||
7 | the parent type. | ||
8 | - reg : should contain the address and the length of the shared message | ||
9 | interrupt register set. | ||
10 | - msi-available-ranges: use <start count> style section to define which | ||
11 | msi interrupt can be used in the 256 msi interrupts. This property is | ||
12 | optional, without this, all the 256 MSI interrupts can be used. | ||
13 | - interrupts : each one of the interrupts here is one entry per 32 MSIs, | ||
14 | and routed to the host interrupt controller. the interrupts should | ||
15 | be set as edge sensitive. | ||
16 | - interrupt-parent: the phandle for the interrupt controller | ||
17 | that services interrupts for this device. for 83xx cpu, the interrupts | ||
18 | are routed to IPIC, and for 85xx/86xx cpu the interrupts are routed | ||
19 | to MPIC. | ||
20 | |||
21 | Example: | ||
22 | msi@41600 { | ||
23 | compatible = "fsl,mpc8610-msi", "fsl,mpic-msi"; | ||
24 | reg = <0x41600 0x80>; | ||
25 | msi-available-ranges = <0 0x100>; | ||
26 | interrupts = < | ||
27 | 0xe0 0 | ||
28 | 0xe1 0 | ||
29 | 0xe2 0 | ||
30 | 0xe3 0 | ||
31 | 0xe4 0 | ||
32 | 0xe5 0 | ||
33 | 0xe6 0 | ||
34 | 0xe7 0>; | ||
35 | interrupt-parent = <&mpic>; | ||
36 | }; | ||
diff --git a/Documentation/powerpc/dts-bindings/fsl/sata.txt b/Documentation/powerpc/dts-bindings/fsl/sata.txt new file mode 100644 index 000000000000..b46bcf46c3d8 --- /dev/null +++ b/Documentation/powerpc/dts-bindings/fsl/sata.txt | |||
@@ -0,0 +1,29 @@ | |||
1 | * Freescale 8xxx/3.0 Gb/s SATA nodes | ||
2 | |||
3 | SATA nodes are defined to describe on-chip Serial ATA controllers. | ||
4 | Each SATA port should have its own node. | ||
5 | |||
6 | Required properties: | ||
7 | - compatible : compatible list, contains 2 entries, first is | ||
8 | "fsl,CHIP-sata", where CHIP is the processor | ||
9 | (mpc8315, mpc8379, etc.) and the second is | ||
10 | "fsl,pq-sata" | ||
11 | - interrupts : <interrupt mapping for SATA IRQ> | ||
12 | - cell-index : controller index. | ||
13 | 1 for controller @ 0x18000 | ||
14 | 2 for controller @ 0x19000 | ||
15 | 3 for controller @ 0x1a000 | ||
16 | 4 for controller @ 0x1b000 | ||
17 | |||
18 | Optional properties: | ||
19 | - interrupt-parent : optional, if needed for interrupt mapping | ||
20 | - reg : <registers mapping> | ||
21 | |||
22 | Example: | ||
23 | sata@18000 { | ||
24 | compatible = "fsl,mpc8379-sata", "fsl,pq-sata"; | ||
25 | reg = <0x18000 0x1000>; | ||
26 | cell-index = <1>; | ||
27 | interrupts = <2c 8>; | ||
28 | interrupt-parent = < &ipic >; | ||
29 | }; | ||
diff --git a/Documentation/powerpc/dts-bindings/fsl/sec.txt b/Documentation/powerpc/dts-bindings/fsl/sec.txt new file mode 100644 index 000000000000..2b6f2d45c45a --- /dev/null +++ b/Documentation/powerpc/dts-bindings/fsl/sec.txt | |||
@@ -0,0 +1,68 @@ | |||
1 | Freescale SoC SEC Security Engines | ||
2 | |||
3 | Required properties: | ||
4 | |||
5 | - compatible : Should contain entries for this and backward compatible | ||
6 | SEC versions, high to low, e.g., "fsl,sec2.1", "fsl,sec2.0" | ||
7 | - reg : Offset and length of the register set for the device | ||
8 | - interrupts : the SEC's interrupt number | ||
9 | - fsl,num-channels : An integer representing the number of channels | ||
10 | available. | ||
11 | - fsl,channel-fifo-len : An integer representing the number of | ||
12 | descriptor pointers each channel fetch fifo can hold. | ||
13 | - fsl,exec-units-mask : The bitmask representing what execution units | ||
14 | (EUs) are available. It's a single 32-bit cell. EU information | ||
15 | should be encoded following the SEC's Descriptor Header Dword | ||
16 | EU_SEL0 field documentation, i.e. as follows: | ||
17 | |||
18 | bit 0 = reserved - should be 0 | ||
19 | bit 1 = set if SEC has the ARC4 EU (AFEU) | ||
20 | bit 2 = set if SEC has the DES/3DES EU (DEU) | ||
21 | bit 3 = set if SEC has the message digest EU (MDEU/MDEU-A) | ||
22 | bit 4 = set if SEC has the random number generator EU (RNG) | ||
23 | bit 5 = set if SEC has the public key EU (PKEU) | ||
24 | bit 6 = set if SEC has the AES EU (AESU) | ||
25 | bit 7 = set if SEC has the Kasumi EU (KEU) | ||
26 | bit 8 = set if SEC has the CRC EU (CRCU) | ||
27 | bit 11 = set if SEC has the message digest EU extended alg set (MDEU-B) | ||
28 | |||
29 | remaining bits are reserved for future SEC EUs. | ||
30 | |||
31 | - fsl,descriptor-types-mask : The bitmask representing what descriptors | ||
32 | are available. It's a single 32-bit cell. Descriptor type information | ||
33 | should be encoded following the SEC's Descriptor Header Dword DESC_TYPE | ||
34 | field documentation, i.e. as follows: | ||
35 | |||
36 | bit 0 = set if SEC supports the aesu_ctr_nonsnoop desc. type | ||
37 | bit 1 = set if SEC supports the ipsec_esp descriptor type | ||
38 | bit 2 = set if SEC supports the common_nonsnoop desc. type | ||
39 | bit 3 = set if SEC supports the 802.11i AES ccmp desc. type | ||
40 | bit 4 = set if SEC supports the hmac_snoop_no_afeu desc. type | ||
41 | bit 5 = set if SEC supports the srtp descriptor type | ||
42 | bit 6 = set if SEC supports the non_hmac_snoop_no_afeu desc.type | ||
43 | bit 7 = set if SEC supports the pkeu_assemble descriptor type | ||
44 | bit 8 = set if SEC supports the aesu_key_expand_output desc.type | ||
45 | bit 9 = set if SEC supports the pkeu_ptmul descriptor type | ||
46 | bit 10 = set if SEC supports the common_nonsnoop_afeu desc. type | ||
47 | bit 11 = set if SEC supports the pkeu_ptadd_dbl descriptor type | ||
48 | |||
49 | ..and so on and so forth. | ||
50 | |||
51 | Optional properties: | ||
52 | |||
53 | - interrupt-parent : the phandle for the interrupt controller that | ||
54 | services interrupts for this device. | ||
55 | |||
56 | Example: | ||
57 | |||
58 | /* MPC8548E */ | ||
59 | crypto@30000 { | ||
60 | compatible = "fsl,sec2.1", "fsl,sec2.0"; | ||
61 | reg = <0x30000 0x10000>; | ||
62 | interrupts = <29 2>; | ||
63 | interrupt-parent = <&mpic>; | ||
64 | fsl,num-channels = <4>; | ||
65 | fsl,channel-fifo-len = <24>; | ||
66 | fsl,exec-units-mask = <0xfe>; | ||
67 | fsl,descriptor-types-mask = <0x12b0ebf>; | ||
68 | }; | ||
diff --git a/Documentation/powerpc/dts-bindings/fsl/spi.txt b/Documentation/powerpc/dts-bindings/fsl/spi.txt new file mode 100644 index 000000000000..e7d9a344c4f4 --- /dev/null +++ b/Documentation/powerpc/dts-bindings/fsl/spi.txt | |||
@@ -0,0 +1,24 @@ | |||
1 | * SPI (Serial Peripheral Interface) | ||
2 | |||
3 | Required properties: | ||
4 | - cell-index : SPI controller index. | ||
5 | - compatible : should be "fsl,spi". | ||
6 | - mode : the SPI operation mode, it can be "cpu" or "cpu-qe". | ||
7 | - reg : Offset and length of the register set for the device | ||
8 | - interrupts : <a b> where a is the interrupt number and b is a | ||
9 | field that represents an encoding of the sense and level | ||
10 | information for the interrupt. This should be encoded based on | ||
11 | the information in section 2) depending on the type of interrupt | ||
12 | controller you have. | ||
13 | - interrupt-parent : the phandle for the interrupt controller that | ||
14 | services interrupts for this device. | ||
15 | |||
16 | Example: | ||
17 | spi@4c0 { | ||
18 | cell-index = <0>; | ||
19 | compatible = "fsl,spi"; | ||
20 | reg = <4c0 40>; | ||
21 | interrupts = <82 0>; | ||
22 | interrupt-parent = <700>; | ||
23 | mode = "cpu"; | ||
24 | }; | ||
diff --git a/Documentation/powerpc/dts-bindings/fsl/ssi.txt b/Documentation/powerpc/dts-bindings/fsl/ssi.txt new file mode 100644 index 000000000000..d100555d488a --- /dev/null +++ b/Documentation/powerpc/dts-bindings/fsl/ssi.txt | |||
@@ -0,0 +1,38 @@ | |||
1 | Freescale Synchronous Serial Interface | ||
2 | |||
3 | The SSI is a serial device that communicates with audio codecs. It can | ||
4 | be programmed in AC97, I2S, left-justified, or right-justified modes. | ||
5 | |||
6 | Required properties: | ||
7 | - compatible : compatible list, containing "fsl,ssi" | ||
8 | - cell-index : the SSI, <0> = SSI1, <1> = SSI2, and so on | ||
9 | - reg : offset and length of the register set for the device | ||
10 | - interrupts : <a b> where a is the interrupt number and b is a | ||
11 | field that represents an encoding of the sense and | ||
12 | level information for the interrupt. This should be | ||
13 | encoded based on the information in section 2) | ||
14 | depending on the type of interrupt controller you | ||
15 | have. | ||
16 | - interrupt-parent : the phandle for the interrupt controller that | ||
17 | services interrupts for this device. | ||
18 | - fsl,mode : the operating mode for the SSI interface | ||
19 | "i2s-slave" - I2S mode, SSI is clock slave | ||
20 | "i2s-master" - I2S mode, SSI is clock master | ||
21 | "lj-slave" - left-justified mode, SSI is clock slave | ||
22 | "lj-master" - l.j. mode, SSI is clock master | ||
23 | "rj-slave" - right-justified mode, SSI is clock slave | ||
24 | "rj-master" - r.j., SSI is clock master | ||
25 | "ac97-slave" - AC97 mode, SSI is clock slave | ||
26 | "ac97-master" - AC97 mode, SSI is clock master | ||
27 | |||
28 | Optional properties: | ||
29 | - codec-handle : phandle to a 'codec' node that defines an audio | ||
30 | codec connected to this SSI. This node is typically | ||
31 | a child of an I2C or other control node. | ||
32 | |||
33 | Child 'codec' node required properties: | ||
34 | - compatible : compatible list, contains the name of the codec | ||
35 | |||
36 | Child 'codec' node optional properties: | ||
37 | - clock-frequency : The frequency of the input clock, which typically | ||
38 | comes from an on-board dedicated oscillator. | ||
diff --git a/Documentation/powerpc/dts-bindings/fsl/tsec.txt b/Documentation/powerpc/dts-bindings/fsl/tsec.txt new file mode 100644 index 000000000000..583ef6b56c43 --- /dev/null +++ b/Documentation/powerpc/dts-bindings/fsl/tsec.txt | |||
@@ -0,0 +1,69 @@ | |||
1 | * MDIO IO device | ||
2 | |||
3 | The MDIO is a bus to which the PHY devices are connected. For each | ||
4 | device that exists on this bus, a child node should be created. See | ||
5 | the definition of the PHY node below for an example of how to define | ||
6 | a PHY. | ||
7 | |||
8 | Required properties: | ||
9 | - reg : Offset and length of the register set for the device | ||
10 | - compatible : Should define the compatible device type for the | ||
11 | mdio. Currently, this is most likely to be "fsl,gianfar-mdio" | ||
12 | |||
13 | Example: | ||
14 | |||
15 | mdio@24520 { | ||
16 | reg = <24520 20>; | ||
17 | compatible = "fsl,gianfar-mdio"; | ||
18 | |||
19 | ethernet-phy@0 { | ||
20 | ...... | ||
21 | }; | ||
22 | }; | ||
23 | |||
24 | |||
25 | * Gianfar-compatible ethernet nodes | ||
26 | |||
27 | Required properties: | ||
28 | |||
29 | - device_type : Should be "network" | ||
30 | - model : Model of the device. Can be "TSEC", "eTSEC", or "FEC" | ||
31 | - compatible : Should be "gianfar" | ||
32 | - reg : Offset and length of the register set for the device | ||
33 | - mac-address : List of bytes representing the ethernet address of | ||
34 | this controller | ||
35 | - interrupts : <a b> where a is the interrupt number and b is a | ||
36 | field that represents an encoding of the sense and level | ||
37 | information for the interrupt. This should be encoded based on | ||
38 | the information in section 2) depending on the type of interrupt | ||
39 | controller you have. | ||
40 | - interrupt-parent : the phandle for the interrupt controller that | ||
41 | services interrupts for this device. | ||
42 | - phy-handle : The phandle for the PHY connected to this ethernet | ||
43 | controller. | ||
44 | - fixed-link : <a b c d e> where a is emulated phy id - choose any, | ||
45 | but unique to the all specified fixed-links, b is duplex - 0 half, | ||
46 | 1 full, c is link speed - d#10/d#100/d#1000, d is pause - 0 no | ||
47 | pause, 1 pause, e is asym_pause - 0 no asym_pause, 1 asym_pause. | ||
48 | |||
49 | Recommended properties: | ||
50 | |||
51 | - phy-connection-type : a string naming the controller/PHY interface type, | ||
52 | i.e., "mii" (default), "rmii", "gmii", "rgmii", "rgmii-id", "sgmii", | ||
53 | "tbi", or "rtbi". This property is only really needed if the connection | ||
54 | is of type "rgmii-id", as all other connection types are detected by | ||
55 | hardware. | ||
56 | |||
57 | |||
58 | Example: | ||
59 | ethernet@24000 { | ||
60 | #size-cells = <0>; | ||
61 | device_type = "network"; | ||
62 | model = "TSEC"; | ||
63 | compatible = "gianfar"; | ||
64 | reg = <24000 1000>; | ||
65 | mac-address = [ 00 E0 0C 00 73 00 ]; | ||
66 | interrupts = <d 3 e 3 12 3>; | ||
67 | interrupt-parent = <40000>; | ||
68 | phy-handle = <2452000> | ||
69 | }; | ||
diff --git a/Documentation/powerpc/dts-bindings/fsl/usb.txt b/Documentation/powerpc/dts-bindings/fsl/usb.txt new file mode 100644 index 000000000000..b00152402694 --- /dev/null +++ b/Documentation/powerpc/dts-bindings/fsl/usb.txt | |||
@@ -0,0 +1,59 @@ | |||
1 | Freescale SOC USB controllers | ||
2 | |||
3 | The device node for a USB controller that is part of a Freescale | ||
4 | SOC is as described in the document "Open Firmware Recommended | ||
5 | Practice : Universal Serial Bus" with the following modifications | ||
6 | and additions : | ||
7 | |||
8 | Required properties : | ||
9 | - compatible : Should be "fsl-usb2-mph" for multi port host USB | ||
10 | controllers, or "fsl-usb2-dr" for dual role USB controllers | ||
11 | - phy_type : For multi port host USB controllers, should be one of | ||
12 | "ulpi", or "serial". For dual role USB controllers, should be | ||
13 | one of "ulpi", "utmi", "utmi_wide", or "serial". | ||
14 | - reg : Offset and length of the register set for the device | ||
15 | - port0 : boolean; if defined, indicates port0 is connected for | ||
16 | fsl-usb2-mph compatible controllers. Either this property or | ||
17 | "port1" (or both) must be defined for "fsl-usb2-mph" compatible | ||
18 | controllers. | ||
19 | - port1 : boolean; if defined, indicates port1 is connected for | ||
20 | fsl-usb2-mph compatible controllers. Either this property or | ||
21 | "port0" (or both) must be defined for "fsl-usb2-mph" compatible | ||
22 | controllers. | ||
23 | - dr_mode : indicates the working mode for "fsl-usb2-dr" compatible | ||
24 | controllers. Can be "host", "peripheral", or "otg". Default to | ||
25 | "host" if not defined for backward compatibility. | ||
26 | |||
27 | Recommended properties : | ||
28 | - interrupts : <a b> where a is the interrupt number and b is a | ||
29 | field that represents an encoding of the sense and level | ||
30 | information for the interrupt. This should be encoded based on | ||
31 | the information in section 2) depending on the type of interrupt | ||
32 | controller you have. | ||
33 | - interrupt-parent : the phandle for the interrupt controller that | ||
34 | services interrupts for this device. | ||
35 | |||
36 | Example multi port host USB controller device node : | ||
37 | usb@22000 { | ||
38 | compatible = "fsl-usb2-mph"; | ||
39 | reg = <22000 1000>; | ||
40 | #address-cells = <1>; | ||
41 | #size-cells = <0>; | ||
42 | interrupt-parent = <700>; | ||
43 | interrupts = <27 1>; | ||
44 | phy_type = "ulpi"; | ||
45 | port0; | ||
46 | port1; | ||
47 | }; | ||
48 | |||
49 | Example dual role USB controller device node : | ||
50 | usb@23000 { | ||
51 | compatible = "fsl-usb2-dr"; | ||
52 | reg = <23000 1000>; | ||
53 | #address-cells = <1>; | ||
54 | #size-cells = <0>; | ||
55 | interrupt-parent = <700>; | ||
56 | interrupts = <26 1>; | ||
57 | dr_mode = "otg"; | ||
58 | phy = "ulpi"; | ||
59 | }; | ||
diff --git a/Documentation/rfkill.txt b/Documentation/rfkill.txt index a83ff23cd68c..0843ed0163a5 100644 --- a/Documentation/rfkill.txt +++ b/Documentation/rfkill.txt | |||
@@ -1,89 +1,528 @@ | |||
1 | rfkill - RF switch subsystem support | 1 | rfkill - RF switch subsystem support |
2 | ==================================== | 2 | ==================================== |
3 | 3 | ||
4 | 1 Implementation details | 4 | 1 Introduction |
5 | 2 Driver support | 5 | 2 Implementation details |
6 | 3 Userspace support | 6 | 3 Kernel driver guidelines |
7 | 3.1 wireless device drivers | ||
8 | 3.2 platform/switch drivers | ||
9 | 3.3 input device drivers | ||
10 | 4 Kernel API | ||
11 | 5 Userspace support | ||
7 | 12 | ||
8 | =============================================================================== | ||
9 | 1: Implementation details | ||
10 | 13 | ||
11 | The rfkill switch subsystem offers support for keys often found on laptops | 14 | 1. Introduction: |
12 | to enable wireless devices like WiFi and Bluetooth. | 15 | |
16 | The rfkill switch subsystem exists to add a generic interface to circuitry that | ||
17 | can enable or disable the signal output of a wireless *transmitter* of any | ||
18 | type. By far, the most common use is to disable radio-frequency transmitters. | ||
13 | 19 | ||
14 | This is done by providing the user 3 possibilities: | 20 | Note that disabling the signal output means that the the transmitter is to be |
15 | 1 - The rfkill system handles all events; userspace is not aware of events. | 21 | made to not emit any energy when "blocked". rfkill is not about blocking data |
16 | 2 - The rfkill system handles all events; userspace is informed about the events. | 22 | transmissions, it is about blocking energy emission. |
17 | 3 - The rfkill system does not handle events; userspace handles all events. | ||
18 | 23 | ||
19 | The buttons to enable and disable the wireless radios are important in | 24 | The rfkill subsystem offers support for keys and switches often found on |
25 | laptops to enable wireless devices like WiFi and Bluetooth, so that these keys | ||
26 | and switches actually perform an action in all wireless devices of a given type | ||
27 | attached to the system. | ||
28 | |||
29 | The buttons to enable and disable the wireless transmitters are important in | ||
20 | situations where the user is for example using his laptop on a location where | 30 | situations where the user is for example using his laptop on a location where |
21 | wireless radios _must_ be disabled (e.g. airplanes). | 31 | radio-frequency transmitters _must_ be disabled (e.g. airplanes). |
22 | Because of this requirement, userspace support for the keys should not be | 32 | |
23 | made mandatory. Because userspace might want to perform some additional smarter | 33 | Because of this requirement, userspace support for the keys should not be made |
24 | tasks when the key is pressed, rfkill still provides userspace the possibility | 34 | mandatory. Because userspace might want to perform some additional smarter |
25 | to take over the task to handle the key events. | 35 | tasks when the key is pressed, rfkill provides userspace the possibility to |
36 | take over the task to handle the key events. | ||
37 | |||
38 | =============================================================================== | ||
39 | 2: Implementation details | ||
40 | |||
41 | The rfkill subsystem is composed of various components: the rfkill class, the | ||
42 | rfkill-input module (an input layer handler), and some specific input layer | ||
43 | events. | ||
44 | |||
45 | The rfkill class provides kernel drivers with an interface that allows them to | ||
46 | know when they should enable or disable a wireless network device transmitter. | ||
47 | This is enabled by the CONFIG_RFKILL Kconfig option. | ||
48 | |||
49 | The rfkill class support makes sure userspace will be notified of all state | ||
50 | changes on rfkill devices through uevents. It provides a notification chain | ||
51 | for interested parties in the kernel to also get notified of rfkill state | ||
52 | changes in other drivers. It creates several sysfs entries which can be used | ||
53 | by userspace. See section "Userspace support". | ||
54 | |||
55 | The rfkill-input module provides the kernel with the ability to implement a | ||
56 | basic response when the user presses a key or button (or toggles a switch) | ||
57 | related to rfkill functionality. It is an in-kernel implementation of default | ||
58 | policy of reacting to rfkill-related input events and neither mandatory nor | ||
59 | required for wireless drivers to operate. It is enabled by the | ||
60 | CONFIG_RFKILL_INPUT Kconfig option. | ||
61 | |||
62 | rfkill-input is a rfkill-related events input layer handler. This handler will | ||
63 | listen to all rfkill key events and will change the rfkill state of the | ||
64 | wireless devices accordingly. With this option enabled userspace could either | ||
65 | do nothing or simply perform monitoring tasks. | ||
66 | |||
67 | The rfkill-input module also provides EPO (emergency power-off) functionality | ||
68 | for all wireless transmitters. This function cannot be overridden, and it is | ||
69 | always active. rfkill EPO is related to *_RFKILL_ALL input layer events. | ||
70 | |||
71 | |||
72 | Important terms for the rfkill subsystem: | ||
73 | |||
74 | In order to avoid confusion, we avoid the term "switch" in rfkill when it is | ||
75 | referring to an electronic control circuit that enables or disables a | ||
76 | transmitter. We reserve it for the physical device a human manipulates | ||
77 | (which is an input device, by the way): | ||
78 | |||
79 | rfkill switch: | ||
80 | |||
81 | A physical device a human manipulates. Its state can be perceived by | ||
82 | the kernel either directly (through a GPIO pin, ACPI GPE) or by its | ||
83 | effect on a rfkill line of a wireless device. | ||
84 | |||
85 | rfkill controller: | ||
86 | |||
87 | A hardware circuit that controls the state of a rfkill line, which a | ||
88 | kernel driver can interact with *to modify* that state (i.e. it has | ||
89 | either write-only or read/write access). | ||
90 | |||
91 | rfkill line: | ||
92 | |||
93 | An input channel (hardware or software) of a wireless device, which | ||
94 | causes a wireless transmitter to stop emitting energy (BLOCK) when it | ||
95 | is active. Point of view is extremely important here: rfkill lines are | ||
96 | always seen from the PoV of a wireless device (and its driver). | ||
97 | |||
98 | soft rfkill line/software rfkill line: | ||
99 | |||
100 | A rfkill line the wireless device driver can directly change the state | ||
101 | of. Related to rfkill_state RFKILL_STATE_SOFT_BLOCKED. | ||
102 | |||
103 | hard rfkill line/hardware rfkill line: | ||
104 | |||
105 | A rfkill line that works fully in hardware or firmware, and that cannot | ||
106 | be overridden by the kernel driver. The hardware device or the | ||
107 | firmware just exports its status to the driver, but it is read-only. | ||
108 | Related to rfkill_state RFKILL_STATE_HARD_BLOCKED. | ||
109 | |||
110 | The enum rfkill_state describes the rfkill state of a transmitter: | ||
111 | |||
112 | When a rfkill line or rfkill controller is in the RFKILL_STATE_UNBLOCKED state, | ||
113 | the wireless transmitter (radio TX circuit for example) is *enabled*. When the | ||
114 | it is in the RFKILL_STATE_SOFT_BLOCKED or RFKILL_STATE_HARD_BLOCKED, the | ||
115 | wireless transmitter is to be *blocked* from operating. | ||
116 | |||
117 | RFKILL_STATE_SOFT_BLOCKED indicates that a call to toggle_radio() can change | ||
118 | that state. RFKILL_STATE_HARD_BLOCKED indicates that a call to toggle_radio() | ||
119 | will not be able to change the state and will return with a suitable error if | ||
120 | attempts are made to set the state to RFKILL_STATE_UNBLOCKED. | ||
121 | |||
122 | RFKILL_STATE_HARD_BLOCKED is used by drivers to signal that the device is | ||
123 | locked in the BLOCKED state by a hardwire rfkill line (typically an input pin | ||
124 | that, when active, forces the transmitter to be disabled) which the driver | ||
125 | CANNOT override. | ||
126 | |||
127 | Full rfkill functionality requires two different subsystems to cooperate: the | ||
128 | input layer and the rfkill class. The input layer issues *commands* to the | ||
129 | entire system requesting that devices registered to the rfkill class change | ||
130 | state. The way this interaction happens is not complex, but it is not obvious | ||
131 | either: | ||
132 | |||
133 | Kernel Input layer: | ||
134 | |||
135 | * Generates KEY_WWAN, KEY_WLAN, KEY_BLUETOOTH, SW_RFKILL_ALL, and | ||
136 | other such events when the user presses certain keys, buttons, or | ||
137 | toggles certain physical switches. | ||
138 | |||
139 | THE INPUT LAYER IS NEVER USED TO PROPAGATE STATUS, NOTIFICATIONS OR THE | ||
140 | KIND OF STUFF AN ON-SCREEN-DISPLAY APPLICATION WOULD REPORT. It is | ||
141 | used to issue *commands* for the system to change behaviour, and these | ||
142 | commands may or may not be carried out by some kernel driver or | ||
143 | userspace application. It follows that doing user feedback based only | ||
144 | on input events is broken, as there is no guarantee that an input event | ||
145 | will be acted upon. | ||
146 | |||
147 | Most wireless communication device drivers implementing rfkill | ||
148 | functionality MUST NOT generate these events, and have no reason to | ||
149 | register themselves with the input layer. Doing otherwise is a common | ||
150 | misconception. There is an API to propagate rfkill status change | ||
151 | information, and it is NOT the input layer. | ||
152 | |||
153 | rfkill class: | ||
154 | |||
155 | * Calls a hook in a driver to effectively change the wireless | ||
156 | transmitter state; | ||
157 | * Keeps track of the wireless transmitter state (with help from | ||
158 | the driver); | ||
159 | * Generates userspace notifications (uevents) and a call to a | ||
160 | notification chain (kernel) when there is a wireless transmitter | ||
161 | state change; | ||
162 | * Connects a wireless communications driver with the common rfkill | ||
163 | control system, which, for example, allows actions such as | ||
164 | "switch all bluetooth devices offline" to be carried out by | ||
165 | userspace or by rfkill-input. | ||
166 | |||
167 | THE RFKILL CLASS NEVER ISSUES INPUT EVENTS. THE RFKILL CLASS DOES | ||
168 | NOT LISTEN TO INPUT EVENTS. NO DRIVER USING THE RFKILL CLASS SHALL | ||
169 | EVER LISTEN TO, OR ACT ON RFKILL INPUT EVENTS. Doing otherwise is | ||
170 | a layering violation. | ||
171 | |||
172 | Most wireless data communication drivers in the kernel have just to | ||
173 | implement the rfkill class API to work properly. Interfacing to the | ||
174 | input layer is not often required (and is very often a *bug*) on | ||
175 | wireless drivers. | ||
176 | |||
177 | Platform drivers often have to attach to the input layer to *issue* | ||
178 | (but never to listen to) rfkill events for rfkill switches, and also to | ||
179 | the rfkill class to export a control interface for the platform rfkill | ||
180 | controllers to the rfkill subsystem. This does NOT mean the rfkill | ||
181 | switch is attached to a rfkill class (doing so is almost always wrong). | ||
182 | It just means the same kernel module is the driver for different | ||
183 | devices (rfkill switches and rfkill controllers). | ||
184 | |||
185 | |||
186 | Userspace input handlers (uevents) or kernel input handlers (rfkill-input): | ||
187 | |||
188 | * Implements the policy of what should happen when one of the input | ||
189 | layer events related to rfkill operation is received. | ||
190 | * Uses the sysfs interface (userspace) or private rfkill API calls | ||
191 | to tell the devices registered with the rfkill class to change | ||
192 | their state (i.e. translates the input layer event into real | ||
193 | action). | ||
194 | * rfkill-input implements EPO by handling EV_SW SW_RFKILL_ALL 0 | ||
195 | (power off all transmitters) in a special way: it ignores any | ||
196 | overrides and local state cache and forces all transmitters to the | ||
197 | RFKILL_STATE_SOFT_BLOCKED state (including those which are already | ||
198 | supposed to be BLOCKED). Note that the opposite event (power on all | ||
199 | transmitters) is handled normally. | ||
200 | |||
201 | Userspace uevent handler or kernel platform-specific drivers hooked to the | ||
202 | rfkill notifier chain: | ||
203 | |||
204 | * Taps into the rfkill notifier chain or to KOBJ_CHANGE uevents, | ||
205 | in order to know when a device that is registered with the rfkill | ||
206 | class changes state; | ||
207 | * Issues feedback notifications to the user; | ||
208 | * In the rare platforms where this is required, synthesizes an input | ||
209 | event to command all *OTHER* rfkill devices to also change their | ||
210 | statues when a specific rfkill device changes state. | ||
211 | |||
212 | |||
213 | =============================================================================== | ||
214 | 3: Kernel driver guidelines | ||
215 | |||
216 | Remember: point-of-view is everything for a driver that connects to the rfkill | ||
217 | subsystem. All the details below must be measured/perceived from the point of | ||
218 | view of the specific driver being modified. | ||
219 | |||
220 | The first thing one needs to know is whether his driver should be talking to | ||
221 | the rfkill class or to the input layer. In rare cases (platform drivers), it | ||
222 | could happen that you need to do both, as platform drivers often handle a | ||
223 | variety of devices in the same driver. | ||
224 | |||
225 | Do not mistake input devices for rfkill controllers. The only type of "rfkill | ||
226 | switch" device that is to be registered with the rfkill class are those | ||
227 | directly controlling the circuits that cause a wireless transmitter to stop | ||
228 | working (or the software equivalent of them), i.e. what we call a rfkill | ||
229 | controller. Every other kind of "rfkill switch" is just an input device and | ||
230 | MUST NOT be registered with the rfkill class. | ||
231 | |||
232 | A driver should register a device with the rfkill class when ALL of the | ||
233 | following conditions are met (they define a rfkill controller): | ||
234 | |||
235 | 1. The device is/controls a data communications wireless transmitter; | ||
236 | |||
237 | 2. The kernel can interact with the hardware/firmware to CHANGE the wireless | ||
238 | transmitter state (block/unblock TX operation); | ||
239 | |||
240 | 3. The transmitter can be made to not emit any energy when "blocked": | ||
241 | rfkill is not about blocking data transmissions, it is about blocking | ||
242 | energy emission; | ||
243 | |||
244 | A driver should register a device with the input subsystem to issue | ||
245 | rfkill-related events (KEY_WLAN, KEY_BLUETOOTH, KEY_WWAN, KEY_WIMAX, | ||
246 | SW_RFKILL_ALL, etc) when ALL of the folowing conditions are met: | ||
247 | |||
248 | 1. It is directly related to some physical device the user interacts with, to | ||
249 | command the O.S./firmware/hardware to enable/disable a data communications | ||
250 | wireless transmitter. | ||
251 | |||
252 | Examples of the physical device are: buttons, keys and switches the user | ||
253 | will press/touch/slide/switch to enable or disable the wireless | ||
254 | communication device. | ||
255 | |||
256 | 2. It is NOT slaved to another device, i.e. there is no other device that | ||
257 | issues rfkill-related input events in preference to this one. | ||
26 | 258 | ||
27 | The system inside the kernel has been split into 2 separate sections: | 259 | Please refer to the corner cases and examples section for more details. |
28 | 1 - RFKILL | ||
29 | 2 - RFKILL_INPUT | ||
30 | 260 | ||
31 | The first option enables rfkill support and will make sure userspace will | 261 | When in doubt, do not issue input events. For drivers that should generate |
32 | be notified of any events through the input device. It also creates several | 262 | input events in some platforms, but not in others (e.g. b43), the best solution |
33 | sysfs entries which can be used by userspace. See section "Userspace support". | 263 | is to NEVER generate input events in the first place. That work should be |
264 | deferred to a platform-specific kernel module (which will know when to generate | ||
265 | events through the rfkill notifier chain) or to userspace. This avoids the | ||
266 | usual maintenance problems with DMI whitelisting. | ||
34 | 267 | ||
35 | The second option provides an rfkill input handler. This handler will | ||
36 | listen to all rfkill key events and will toggle the radio accordingly. | ||
37 | With this option enabled userspace could either do nothing or simply | ||
38 | perform monitoring tasks. | ||
39 | 268 | ||
269 | Corner cases and examples: | ||
40 | ==================================== | 270 | ==================================== |
41 | 2: Driver support | ||
42 | 271 | ||
43 | To build a driver with rfkill subsystem support, the driver should | 272 | 1. If the device is an input device that, because of hardware or firmware, |
44 | depend on the Kconfig symbol RFKILL; it should _not_ depend on | 273 | causes wireless transmitters to be blocked regardless of the kernel's will, it |
45 | RKFILL_INPUT. | 274 | is still just an input device, and NOT to be registered with the rfkill class. |
46 | 275 | ||
47 | Unless key events trigger an interrupt to which the driver listens, polling | 276 | 2. If the wireless transmitter switch control is read-only, it is an input |
48 | will be required to determine the key state changes. For this the input | 277 | device and not to be registered with the rfkill class (and maybe not to be made |
49 | layer providers the input-polldev handler. | 278 | an input layer event source either, see below). |
50 | 279 | ||
51 | A driver should implement a few steps to correctly make use of the | 280 | 3. If there is some other device driver *closer* to the actual hardware the |
52 | rfkill subsystem. First for non-polling drivers: | 281 | user interacted with (the button/switch/key) to issue an input event, THAT is |
282 | the device driver that should be issuing input events. | ||
53 | 283 | ||
54 | - rfkill_allocate() | 284 | E.g: |
55 | - input_allocate_device() | 285 | [RFKILL slider switch] -- [GPIO hardware] -- [WLAN card rf-kill input] |
56 | - rfkill_register() | 286 | (platform driver) (wireless card driver) |
57 | - input_register_device() | 287 | |
288 | The user is closer to the RFKILL slide switch plaform driver, so the driver | ||
289 | which must issue input events is the platform driver looking at the GPIO | ||
290 | hardware, and NEVER the wireless card driver (which is just a slave). It is | ||
291 | very likely that there are other leaves than just the WLAN card rf-kill input | ||
292 | (e.g. a bluetooth card, etc)... | ||
293 | |||
294 | On the other hand, some embedded devices do this: | ||
295 | |||
296 | [RFKILL slider switch] -- [WLAN card rf-kill input] | ||
297 | (wireless card driver) | ||
298 | |||
299 | In this situation, the wireless card driver *could* register itself as an input | ||
300 | device and issue rf-kill related input events... but in order to AVOID the need | ||
301 | for DMI whitelisting, the wireless card driver does NOT do it. Userspace (HAL) | ||
302 | or a platform driver (that exists only on these embedded devices) will do the | ||
303 | dirty job of issuing the input events. | ||
304 | |||
305 | |||
306 | COMMON MISTAKES in kernel drivers, related to rfkill: | ||
307 | ==================================== | ||
308 | |||
309 | 1. NEVER confuse input device keys and buttons with input device switches. | ||
310 | |||
311 | 1a. Switches are always set or reset. They report the current state | ||
312 | (on position or off position). | ||
313 | |||
314 | 1b. Keys and buttons are either in the pressed or not-pressed state, and | ||
315 | that's it. A "button" that latches down when you press it, and | ||
316 | unlatches when you press it again is in fact a switch as far as input | ||
317 | devices go. | ||
318 | |||
319 | Add the SW_* events you need for switches, do NOT try to emulate a button using | ||
320 | KEY_* events just because there is no such SW_* event yet. Do NOT try to use, | ||
321 | for example, KEY_BLUETOOTH when you should be using SW_BLUETOOTH instead. | ||
322 | |||
323 | 2. Input device switches (sources of EV_SW events) DO store their current state | ||
324 | (so you *must* initialize it by issuing a gratuitous input layer event on | ||
325 | driver start-up and also when resuming from sleep), and that state CAN be | ||
326 | queried from userspace through IOCTLs. There is no sysfs interface for this, | ||
327 | but that doesn't mean you should break things trying to hook it to the rfkill | ||
328 | class to get a sysfs interface :-) | ||
329 | |||
330 | 3. Do not issue *_RFKILL_ALL events by default, unless you are sure it is the | ||
331 | correct event for your switch/button. These events are emergency power-off | ||
332 | events when they are trying to turn the transmitters off. An example of an | ||
333 | input device which SHOULD generate *_RFKILL_ALL events is the wireless-kill | ||
334 | switch in a laptop which is NOT a hotkey, but a real switch that kills radios | ||
335 | in hardware, even if the O.S. has gone to lunch. An example of an input device | ||
336 | which SHOULD NOT generate *_RFKILL_ALL events by default, is any sort of hot | ||
337 | key that does nothing by itself, as well as any hot key that is type-specific | ||
338 | (e.g. the one for WLAN). | ||
339 | |||
340 | |||
341 | 3.1 Guidelines for wireless device drivers | ||
342 | ------------------------------------------ | ||
343 | |||
344 | 1. Each independent transmitter in a wireless device (usually there is only one | ||
345 | transmitter per device) should have a SINGLE rfkill class attached to it. | ||
346 | |||
347 | 2. If the device does not have any sort of hardware assistance to allow the | ||
348 | driver to rfkill the device, the driver should emulate it by taking all actions | ||
349 | required to silence the transmitter. | ||
350 | |||
351 | 3. If it is impossible to silence the transmitter (i.e. it still emits energy, | ||
352 | even if it is just in brief pulses, when there is no data to transmit and there | ||
353 | is no hardware support to turn it off) do NOT lie to the users. Do not attach | ||
354 | it to a rfkill class. The rfkill subsystem does not deal with data | ||
355 | transmission, it deals with energy emission. If the transmitter is emitting | ||
356 | energy, it is not blocked in rfkill terms. | ||
357 | |||
358 | 4. It doesn't matter if the device has multiple rfkill input lines affecting | ||
359 | the same transmitter, their combined state is to be exported as a single state | ||
360 | per transmitter (see rule 1). | ||
361 | |||
362 | This rule exists because users of the rfkill subsystem expect to get (and set, | ||
363 | when possible) the overall transmitter rfkill state, not of a particular rfkill | ||
364 | line. | ||
365 | |||
366 | Example of a WLAN wireless driver connected to the rfkill subsystem: | ||
367 | -------------------------------------------------------------------- | ||
368 | |||
369 | A certain WLAN card has one input pin that causes it to block the transmitter | ||
370 | and makes the status of that input pin available (only for reading!) to the | ||
371 | kernel driver. This is a hard rfkill input line (it cannot be overridden by | ||
372 | the kernel driver). | ||
373 | |||
374 | The card also has one PCI register that, if manipulated by the driver, causes | ||
375 | it to block the transmitter. This is a soft rfkill input line. | ||
376 | |||
377 | It has also a thermal protection circuitry that shuts down its transmitter if | ||
378 | the card overheats, and makes the status of that protection available (only for | ||
379 | reading!) to the kernel driver. This is also a hard rfkill input line. | ||
380 | |||
381 | If either one of these rfkill lines are active, the transmitter is blocked by | ||
382 | the hardware and forced offline. | ||
383 | |||
384 | The driver should allocate and attach to its struct device *ONE* instance of | ||
385 | the rfkill class (there is only one transmitter). | ||
386 | |||
387 | It can implement the get_state() hook, and return RFKILL_STATE_HARD_BLOCKED if | ||
388 | either one of its two hard rfkill input lines are active. If the two hard | ||
389 | rfkill lines are inactive, it must return RFKILL_STATE_SOFT_BLOCKED if its soft | ||
390 | rfkill input line is active. Only if none of the rfkill input lines are | ||
391 | active, will it return RFKILL_STATE_UNBLOCKED. | ||
58 | 392 | ||
59 | For polling drivers: | 393 | If it doesn't implement the get_state() hook, it must make sure that its calls |
394 | to rfkill_force_state() are enough to keep the status always up-to-date, and it | ||
395 | must do a rfkill_force_state() on resume from sleep. | ||
60 | 396 | ||
397 | Every time the driver gets a notification from the card that one of its rfkill | ||
398 | lines changed state (polling might be needed on badly designed cards that don't | ||
399 | generate interrupts for such events), it recomputes the rfkill state as per | ||
400 | above, and calls rfkill_force_state() to update it. | ||
401 | |||
402 | The driver should implement the toggle_radio() hook, that: | ||
403 | |||
404 | 1. Returns an error if one of the hardware rfkill lines are active, and the | ||
405 | caller asked for RFKILL_STATE_UNBLOCKED. | ||
406 | |||
407 | 2. Activates the soft rfkill line if the caller asked for state | ||
408 | RFKILL_STATE_SOFT_BLOCKED. It should do this even if one of the hard rfkill | ||
409 | lines are active, effectively double-blocking the transmitter. | ||
410 | |||
411 | 3. Deactivates the soft rfkill line if none of the hardware rfkill lines are | ||
412 | active and the caller asked for RFKILL_STATE_UNBLOCKED. | ||
413 | |||
414 | =============================================================================== | ||
415 | 4: Kernel API | ||
416 | |||
417 | To build a driver with rfkill subsystem support, the driver should depend on | ||
418 | (or select) the Kconfig symbol RFKILL; it should _not_ depend on RKFILL_INPUT. | ||
419 | |||
420 | The hardware the driver talks to may be write-only (where the current state | ||
421 | of the hardware is unknown), or read-write (where the hardware can be queried | ||
422 | about its current state). | ||
423 | |||
424 | The rfkill class will call the get_state hook of a device every time it needs | ||
425 | to know the *real* current state of the hardware. This can happen often. | ||
426 | |||
427 | Some hardware provides events when its status changes. In these cases, it is | ||
428 | best for the driver to not provide a get_state hook, and instead register the | ||
429 | rfkill class *already* with the correct status, and keep it updated using | ||
430 | rfkill_force_state() when it gets an event from the hardware. | ||
431 | |||
432 | There is no provision for a statically-allocated rfkill struct. You must | ||
433 | use rfkill_allocate() to allocate one. | ||
434 | |||
435 | You should: | ||
61 | - rfkill_allocate() | 436 | - rfkill_allocate() |
62 | - input_allocate_polled_device() | 437 | - modify rfkill fields (flags, name) |
438 | - modify state to the current hardware state (THIS IS THE ONLY TIME | ||
439 | YOU CAN ACCESS state DIRECTLY) | ||
63 | - rfkill_register() | 440 | - rfkill_register() |
64 | - input_register_polled_device() | ||
65 | 441 | ||
66 | When a key event has been detected, the correct event should be | 442 | The only way to set a device to the RFKILL_STATE_HARD_BLOCKED state is through |
67 | sent over the input device which has been registered by the driver. | 443 | a suitable return of get_state() or through rfkill_force_state(). |
68 | 444 | ||
69 | ==================================== | 445 | When a device is in the RFKILL_STATE_HARD_BLOCKED state, the only way to switch |
70 | 3: Userspace support | 446 | it to a different state is through a suitable return of get_state() or through |
447 | rfkill_force_state(). | ||
448 | |||
449 | If toggle_radio() is called to set a device to state RFKILL_STATE_SOFT_BLOCKED | ||
450 | when that device is already at the RFKILL_STATE_HARD_BLOCKED state, it should | ||
451 | not return an error. Instead, it should try to double-block the transmitter, | ||
452 | so that its state will change from RFKILL_STATE_HARD_BLOCKED to | ||
453 | RFKILL_STATE_SOFT_BLOCKED should the hardware blocking cease. | ||
71 | 454 | ||
72 | For each key an input device will be created which will send out the correct | 455 | Please refer to the source for more documentation. |
73 | key event when the rfkill key has been pressed. | 456 | |
457 | =============================================================================== | ||
458 | 5: Userspace support | ||
459 | |||
460 | rfkill devices issue uevents (with an action of "change"), with the following | ||
461 | environment variables set: | ||
462 | |||
463 | RFKILL_NAME | ||
464 | RFKILL_STATE | ||
465 | RFKILL_TYPE | ||
466 | |||
467 | The ABI for these variables is defined by the sysfs attributes. It is best | ||
468 | to take a quick look at the source to make sure of the possible values. | ||
469 | |||
470 | It is expected that HAL will trap those, and bridge them to DBUS, etc. These | ||
471 | events CAN and SHOULD be used to give feedback to the user about the rfkill | ||
472 | status of the system. | ||
473 | |||
474 | Input devices may issue events that are related to rfkill. These are the | ||
475 | various KEY_* events and SW_* events supported by rfkill-input.c. | ||
476 | |||
477 | ******IMPORTANT****** | ||
478 | When rfkill-input is ACTIVE, userspace is NOT TO CHANGE THE STATE OF AN RFKILL | ||
479 | SWITCH IN RESPONSE TO AN INPUT EVENT also handled by rfkill-input, unless it | ||
480 | has set to true the user_claim attribute for that particular switch. This rule | ||
481 | is *absolute*; do NOT violate it. | ||
482 | ******IMPORTANT****** | ||
483 | |||
484 | Userspace must not assume it is the only source of control for rfkill switches. | ||
485 | Their state CAN and WILL change due to firmware actions, direct user actions, | ||
486 | and the rfkill-input EPO override for *_RFKILL_ALL. | ||
487 | |||
488 | When rfkill-input is not active, userspace must initiate a rfkill status | ||
489 | change by writing to the "state" attribute in order for anything to happen. | ||
490 | |||
491 | Take particular care to implement EV_SW SW_RFKILL_ALL properly. When that | ||
492 | switch is set to OFF, *every* rfkill device *MUST* be immediately put into the | ||
493 | RFKILL_STATE_SOFT_BLOCKED state, no questions asked. | ||
74 | 494 | ||
75 | The following sysfs entries will be created: | 495 | The following sysfs entries will be created: |
76 | 496 | ||
77 | name: Name assigned by driver to this key (interface or driver name). | 497 | name: Name assigned by driver to this key (interface or driver name). |
78 | type: Name of the key type ("wlan", "bluetooth", etc). | 498 | type: Name of the key type ("wlan", "bluetooth", etc). |
79 | state: Current state of the key. 1: On, 0: Off. | 499 | state: Current state of the transmitter |
500 | 0: RFKILL_STATE_SOFT_BLOCKED | ||
501 | transmitter is forced off, but one can override it | ||
502 | by a write to the state attribute; | ||
503 | 1: RFKILL_STATE_UNBLOCKED | ||
504 | transmiter is NOT forced off, and may operate if | ||
505 | all other conditions for such operation are met | ||
506 | (such as interface is up and configured, etc); | ||
507 | 2: RFKILL_STATE_HARD_BLOCKED | ||
508 | transmitter is forced off by something outside of | ||
509 | the driver's control. One cannot set a device to | ||
510 | this state through writes to the state attribute; | ||
80 | claim: 1: Userspace handles events, 0: Kernel handles events | 511 | claim: 1: Userspace handles events, 0: Kernel handles events |
81 | 512 | ||
82 | Both the "state" and "claim" entries are also writable. For the "state" entry | 513 | Both the "state" and "claim" entries are also writable. For the "state" entry |
83 | this means that when 1 or 0 is written all radios, not yet in the requested | 514 | this means that when 1 or 0 is written, the device rfkill state (if not yet in |
84 | state, will be will be toggled accordingly. | 515 | the requested state), will be will be toggled accordingly. |
516 | |||
85 | For the "claim" entry writing 1 to it means that the kernel no longer handles | 517 | For the "claim" entry writing 1 to it means that the kernel no longer handles |
86 | key events even though RFKILL_INPUT input was enabled. When "claim" has been | 518 | key events even though RFKILL_INPUT input was enabled. When "claim" has been |
87 | set to 0, userspace should make sure that it listens for the input events or | 519 | set to 0, userspace should make sure that it listens for the input events or |
88 | check the sysfs "state" entry regularly to correctly perform the required | 520 | check the sysfs "state" entry regularly to correctly perform the required tasks |
89 | tasks when the rkfill key is pressed. | 521 | when the rkfill key is pressed. |
522 | |||
523 | A note about input devices and EV_SW events: | ||
524 | |||
525 | In order to know the current state of an input device switch (like | ||
526 | SW_RFKILL_ALL), you will need to use an IOCTL. That information is not | ||
527 | available through sysfs in a generic way at this time, and it is not available | ||
528 | through the rfkill class AT ALL. | ||
diff --git a/Documentation/scheduler/sched-domains.txt b/Documentation/scheduler/sched-domains.txt index a9e990ab980f..373ceacc367e 100644 --- a/Documentation/scheduler/sched-domains.txt +++ b/Documentation/scheduler/sched-domains.txt | |||
@@ -61,10 +61,7 @@ builder by #define'ing ARCH_HASH_SCHED_DOMAIN, and exporting your | |||
61 | arch_init_sched_domains function. This function will attach domains to all | 61 | arch_init_sched_domains function. This function will attach domains to all |
62 | CPUs using cpu_attach_domain. | 62 | CPUs using cpu_attach_domain. |
63 | 63 | ||
64 | Implementors should change the line | 64 | The sched-domains debugging infrastructure can be enabled by enabling |
65 | #undef SCHED_DOMAIN_DEBUG | 65 | CONFIG_SCHED_DEBUG. This enables an error checking parse of the sched domains |
66 | to | ||
67 | #define SCHED_DOMAIN_DEBUG | ||
68 | in kernel/sched.c as this enables an error checking parse of the sched domains | ||
69 | which should catch most possible errors (described above). It also prints out | 66 | which should catch most possible errors (described above). It also prints out |
70 | the domain structure in a visual format. | 67 | the domain structure in a visual format. |
diff --git a/Documentation/scheduler/sched-rt-group.txt b/Documentation/scheduler/sched-rt-group.txt index 14f901f639ee..3ef339f491e0 100644 --- a/Documentation/scheduler/sched-rt-group.txt +++ b/Documentation/scheduler/sched-rt-group.txt | |||
@@ -51,9 +51,9 @@ needs only about 3% CPU time to do so, it can do with a 0.03 * 0.005s = | |||
51 | 0.00015s. So this group can be scheduled with a period of 0.005s and a run time | 51 | 0.00015s. So this group can be scheduled with a period of 0.005s and a run time |
52 | of 0.00015s. | 52 | of 0.00015s. |
53 | 53 | ||
54 | The remaining CPU time will be used for user input and other tass. Because | 54 | The remaining CPU time will be used for user input and other tasks. Because |
55 | realtime tasks have explicitly allocated the CPU time they need to perform | 55 | realtime tasks have explicitly allocated the CPU time they need to perform |
56 | their tasks, buffer underruns in the graphocs or audio can be eliminated. | 56 | their tasks, buffer underruns in the graphics or audio can be eliminated. |
57 | 57 | ||
58 | NOTE: the above example is not fully implemented as of yet (2.6.25). We still | 58 | NOTE: the above example is not fully implemented as of yet (2.6.25). We still |
59 | lack an EDF scheduler to make non-uniform periods usable. | 59 | lack an EDF scheduler to make non-uniform periods usable. |
diff --git a/Documentation/scsi/aacraid.txt b/Documentation/scsi/aacraid.txt index d16011a8618e..709ca991a451 100644 --- a/Documentation/scsi/aacraid.txt +++ b/Documentation/scsi/aacraid.txt | |||
@@ -56,19 +56,33 @@ Supported Cards/Chipsets | |||
56 | 9005:0285:9005:02d1 Adaptec 5405 (Voodoo40) | 56 | 9005:0285:9005:02d1 Adaptec 5405 (Voodoo40) |
57 | 9005:0285:15d9:02d2 SMC AOC-USAS-S8i-LP | 57 | 9005:0285:15d9:02d2 SMC AOC-USAS-S8i-LP |
58 | 9005:0285:15d9:02d3 SMC AOC-USAS-S8iR-LP | 58 | 9005:0285:15d9:02d3 SMC AOC-USAS-S8iR-LP |
59 | 9005:0285:9005:02d4 Adaptec 2045 (Voodoo04 Lite) | 59 | 9005:0285:9005:02d4 Adaptec ASR-2045 (Voodoo04 Lite) |
60 | 9005:0285:9005:02d5 Adaptec 2405 (Voodoo40 Lite) | 60 | 9005:0285:9005:02d5 Adaptec ASR-2405 (Voodoo40 Lite) |
61 | 9005:0285:9005:02d6 Adaptec 2445 (Voodoo44 Lite) | 61 | 9005:0285:9005:02d6 Adaptec ASR-2445 (Voodoo44 Lite) |
62 | 9005:0285:9005:02d7 Adaptec 2805 (Voodoo80 Lite) | 62 | 9005:0285:9005:02d7 Adaptec ASR-2805 (Voodoo80 Lite) |
63 | 9005:0285:9005:02d8 Adaptec 5405G (Voodoo40 PM) | ||
64 | 9005:0285:9005:02d9 Adaptec 5445G (Voodoo44 PM) | ||
65 | 9005:0285:9005:02da Adaptec 5805G (Voodoo80 PM) | ||
66 | 9005:0285:9005:02db Adaptec 5085G (Voodoo08 PM) | ||
67 | 9005:0285:9005:02dc Adaptec 51245G (Voodoo124 PM) | ||
68 | 9005:0285:9005:02dd Adaptec 51645G (Voodoo164 PM) | ||
69 | 9005:0285:9005:02de Adaptec 52445G (Voodoo244 PM) | ||
70 | 9005:0285:9005:02df Adaptec ASR-2045G (Voodoo04 Lite PM) | ||
71 | 9005:0285:9005:02e0 Adaptec ASR-2405G (Voodoo40 Lite PM) | ||
72 | 9005:0285:9005:02e1 Adaptec ASR-2445G (Voodoo44 Lite PM) | ||
73 | 9005:0285:9005:02e2 Adaptec ASR-2805G (Voodoo80 Lite PM) | ||
63 | 1011:0046:9005:0364 Adaptec 5400S (Mustang) | 74 | 1011:0046:9005:0364 Adaptec 5400S (Mustang) |
75 | 1011:0046:9005:0365 Adaptec 5400S (Mustang) | ||
64 | 9005:0287:9005:0800 Adaptec Themisto (Jupiter) | 76 | 9005:0287:9005:0800 Adaptec Themisto (Jupiter) |
65 | 9005:0200:9005:0200 Adaptec Themisto (Jupiter) | 77 | 9005:0200:9005:0200 Adaptec Themisto (Jupiter) |
66 | 9005:0286:9005:0800 Adaptec Callisto (Jupiter) | 78 | 9005:0286:9005:0800 Adaptec Callisto (Jupiter) |
67 | 1011:0046:9005:1364 Dell PERC 2/QC (Quad Channel, Mustang) | 79 | 1011:0046:9005:1364 Dell PERC 2/QC (Quad Channel, Mustang) |
80 | 1011:0046:9005:1365 Dell PERC 2/QC (Quad Channel, Mustang) | ||
68 | 1028:0001:1028:0001 Dell PERC 2/Si (Iguana) | 81 | 1028:0001:1028:0001 Dell PERC 2/Si (Iguana) |
69 | 1028:0003:1028:0003 Dell PERC 3/Si (SlimFast) | 82 | 1028:0003:1028:0003 Dell PERC 3/Si (SlimFast) |
70 | 1028:0002:1028:0002 Dell PERC 3/Di (Opal) | 83 | 1028:0002:1028:0002 Dell PERC 3/Di (Opal) |
71 | 1028:0004:1028:0004 Dell PERC 3/DiF (Iguana) | 84 | 1028:0004:1028:0004 Dell PERC 3/SiF (Iguana) |
85 | 1028:0004:1028:00d0 Dell PERC 3/DiF (Iguana) | ||
72 | 1028:0002:1028:00d1 Dell PERC 3/DiV (Viper) | 86 | 1028:0002:1028:00d1 Dell PERC 3/DiV (Viper) |
73 | 1028:0002:1028:00d9 Dell PERC 3/DiL (Lexus) | 87 | 1028:0002:1028:00d9 Dell PERC 3/DiL (Lexus) |
74 | 1028:000a:1028:0106 Dell PERC 3/DiJ (Jaguar) | 88 | 1028:000a:1028:0106 Dell PERC 3/DiJ (Jaguar) |
diff --git a/Documentation/serial/driver b/Documentation/serial/driver index 88ad615dd338..77ba0afbe4db 100644 --- a/Documentation/serial/driver +++ b/Documentation/serial/driver | |||
@@ -186,6 +186,17 @@ hardware. | |||
186 | Locking: port_sem taken. | 186 | Locking: port_sem taken. |
187 | Interrupts: caller dependent. | 187 | Interrupts: caller dependent. |
188 | 188 | ||
189 | flush_buffer(port) | ||
190 | Flush any write buffers, reset any DMA state and stop any | ||
191 | ongoing DMA transfers. | ||
192 | |||
193 | This will be called whenever the port->info->xmit circular | ||
194 | buffer is cleared. | ||
195 | |||
196 | Locking: port->lock taken. | ||
197 | Interrupts: locally disabled. | ||
198 | This call must not sleep | ||
199 | |||
189 | set_termios(port,termios,oldtermios) | 200 | set_termios(port,termios,oldtermios) |
190 | Change the port parameters, including word length, parity, stop | 201 | Change the port parameters, including word length, parity, stop |
191 | bits. Update read_status_mask and ignore_status_mask to indicate | 202 | bits. Update read_status_mask and ignore_status_mask to indicate |
diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt index 0bbee38acd26..72aff61e7315 100644 --- a/Documentation/sound/alsa/ALSA-Configuration.txt +++ b/Documentation/sound/alsa/ALSA-Configuration.txt | |||
@@ -753,8 +753,11 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. | |||
753 | 753 | ||
754 | [Multiple options for each card instance] | 754 | [Multiple options for each card instance] |
755 | model - force the model name | 755 | model - force the model name |
756 | position_fix - Fix DMA pointer (0 = auto, 1 = none, 2 = POSBUF, 3 = FIFO size) | 756 | position_fix - Fix DMA pointer (0 = auto, 1 = use LPIB, 2 = POSBUF) |
757 | probe_mask - Bitmask to probe codecs (default = -1, meaning all slots) | 757 | probe_mask - Bitmask to probe codecs (default = -1, meaning all slots) |
758 | bdl_pos_adj - Specifies the DMA IRQ timing delay in samples. | ||
759 | Passing -1 will make the driver to choose the appropriate | ||
760 | value based on the controller chip. | ||
758 | 761 | ||
759 | [Single (global) options] | 762 | [Single (global) options] |
760 | single_cmd - Use single immediate commands to communicate with | 763 | single_cmd - Use single immediate commands to communicate with |
@@ -845,7 +848,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. | |||
845 | ALC269 | 848 | ALC269 |
846 | basic Basic preset | 849 | basic Basic preset |
847 | 850 | ||
848 | ALC662 | 851 | ALC662/663 |
849 | 3stack-dig 3-stack (2-channel) with SPDIF | 852 | 3stack-dig 3-stack (2-channel) with SPDIF |
850 | 3stack-6ch 3-stack (6-channel) | 853 | 3stack-6ch 3-stack (6-channel) |
851 | 3stack-6ch-dig 3-stack (6-channel) with SPDIF | 854 | 3stack-6ch-dig 3-stack (6-channel) with SPDIF |
@@ -853,6 +856,10 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. | |||
853 | lenovo-101e Lenovo laptop | 856 | lenovo-101e Lenovo laptop |
854 | eeepc-p701 ASUS Eeepc P701 | 857 | eeepc-p701 ASUS Eeepc P701 |
855 | eeepc-ep20 ASUS Eeepc EP20 | 858 | eeepc-ep20 ASUS Eeepc EP20 |
859 | m51va ASUS M51VA | ||
860 | g71v ASUS G71V | ||
861 | h13 ASUS H13 | ||
862 | g50v ASUS G50V | ||
856 | auto auto-config reading BIOS (default) | 863 | auto auto-config reading BIOS (default) |
857 | 864 | ||
858 | ALC882/885 | 865 | ALC882/885 |
@@ -1091,7 +1098,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. | |||
1091 | This occurs when the access to non-existing or non-working codec slot | 1098 | This occurs when the access to non-existing or non-working codec slot |
1092 | (likely a modem one) causes a stall of the communication via HD-audio | 1099 | (likely a modem one) causes a stall of the communication via HD-audio |
1093 | bus. You can see which codec slots are probed by enabling | 1100 | bus. You can see which codec slots are probed by enabling |
1094 | CONFIG_SND_DEBUG_DETECT, or simply from the file name of the codec | 1101 | CONFIG_SND_DEBUG_VERBOSE, or simply from the file name of the codec |
1095 | proc files. Then limit the slots to probe by probe_mask option. | 1102 | proc files. Then limit the slots to probe by probe_mask option. |
1096 | For example, probe_mask=1 means to probe only the first slot, and | 1103 | For example, probe_mask=1 means to probe only the first slot, and |
1097 | probe_mask=4 means only the third slot. | 1104 | probe_mask=4 means only the third slot. |
@@ -2267,6 +2274,10 @@ case above again, the first two slots are already reserved. If any | |||
2267 | other driver (e.g. snd-usb-audio) is loaded before snd-interwave or | 2274 | other driver (e.g. snd-usb-audio) is loaded before snd-interwave or |
2268 | snd-ens1371, it will be assigned to the third or later slot. | 2275 | snd-ens1371, it will be assigned to the third or later slot. |
2269 | 2276 | ||
2277 | When a module name is given with '!', the slot will be given for any | ||
2278 | modules but that name. For example, "slots=!snd-pcsp" will reserve | ||
2279 | the first slot for any modules but snd-pcsp. | ||
2280 | |||
2270 | 2281 | ||
2271 | ALSA PCM devices to OSS devices mapping | 2282 | ALSA PCM devices to OSS devices mapping |
2272 | ======================================= | 2283 | ======================================= |
diff --git a/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl b/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl index b03df4d4795c..e13c4e67029f 100644 --- a/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl +++ b/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl | |||
@@ -6127,8 +6127,8 @@ struct _snd_pcm_runtime { | |||
6127 | 6127 | ||
6128 | <para> | 6128 | <para> |
6129 | <function>snd_printdd()</function> is compiled in only when | 6129 | <function>snd_printdd()</function> is compiled in only when |
6130 | <constant>CONFIG_SND_DEBUG_DETECT</constant> is set. Please note | 6130 | <constant>CONFIG_SND_DEBUG_VERBOSE</constant> is set. Please note |
6131 | that <constant>DEBUG_DETECT</constant> is not set as default | 6131 | that <constant>CONFIG_SND_DEBUG_VERBOSE</constant> is not set as default |
6132 | even if you configure the alsa-driver with | 6132 | even if you configure the alsa-driver with |
6133 | <option>--with-debug=full</option> option. You need to give | 6133 | <option>--with-debug=full</option> option. You need to give |
6134 | explicitly <option>--with-debug=detect</option> option instead. | 6134 | explicitly <option>--with-debug=detect</option> option instead. |
diff --git a/Documentation/tracers/mmiotrace.txt b/Documentation/tracers/mmiotrace.txt new file mode 100644 index 000000000000..a4afb560a45b --- /dev/null +++ b/Documentation/tracers/mmiotrace.txt | |||
@@ -0,0 +1,164 @@ | |||
1 | In-kernel memory-mapped I/O tracing | ||
2 | |||
3 | |||
4 | Home page and links to optional user space tools: | ||
5 | |||
6 | http://nouveau.freedesktop.org/wiki/MmioTrace | ||
7 | |||
8 | MMIO tracing was originally developed by Intel around 2003 for their Fault | ||
9 | Injection Test Harness. In Dec 2006 - Jan 2007, using the code from Intel, | ||
10 | Jeff Muizelaar created a tool for tracing MMIO accesses with the Nouveau | ||
11 | project in mind. Since then many people have contributed. | ||
12 | |||
13 | Mmiotrace was built for reverse engineering any memory-mapped IO device with | ||
14 | the Nouveau project as the first real user. Only x86 and x86_64 architectures | ||
15 | are supported. | ||
16 | |||
17 | Out-of-tree mmiotrace was originally modified for mainline inclusion and | ||
18 | ftrace framework by Pekka Paalanen <pq@iki.fi>. | ||
19 | |||
20 | |||
21 | Preparation | ||
22 | ----------- | ||
23 | |||
24 | Mmiotrace feature is compiled in by the CONFIG_MMIOTRACE option. Tracing is | ||
25 | disabled by default, so it is safe to have this set to yes. SMP systems are | ||
26 | supported, but tracing is unreliable and may miss events if more than one CPU | ||
27 | is on-line, therefore mmiotrace takes all but one CPU off-line during run-time | ||
28 | activation. You can re-enable CPUs by hand, but you have been warned, there | ||
29 | is no way to automatically detect if you are losing events due to CPUs racing. | ||
30 | |||
31 | |||
32 | Usage Quick Reference | ||
33 | --------------------- | ||
34 | |||
35 | $ mount -t debugfs debugfs /debug | ||
36 | $ echo mmiotrace > /debug/tracing/current_tracer | ||
37 | $ cat /debug/tracing/trace_pipe > mydump.txt & | ||
38 | Start X or whatever. | ||
39 | $ echo "X is up" > /debug/tracing/marker | ||
40 | $ echo none > /debug/tracing/current_tracer | ||
41 | Check for lost events. | ||
42 | |||
43 | |||
44 | Usage | ||
45 | ----- | ||
46 | |||
47 | Make sure debugfs is mounted to /debug. If not, (requires root privileges) | ||
48 | $ mount -t debugfs debugfs /debug | ||
49 | |||
50 | Check that the driver you are about to trace is not loaded. | ||
51 | |||
52 | Activate mmiotrace (requires root privileges): | ||
53 | $ echo mmiotrace > /debug/tracing/current_tracer | ||
54 | |||
55 | Start storing the trace: | ||
56 | $ cat /debug/tracing/trace_pipe > mydump.txt & | ||
57 | The 'cat' process should stay running (sleeping) in the background. | ||
58 | |||
59 | Load the driver you want to trace and use it. Mmiotrace will only catch MMIO | ||
60 | accesses to areas that are ioremapped while mmiotrace is active. | ||
61 | |||
62 | [Unimplemented feature:] | ||
63 | During tracing you can place comments (markers) into the trace by | ||
64 | $ echo "X is up" > /debug/tracing/marker | ||
65 | This makes it easier to see which part of the (huge) trace corresponds to | ||
66 | which action. It is recommended to place descriptive markers about what you | ||
67 | do. | ||
68 | |||
69 | Shut down mmiotrace (requires root privileges): | ||
70 | $ echo none > /debug/tracing/current_tracer | ||
71 | The 'cat' process exits. If it does not, kill it by issuing 'fg' command and | ||
72 | pressing ctrl+c. | ||
73 | |||
74 | Check that mmiotrace did not lose events due to a buffer filling up. Either | ||
75 | $ grep -i lost mydump.txt | ||
76 | which tells you exactly how many events were lost, or use | ||
77 | $ dmesg | ||
78 | to view your kernel log and look for "mmiotrace has lost events" warning. If | ||
79 | events were lost, the trace is incomplete. You should enlarge the buffers and | ||
80 | try again. Buffers are enlarged by first seeing how large the current buffers | ||
81 | are: | ||
82 | $ cat /debug/tracing/trace_entries | ||
83 | gives you a number. Approximately double this number and write it back, for | ||
84 | instance: | ||
85 | $ echo 128000 > /debug/tracing/trace_entries | ||
86 | Then start again from the top. | ||
87 | |||
88 | If you are doing a trace for a driver project, e.g. Nouveau, you should also | ||
89 | do the following before sending your results: | ||
90 | $ lspci -vvv > lspci.txt | ||
91 | $ dmesg > dmesg.txt | ||
92 | $ tar zcf pciid-nick-mmiotrace.tar.gz mydump.txt lspci.txt dmesg.txt | ||
93 | and then send the .tar.gz file. The trace compresses considerably. Replace | ||
94 | "pciid" and "nick" with the PCI ID or model name of your piece of hardware | ||
95 | under investigation and your nick name. | ||
96 | |||
97 | |||
98 | How Mmiotrace Works | ||
99 | ------------------- | ||
100 | |||
101 | Access to hardware IO-memory is gained by mapping addresses from PCI bus by | ||
102 | calling one of the ioremap_*() functions. Mmiotrace is hooked into the | ||
103 | __ioremap() function and gets called whenever a mapping is created. Mapping is | ||
104 | an event that is recorded into the trace log. Note, that ISA range mappings | ||
105 | are not caught, since the mapping always exists and is returned directly. | ||
106 | |||
107 | MMIO accesses are recorded via page faults. Just before __ioremap() returns, | ||
108 | the mapped pages are marked as not present. Any access to the pages causes a | ||
109 | fault. The page fault handler calls mmiotrace to handle the fault. Mmiotrace | ||
110 | marks the page present, sets TF flag to achieve single stepping and exits the | ||
111 | fault handler. The instruction that faulted is executed and debug trap is | ||
112 | entered. Here mmiotrace again marks the page as not present. The instruction | ||
113 | is decoded to get the type of operation (read/write), data width and the value | ||
114 | read or written. These are stored to the trace log. | ||
115 | |||
116 | Setting the page present in the page fault handler has a race condition on SMP | ||
117 | machines. During the single stepping other CPUs may run freely on that page | ||
118 | and events can be missed without a notice. Re-enabling other CPUs during | ||
119 | tracing is discouraged. | ||
120 | |||
121 | |||
122 | Trace Log Format | ||
123 | ---------------- | ||
124 | |||
125 | The raw log is text and easily filtered with e.g. grep and awk. One record is | ||
126 | one line in the log. A record starts with a keyword, followed by keyword | ||
127 | dependant arguments. Arguments are separated by a space, or continue until the | ||
128 | end of line. The format for version 20070824 is as follows: | ||
129 | |||
130 | Explanation Keyword Space separated arguments | ||
131 | --------------------------------------------------------------------------- | ||
132 | |||
133 | read event R width, timestamp, map id, physical, value, PC, PID | ||
134 | write event W width, timestamp, map id, physical, value, PC, PID | ||
135 | ioremap event MAP timestamp, map id, physical, virtual, length, PC, PID | ||
136 | iounmap event UNMAP timestamp, map id, PC, PID | ||
137 | marker MARK timestamp, text | ||
138 | version VERSION the string "20070824" | ||
139 | info for reader LSPCI one line from lspci -v | ||
140 | PCI address map PCIDEV space separated /proc/bus/pci/devices data | ||
141 | unk. opcode UNKNOWN timestamp, map id, physical, data, PC, PID | ||
142 | |||
143 | Timestamp is in seconds with decimals. Physical is a PCI bus address, virtual | ||
144 | is a kernel virtual address. Width is the data width in bytes and value is the | ||
145 | data value. Map id is an arbitrary id number identifying the mapping that was | ||
146 | used in an operation. PC is the program counter and PID is process id. PC is | ||
147 | zero if it is not recorded. PID is always zero as tracing MMIO accesses | ||
148 | originating in user space memory is not yet supported. | ||
149 | |||
150 | For instance, the following awk filter will pass all 32-bit writes that target | ||
151 | physical addresses in the range [0xfb73ce40, 0xfb800000[ | ||
152 | |||
153 | $ awk '/W 4 / { adr=strtonum($5); if (adr >= 0xfb73ce40 && | ||
154 | adr < 0xfb800000) print; }' | ||
155 | |||
156 | |||
157 | Tools for Developers | ||
158 | -------------------- | ||
159 | |||
160 | The user space tools include utilities for: | ||
161 | - replacing numeric addresses and values with hardware register names | ||
162 | - replaying MMIO logs, i.e., re-executing the recorded writes | ||
163 | |||
164 | |||
diff --git a/Documentation/video4linux/CARDLIST.cx23885 b/Documentation/video4linux/CARDLIST.cx23885 index 191194ea1e25..f0e613ba55b8 100644 --- a/Documentation/video4linux/CARDLIST.cx23885 +++ b/Documentation/video4linux/CARDLIST.cx23885 | |||
@@ -8,3 +8,4 @@ | |||
8 | 7 -> Hauppauge WinTV-HVR1200 [0070:71d1,0070:71d3] | 8 | 7 -> Hauppauge WinTV-HVR1200 [0070:71d1,0070:71d3] |
9 | 8 -> Hauppauge WinTV-HVR1700 [0070:8101] | 9 | 8 -> Hauppauge WinTV-HVR1700 [0070:8101] |
10 | 9 -> Hauppauge WinTV-HVR1400 [0070:8010] | 10 | 9 -> Hauppauge WinTV-HVR1400 [0070:8010] |
11 | 10 -> DViCO FusionHDTV7 Dual Express [18ac:d618] | ||
diff --git a/Documentation/video4linux/CARDLIST.em28xx b/Documentation/video4linux/CARDLIST.em28xx index 1d6a245c828f..10591467ef16 100644 --- a/Documentation/video4linux/CARDLIST.em28xx +++ b/Documentation/video4linux/CARDLIST.em28xx | |||
@@ -8,10 +8,13 @@ | |||
8 | 7 -> Leadtek Winfast USB II (em2800) | 8 | 7 -> Leadtek Winfast USB II (em2800) |
9 | 8 -> Kworld USB2800 (em2800) | 9 | 8 -> Kworld USB2800 (em2800) |
10 | 9 -> Pinnacle Dazzle DVC 90/DVC 100 (em2820/em2840) [2304:0207,2304:021a] | 10 | 9 -> Pinnacle Dazzle DVC 90/DVC 100 (em2820/em2840) [2304:0207,2304:021a] |
11 | 10 -> Hauppauge WinTV HVR 900 (em2880) [2040:6500,2040:6502] | 11 | 10 -> Hauppauge WinTV HVR 900 (em2880) [2040:6500] |
12 | 11 -> Terratec Hybrid XS (em2880) [0ccd:0042] | 12 | 11 -> Terratec Hybrid XS (em2880) [0ccd:0042] |
13 | 12 -> Kworld PVR TV 2800 RF (em2820/em2840) | 13 | 12 -> Kworld PVR TV 2800 RF (em2820/em2840) |
14 | 13 -> Terratec Prodigy XS (em2880) [0ccd:0047] | 14 | 13 -> Terratec Prodigy XS (em2880) [0ccd:0047] |
15 | 14 -> Pixelview Prolink PlayTV USB 2.0 (em2820/em2840) | 15 | 14 -> Pixelview Prolink PlayTV USB 2.0 (em2820/em2840) |
16 | 15 -> V-Gear PocketTV (em2800) | 16 | 15 -> V-Gear PocketTV (em2800) |
17 | 16 -> Hauppauge WinTV HVR 950 (em2880) [2040:6513,2040:6517,2040:651b,2040:651f] | 17 | 16 -> Hauppauge WinTV HVR 950 (em2880) [2040:6513,2040:6517,2040:651b,2040:651f] |
18 | 17 -> Pinnacle PCTV HD Pro Stick (em2880) [2304:0227] | ||
19 | 18 -> Hauppauge WinTV HVR 900 (R2) (em2880) [2040:6502] | ||
20 | 19 -> PointNix Intra-Oral Camera (em2860) | ||
diff --git a/Documentation/video4linux/CARDLIST.saa7134 b/Documentation/video4linux/CARDLIST.saa7134 index 67937df1e974..39868af9cf9f 100644 --- a/Documentation/video4linux/CARDLIST.saa7134 +++ b/Documentation/video4linux/CARDLIST.saa7134 | |||
@@ -37,7 +37,7 @@ | |||
37 | 36 -> UPMOST PURPLE TV [12ab:0800] | 37 | 36 -> UPMOST PURPLE TV [12ab:0800] |
38 | 37 -> Items MuchTV Plus / IT-005 | 38 | 37 -> Items MuchTV Plus / IT-005 |
39 | 38 -> Terratec Cinergy 200 TV [153b:1152] | 39 | 38 -> Terratec Cinergy 200 TV [153b:1152] |
40 | 39 -> LifeView FlyTV Platinum Mini [5168:0212,4e42:0212] | 40 | 39 -> LifeView FlyTV Platinum Mini [5168:0212,4e42:0212,5169:1502] |
41 | 40 -> Compro VideoMate TV PVR/FM [185b:c100] | 41 | 40 -> Compro VideoMate TV PVR/FM [185b:c100] |
42 | 41 -> Compro VideoMate TV Gold+ [185b:c100] | 42 | 41 -> Compro VideoMate TV Gold+ [185b:c100] |
43 | 42 -> Sabrent SBT-TVFM (saa7130) | 43 | 42 -> Sabrent SBT-TVFM (saa7130) |
@@ -128,7 +128,7 @@ | |||
128 | 127 -> Beholder BeholdTV 507 FM/RDS / BeholdTV 509 FM [0000:5071,0000:507B,5ace:5070,5ace:5090] | 128 | 127 -> Beholder BeholdTV 507 FM/RDS / BeholdTV 509 FM [0000:5071,0000:507B,5ace:5070,5ace:5090] |
129 | 128 -> Beholder BeholdTV Columbus TVFM [0000:5201] | 129 | 128 -> Beholder BeholdTV Columbus TVFM [0000:5201] |
130 | 129 -> Beholder BeholdTV 607 / BeholdTV 609 [5ace:6070,5ace:6071,5ace:6072,5ace:6073,5ace:6090,5ace:6091,5ace:6092,5ace:6093] | 130 | 129 -> Beholder BeholdTV 607 / BeholdTV 609 [5ace:6070,5ace:6071,5ace:6072,5ace:6073,5ace:6090,5ace:6091,5ace:6092,5ace:6093] |
131 | 130 -> Beholder BeholdTV M6 / BeholdTV M6 Extra [5ace:6190,5ace:6193,5ace:6191] | 131 | 130 -> Beholder BeholdTV M6 [5ace:6190] |
132 | 131 -> Twinhan Hybrid DTV-DVB 3056 PCI [1822:0022] | 132 | 131 -> Twinhan Hybrid DTV-DVB 3056 PCI [1822:0022] |
133 | 132 -> Genius TVGO AM11MCE | 133 | 132 -> Genius TVGO AM11MCE |
134 | 133 -> NXP Snake DVB-S reference design | 134 | 133 -> NXP Snake DVB-S reference design |
@@ -141,3 +141,7 @@ | |||
141 | 140 -> Avermedia DVB-S Pro A700 [1461:a7a1] | 141 | 140 -> Avermedia DVB-S Pro A700 [1461:a7a1] |
142 | 141 -> Avermedia DVB-S Hybrid+FM A700 [1461:a7a2] | 142 | 141 -> Avermedia DVB-S Hybrid+FM A700 [1461:a7a2] |
143 | 142 -> Beholder BeholdTV H6 [5ace:6290] | 143 | 142 -> Beholder BeholdTV H6 [5ace:6290] |
144 | 143 -> Beholder BeholdTV M63 [5ace:6191] | ||
145 | 144 -> Beholder BeholdTV M6 Extra [5ace:6193] | ||
146 | 145 -> AVerMedia MiniPCI DVB-T Hybrid M103 [1461:f636] | ||
147 | 146 -> ASUSTeK P7131 Analog | ||
diff --git a/Documentation/video4linux/cx18.txt b/Documentation/video4linux/cx18.txt index 6842c262890f..914cb7e734a2 100644 --- a/Documentation/video4linux/cx18.txt +++ b/Documentation/video4linux/cx18.txt | |||
@@ -1,36 +1,30 @@ | |||
1 | Some notes regarding the cx18 driver for the Conexant CX23418 MPEG | 1 | Some notes regarding the cx18 driver for the Conexant CX23418 MPEG |
2 | encoder chip: | 2 | encoder chip: |
3 | 3 | ||
4 | 1) The only hardware currently supported is the Hauppauge HVR-1600 | 4 | 1) Currently supported are: |
5 | card and the Compro VideoMate H900 (note that this card only | ||
6 | supports analog input, it has no digital tuner!). | ||
7 | 5 | ||
8 | 2) Some people have problems getting the i2c bus to work. Cause unknown. | 6 | - Hauppauge HVR-1600 |
9 | The symptom is that the eeprom cannot be read and the card is | 7 | - Compro VideoMate H900 |
10 | unusable. | 8 | - Yuan MPC718 |
9 | - Conexant Raptor PAL/SECAM devkit | ||
11 | 10 | ||
12 | 3) The audio from the analog tuner is mono only. Probably caused by | 11 | 2) Some people have problems getting the i2c bus to work. |
13 | incorrect audio register information in the datasheet. We are | 12 | The symptom is that the eeprom cannot be read and the card is |
14 | waiting for updated information from Conexant. | 13 | unusable. This is probably fixed, but if you have problems |
14 | then post to the video4linux or ivtv-users mailinglist. | ||
15 | 15 | ||
16 | 4) VBI (raw or sliced) has not yet been implemented. | 16 | 3) VBI (raw or sliced) has not yet been implemented. |
17 | 17 | ||
18 | 5) MPEG indexing is not yet implemented. | 18 | 4) MPEG indexing is not yet implemented. |
19 | 19 | ||
20 | 6) The driver is still a bit rough around the edges, this should | 20 | 5) The driver is still a bit rough around the edges, this should |
21 | improve over time. | 21 | improve over time. |
22 | 22 | ||
23 | 23 | ||
24 | Firmware: | 24 | Firmware: |
25 | 25 | ||
26 | The firmware needs to be extracted from the Windows Hauppauge HVR-1600 | 26 | You can obtain the firmware files here: |
27 | driver, available here: | ||
28 | |||
29 | http://hauppauge.lightpath.net/software/install_cd/hauppauge_cd_3.4d1.zip | ||
30 | 27 | ||
31 | Unzip, then copy the following files to the firmware directory | 28 | http://dl.ivtvdriver.org/ivtv/firmware/cx18-firmware.tar.gz |
32 | and rename them as follows: | ||
33 | 29 | ||
34 | Drivers/Driver18/hcw18apu.rom -> v4l-cx23418-apu.fw | 30 | Untar and copy the .fw files to your firmware directory. |
35 | Drivers/Driver18/hcw18enc.rom -> v4l-cx23418-cpu.fw | ||
36 | Drivers/Driver18/hcw18mlC.rom -> v4l-cx23418-dig.fw | ||
diff --git a/Documentation/video4linux/gspca.txt b/Documentation/video4linux/gspca.txt new file mode 100644 index 000000000000..0c4880af57a3 --- /dev/null +++ b/Documentation/video4linux/gspca.txt | |||
@@ -0,0 +1,243 @@ | |||
1 | List of the webcams know by gspca. | ||
2 | |||
3 | The modules are: | ||
4 | gspca_main main driver | ||
5 | gspca_xxxx subdriver module with xxxx as follows | ||
6 | |||
7 | xxxx vend:prod | ||
8 | ---- | ||
9 | spca501 0000:0000 MystFromOri Unknow Camera | ||
10 | spca501 040a:0002 Kodak DVC-325 | ||
11 | spca500 040a:0300 Kodak EZ200 | ||
12 | zc3xx 041e:041e Creative WebCam Live! | ||
13 | spca500 041e:400a Creative PC-CAM 300 | ||
14 | sunplus 041e:400b Creative PC-CAM 600 | ||
15 | sunplus 041e:4012 PC-Cam350 | ||
16 | sunplus 041e:4013 Creative Pccam750 | ||
17 | zc3xx 041e:4017 Creative Webcam Mobile PD1090 | ||
18 | spca508 041e:4018 Creative Webcam Vista (PD1100) | ||
19 | spca561 041e:401a Creative Webcam Vista (PD1100) | ||
20 | zc3xx 041e:401c Creative NX | ||
21 | spca505 041e:401d Creative Webcam NX ULTRA | ||
22 | zc3xx 041e:401e Creative Nx Pro | ||
23 | zc3xx 041e:401f Creative Webcam Notebook PD1171 | ||
24 | pac207 041e:4028 Creative Webcam Vista Plus | ||
25 | zc3xx 041e:4029 Creative WebCam Vista Pro | ||
26 | zc3xx 041e:4034 Creative Instant P0620 | ||
27 | zc3xx 041e:4035 Creative Instant P0620D | ||
28 | zc3xx 041e:4036 Creative Live ! | ||
29 | zc3xx 041e:403a Creative Nx Pro 2 | ||
30 | spca561 041e:403b Creative Webcam Vista (VF0010) | ||
31 | zc3xx 041e:4051 Creative Live!Cam Notebook Pro (VF0250) | ||
32 | ov519 041e:4052 Creative Live! VISTA IM | ||
33 | zc3xx 041e:4053 Creative Live!Cam Video IM | ||
34 | ov519 041e:405f Creative Live! VISTA VF0330 | ||
35 | ov519 041e:4060 Creative Live! VISTA VF0350 | ||
36 | ov519 041e:4061 Creative Live! VISTA VF0400 | ||
37 | ov519 041e:4064 Creative Live! VISTA VF0420 | ||
38 | ov519 041e:4068 Creative Live! VISTA VF0470 | ||
39 | spca561 0458:7004 Genius VideoCAM Express V2 | ||
40 | sunplus 0458:7006 Genius Dsc 1.3 Smart | ||
41 | zc3xx 0458:7007 Genius VideoCam V2 | ||
42 | zc3xx 0458:700c Genius VideoCam V3 | ||
43 | zc3xx 0458:700f Genius VideoCam Web V2 | ||
44 | sonixj 0458:7025 Genius Eye 311Q | ||
45 | sonixj 045e:00f5 MicroSoft VX3000 | ||
46 | sonixj 045e:00f7 MicroSoft VX1000 | ||
47 | ov519 045e:028c Micro$oft xbox cam | ||
48 | spca508 0461:0815 Micro Innovation IC200 | ||
49 | sunplus 0461:0821 Fujifilm MV-1 | ||
50 | zc3xx 0461:0a00 MicroInnovation WebCam320 | ||
51 | spca500 046d:0890 Logitech QuickCam traveler | ||
52 | vc032x 046d:0892 Logitech Orbicam | ||
53 | vc032x 046d:0896 Logitech Orbicam | ||
54 | zc3xx 046d:08a0 Logitech QC IM | ||
55 | zc3xx 046d:08a1 Logitech QC IM 0x08A1 +sound | ||
56 | zc3xx 046d:08a2 Labtec Webcam Pro | ||
57 | zc3xx 046d:08a3 Logitech QC Chat | ||
58 | zc3xx 046d:08a6 Logitech QCim | ||
59 | zc3xx 046d:08a7 Logitech QuickCam Image | ||
60 | zc3xx 046d:08a9 Logitech Notebook Deluxe | ||
61 | zc3xx 046d:08aa Labtec Webcam Notebook | ||
62 | zc3xx 046d:08ac Logitech QuickCam Cool | ||
63 | zc3xx 046d:08ad Logitech QCCommunicate STX | ||
64 | zc3xx 046d:08ae Logitech QuickCam for Notebooks | ||
65 | zc3xx 046d:08af Logitech QuickCam Cool | ||
66 | zc3xx 046d:08b9 Logitech QC IM ??? | ||
67 | zc3xx 046d:08d7 Logitech QCam STX | ||
68 | zc3xx 046d:08d9 Logitech QuickCam IM/Connect | ||
69 | zc3xx 046d:08d8 Logitech Notebook Deluxe | ||
70 | zc3xx 046d:08da Logitech QuickCam Messenger | ||
71 | zc3xx 046d:08dd Logitech QuickCam for Notebooks | ||
72 | spca500 046d:0900 Logitech Inc. ClickSmart 310 | ||
73 | spca500 046d:0901 Logitech Inc. ClickSmart 510 | ||
74 | sunplus 046d:0905 Logitech ClickSmart 820 | ||
75 | tv8532 046d:0920 QC Express | ||
76 | tv8532 046d:0921 Labtec Webcam | ||
77 | spca561 046d:0928 Logitech QC Express Etch2 | ||
78 | spca561 046d:0929 Labtec Webcam Elch2 | ||
79 | spca561 046d:092a Logitech QC for Notebook | ||
80 | spca561 046d:092b Labtec Webcam Plus | ||
81 | spca561 046d:092c Logitech QC chat Elch2 | ||
82 | spca561 046d:092d Logitech QC Elch2 | ||
83 | spca561 046d:092e Logitech QC Elch2 | ||
84 | spca561 046d:092f Logitech QC Elch2 | ||
85 | sunplus 046d:0960 Logitech ClickSmart 420 | ||
86 | sunplus 0471:0322 Philips DMVC1300K | ||
87 | zc3xx 0471:0325 Philips SPC 200 NC | ||
88 | zc3xx 0471:0326 Philips SPC 300 NC | ||
89 | sonixj 0471:0327 Philips SPC 600 NC | ||
90 | sonixj 0471:0328 Philips SPC 700 NC | ||
91 | zc3xx 0471:032d Philips spc210nc | ||
92 | zc3xx 0471:032e Philips spc315nc | ||
93 | sonixj 0471:0330 Philips SPC 710NC | ||
94 | spca501 0497:c001 Smile International | ||
95 | sunplus 04a5:3003 Benq DC 1300 | ||
96 | sunplus 04a5:3008 Benq DC 1500 | ||
97 | sunplus 04a5:300a Benq DC3410 | ||
98 | spca500 04a5:300c Benq DC1016 | ||
99 | sunplus 04f1:1001 JVC GC A50 | ||
100 | spca561 04fc:0561 Flexcam 100 | ||
101 | sunplus 04fc:500c Sunplus CA500C | ||
102 | sunplus 04fc:504a Aiptek Mini PenCam 1.3 | ||
103 | sunplus 04fc:504b Maxell MaxPocket LE 1.3 | ||
104 | sunplus 04fc:5330 Digitrex 2110 | ||
105 | sunplus 04fc:5360 Sunplus Generic | ||
106 | spca500 04fc:7333 PalmPixDC85 | ||
107 | sunplus 04fc:ffff Pure DigitalDakota | ||
108 | spca501 0506:00df 3Com HomeConnect Lite | ||
109 | sunplus 052b:1513 Megapix V4 | ||
110 | tv8532 0545:808b Veo Stingray | ||
111 | tv8532 0545:8333 Veo Stingray | ||
112 | sunplus 0546:3155 Polaroid PDC3070 | ||
113 | sunplus 0546:3191 Polaroid Ion 80 | ||
114 | sunplus 0546:3273 Polaroid PDC2030 | ||
115 | ov519 054c:0154 Sonny toy4 | ||
116 | ov519 054c:0155 Sonny toy5 | ||
117 | zc3xx 055f:c005 Mustek Wcam300A | ||
118 | spca500 055f:c200 Mustek Gsmart 300 | ||
119 | sunplus 055f:c211 Kowa Bs888e Microcamera | ||
120 | spca500 055f:c220 Gsmart Mini | ||
121 | sunplus 055f:c230 Mustek Digicam 330K | ||
122 | sunplus 055f:c232 Mustek MDC3500 | ||
123 | sunplus 055f:c360 Mustek DV4000 Mpeg4 | ||
124 | sunplus 055f:c420 Mustek gSmart Mini 2 | ||
125 | sunplus 055f:c430 Mustek Gsmart LCD 2 | ||
126 | sunplus 055f:c440 Mustek DV 3000 | ||
127 | sunplus 055f:c520 Mustek gSmart Mini 3 | ||
128 | sunplus 055f:c530 Mustek Gsmart LCD 3 | ||
129 | sunplus 055f:c540 Gsmart D30 | ||
130 | sunplus 055f:c630 Mustek MDC4000 | ||
131 | sunplus 055f:c650 Mustek MDC5500Z | ||
132 | zc3xx 055f:d003 Mustek WCam300A | ||
133 | zc3xx 055f:d004 Mustek WCam300 AN | ||
134 | conex 0572:0041 Creative Notebook cx11646 | ||
135 | ov519 05a9:0519 OmniVision | ||
136 | ov519 05a9:0530 OmniVision | ||
137 | ov519 05a9:4519 OmniVision | ||
138 | ov519 05a9:8519 OmniVision | ||
139 | sunplus 05da:1018 Digital Dream Enigma 1.3 | ||
140 | stk014 05e1:0893 Syntek DV4000 | ||
141 | spca561 060b:a001 Maxell Compact Pc PM3 | ||
142 | zc3xx 0698:2003 CTX M730V built in | ||
143 | spca500 06bd:0404 Agfa CL20 | ||
144 | spca500 06be:0800 Optimedia | ||
145 | sunplus 06d6:0031 Trust 610 LCD PowerC@m Zoom | ||
146 | spca506 06e1:a190 ADS Instant VCD | ||
147 | spca508 0733:0110 ViewQuest VQ110 | ||
148 | spca508 0130:0130 Clone Digital Webcam 11043 | ||
149 | spca501 0733:0401 Intel Create and Share | ||
150 | spca501 0733:0402 ViewQuest M318B | ||
151 | spca505 0733:0430 Intel PC Camera Pro | ||
152 | sunplus 0733:1311 Digital Dream Epsilon 1.3 | ||
153 | sunplus 0733:1314 Mercury 2.1MEG Deluxe Classic Cam | ||
154 | sunplus 0733:2211 Jenoptik jdc 21 LCD | ||
155 | sunplus 0733:2221 Mercury Digital Pro 3.1p | ||
156 | sunplus 0733:3261 Concord 3045 spca536a | ||
157 | sunplus 0733:3281 Cyberpix S550V | ||
158 | spca506 0734:043b 3DeMon USB Capture aka | ||
159 | spca500 084d:0003 D-Link DSC-350 | ||
160 | spca500 08ca:0103 Aiptek PocketDV | ||
161 | sunplus 08ca:0104 Aiptek PocketDVII 1.3 | ||
162 | sunplus 08ca:0106 Aiptek Pocket DV3100+ | ||
163 | sunplus 08ca:2008 Aiptek Mini PenCam 2 M | ||
164 | sunplus 08ca:2010 Aiptek PocketCam 3M | ||
165 | sunplus 08ca:2016 Aiptek PocketCam 2 Mega | ||
166 | sunplus 08ca:2018 Aiptek Pencam SD 2M | ||
167 | sunplus 08ca:2020 Aiptek Slim 3000F | ||
168 | sunplus 08ca:2022 Aiptek Slim 3200 | ||
169 | sunplus 08ca:2024 Aiptek DV3500 Mpeg4 | ||
170 | sunplus 08ca:2028 Aiptek PocketCam4M | ||
171 | sunplus 08ca:2040 Aiptek PocketDV4100M | ||
172 | sunplus 08ca:2042 Aiptek PocketDV5100 | ||
173 | sunplus 08ca:2050 Medion MD 41437 | ||
174 | sunplus 08ca:2060 Aiptek PocketDV5300 | ||
175 | tv8532 0923:010f ICM532 cams | ||
176 | mars 093a:050f Mars-Semi Pc-Camera | ||
177 | pac207 093a:2460 PAC207 Qtec Webcam 100 | ||
178 | pac207 093a:2463 Philips spc200nc pac207 | ||
179 | pac207 093a:2464 Labtec Webcam 1200 | ||
180 | pac207 093a:2468 PAC207 | ||
181 | pac207 093a:2470 Genius GF112 | ||
182 | pac207 093a:2471 PAC207 Genius VideoCam ge111 | ||
183 | pac207 093a:2472 PAC207 Genius VideoCam ge110 | ||
184 | pac7311 093a:2600 PAC7311 Typhoon | ||
185 | pac7311 093a:2601 PAC7311 Phillips SPC610NC | ||
186 | pac7311 093a:2603 PAC7312 | ||
187 | pac7311 093a:2608 PAC7311 Trust WB-3300p | ||
188 | pac7311 093a:260e PAC7311 Gigaware VGA PC Camera, Trust WB-3350p, SIGMA cam 2350 | ||
189 | pac7311 093a:260f PAC7311 SnakeCam | ||
190 | pac7311 093a:2621 PAC731x | ||
191 | zc3xx 0ac8:0302 Z-star Vimicro zc0302 | ||
192 | vc032x 0ac8:0321 Vimicro generic vc0321 | ||
193 | vc032x 0ac8:0323 Vimicro Vc0323 | ||
194 | vc032x 0ac8:0328 A4Tech PK-130MG | ||
195 | zc3xx 0ac8:301b Z-Star zc301b | ||
196 | zc3xx 0ac8:303b Vimicro 0x303b | ||
197 | zc3xx 0ac8:305b Z-star Vimicro zc0305b | ||
198 | zc3xx 0ac8:307b Ldlc VC302+Ov7620 | ||
199 | vc032x 0ac8:c001 Sony embedded vimicro | ||
200 | vc032x 0ac8:c002 Sony embedded vimicro | ||
201 | spca508 0af9:0010 Hama USB Sightcam 100 | ||
202 | spca508 0af9:0011 Hama USB Sightcam 100 | ||
203 | sonixb 0c45:6001 Genius VideoCAM NB | ||
204 | sonixb 0c45:6005 Microdia Sweex Mini Webcam | ||
205 | sonixb 0c45:6007 Sonix sn9c101 + Tas5110D | ||
206 | sonixb 0c45:6009 spcaCam@120 | ||
207 | sonixb 0c45:600d spcaCam@120 | ||
208 | sonixb 0c45:6011 Microdia PC Camera (SN9C102) | ||
209 | sonixb 0c45:6019 Generic Sonix OV7630 | ||
210 | sonixb 0c45:6024 Generic Sonix Tas5130c | ||
211 | sonixb 0c45:6025 Xcam Shanga | ||
212 | sonixb 0c45:6028 Sonix Btc Pc380 | ||
213 | sonixb 0c45:6029 spcaCam@150 | ||
214 | sonixb 0c45:602c Generic Sonix OV7630 | ||
215 | sonixb 0c45:602d LIC-200 LG | ||
216 | sonixb 0c45:602e Genius VideoCam Messenger | ||
217 | sonixj 0c45:6040 Speed NVC 350K | ||
218 | sonixj 0c45:607c Sonix sn9c102p Hv7131R | ||
219 | sonixj 0c45:60c0 Sangha Sn535 | ||
220 | sonixj 0c45:60ec SN9C105+MO4000 | ||
221 | sonixj 0c45:60fb Surfer NoName | ||
222 | sonixj 0c45:60fc LG-LIC300 | ||
223 | sonixj 0c45:612a Avant Camera | ||
224 | sonixj 0c45:612c Typhoon Rasy Cam 1.3MPix | ||
225 | sonixj 0c45:6130 Sonix Pccam | ||
226 | sonixj 0c45:6138 Sn9c120 Mo4000 | ||
227 | sonixj 0c45:613b Surfer SN-206 | ||
228 | sonixj 0c45:613c Sonix Pccam168 | ||
229 | sunplus 0d64:0303 Sunplus FashionCam DXG | ||
230 | etoms 102c:6151 Qcam Sangha CIF | ||
231 | etoms 102c:6251 Qcam xxxxxx VGA | ||
232 | zc3xx 10fd:0128 Typhoon Webshot II USB 300k 0x0128 | ||
233 | spca561 10fd:7e50 FlyCam Usb 100 | ||
234 | zc3xx 10fd:8050 Typhoon Webshot II USB 300k | ||
235 | spca501 1776:501c Arowana 300K CMOS Camera | ||
236 | t613 17a1:0128 T613/TAS5130A | ||
237 | vc032x 17ef:4802 Lenovo Vc0323+MI1310_SOC | ||
238 | pac207 2001:f115 D-Link DSB-C120 | ||
239 | spca500 2899:012c Toptro Industrial | ||
240 | spca508 8086:0110 Intel Easy PC Camera | ||
241 | spca500 8086:0630 Intel Pocket PC Camera | ||
242 | spca506 99fa:8988 Grandtec V.cap | ||
243 | spca561 abcd:cdee Petcam | ||