diff options
Diffstat (limited to 'Documentation')
39 files changed, 1488 insertions, 646 deletions
diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX index 5b5aba404aac..438277800103 100644 --- a/Documentation/00-INDEX +++ b/Documentation/00-INDEX | |||
@@ -159,8 +159,6 @@ hayes-esp.txt | |||
159 | - info on using the Hayes ESP serial driver. | 159 | - info on using the Hayes ESP serial driver. |
160 | highuid.txt | 160 | highuid.txt |
161 | - notes on the change from 16 bit to 32 bit user/group IDs. | 161 | - notes on the change from 16 bit to 32 bit user/group IDs. |
162 | hpet.txt | ||
163 | - High Precision Event Timer Driver for Linux. | ||
164 | timers/ | 162 | timers/ |
165 | - info on the timer related topics | 163 | - info on the timer related topics |
166 | hw_random.txt | 164 | hw_random.txt |
@@ -251,8 +249,6 @@ mono.txt | |||
251 | - how to execute Mono-based .NET binaries with the help of BINFMT_MISC. | 249 | - how to execute Mono-based .NET binaries with the help of BINFMT_MISC. |
252 | moxa-smartio | 250 | moxa-smartio |
253 | - file with info on installing/using Moxa multiport serial driver. | 251 | - file with info on installing/using Moxa multiport serial driver. |
254 | mtrr.txt | ||
255 | - how to use PPro Memory Type Range Registers to increase performance. | ||
256 | mutex-design.txt | 252 | mutex-design.txt |
257 | - info on the generic mutex subsystem. | 253 | - info on the generic mutex subsystem. |
258 | namespaces/ | 254 | namespaces/ |
diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt index d8b63d164e41..b8e86460046e 100644 --- a/Documentation/DMA-API.txt +++ b/Documentation/DMA-API.txt | |||
@@ -337,7 +337,7 @@ With scatterlists, you use the resulting mapping like this: | |||
337 | int i, count = dma_map_sg(dev, sglist, nents, direction); | 337 | int i, count = dma_map_sg(dev, sglist, nents, direction); |
338 | struct scatterlist *sg; | 338 | struct scatterlist *sg; |
339 | 339 | ||
340 | for (i = 0, sg = sglist; i < count; i++, sg++) { | 340 | for_each_sg(sglist, sg, count, i) { |
341 | hw_address[i] = sg_dma_address(sg); | 341 | hw_address[i] = sg_dma_address(sg); |
342 | hw_len[i] = sg_dma_len(sg); | 342 | hw_len[i] = sg_dma_len(sg); |
343 | } | 343 | } |
diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl index b7b1482f6e04..9d0058e788e5 100644 --- a/Documentation/DocBook/kernel-api.tmpl +++ b/Documentation/DocBook/kernel-api.tmpl | |||
@@ -283,6 +283,7 @@ X!Earch/x86/kernel/mca_32.c | |||
283 | <chapter id="security"> | 283 | <chapter id="security"> |
284 | <title>Security Framework</title> | 284 | <title>Security Framework</title> |
285 | !Isecurity/security.c | 285 | !Isecurity/security.c |
286 | !Esecurity/inode.c | ||
286 | </chapter> | 287 | </chapter> |
287 | 288 | ||
288 | <chapter id="audit"> | 289 | <chapter id="audit"> |
@@ -364,6 +365,10 @@ X!Edrivers/pnp/system.c | |||
364 | !Eblock/blk-barrier.c | 365 | !Eblock/blk-barrier.c |
365 | !Eblock/blk-tag.c | 366 | !Eblock/blk-tag.c |
366 | !Iblock/blk-tag.c | 367 | !Iblock/blk-tag.c |
368 | !Eblock/blk-integrity.c | ||
369 | !Iblock/blktrace.c | ||
370 | !Iblock/genhd.c | ||
371 | !Eblock/genhd.c | ||
367 | </chapter> | 372 | </chapter> |
368 | 373 | ||
369 | <chapter id="chrdev"> | 374 | <chapter id="chrdev"> |
diff --git a/Documentation/DocBook/mac80211.tmpl b/Documentation/DocBook/mac80211.tmpl index b651e0a4b1c0..77c3c202991b 100644 --- a/Documentation/DocBook/mac80211.tmpl +++ b/Documentation/DocBook/mac80211.tmpl | |||
@@ -145,7 +145,6 @@ usage should require reading the full document. | |||
145 | this though and the recommendation to allow only a single | 145 | this though and the recommendation to allow only a single |
146 | interface in STA mode at first! | 146 | interface in STA mode at first! |
147 | </para> | 147 | </para> |
148 | !Finclude/net/mac80211.h ieee80211_if_types | ||
149 | !Finclude/net/mac80211.h ieee80211_if_init_conf | 148 | !Finclude/net/mac80211.h ieee80211_if_init_conf |
150 | !Finclude/net/mac80211.h ieee80211_if_conf | 149 | !Finclude/net/mac80211.h ieee80211_if_conf |
151 | </chapter> | 150 | </chapter> |
@@ -177,8 +176,7 @@ usage should require reading the full document. | |||
177 | <title>functions/definitions</title> | 176 | <title>functions/definitions</title> |
178 | !Finclude/net/mac80211.h ieee80211_rx_status | 177 | !Finclude/net/mac80211.h ieee80211_rx_status |
179 | !Finclude/net/mac80211.h mac80211_rx_flags | 178 | !Finclude/net/mac80211.h mac80211_rx_flags |
180 | !Finclude/net/mac80211.h ieee80211_tx_control | 179 | !Finclude/net/mac80211.h ieee80211_tx_info |
181 | !Finclude/net/mac80211.h ieee80211_tx_status_flags | ||
182 | !Finclude/net/mac80211.h ieee80211_rx | 180 | !Finclude/net/mac80211.h ieee80211_rx |
183 | !Finclude/net/mac80211.h ieee80211_rx_irqsafe | 181 | !Finclude/net/mac80211.h ieee80211_rx_irqsafe |
184 | !Finclude/net/mac80211.h ieee80211_tx_status | 182 | !Finclude/net/mac80211.h ieee80211_tx_status |
@@ -189,12 +187,11 @@ usage should require reading the full document. | |||
189 | !Finclude/net/mac80211.h ieee80211_ctstoself_duration | 187 | !Finclude/net/mac80211.h ieee80211_ctstoself_duration |
190 | !Finclude/net/mac80211.h ieee80211_generic_frame_duration | 188 | !Finclude/net/mac80211.h ieee80211_generic_frame_duration |
191 | !Finclude/net/mac80211.h ieee80211_get_hdrlen_from_skb | 189 | !Finclude/net/mac80211.h ieee80211_get_hdrlen_from_skb |
192 | !Finclude/net/mac80211.h ieee80211_get_hdrlen | 190 | !Finclude/net/mac80211.h ieee80211_hdrlen |
193 | !Finclude/net/mac80211.h ieee80211_wake_queue | 191 | !Finclude/net/mac80211.h ieee80211_wake_queue |
194 | !Finclude/net/mac80211.h ieee80211_stop_queue | 192 | !Finclude/net/mac80211.h ieee80211_stop_queue |
195 | !Finclude/net/mac80211.h ieee80211_start_queues | ||
196 | !Finclude/net/mac80211.h ieee80211_stop_queues | ||
197 | !Finclude/net/mac80211.h ieee80211_wake_queues | 193 | !Finclude/net/mac80211.h ieee80211_wake_queues |
194 | !Finclude/net/mac80211.h ieee80211_stop_queues | ||
198 | </sect1> | 195 | </sect1> |
199 | </chapter> | 196 | </chapter> |
200 | 197 | ||
@@ -230,8 +227,7 @@ usage should require reading the full document. | |||
230 | <title>Multiple queues and QoS support</title> | 227 | <title>Multiple queues and QoS support</title> |
231 | <para>TBD</para> | 228 | <para>TBD</para> |
232 | !Finclude/net/mac80211.h ieee80211_tx_queue_params | 229 | !Finclude/net/mac80211.h ieee80211_tx_queue_params |
233 | !Finclude/net/mac80211.h ieee80211_tx_queue_stats_data | 230 | !Finclude/net/mac80211.h ieee80211_tx_queue_stats |
234 | !Finclude/net/mac80211.h ieee80211_tx_queue | ||
235 | </chapter> | 231 | </chapter> |
236 | 232 | ||
237 | <chapter id="AP"> | 233 | <chapter id="AP"> |
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt index cf5562cbe356..6e253407b3dc 100644 --- a/Documentation/RCU/checklist.txt +++ b/Documentation/RCU/checklist.txt | |||
@@ -210,7 +210,7 @@ over a rather long period of time, but improvements are always welcome! | |||
210 | number of updates per grace period. | 210 | number of updates per grace period. |
211 | 211 | ||
212 | 9. All RCU list-traversal primitives, which include | 212 | 9. All RCU list-traversal primitives, which include |
213 | rcu_dereference(), list_for_each_rcu(), list_for_each_entry_rcu(), | 213 | rcu_dereference(), list_for_each_entry_rcu(), |
214 | list_for_each_continue_rcu(), and list_for_each_safe_rcu(), | 214 | list_for_each_continue_rcu(), and list_for_each_safe_rcu(), |
215 | must be either within an RCU read-side critical section or | 215 | must be either within an RCU read-side critical section or |
216 | must be protected by appropriate update-side locks. RCU | 216 | must be protected by appropriate update-side locks. RCU |
diff --git a/Documentation/RCU/rcuref.txt b/Documentation/RCU/rcuref.txt index 451de2ad8329..4202ad093130 100644 --- a/Documentation/RCU/rcuref.txt +++ b/Documentation/RCU/rcuref.txt | |||
@@ -29,9 +29,9 @@ release_referenced() delete() | |||
29 | } | 29 | } |
30 | 30 | ||
31 | If this list/array is made lock free using RCU as in changing the | 31 | If this list/array is made lock free using RCU as in changing the |
32 | write_lock() in add() and delete() to spin_lock and changing read_lock | 32 | write_lock() in add() and delete() to spin_lock() and changing read_lock() |
33 | in search_and_reference to rcu_read_lock(), the atomic_get in | 33 | in search_and_reference() to rcu_read_lock(), the atomic_inc() in |
34 | search_and_reference could potentially hold reference to an element which | 34 | search_and_reference() could potentially hold reference to an element which |
35 | has already been deleted from the list/array. Use atomic_inc_not_zero() | 35 | has already been deleted from the list/array. Use atomic_inc_not_zero() |
36 | in this scenario as follows: | 36 | in this scenario as follows: |
37 | 37 | ||
@@ -40,20 +40,20 @@ add() search_and_reference() | |||
40 | { { | 40 | { { |
41 | alloc_object rcu_read_lock(); | 41 | alloc_object rcu_read_lock(); |
42 | ... search_for_element | 42 | ... search_for_element |
43 | atomic_set(&el->rc, 1); if (atomic_inc_not_zero(&el->rc)) { | 43 | atomic_set(&el->rc, 1); if (!atomic_inc_not_zero(&el->rc)) { |
44 | write_lock(&list_lock); rcu_read_unlock(); | 44 | spin_lock(&list_lock); rcu_read_unlock(); |
45 | return FAIL; | 45 | return FAIL; |
46 | add_element } | 46 | add_element } |
47 | ... ... | 47 | ... ... |
48 | write_unlock(&list_lock); rcu_read_unlock(); | 48 | spin_unlock(&list_lock); rcu_read_unlock(); |
49 | } } | 49 | } } |
50 | 3. 4. | 50 | 3. 4. |
51 | release_referenced() delete() | 51 | release_referenced() delete() |
52 | { { | 52 | { { |
53 | ... write_lock(&list_lock); | 53 | ... spin_lock(&list_lock); |
54 | if (atomic_dec_and_test(&el->rc)) ... | 54 | if (atomic_dec_and_test(&el->rc)) ... |
55 | call_rcu(&el->head, el_free); delete_element | 55 | call_rcu(&el->head, el_free); delete_element |
56 | ... write_unlock(&list_lock); | 56 | ... spin_unlock(&list_lock); |
57 | } ... | 57 | } ... |
58 | if (atomic_dec_and_test(&el->rc)) | 58 | if (atomic_dec_and_test(&el->rc)) |
59 | call_rcu(&el->head, el_free); | 59 | call_rcu(&el->head, el_free); |
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index e04d643a9f57..96170824a717 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt | |||
@@ -786,8 +786,6 @@ RCU pointer/list traversal: | |||
786 | list_for_each_entry_rcu | 786 | list_for_each_entry_rcu |
787 | hlist_for_each_entry_rcu | 787 | hlist_for_each_entry_rcu |
788 | 788 | ||
789 | list_for_each_rcu (to be deprecated in favor of | ||
790 | list_for_each_entry_rcu) | ||
791 | list_for_each_continue_rcu (to be deprecated in favor of new | 789 | list_for_each_continue_rcu (to be deprecated in favor of new |
792 | list_for_each_entry_continue_rcu) | 790 | list_for_each_entry_continue_rcu) |
793 | 791 | ||
diff --git a/Documentation/SELinux.txt b/Documentation/SELinux.txt new file mode 100644 index 000000000000..07eae00f3314 --- /dev/null +++ b/Documentation/SELinux.txt | |||
@@ -0,0 +1,27 @@ | |||
1 | If you want to use SELinux, chances are you will want | ||
2 | to use the distro-provided policies, or install the | ||
3 | latest reference policy release from | ||
4 | http://oss.tresys.com/projects/refpolicy | ||
5 | |||
6 | However, if you want to install a dummy policy for | ||
7 | testing, you can do using 'mdp' provided under | ||
8 | scripts/selinux. Note that this requires the selinux | ||
9 | userspace to be installed - in particular you will | ||
10 | need checkpolicy to compile a kernel, and setfiles and | ||
11 | fixfiles to label the filesystem. | ||
12 | |||
13 | 1. Compile the kernel with selinux enabled. | ||
14 | 2. Type 'make' to compile mdp. | ||
15 | 3. Make sure that you are not running with | ||
16 | SELinux enabled and a real policy. If | ||
17 | you are, reboot with selinux disabled | ||
18 | before continuing. | ||
19 | 4. Run install_policy.sh: | ||
20 | cd scripts/selinux | ||
21 | sh install_policy.sh | ||
22 | |||
23 | Step 4 will create a new dummy policy valid for your | ||
24 | kernel, with a single selinux user, role, and type. | ||
25 | It will compile the policy, will set your SELINUXTYPE to | ||
26 | dummy in /etc/selinux/config, install the compiled policy | ||
27 | as 'dummy', and relabel your filesystem. | ||
diff --git a/Documentation/block/deadline-iosched.txt b/Documentation/block/deadline-iosched.txt index c23cab13c3d1..72576769e0f4 100644 --- a/Documentation/block/deadline-iosched.txt +++ b/Documentation/block/deadline-iosched.txt | |||
@@ -30,12 +30,18 @@ write_expire (in ms) | |||
30 | Similar to read_expire mentioned above, but for writes. | 30 | Similar to read_expire mentioned above, but for writes. |
31 | 31 | ||
32 | 32 | ||
33 | fifo_batch | 33 | fifo_batch (number of requests) |
34 | ---------- | 34 | ---------- |
35 | 35 | ||
36 | When a read request expires its deadline, we must move some requests from | 36 | Requests are grouped into ``batches'' of a particular data direction (read or |
37 | the sorted io scheduler list to the block device dispatch queue. fifo_batch | 37 | write) which are serviced in increasing sector order. To limit extra seeking, |
38 | controls how many requests we move. | 38 | deadline expiries are only checked between batches. fifo_batch controls the |
39 | maximum number of requests per batch. | ||
40 | |||
41 | This parameter tunes the balance between per-request latency and aggregate | ||
42 | throughput. When low latency is the primary concern, smaller is better (where | ||
43 | a value of 1 yields first-come first-served behaviour). Increasing fifo_batch | ||
44 | generally improves throughput, at the cost of latency variation. | ||
39 | 45 | ||
40 | 46 | ||
41 | writes_starved (number of dispatches) | 47 | writes_starved (number of dispatches) |
diff --git a/Documentation/cdrom/ide-cd b/Documentation/cdrom/ide-cd index 91c0dcc6fa5c..2c558cd6c1ef 100644 --- a/Documentation/cdrom/ide-cd +++ b/Documentation/cdrom/ide-cd | |||
@@ -145,8 +145,7 @@ useful for reading photocds. | |||
145 | 145 | ||
146 | To play an audio CD, you should first unmount and remove any data | 146 | To play an audio CD, you should first unmount and remove any data |
147 | CDROM. Any of the CDROM player programs should then work (workman, | 147 | CDROM. Any of the CDROM player programs should then work (workman, |
148 | workbone, cdplayer, etc.). Lacking anything else, you could use the | 148 | workbone, cdplayer, etc.). |
149 | cdtester program in Documentation/cdrom/sbpcd. | ||
150 | 149 | ||
151 | On a few drives, you can read digital audio directly using a program | 150 | On a few drives, you can read digital audio directly using a program |
152 | such as cdda2wav. The only types of drive which I've heard support | 151 | such as cdda2wav. The only types of drive which I've heard support |
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 83c88cae1eda..cc8093c15cf5 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt | |||
@@ -6,6 +6,24 @@ be removed from this file. | |||
6 | 6 | ||
7 | --------------------------- | 7 | --------------------------- |
8 | 8 | ||
9 | What: old static regulatory information and ieee80211_regdom module parameter | ||
10 | When: 2.6.29 | ||
11 | Why: The old regulatory infrastructure has been replaced with a new one | ||
12 | which does not require statically defined regulatory domains. We do | ||
13 | not want to keep static regulatory domains in the kernel due to the | ||
14 | the dynamic nature of regulatory law and localization. We kept around | ||
15 | the old static definitions for the regulatory domains of: | ||
16 | * US | ||
17 | * JP | ||
18 | * EU | ||
19 | and used by default the US when CONFIG_WIRELESS_OLD_REGULATORY was | ||
20 | set. We also kept around the ieee80211_regdom module parameter in case | ||
21 | some applications were relying on it. Changing regulatory domains | ||
22 | can now be done instead by using nl80211, as is done with iw. | ||
23 | Who: Luis R. Rodriguez <lrodriguez@atheros.com> | ||
24 | |||
25 | --------------------------- | ||
26 | |||
9 | What: dev->power.power_state | 27 | What: dev->power.power_state |
10 | When: July 2007 | 28 | When: July 2007 |
11 | Why: Broken design for runtime control over driver power states, confusing | 29 | Why: Broken design for runtime control over driver power states, confusing |
@@ -232,6 +250,9 @@ What (Why): | |||
232 | - xt_mark match revision 0 | 250 | - xt_mark match revision 0 |
233 | (superseded by xt_mark match revision 1) | 251 | (superseded by xt_mark match revision 1) |
234 | 252 | ||
253 | - xt_recent: the old ipt_recent proc dir | ||
254 | (superseded by /proc/net/xt_recent) | ||
255 | |||
235 | When: January 2009 or Linux 2.7.0, whichever comes first | 256 | When: January 2009 or Linux 2.7.0, whichever comes first |
236 | Why: Superseded by newer revisions or modules | 257 | Why: Superseded by newer revisions or modules |
237 | Who: Jan Engelhardt <jengelh@computergmbh.de> | 258 | Who: Jan Engelhardt <jengelh@computergmbh.de> |
@@ -266,14 +287,6 @@ Who: Glauber Costa <gcosta@redhat.com> | |||
266 | 287 | ||
267 | --------------------------- | 288 | --------------------------- |
268 | 289 | ||
269 | What: old style serial driver for ColdFire (CONFIG_SERIAL_COLDFIRE) | ||
270 | When: 2.6.28 | ||
271 | Why: This driver still uses the old interface and has been replaced | ||
272 | by CONFIG_SERIAL_MCF. | ||
273 | Who: Sebastian Siewior <sebastian@breakpoint.cc> | ||
274 | |||
275 | --------------------------- | ||
276 | |||
277 | What: /sys/o2cb symlink | 290 | What: /sys/o2cb symlink |
278 | When: January 2010 | 291 | When: January 2010 |
279 | Why: /sys/fs/o2cb is the proper location for this information - /sys/o2cb | 292 | Why: /sys/fs/o2cb is the proper location for this information - /sys/o2cb |
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index 0d5394920a31..eb154ef36c2a 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt | |||
@@ -32,9 +32,9 @@ Mailing list: linux-ext4@vger.kernel.org | |||
32 | you will need to merge your changes with the version from e2fsprogs | 32 | you will need to merge your changes with the version from e2fsprogs |
33 | 1.41.x. | 33 | 1.41.x. |
34 | 34 | ||
35 | - Create a new filesystem using the ext4dev filesystem type: | 35 | - Create a new filesystem using the ext4 filesystem type: |
36 | 36 | ||
37 | # mke2fs -t ext4dev /dev/hda1 | 37 | # mke2fs -t ext4 /dev/hda1 |
38 | 38 | ||
39 | Or configure an existing ext3 filesystem to support extents and set | 39 | Or configure an existing ext3 filesystem to support extents and set |
40 | the test_fs flag to indicate that it's ok for an in-development | 40 | the test_fs flag to indicate that it's ok for an in-development |
@@ -47,13 +47,13 @@ Mailing list: linux-ext4@vger.kernel.org | |||
47 | 47 | ||
48 | # tune2fs -I 256 /dev/hda1 | 48 | # tune2fs -I 256 /dev/hda1 |
49 | 49 | ||
50 | (Note: we currently do not have tools to convert an ext4dev | 50 | (Note: we currently do not have tools to convert an ext4 |
51 | filesystem back to ext3; so please do not do try this on production | 51 | filesystem back to ext3; so please do not do try this on production |
52 | filesystems.) | 52 | filesystems.) |
53 | 53 | ||
54 | - Mounting: | 54 | - Mounting: |
55 | 55 | ||
56 | # mount -t ext4dev /dev/hda1 /wherever | 56 | # mount -t ext4 /dev/hda1 /wherever |
57 | 57 | ||
58 | - When comparing performance with other filesystems, remember that | 58 | - When comparing performance with other filesystems, remember that |
59 | ext3/4 by default offers higher data integrity guarantees than most. | 59 | ext3/4 by default offers higher data integrity guarantees than most. |
@@ -177,6 +177,11 @@ barrier=<0|1(*)> This enables/disables the use of write barriers in | |||
177 | your disks are battery-backed in one way or another, | 177 | your disks are battery-backed in one way or another, |
178 | disabling barriers may safely improve performance. | 178 | disabling barriers may safely improve performance. |
179 | 179 | ||
180 | inode_readahead=n This tuning parameter controls the maximum | ||
181 | number of inode table blocks that ext4's inode | ||
182 | table readahead algorithm will pre-read into | ||
183 | the buffer cache. The default value is 32 blocks. | ||
184 | |||
180 | orlov (*) This enables the new Orlov block allocator. It is | 185 | orlov (*) This enables the new Orlov block allocator. It is |
181 | enabled by default. | 186 | enabled by default. |
182 | 187 | ||
@@ -218,6 +223,11 @@ errors=remount-ro(*) Remount the filesystem read-only on an error. | |||
218 | errors=continue Keep going on a filesystem error. | 223 | errors=continue Keep going on a filesystem error. |
219 | errors=panic Panic and halt the machine if an error occurs. | 224 | errors=panic Panic and halt the machine if an error occurs. |
220 | 225 | ||
226 | data_err=ignore(*) Just print an error message if an error occurs | ||
227 | in a file data buffer in ordered mode. | ||
228 | data_err=abort Abort the journal if an error occurs in a file | ||
229 | data buffer in ordered mode. | ||
230 | |||
221 | grpid Give objects the same group ID as their creator. | 231 | grpid Give objects the same group ID as their creator. |
222 | bsdgroups | 232 | bsdgroups |
223 | 233 | ||
@@ -252,6 +262,7 @@ stripe=n Number of filesystem blocks that mballoc will try | |||
252 | delalloc (*) Deferring block allocation until write-out time. | 262 | delalloc (*) Deferring block allocation until write-out time. |
253 | nodelalloc Disable delayed allocation. Blocks are allocation | 263 | nodelalloc Disable delayed allocation. Blocks are allocation |
254 | when data is copied from user to page cache. | 264 | when data is copied from user to page cache. |
265 | |||
255 | Data Mode | 266 | Data Mode |
256 | ========= | 267 | ========= |
257 | There are 3 different data modes: | 268 | There are 3 different data modes: |
diff --git a/Documentation/filesystems/fiemap.txt b/Documentation/filesystems/fiemap.txt new file mode 100644 index 000000000000..1e3defcfe50b --- /dev/null +++ b/Documentation/filesystems/fiemap.txt | |||
@@ -0,0 +1,228 @@ | |||
1 | ============ | ||
2 | Fiemap Ioctl | ||
3 | ============ | ||
4 | |||
5 | The fiemap ioctl is an efficient method for userspace to get file | ||
6 | extent mappings. Instead of block-by-block mapping (such as bmap), fiemap | ||
7 | returns a list of extents. | ||
8 | |||
9 | |||
10 | Request Basics | ||
11 | -------------- | ||
12 | |||
13 | A fiemap request is encoded within struct fiemap: | ||
14 | |||
15 | struct fiemap { | ||
16 | __u64 fm_start; /* logical offset (inclusive) at | ||
17 | * which to start mapping (in) */ | ||
18 | __u64 fm_length; /* logical length of mapping which | ||
19 | * userspace cares about (in) */ | ||
20 | __u32 fm_flags; /* FIEMAP_FLAG_* flags for request (in/out) */ | ||
21 | __u32 fm_mapped_extents; /* number of extents that were | ||
22 | * mapped (out) */ | ||
23 | __u32 fm_extent_count; /* size of fm_extents array (in) */ | ||
24 | __u32 fm_reserved; | ||
25 | struct fiemap_extent fm_extents[0]; /* array of mapped extents (out) */ | ||
26 | }; | ||
27 | |||
28 | |||
29 | fm_start, and fm_length specify the logical range within the file | ||
30 | which the process would like mappings for. Extents returned mirror | ||
31 | those on disk - that is, the logical offset of the 1st returned extent | ||
32 | may start before fm_start, and the range covered by the last returned | ||
33 | extent may end after fm_length. All offsets and lengths are in bytes. | ||
34 | |||
35 | Certain flags to modify the way in which mappings are looked up can be | ||
36 | set in fm_flags. If the kernel doesn't understand some particular | ||
37 | flags, it will return EBADR and the contents of fm_flags will contain | ||
38 | the set of flags which caused the error. If the kernel is compatible | ||
39 | with all flags passed, the contents of fm_flags will be unmodified. | ||
40 | It is up to userspace to determine whether rejection of a particular | ||
41 | flag is fatal to it's operation. This scheme is intended to allow the | ||
42 | fiemap interface to grow in the future but without losing | ||
43 | compatibility with old software. | ||
44 | |||
45 | fm_extent_count specifies the number of elements in the fm_extents[] array | ||
46 | that can be used to return extents. If fm_extent_count is zero, then the | ||
47 | fm_extents[] array is ignored (no extents will be returned), and the | ||
48 | fm_mapped_extents count will hold the number of extents needed in | ||
49 | fm_extents[] to hold the file's current mapping. Note that there is | ||
50 | nothing to prevent the file from changing between calls to FIEMAP. | ||
51 | |||
52 | The following flags can be set in fm_flags: | ||
53 | |||
54 | * FIEMAP_FLAG_SYNC | ||
55 | If this flag is set, the kernel will sync the file before mapping extents. | ||
56 | |||
57 | * FIEMAP_FLAG_XATTR | ||
58 | If this flag is set, the extents returned will describe the inodes | ||
59 | extended attribute lookup tree, instead of it's data tree. | ||
60 | |||
61 | |||
62 | Extent Mapping | ||
63 | -------------- | ||
64 | |||
65 | Extent information is returned within the embedded fm_extents array | ||
66 | which userspace must allocate along with the fiemap structure. The | ||
67 | number of elements in the fiemap_extents[] array should be passed via | ||
68 | fm_extent_count. The number of extents mapped by kernel will be | ||
69 | returned via fm_mapped_extents. If the number of fiemap_extents | ||
70 | allocated is less than would be required to map the requested range, | ||
71 | the maximum number of extents that can be mapped in the fm_extent[] | ||
72 | array will be returned and fm_mapped_extents will be equal to | ||
73 | fm_extent_count. In that case, the last extent in the array will not | ||
74 | complete the requested range and will not have the FIEMAP_EXTENT_LAST | ||
75 | flag set (see the next section on extent flags). | ||
76 | |||
77 | Each extent is described by a single fiemap_extent structure as | ||
78 | returned in fm_extents. | ||
79 | |||
80 | struct fiemap_extent { | ||
81 | __u64 fe_logical; /* logical offset in bytes for the start of | ||
82 | * the extent */ | ||
83 | __u64 fe_physical; /* physical offset in bytes for the start | ||
84 | * of the extent */ | ||
85 | __u64 fe_length; /* length in bytes for the extent */ | ||
86 | __u64 fe_reserved64[2]; | ||
87 | __u32 fe_flags; /* FIEMAP_EXTENT_* flags for this extent */ | ||
88 | __u32 fe_reserved[3]; | ||
89 | }; | ||
90 | |||
91 | All offsets and lengths are in bytes and mirror those on disk. It is valid | ||
92 | for an extents logical offset to start before the request or it's logical | ||
93 | length to extend past the request. Unless FIEMAP_EXTENT_NOT_ALIGNED is | ||
94 | returned, fe_logical, fe_physical, and fe_length will be aligned to the | ||
95 | block size of the file system. With the exception of extents flagged as | ||
96 | FIEMAP_EXTENT_MERGED, adjacent extents will not be merged. | ||
97 | |||
98 | The fe_flags field contains flags which describe the extent returned. | ||
99 | A special flag, FIEMAP_EXTENT_LAST is always set on the last extent in | ||
100 | the file so that the process making fiemap calls can determine when no | ||
101 | more extents are available, without having to call the ioctl again. | ||
102 | |||
103 | Some flags are intentionally vague and will always be set in the | ||
104 | presence of other more specific flags. This way a program looking for | ||
105 | a general property does not have to know all existing and future flags | ||
106 | which imply that property. | ||
107 | |||
108 | For example, if FIEMAP_EXTENT_DATA_INLINE or FIEMAP_EXTENT_DATA_TAIL | ||
109 | are set, FIEMAP_EXTENT_NOT_ALIGNED will also be set. A program looking | ||
110 | for inline or tail-packed data can key on the specific flag. Software | ||
111 | which simply cares not to try operating on non-aligned extents | ||
112 | however, can just key on FIEMAP_EXTENT_NOT_ALIGNED, and not have to | ||
113 | worry about all present and future flags which might imply unaligned | ||
114 | data. Note that the opposite is not true - it would be valid for | ||
115 | FIEMAP_EXTENT_NOT_ALIGNED to appear alone. | ||
116 | |||
117 | * FIEMAP_EXTENT_LAST | ||
118 | This is the last extent in the file. A mapping attempt past this | ||
119 | extent will return nothing. | ||
120 | |||
121 | * FIEMAP_EXTENT_UNKNOWN | ||
122 | The location of this extent is currently unknown. This may indicate | ||
123 | the data is stored on an inaccessible volume or that no storage has | ||
124 | been allocated for the file yet. | ||
125 | |||
126 | * FIEMAP_EXTENT_DELALLOC | ||
127 | - This will also set FIEMAP_EXTENT_UNKNOWN. | ||
128 | Delayed allocation - while there is data for this extent, it's | ||
129 | physical location has not been allocated yet. | ||
130 | |||
131 | * FIEMAP_EXTENT_ENCODED | ||
132 | This extent does not consist of plain filesystem blocks but is | ||
133 | encoded (e.g. encrypted or compressed). Reading the data in this | ||
134 | extent via I/O to the block device will have undefined results. | ||
135 | |||
136 | Note that it is *always* undefined to try to update the data | ||
137 | in-place by writing to the indicated location without the | ||
138 | assistance of the filesystem, or to access the data using the | ||
139 | information returned by the FIEMAP interface while the filesystem | ||
140 | is mounted. In other words, user applications may only read the | ||
141 | extent data via I/O to the block device while the filesystem is | ||
142 | unmounted, and then only if the FIEMAP_EXTENT_ENCODED flag is | ||
143 | clear; user applications must not try reading or writing to the | ||
144 | filesystem via the block device under any other circumstances. | ||
145 | |||
146 | * FIEMAP_EXTENT_DATA_ENCRYPTED | ||
147 | - This will also set FIEMAP_EXTENT_ENCODED | ||
148 | The data in this extent has been encrypted by the file system. | ||
149 | |||
150 | * FIEMAP_EXTENT_NOT_ALIGNED | ||
151 | Extent offsets and length are not guaranteed to be block aligned. | ||
152 | |||
153 | * FIEMAP_EXTENT_DATA_INLINE | ||
154 | This will also set FIEMAP_EXTENT_NOT_ALIGNED | ||
155 | Data is located within a meta data block. | ||
156 | |||
157 | * FIEMAP_EXTENT_DATA_TAIL | ||
158 | This will also set FIEMAP_EXTENT_NOT_ALIGNED | ||
159 | Data is packed into a block with data from other files. | ||
160 | |||
161 | * FIEMAP_EXTENT_UNWRITTEN | ||
162 | Unwritten extent - the extent is allocated but it's data has not been | ||
163 | initialized. This indicates the extent's data will be all zero if read | ||
164 | through the filesystem but the contents are undefined if read directly from | ||
165 | the device. | ||
166 | |||
167 | * FIEMAP_EXTENT_MERGED | ||
168 | This will be set when a file does not support extents, i.e., it uses a block | ||
169 | based addressing scheme. Since returning an extent for each block back to | ||
170 | userspace would be highly inefficient, the kernel will try to merge most | ||
171 | adjacent blocks into 'extents'. | ||
172 | |||
173 | |||
174 | VFS -> File System Implementation | ||
175 | --------------------------------- | ||
176 | |||
177 | File systems wishing to support fiemap must implement a ->fiemap callback on | ||
178 | their inode_operations structure. The fs ->fiemap call is responsible for | ||
179 | defining it's set of supported fiemap flags, and calling a helper function on | ||
180 | each discovered extent: | ||
181 | |||
182 | struct inode_operations { | ||
183 | ... | ||
184 | |||
185 | int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, | ||
186 | u64 len); | ||
187 | |||
188 | ->fiemap is passed struct fiemap_extent_info which describes the | ||
189 | fiemap request: | ||
190 | |||
191 | struct fiemap_extent_info { | ||
192 | unsigned int fi_flags; /* Flags as passed from user */ | ||
193 | unsigned int fi_extents_mapped; /* Number of mapped extents */ | ||
194 | unsigned int fi_extents_max; /* Size of fiemap_extent array */ | ||
195 | struct fiemap_extent *fi_extents_start; /* Start of fiemap_extent array */ | ||
196 | }; | ||
197 | |||
198 | It is intended that the file system should not need to access any of this | ||
199 | structure directly. | ||
200 | |||
201 | |||
202 | Flag checking should be done at the beginning of the ->fiemap callback via the | ||
203 | fiemap_check_flags() helper: | ||
204 | |||
205 | int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); | ||
206 | |||
207 | The struct fieinfo should be passed in as recieved from ioctl_fiemap(). The | ||
208 | set of fiemap flags which the fs understands should be passed via fs_flags. If | ||
209 | fiemap_check_flags finds invalid user flags, it will place the bad values in | ||
210 | fieinfo->fi_flags and return -EBADR. If the file system gets -EBADR, from | ||
211 | fiemap_check_flags(), it should immediately exit, returning that error back to | ||
212 | ioctl_fiemap(). | ||
213 | |||
214 | |||
215 | For each extent in the request range, the file system should call | ||
216 | the helper function, fiemap_fill_next_extent(): | ||
217 | |||
218 | int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical, | ||
219 | u64 phys, u64 len, u32 flags, u32 dev); | ||
220 | |||
221 | fiemap_fill_next_extent() will use the passed values to populate the | ||
222 | next free extent in the fm_extents array. 'General' extent flags will | ||
223 | automatically be set from specific flags on behalf of the calling file | ||
224 | system so that the userspace API is not broken. | ||
225 | |||
226 | fiemap_fill_next_extent() returns 0 on success, and 1 when the | ||
227 | user-supplied fm_extents array is full. If an error is encountered | ||
228 | while copying the extent to user memory, -EFAULT will be returned. | ||
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 63ed861d5ca1..b488edad743c 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt | |||
@@ -923,45 +923,44 @@ CPUs. | |||
923 | The "procs_blocked" line gives the number of processes currently blocked, | 923 | The "procs_blocked" line gives the number of processes currently blocked, |
924 | waiting for I/O to complete. | 924 | waiting for I/O to complete. |
925 | 925 | ||
926 | |||
926 | 1.9 Ext4 file system parameters | 927 | 1.9 Ext4 file system parameters |
927 | ------------------------------ | 928 | ------------------------------ |
928 | Ext4 file system have one directory per partition under /proc/fs/ext4/ | 929 | |
929 | # ls /proc/fs/ext4/hdc/ | 930 | Information about mounted ext4 file systems can be found in |
930 | group_prealloc max_to_scan mb_groups mb_history min_to_scan order2_req | 931 | /proc/fs/ext4. Each mounted filesystem will have a directory in |
931 | stats stream_req | 932 | /proc/fs/ext4 based on its device name (i.e., /proc/fs/ext4/hdc or |
932 | 933 | /proc/fs/ext4/dm-0). The files in each per-device directory are shown | |
933 | mb_groups: | 934 | in Table 1-10, below. |
934 | This file gives the details of multiblock allocator buddy cache of free blocks | 935 | |
935 | 936 | Table 1-10: Files in /proc/fs/ext4/<devname> | |
936 | mb_history: | 937 | .............................................................................. |
937 | Multiblock allocation history. | 938 | File Content |
938 | 939 | mb_groups details of multiblock allocator buddy cache of free blocks | |
939 | stats: | 940 | mb_history multiblock allocation history |
940 | This file indicate whether the multiblock allocator should start collecting | 941 | stats controls whether the multiblock allocator should start |
941 | statistics. The statistics are shown during unmount | 942 | collecting statistics, which are shown during the unmount |
942 | 943 | group_prealloc the multiblock allocator will round up allocation | |
943 | group_prealloc: | 944 | requests to a multiple of this tuning parameter if the |
944 | The multiblock allocator normalize the block allocation request to | 945 | stripe size is not set in the ext4 superblock |
945 | group_prealloc filesystem blocks if we don't have strip value set. | 946 | max_to_scan The maximum number of extents the multiblock allocator |
946 | The stripe value can be specified at mount time or during mke2fs. | 947 | will search to find the best extent |
947 | 948 | min_to_scan The minimum number of extents the multiblock allocator | |
948 | max_to_scan: | 949 | will search to find the best extent |
949 | How long multiblock allocator can look for a best extent (in found extents) | 950 | order2_req Tuning parameter which controls the minimum size for |
950 | 951 | requests (as a power of 2) where the buddy cache is | |
951 | min_to_scan: | 952 | used |
952 | How long multiblock allocator must look for a best extent | 953 | stream_req Files which have fewer blocks than this tunable |
953 | 954 | parameter will have their blocks allocated out of a | |
954 | order2_req: | 955 | block group specific preallocation pool, so that small |
955 | Multiblock allocator use 2^N search using buddies only for requests greater | 956 | files are packed closely together. Each large file |
956 | than or equal to order2_req. The request size is specfied in file system | 957 | will have its blocks allocated out of its own unique |
957 | blocks. A value of 2 indicate only if the requests are greater than or equal | 958 | preallocation pool. |
958 | to 4 blocks. | 959 | inode_readahead Tuning parameter which controls the maximum number of |
959 | 960 | inode table blocks that ext4's inode table readahead | |
960 | stream_req: | 961 | algorithm will pre-read into the buffer cache |
961 | Files smaller than stream_req are served by the stream allocator, whose | 962 | .............................................................................. |
962 | purpose is to pack requests as close each to other as possible to | 963 | |
963 | produce smooth I/O traffic. Avalue of 16 indicate that file smaller than 16 | ||
964 | filesystem block size will use group based preallocation. | ||
965 | 964 | ||
966 | ------------------------------------------------------------------------------ | 965 | ------------------------------------------------------------------------------ |
967 | Summary | 966 | Summary |
diff --git a/Documentation/kernel-doc-nano-HOWTO.txt b/Documentation/kernel-doc-nano-HOWTO.txt index 0bd32748a467..c6841eee9598 100644 --- a/Documentation/kernel-doc-nano-HOWTO.txt +++ b/Documentation/kernel-doc-nano-HOWTO.txt | |||
@@ -168,10 +168,10 @@ if ($#ARGV < 0) { | |||
168 | mkdir $ARGV[0],0777; | 168 | mkdir $ARGV[0],0777; |
169 | $state = 0; | 169 | $state = 0; |
170 | while (<STDIN>) { | 170 | while (<STDIN>) { |
171 | if (/^\.TH \"[^\"]*\" 4 \"([^\"]*)\"/) { | 171 | if (/^\.TH \"[^\"]*\" 9 \"([^\"]*)\"/) { |
172 | if ($state == 1) { close OUT } | 172 | if ($state == 1) { close OUT } |
173 | $state = 1; | 173 | $state = 1; |
174 | $fn = "$ARGV[0]/$1.4"; | 174 | $fn = "$ARGV[0]/$1.9"; |
175 | print STDERR "Creating $fn\n"; | 175 | print STDERR "Creating $fn\n"; |
176 | open OUT, ">$fn" or die "can't open $fn: $!\n"; | 176 | open OUT, ">$fn" or die "can't open $fn: $!\n"; |
177 | print OUT $_; | 177 | print OUT $_; |
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 1150444a21ab..2443f5bb4364 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -284,6 +284,11 @@ and is between 256 and 4096 characters. It is defined in the file | |||
284 | isolate - enable device isolation (each device, as far | 284 | isolate - enable device isolation (each device, as far |
285 | as possible, will get its own protection | 285 | as possible, will get its own protection |
286 | domain) | 286 | domain) |
287 | fullflush - enable flushing of IO/TLB entries when | ||
288 | they are unmapped. Otherwise they are | ||
289 | flushed before they will be reused, which | ||
290 | is a lot of faster | ||
291 | |||
287 | amd_iommu_size= [HW,X86-64] | 292 | amd_iommu_size= [HW,X86-64] |
288 | Define the size of the aperture for the AMD IOMMU | 293 | Define the size of the aperture for the AMD IOMMU |
289 | driver. Possible values are: | 294 | driver. Possible values are: |
@@ -463,12 +468,6 @@ and is between 256 and 4096 characters. It is defined in the file | |||
463 | Range: 0 - 8192 | 468 | Range: 0 - 8192 |
464 | Default: 64 | 469 | Default: 64 |
465 | 470 | ||
466 | disable_8254_timer | ||
467 | enable_8254_timer | ||
468 | [IA32/X86_64] Disable/Enable interrupt 0 timer routing | ||
469 | over the 8254 in addition to over the IO-APIC. The | ||
470 | kernel tries to set a sensible default. | ||
471 | |||
472 | hpet= [X86-32,HPET] option to control HPET usage | 471 | hpet= [X86-32,HPET] option to control HPET usage |
473 | Format: { enable (default) | disable | force } | 472 | Format: { enable (default) | disable | force } |
474 | disable: disable HPET and use PIT instead | 473 | disable: disable HPET and use PIT instead |
@@ -659,11 +658,12 @@ and is between 256 and 4096 characters. It is defined in the file | |||
659 | earlyprintk= [X86-32,X86-64,SH,BLACKFIN] | 658 | earlyprintk= [X86-32,X86-64,SH,BLACKFIN] |
660 | earlyprintk=vga | 659 | earlyprintk=vga |
661 | earlyprintk=serial[,ttySn[,baudrate]] | 660 | earlyprintk=serial[,ttySn[,baudrate]] |
661 | earlyprintk=dbgp | ||
662 | 662 | ||
663 | Append ",keep" to not disable it when the real console | 663 | Append ",keep" to not disable it when the real console |
664 | takes over. | 664 | takes over. |
665 | 665 | ||
666 | Only vga or serial at a time, not both. | 666 | Only vga or serial or usb debug port at a time. |
667 | 667 | ||
668 | Currently only ttyS0 and ttyS1 are supported. | 668 | Currently only ttyS0 and ttyS1 are supported. |
669 | 669 | ||
@@ -1020,6 +1020,10 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1020 | (only serial suported for now) | 1020 | (only serial suported for now) |
1021 | Format: <serial_device>[,baud] | 1021 | Format: <serial_device>[,baud] |
1022 | 1022 | ||
1023 | kmac= [MIPS] korina ethernet MAC address. | ||
1024 | Configure the RouterBoard 532 series on-chip | ||
1025 | Ethernet adapter MAC address. | ||
1026 | |||
1023 | l2cr= [PPC] | 1027 | l2cr= [PPC] |
1024 | 1028 | ||
1025 | l3cr= [PPC] | 1029 | l3cr= [PPC] |
@@ -1228,6 +1232,29 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1228 | or | 1232 | or |
1229 | memmap=0x10000$0x18690000 | 1233 | memmap=0x10000$0x18690000 |
1230 | 1234 | ||
1235 | memory_corruption_check=0/1 [X86] | ||
1236 | Some BIOSes seem to corrupt the first 64k of | ||
1237 | memory when doing things like suspend/resume. | ||
1238 | Setting this option will scan the memory | ||
1239 | looking for corruption. Enabling this will | ||
1240 | both detect corruption and prevent the kernel | ||
1241 | from using the memory being corrupted. | ||
1242 | However, its intended as a diagnostic tool; if | ||
1243 | repeatable BIOS-originated corruption always | ||
1244 | affects the same memory, you can use memmap= | ||
1245 | to prevent the kernel from using that memory. | ||
1246 | |||
1247 | memory_corruption_check_size=size [X86] | ||
1248 | By default it checks for corruption in the low | ||
1249 | 64k, making this memory unavailable for normal | ||
1250 | use. Use this parameter to scan for | ||
1251 | corruption in more or less memory. | ||
1252 | |||
1253 | memory_corruption_check_period=seconds [X86] | ||
1254 | By default it checks for corruption every 60 | ||
1255 | seconds. Use this parameter to check at some | ||
1256 | other rate. 0 disables periodic checking. | ||
1257 | |||
1231 | memtest= [KNL,X86] Enable memtest | 1258 | memtest= [KNL,X86] Enable memtest |
1232 | Format: <integer> | 1259 | Format: <integer> |
1233 | range: 0,4 : pattern number | 1260 | range: 0,4 : pattern number |
@@ -1425,6 +1452,12 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1425 | 1452 | ||
1426 | nolapic_timer [X86-32,APIC] Do not use the local APIC timer. | 1453 | nolapic_timer [X86-32,APIC] Do not use the local APIC timer. |
1427 | 1454 | ||
1455 | nox2apic [X86-64,APIC] Do not enable x2APIC mode. | ||
1456 | |||
1457 | x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of | ||
1458 | default x2apic cluster mode on platforms | ||
1459 | supporting x2apic. | ||
1460 | |||
1428 | noltlbs [PPC] Do not use large page/tlb entries for kernel | 1461 | noltlbs [PPC] Do not use large page/tlb entries for kernel |
1429 | lowmem mapping on PPC40x. | 1462 | lowmem mapping on PPC40x. |
1430 | 1463 | ||
@@ -1882,6 +1915,12 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1882 | shapers= [NET] | 1915 | shapers= [NET] |
1883 | Maximal number of shapers. | 1916 | Maximal number of shapers. |
1884 | 1917 | ||
1918 | show_msr= [x86] show boot-time MSR settings | ||
1919 | Format: { <integer> } | ||
1920 | Show boot-time (BIOS-initialized) MSR settings. | ||
1921 | The parameter means the number of CPUs to show, | ||
1922 | for example 1 means boot CPU only. | ||
1923 | |||
1885 | sim710= [SCSI,HW] | 1924 | sim710= [SCSI,HW] |
1886 | See header of drivers/scsi/sim710.c. | 1925 | See header of drivers/scsi/sim710.c. |
1887 | 1926 | ||
diff --git a/Documentation/networking/LICENSE.qlge b/Documentation/networking/LICENSE.qlge new file mode 100644 index 000000000000..123b6edd7f18 --- /dev/null +++ b/Documentation/networking/LICENSE.qlge | |||
@@ -0,0 +1,46 @@ | |||
1 | Copyright (c) 2003-2008 QLogic Corporation | ||
2 | QLogic Linux Networking HBA Driver | ||
3 | |||
4 | This program includes a device driver for Linux 2.6 that may be | ||
5 | distributed with QLogic hardware specific firmware binary file. | ||
6 | You may modify and redistribute the device driver code under the | ||
7 | GNU General Public License as published by the Free Software | ||
8 | Foundation (version 2 or a later version). | ||
9 | |||
10 | You may redistribute the hardware specific firmware binary file | ||
11 | under the following terms: | ||
12 | |||
13 | 1. Redistribution of source code (only if applicable), | ||
14 | must retain the above copyright notice, this list of | ||
15 | conditions and the following disclaimer. | ||
16 | |||
17 | 2. Redistribution in binary form must reproduce the above | ||
18 | copyright notice, this list of conditions and the | ||
19 | following disclaimer in the documentation and/or other | ||
20 | materials provided with the distribution. | ||
21 | |||
22 | 3. The name of QLogic Corporation may not be used to | ||
23 | endorse or promote products derived from this software | ||
24 | without specific prior written permission | ||
25 | |||
26 | REGARDLESS OF WHAT LICENSING MECHANISM IS USED OR APPLICABLE, | ||
27 | THIS PROGRAM IS PROVIDED BY QLOGIC CORPORATION "AS IS'' AND ANY | ||
28 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
29 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A | ||
30 | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR | ||
31 | BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
32 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED | ||
33 | TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
34 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON | ||
35 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | ||
36 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||
37 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||
38 | POSSIBILITY OF SUCH DAMAGE. | ||
39 | |||
40 | USER ACKNOWLEDGES AND AGREES THAT USE OF THIS PROGRAM WILL NOT | ||
41 | CREATE OR GIVE GROUNDS FOR A LICENSE BY IMPLICATION, ESTOPPEL, OR | ||
42 | OTHERWISE IN ANY INTELLECTUAL PROPERTY RIGHTS (PATENT, COPYRIGHT, | ||
43 | TRADE SECRET, MASK WORK, OR OTHER PROPRIETARY RIGHT) EMBODIED IN | ||
44 | ANY OTHER QLOGIC HARDWARE OR SOFTWARE EITHER SOLELY OR IN | ||
45 | COMBINATION WITH THIS PROGRAM. | ||
46 | |||
diff --git a/Documentation/networking/can.txt b/Documentation/networking/can.txt index 297ba7b1ccaf..2035bc4932f2 100644 --- a/Documentation/networking/can.txt +++ b/Documentation/networking/can.txt | |||
@@ -35,8 +35,9 @@ This file contains | |||
35 | 6.1 general settings | 35 | 6.1 general settings |
36 | 6.2 local loopback of sent frames | 36 | 6.2 local loopback of sent frames |
37 | 6.3 CAN controller hardware filters | 37 | 6.3 CAN controller hardware filters |
38 | 6.4 currently supported CAN hardware | 38 | 6.4 The virtual CAN driver (vcan) |
39 | 6.5 todo | 39 | 6.5 currently supported CAN hardware |
40 | 6.6 todo | ||
40 | 41 | ||
41 | 7 Credits | 42 | 7 Credits |
42 | 43 | ||
@@ -584,7 +585,42 @@ solution for a couple of reasons: | |||
584 | @133MHz with four SJA1000 CAN controllers from 2002 under heavy bus | 585 | @133MHz with four SJA1000 CAN controllers from 2002 under heavy bus |
585 | load without any problems ... | 586 | load without any problems ... |
586 | 587 | ||
587 | 6.4 currently supported CAN hardware (September 2007) | 588 | 6.4 The virtual CAN driver (vcan) |
589 | |||
590 | Similar to the network loopback devices, vcan offers a virtual local | ||
591 | CAN interface. A full qualified address on CAN consists of | ||
592 | |||
593 | - a unique CAN Identifier (CAN ID) | ||
594 | - the CAN bus this CAN ID is transmitted on (e.g. can0) | ||
595 | |||
596 | so in common use cases more than one virtual CAN interface is needed. | ||
597 | |||
598 | The virtual CAN interfaces allow the transmission and reception of CAN | ||
599 | frames without real CAN controller hardware. Virtual CAN network | ||
600 | devices are usually named 'vcanX', like vcan0 vcan1 vcan2 ... | ||
601 | When compiled as a module the virtual CAN driver module is called vcan.ko | ||
602 | |||
603 | Since Linux Kernel version 2.6.24 the vcan driver supports the Kernel | ||
604 | netlink interface to create vcan network devices. The creation and | ||
605 | removal of vcan network devices can be managed with the ip(8) tool: | ||
606 | |||
607 | - Create a virtual CAN network interface: | ||
608 | ip link add type vcan | ||
609 | |||
610 | - Create a virtual CAN network interface with a specific name 'vcan42': | ||
611 | ip link add dev vcan42 type vcan | ||
612 | |||
613 | - Remove a (virtual CAN) network interface 'vcan42': | ||
614 | ip link del vcan42 | ||
615 | |||
616 | The tool 'vcan' from the SocketCAN SVN repository on BerliOS is obsolete. | ||
617 | |||
618 | Virtual CAN network device creation in older Kernels: | ||
619 | In Linux Kernel versions < 2.6.24 the vcan driver creates 4 vcan | ||
620 | netdevices at module load time by default. This value can be changed | ||
621 | with the module parameter 'numdev'. E.g. 'modprobe vcan numdev=8' | ||
622 | |||
623 | 6.5 currently supported CAN hardware | ||
588 | 624 | ||
589 | On the project website http://developer.berlios.de/projects/socketcan | 625 | On the project website http://developer.berlios.de/projects/socketcan |
590 | there are different drivers available: | 626 | there are different drivers available: |
@@ -603,7 +639,7 @@ solution for a couple of reasons: | |||
603 | 639 | ||
604 | Please check the Mailing Lists on the berlios OSS project website. | 640 | Please check the Mailing Lists on the berlios OSS project website. |
605 | 641 | ||
606 | 6.5 todo (September 2007) | 642 | 6.6 todo |
607 | 643 | ||
608 | The configuration interface for CAN network drivers is still an open | 644 | The configuration interface for CAN network drivers is still an open |
609 | issue that has not been finalized in the socketcan project. Also the | 645 | issue that has not been finalized in the socketcan project. Also the |
diff --git a/Documentation/networking/multiqueue.txt b/Documentation/networking/multiqueue.txt index d391ea631141..4caa0e314cc2 100644 --- a/Documentation/networking/multiqueue.txt +++ b/Documentation/networking/multiqueue.txt | |||
@@ -24,4 +24,56 @@ netif_{start|stop|wake}_subqueue() functions to manage each queue while the | |||
24 | device is still operational. netdev->queue_lock is still used when the device | 24 | device is still operational. netdev->queue_lock is still used when the device |
25 | comes online or when it's completely shut down (unregister_netdev(), etc.). | 25 | comes online or when it's completely shut down (unregister_netdev(), etc.). |
26 | 26 | ||
27 | Author: Peter P. Waskiewicz Jr. <peter.p.waskiewicz.jr@intel.com> | 27 | |
28 | Section 2: Qdisc support for multiqueue devices | ||
29 | |||
30 | ----------------------------------------------- | ||
31 | |||
32 | Currently two qdiscs are optimized for multiqueue devices. The first is the | ||
33 | default pfifo_fast qdisc. This qdisc supports one qdisc per hardware queue. | ||
34 | A new round-robin qdisc, sch_multiq also supports multiple hardware queues. The | ||
35 | qdisc is responsible for classifying the skb's and then directing the skb's to | ||
36 | bands and queues based on the value in skb->queue_mapping. Use this field in | ||
37 | the base driver to determine which queue to send the skb to. | ||
38 | |||
39 | sch_multiq has been added for hardware that wishes to avoid head-of-line | ||
40 | blocking. It will cycle though the bands and verify that the hardware queue | ||
41 | associated with the band is not stopped prior to dequeuing a packet. | ||
42 | |||
43 | On qdisc load, the number of bands is based on the number of queues on the | ||
44 | hardware. Once the association is made, any skb with skb->queue_mapping set, | ||
45 | will be queued to the band associated with the hardware queue. | ||
46 | |||
47 | |||
48 | Section 3: Brief howto using MULTIQ for multiqueue devices | ||
49 | --------------------------------------------------------------- | ||
50 | |||
51 | The userspace command 'tc,' part of the iproute2 package, is used to configure | ||
52 | qdiscs. To add the MULTIQ qdisc to your network device, assuming the device | ||
53 | is called eth0, run the following command: | ||
54 | |||
55 | # tc qdisc add dev eth0 root handle 1: multiq | ||
56 | |||
57 | The qdisc will allocate the number of bands to equal the number of queues that | ||
58 | the device reports, and bring the qdisc online. Assuming eth0 has 4 Tx | ||
59 | queues, the band mapping would look like: | ||
60 | |||
61 | band 0 => queue 0 | ||
62 | band 1 => queue 1 | ||
63 | band 2 => queue 2 | ||
64 | band 3 => queue 3 | ||
65 | |||
66 | Traffic will begin flowing through each queue based on either the simple_tx_hash | ||
67 | function or based on netdev->select_queue() if you have it defined. | ||
68 | |||
69 | The behavior of tc filters remains the same. However a new tc action, | ||
70 | skbedit, has been added. Assuming you wanted to route all traffic to a | ||
71 | specific host, for example 192.168.0.3, through a specific queue you could use | ||
72 | this action and establish a filter such as: | ||
73 | |||
74 | tc filter add dev eth0 parent 1: protocol ip prio 1 u32 \ | ||
75 | match ip dst 192.168.0.3 \ | ||
76 | action skbedit queue_mapping 3 | ||
77 | |||
78 | Author: Alexander Duyck <alexander.h.duyck@intel.com> | ||
79 | Original Author: Peter P. Waskiewicz Jr. <peter.p.waskiewicz.jr@intel.com> | ||
diff --git a/Documentation/networking/phonet.txt b/Documentation/networking/phonet.txt new file mode 100644 index 000000000000..0e6e592f4f55 --- /dev/null +++ b/Documentation/networking/phonet.txt | |||
@@ -0,0 +1,175 @@ | |||
1 | Linux Phonet protocol family | ||
2 | ============================ | ||
3 | |||
4 | Introduction | ||
5 | ------------ | ||
6 | |||
7 | Phonet is a packet protocol used by Nokia cellular modems for both IPC | ||
8 | and RPC. With the Linux Phonet socket family, Linux host processes can | ||
9 | receive and send messages from/to the modem, or any other external | ||
10 | device attached to the modem. The modem takes care of routing. | ||
11 | |||
12 | Phonet packets can be exchanged through various hardware connections | ||
13 | depending on the device, such as: | ||
14 | - USB with the CDC Phonet interface, | ||
15 | - infrared, | ||
16 | - Bluetooth, | ||
17 | - an RS232 serial port (with a dedicated "FBUS" line discipline), | ||
18 | - the SSI bus with some TI OMAP processors. | ||
19 | |||
20 | |||
21 | Packets format | ||
22 | -------------- | ||
23 | |||
24 | Phonet packets have a common header as follows: | ||
25 | |||
26 | struct phonethdr { | ||
27 | uint8_t pn_media; /* Media type (link-layer identifier) */ | ||
28 | uint8_t pn_rdev; /* Receiver device ID */ | ||
29 | uint8_t pn_sdev; /* Sender device ID */ | ||
30 | uint8_t pn_res; /* Resource ID or function */ | ||
31 | uint16_t pn_length; /* Big-endian message byte length (minus 6) */ | ||
32 | uint8_t pn_robj; /* Receiver object ID */ | ||
33 | uint8_t pn_sobj; /* Sender object ID */ | ||
34 | }; | ||
35 | |||
36 | On Linux, the link-layer header includes the pn_media byte (see below). | ||
37 | The next 7 bytes are part of the network-layer header. | ||
38 | |||
39 | The device ID is split: the 6 higher-order bits consitute the device | ||
40 | address, while the 2 lower-order bits are used for multiplexing, as are | ||
41 | the 8-bit object identifiers. As such, Phonet can be considered as a | ||
42 | network layer with 6 bits of address space and 10 bits for transport | ||
43 | protocol (much like port numbers in IP world). | ||
44 | |||
45 | The modem always has address number zero. All other device have a their | ||
46 | own 6-bit address. | ||
47 | |||
48 | |||
49 | Link layer | ||
50 | ---------- | ||
51 | |||
52 | Phonet links are always point-to-point links. The link layer header | ||
53 | consists of a single Phonet media type byte. It uniquely identifies the | ||
54 | link through which the packet is transmitted, from the modem's | ||
55 | perspective. Each Phonet network device shall prepend and set the media | ||
56 | type byte as appropriate. For convenience, a common phonet_header_ops | ||
57 | link-layer header operations structure is provided. It sets the | ||
58 | media type according to the network device hardware address. | ||
59 | |||
60 | Linux Phonet network interfaces support a dedicated link layer packets | ||
61 | type (ETH_P_PHONET) which is out of the Ethernet type range. They can | ||
62 | only send and receive Phonet packets. | ||
63 | |||
64 | The virtual TUN tunnel device driver can also be used for Phonet. This | ||
65 | requires IFF_TUN mode, _without_ the IFF_NO_PI flag. In this case, | ||
66 | there is no link-layer header, so there is no Phonet media type byte. | ||
67 | |||
68 | Note that Phonet interfaces are not allowed to re-order packets, so | ||
69 | only the (default) Linux FIFO qdisc should be used with them. | ||
70 | |||
71 | |||
72 | Network layer | ||
73 | ------------- | ||
74 | |||
75 | The Phonet socket address family maps the Phonet packet header: | ||
76 | |||
77 | struct sockaddr_pn { | ||
78 | sa_family_t spn_family; /* AF_PHONET */ | ||
79 | uint8_t spn_obj; /* Object ID */ | ||
80 | uint8_t spn_dev; /* Device ID */ | ||
81 | uint8_t spn_resource; /* Resource or function */ | ||
82 | uint8_t spn_zero[...]; /* Padding */ | ||
83 | }; | ||
84 | |||
85 | The resource field is only used when sending and receiving; | ||
86 | It is ignored by bind() and getsockname(). | ||
87 | |||
88 | |||
89 | Low-level datagram protocol | ||
90 | --------------------------- | ||
91 | |||
92 | Applications can send Phonet messages using the Phonet datagram socket | ||
93 | protocol from the PF_PHONET family. Each socket is bound to one of the | ||
94 | 2^10 object IDs available, and can send and receive packets with any | ||
95 | other peer. | ||
96 | |||
97 | struct sockaddr_pn addr = { .spn_family = AF_PHONET, }; | ||
98 | ssize_t len; | ||
99 | socklen_t addrlen = sizeof(addr); | ||
100 | int fd; | ||
101 | |||
102 | fd = socket(PF_PHONET, SOCK_DGRAM, 0); | ||
103 | bind(fd, (struct sockaddr *)&addr, sizeof(addr)); | ||
104 | /* ... */ | ||
105 | |||
106 | sendto(fd, msg, msglen, 0, (struct sockaddr *)&addr, sizeof(addr)); | ||
107 | len = recvfrom(fd, buf, sizeof(buf), 0, | ||
108 | (struct sockaddr *)&addr, &addrlen); | ||
109 | |||
110 | This protocol follows the SOCK_DGRAM connection-less semantics. | ||
111 | However, connect() and getpeername() are not supported, as they did | ||
112 | not seem useful with Phonet usages (could be added easily). | ||
113 | |||
114 | |||
115 | Phonet Pipe protocol | ||
116 | -------------------- | ||
117 | |||
118 | The Phonet Pipe protocol is a simple sequenced packets protocol | ||
119 | with end-to-end congestion control. It uses the passive listening | ||
120 | socket paradigm. The listening socket is bound to an unique free object | ||
121 | ID. Each listening socket can handle up to 255 simultaneous | ||
122 | connections, one per accept()'d socket. | ||
123 | |||
124 | int lfd, cfd; | ||
125 | |||
126 | lfd = socket(PF_PHONET, SOCK_SEQPACKET, PN_PROTO_PIPE); | ||
127 | listen (lfd, INT_MAX); | ||
128 | |||
129 | /* ... */ | ||
130 | cfd = accept(lfd, NULL, NULL); | ||
131 | for (;;) | ||
132 | { | ||
133 | char buf[...]; | ||
134 | ssize_t len = read(cfd, buf, sizeof(buf)); | ||
135 | |||
136 | /* ... */ | ||
137 | |||
138 | write(cfd, msg, msglen); | ||
139 | } | ||
140 | |||
141 | Connections are established between two endpoints by a "third party" | ||
142 | application. This means that both endpoints are passive; so connect() | ||
143 | is not possible. | ||
144 | |||
145 | WARNING: | ||
146 | When polling a connected pipe socket for writability, there is an | ||
147 | intrinsic race condition whereby writability might be lost between the | ||
148 | polling and the writing system calls. In this case, the socket will | ||
149 | block until write because possible again, unless non-blocking mode | ||
150 | becomes enabled. | ||
151 | |||
152 | |||
153 | The pipe protocol provides two socket options at the SOL_PNPIPE level: | ||
154 | |||
155 | PNPIPE_ENCAP accepts one integer value (int) of: | ||
156 | |||
157 | PNPIPE_ENCAP_NONE: The socket operates normally (default). | ||
158 | |||
159 | PNPIPE_ENCAP_IP: The socket is used as a backend for a virtual IP | ||
160 | interface. This requires CAP_NET_ADMIN capability. GPRS data | ||
161 | support on Nokia modems can use this. Note that the socket cannot | ||
162 | be reliably poll()'d or read() from while in this mode. | ||
163 | |||
164 | PNPIPE_IFINDEX is a read-only integer value. It contains the | ||
165 | interface index of the network interface created by PNPIPE_ENCAP, | ||
166 | or zero if encapsulation is off. | ||
167 | |||
168 | |||
169 | Authors | ||
170 | ------- | ||
171 | |||
172 | Linux Phonet was initially written by Sakari Ailus. | ||
173 | Other contributors include Mikä Liljeberg, Andras Domokos, | ||
174 | Carlos Chinea and Rémi Denis-Courmont. | ||
175 | Copyright (C) 2008 Nokia Corporation. | ||
diff --git a/Documentation/networking/regulatory.txt b/Documentation/networking/regulatory.txt new file mode 100644 index 000000000000..a96989a8ff35 --- /dev/null +++ b/Documentation/networking/regulatory.txt | |||
@@ -0,0 +1,194 @@ | |||
1 | Linux wireless regulatory documentation | ||
2 | --------------------------------------- | ||
3 | |||
4 | This document gives a brief review over how the Linux wireless | ||
5 | regulatory infrastructure works. | ||
6 | |||
7 | More up to date information can be obtained at the project's web page: | ||
8 | |||
9 | http://wireless.kernel.org/en/developers/Regulatory | ||
10 | |||
11 | Keeping regulatory domains in userspace | ||
12 | --------------------------------------- | ||
13 | |||
14 | Due to the dynamic nature of regulatory domains we keep them | ||
15 | in userspace and provide a framework for userspace to upload | ||
16 | to the kernel one regulatory domain to be used as the central | ||
17 | core regulatory domain all wireless devices should adhere to. | ||
18 | |||
19 | How to get regulatory domains to the kernel | ||
20 | ------------------------------------------- | ||
21 | |||
22 | Userspace gets a regulatory domain in the kernel by having | ||
23 | a userspace agent build it and send it via nl80211. Only | ||
24 | expected regulatory domains will be respected by the kernel. | ||
25 | |||
26 | A currently available userspace agent which can accomplish this | ||
27 | is CRDA - central regulatory domain agent. Its documented here: | ||
28 | |||
29 | http://wireless.kernel.org/en/developers/Regulatory/CRDA | ||
30 | |||
31 | Essentially the kernel will send a udev event when it knows | ||
32 | it needs a new regulatory domain. A udev rule can be put in place | ||
33 | to trigger crda to send the respective regulatory domain for a | ||
34 | specific ISO/IEC 3166 alpha2. | ||
35 | |||
36 | Below is an example udev rule which can be used: | ||
37 | |||
38 | # Example file, should be put in /etc/udev/rules.d/regulatory.rules | ||
39 | KERNEL=="regulatory*", ACTION=="change", SUBSYSTEM=="platform", RUN+="/sbin/crda" | ||
40 | |||
41 | The alpha2 is passed as an environment variable under the variable COUNTRY. | ||
42 | |||
43 | Who asks for regulatory domains? | ||
44 | -------------------------------- | ||
45 | |||
46 | * Users | ||
47 | |||
48 | Users can use iw: | ||
49 | |||
50 | http://wireless.kernel.org/en/users/Documentation/iw | ||
51 | |||
52 | An example: | ||
53 | |||
54 | # set regulatory domain to "Costa Rica" | ||
55 | iw reg set CR | ||
56 | |||
57 | This will request the kernel to set the regulatory domain to | ||
58 | the specificied alpha2. The kernel in turn will then ask userspace | ||
59 | to provide a regulatory domain for the alpha2 specified by the user | ||
60 | by sending a uevent. | ||
61 | |||
62 | * Wireless subsystems for Country Information elements | ||
63 | |||
64 | The kernel will send a uevent to inform userspace a new | ||
65 | regulatory domain is required. More on this to be added | ||
66 | as its integration is added. | ||
67 | |||
68 | * Drivers | ||
69 | |||
70 | If drivers determine they need a specific regulatory domain | ||
71 | set they can inform the wireless core using regulatory_hint(). | ||
72 | They have two options -- they either provide an alpha2 so that | ||
73 | crda can provide back a regulatory domain for that country or | ||
74 | they can build their own regulatory domain based on internal | ||
75 | custom knowledge so the wireless core can respect it. | ||
76 | |||
77 | *Most* drivers will rely on the first mechanism of providing a | ||
78 | regulatory hint with an alpha2. For these drivers there is an additional | ||
79 | check that can be used to ensure compliance based on custom EEPROM | ||
80 | regulatory data. This additional check can be used by drivers by | ||
81 | registering on its struct wiphy a reg_notifier() callback. This notifier | ||
82 | is called when the core's regulatory domain has been changed. The driver | ||
83 | can use this to review the changes made and also review who made them | ||
84 | (driver, user, country IE) and determine what to allow based on its | ||
85 | internal EEPROM data. Devices drivers wishing to be capable of world | ||
86 | roaming should use this callback. More on world roaming will be | ||
87 | added to this document when its support is enabled. | ||
88 | |||
89 | Device drivers who provide their own built regulatory domain | ||
90 | do not need a callback as the channels registered by them are | ||
91 | the only ones that will be allowed and therefore *additional* | ||
92 | cannels cannot be enabled. | ||
93 | |||
94 | Example code - drivers hinting an alpha2: | ||
95 | ------------------------------------------ | ||
96 | |||
97 | This example comes from the zd1211rw device driver. You can start | ||
98 | by having a mapping of your device's EEPROM country/regulatory | ||
99 | domain value to to a specific alpha2 as follows: | ||
100 | |||
101 | static struct zd_reg_alpha2_map reg_alpha2_map[] = { | ||
102 | { ZD_REGDOMAIN_FCC, "US" }, | ||
103 | { ZD_REGDOMAIN_IC, "CA" }, | ||
104 | { ZD_REGDOMAIN_ETSI, "DE" }, /* Generic ETSI, use most restrictive */ | ||
105 | { ZD_REGDOMAIN_JAPAN, "JP" }, | ||
106 | { ZD_REGDOMAIN_JAPAN_ADD, "JP" }, | ||
107 | { ZD_REGDOMAIN_SPAIN, "ES" }, | ||
108 | { ZD_REGDOMAIN_FRANCE, "FR" }, | ||
109 | |||
110 | Then you can define a routine to map your read EEPROM value to an alpha2, | ||
111 | as follows: | ||
112 | |||
113 | static int zd_reg2alpha2(u8 regdomain, char *alpha2) | ||
114 | { | ||
115 | unsigned int i; | ||
116 | struct zd_reg_alpha2_map *reg_map; | ||
117 | for (i = 0; i < ARRAY_SIZE(reg_alpha2_map); i++) { | ||
118 | reg_map = ®_alpha2_map[i]; | ||
119 | if (regdomain == reg_map->reg) { | ||
120 | alpha2[0] = reg_map->alpha2[0]; | ||
121 | alpha2[1] = reg_map->alpha2[1]; | ||
122 | return 0; | ||
123 | } | ||
124 | } | ||
125 | return 1; | ||
126 | } | ||
127 | |||
128 | Lastly, you can then hint to the core of your discovered alpha2, if a match | ||
129 | was found. You need to do this after you have registered your wiphy. You | ||
130 | are expected to do this during initialization. | ||
131 | |||
132 | r = zd_reg2alpha2(mac->regdomain, alpha2); | ||
133 | if (!r) | ||
134 | regulatory_hint(hw->wiphy, alpha2, NULL); | ||
135 | |||
136 | Example code - drivers providing a built in regulatory domain: | ||
137 | -------------------------------------------------------------- | ||
138 | |||
139 | If you have regulatory information you can obtain from your | ||
140 | driver and you *need* to use this we let you build a regulatory domain | ||
141 | structure and pass it to the wireless core. To do this you should | ||
142 | kmalloc() a structure big enough to hold your regulatory domain | ||
143 | structure and you should then fill it with your data. Finally you simply | ||
144 | call regulatory_hint() with the regulatory domain structure in it. | ||
145 | |||
146 | Bellow is a simple example, with a regulatory domain cached using the stack. | ||
147 | Your implementation may vary (read EEPROM cache instead, for example). | ||
148 | |||
149 | Example cache of some regulatory domain | ||
150 | |||
151 | struct ieee80211_regdomain mydriver_jp_regdom = { | ||
152 | .n_reg_rules = 3, | ||
153 | .alpha2 = "JP", | ||
154 | //.alpha2 = "99", /* If I have no alpha2 to map it to */ | ||
155 | .reg_rules = { | ||
156 | /* IEEE 802.11b/g, channels 1..14 */ | ||
157 | REG_RULE(2412-20, 2484+20, 40, 6, 20, 0), | ||
158 | /* IEEE 802.11a, channels 34..48 */ | ||
159 | REG_RULE(5170-20, 5240+20, 40, 6, 20, | ||
160 | NL80211_RRF_PASSIVE_SCAN), | ||
161 | /* IEEE 802.11a, channels 52..64 */ | ||
162 | REG_RULE(5260-20, 5320+20, 40, 6, 20, | ||
163 | NL80211_RRF_NO_IBSS | | ||
164 | NL80211_RRF_DFS), | ||
165 | } | ||
166 | }; | ||
167 | |||
168 | Then in some part of your code after your wiphy has been registered: | ||
169 | |||
170 | int r; | ||
171 | struct ieee80211_regdomain *rd; | ||
172 | int size_of_regd; | ||
173 | int num_rules = mydriver_jp_regdom.n_reg_rules; | ||
174 | unsigned int i; | ||
175 | |||
176 | size_of_regd = sizeof(struct ieee80211_regdomain) + | ||
177 | (num_rules * sizeof(struct ieee80211_reg_rule)); | ||
178 | |||
179 | rd = kzalloc(size_of_regd, GFP_KERNEL); | ||
180 | if (!rd) | ||
181 | return -ENOMEM; | ||
182 | |||
183 | memcpy(rd, &mydriver_jp_regdom, sizeof(struct ieee80211_regdomain)); | ||
184 | |||
185 | for (i=0; i < num_rules; i++) { | ||
186 | memcpy(&rd->reg_rules[i], &mydriver_jp_regdom.reg_rules[i], | ||
187 | sizeof(struct ieee80211_reg_rule)); | ||
188 | } | ||
189 | r = regulatory_hint(hw->wiphy, NULL, rd); | ||
190 | if (r) { | ||
191 | kfree(rd); | ||
192 | return r; | ||
193 | } | ||
194 | |||
diff --git a/Documentation/networking/tproxy.txt b/Documentation/networking/tproxy.txt new file mode 100644 index 000000000000..7b5996d9357e --- /dev/null +++ b/Documentation/networking/tproxy.txt | |||
@@ -0,0 +1,85 @@ | |||
1 | Transparent proxy support | ||
2 | ========================= | ||
3 | |||
4 | This feature adds Linux 2.2-like transparent proxy support to current kernels. | ||
5 | To use it, enable NETFILTER_TPROXY, the socket match and the TPROXY target in | ||
6 | your kernel config. You will need policy routing too, so be sure to enable that | ||
7 | as well. | ||
8 | |||
9 | |||
10 | 1. Making non-local sockets work | ||
11 | ================================ | ||
12 | |||
13 | The idea is that you identify packets with destination address matching a local | ||
14 | socket on your box, set the packet mark to a certain value, and then match on that | ||
15 | value using policy routing to have those packets delivered locally: | ||
16 | |||
17 | # iptables -t mangle -N DIVERT | ||
18 | # iptables -t mangle -A PREROUTING -p tcp -m socket -j DIVERT | ||
19 | # iptables -t mangle -A DIVERT -j MARK --set-mark 1 | ||
20 | # iptables -t mangle -A DIVERT -j ACCEPT | ||
21 | |||
22 | # ip rule add fwmark 1 lookup 100 | ||
23 | # ip route add local 0.0.0.0/0 dev lo table 100 | ||
24 | |||
25 | Because of certain restrictions in the IPv4 routing output code you'll have to | ||
26 | modify your application to allow it to send datagrams _from_ non-local IP | ||
27 | addresses. All you have to do is enable the (SOL_IP, IP_TRANSPARENT) socket | ||
28 | option before calling bind: | ||
29 | |||
30 | fd = socket(AF_INET, SOCK_STREAM, 0); | ||
31 | /* - 8< -*/ | ||
32 | int value = 1; | ||
33 | setsockopt(fd, SOL_IP, IP_TRANSPARENT, &value, sizeof(value)); | ||
34 | /* - 8< -*/ | ||
35 | name.sin_family = AF_INET; | ||
36 | name.sin_port = htons(0xCAFE); | ||
37 | name.sin_addr.s_addr = htonl(0xDEADBEEF); | ||
38 | bind(fd, &name, sizeof(name)); | ||
39 | |||
40 | A trivial patch for netcat is available here: | ||
41 | http://people.netfilter.org/hidden/tproxy/netcat-ip_transparent-support.patch | ||
42 | |||
43 | |||
44 | 2. Redirecting traffic | ||
45 | ====================== | ||
46 | |||
47 | Transparent proxying often involves "intercepting" traffic on a router. This is | ||
48 | usually done with the iptables REDIRECT target; however, there are serious | ||
49 | limitations of that method. One of the major issues is that it actually | ||
50 | modifies the packets to change the destination address -- which might not be | ||
51 | acceptable in certain situations. (Think of proxying UDP for example: you won't | ||
52 | be able to find out the original destination address. Even in case of TCP | ||
53 | getting the original destination address is racy.) | ||
54 | |||
55 | The 'TPROXY' target provides similar functionality without relying on NAT. Simply | ||
56 | add rules like this to the iptables ruleset above: | ||
57 | |||
58 | # iptables -t mangle -A PREROUTING -p tcp --dport 80 -j TPROXY \ | ||
59 | --tproxy-mark 0x1/0x1 --on-port 50080 | ||
60 | |||
61 | Note that for this to work you'll have to modify the proxy to enable (SOL_IP, | ||
62 | IP_TRANSPARENT) for the listening socket. | ||
63 | |||
64 | |||
65 | 3. Iptables extensions | ||
66 | ====================== | ||
67 | |||
68 | To use tproxy you'll need to have the 'socket' and 'TPROXY' modules | ||
69 | compiled for iptables. A patched version of iptables is available | ||
70 | here: http://git.balabit.hu/?p=bazsi/iptables-tproxy.git | ||
71 | |||
72 | |||
73 | 4. Application support | ||
74 | ====================== | ||
75 | |||
76 | 4.1. Squid | ||
77 | ---------- | ||
78 | |||
79 | Squid 3.HEAD has support built-in. To use it, pass | ||
80 | '--enable-linux-netfilter' to configure and set the 'tproxy' option on | ||
81 | the HTTP listener you redirect traffic to with the TPROXY iptables | ||
82 | target. | ||
83 | |||
84 | For more information please consult the following page on the Squid | ||
85 | wiki: http://wiki.squid-cache.org/Features/Tproxy4 | ||
diff --git a/Documentation/rfkill.txt b/Documentation/rfkill.txt index 6fcb3060dec5..b65f0799df48 100644 --- a/Documentation/rfkill.txt +++ b/Documentation/rfkill.txt | |||
@@ -341,6 +341,8 @@ key that does nothing by itself, as well as any hot key that is type-specific | |||
341 | 3.1 Guidelines for wireless device drivers | 341 | 3.1 Guidelines for wireless device drivers |
342 | ------------------------------------------ | 342 | ------------------------------------------ |
343 | 343 | ||
344 | (in this text, rfkill->foo means the foo field of struct rfkill). | ||
345 | |||
344 | 1. Each independent transmitter in a wireless device (usually there is only one | 346 | 1. Each independent transmitter in a wireless device (usually there is only one |
345 | transmitter per device) should have a SINGLE rfkill class attached to it. | 347 | transmitter per device) should have a SINGLE rfkill class attached to it. |
346 | 348 | ||
@@ -363,10 +365,32 @@ This rule exists because users of the rfkill subsystem expect to get (and set, | |||
363 | when possible) the overall transmitter rfkill state, not of a particular rfkill | 365 | when possible) the overall transmitter rfkill state, not of a particular rfkill |
364 | line. | 366 | line. |
365 | 367 | ||
366 | 5. During suspend, the rfkill class will attempt to soft-block the radio | 368 | 5. The wireless device driver MUST NOT leave the transmitter enabled during |
367 | through a call to rfkill->toggle_radio, and will try to restore its previous | 369 | suspend and hibernation unless: |
368 | state during resume. After a rfkill class is suspended, it will *not* call | 370 | |
369 | rfkill->toggle_radio until it is resumed. | 371 | 5.1. The transmitter has to be enabled for some sort of functionality |
372 | like wake-on-wireless-packet or autonomous packed forwarding in a mesh | ||
373 | network, and that functionality is enabled for this suspend/hibernation | ||
374 | cycle. | ||
375 | |||
376 | AND | ||
377 | |||
378 | 5.2. The device was not on a user-requested BLOCKED state before | ||
379 | the suspend (i.e. the driver must NOT unblock a device, not even | ||
380 | to support wake-on-wireless-packet or remain in the mesh). | ||
381 | |||
382 | In other words, there is absolutely no allowed scenario where a driver can | ||
383 | automatically take action to unblock a rfkill controller (obviously, this deals | ||
384 | with scenarios where soft-blocking or both soft and hard blocking is happening. | ||
385 | Scenarios where hardware rfkill lines are the only ones blocking the | ||
386 | transmitter are outside of this rule, since the wireless device driver does not | ||
387 | control its input hardware rfkill lines in the first place). | ||
388 | |||
389 | 6. During resume, rfkill will try to restore its previous state. | ||
390 | |||
391 | 7. After a rfkill class is suspended, it will *not* call rfkill->toggle_radio | ||
392 | until it is resumed. | ||
393 | |||
370 | 394 | ||
371 | Example of a WLAN wireless driver connected to the rfkill subsystem: | 395 | Example of a WLAN wireless driver connected to the rfkill subsystem: |
372 | -------------------------------------------------------------------- | 396 | -------------------------------------------------------------------- |
diff --git a/Documentation/s390/CommonIO b/Documentation/s390/CommonIO index bf0baa19ec24..339207d11d95 100644 --- a/Documentation/s390/CommonIO +++ b/Documentation/s390/CommonIO | |||
@@ -70,13 +70,19 @@ Command line parameters | |||
70 | 70 | ||
71 | Note: While already known devices can be added to the list of devices to be | 71 | Note: While already known devices can be added to the list of devices to be |
72 | ignored, there will be no effect on then. However, if such a device | 72 | ignored, there will be no effect on then. However, if such a device |
73 | disappears and then reappears, it will then be ignored. | 73 | disappears and then reappears, it will then be ignored. To make |
74 | known devices go away, you need the "purge" command (see below). | ||
74 | 75 | ||
75 | For example, | 76 | For example, |
76 | "echo add 0.0.a000-0.0.accc, 0.0.af00-0.0.afff > /proc/cio_ignore" | 77 | "echo add 0.0.a000-0.0.accc, 0.0.af00-0.0.afff > /proc/cio_ignore" |
77 | will add 0.0.a000-0.0.accc and 0.0.af00-0.0.afff to the list of ignored | 78 | will add 0.0.a000-0.0.accc and 0.0.af00-0.0.afff to the list of ignored |
78 | devices. | 79 | devices. |
79 | 80 | ||
81 | You can remove already known but now ignored devices via | ||
82 | "echo purge > /proc/cio_ignore" | ||
83 | All devices ignored but still registered and not online (= not in use) | ||
84 | will be deregistered and thus removed from the system. | ||
85 | |||
80 | The devices can be specified either by bus id (0.x.abcd) or, for 2.4 backward | 86 | The devices can be specified either by bus id (0.x.abcd) or, for 2.4 backward |
81 | compatibility, by the device number in hexadecimal (0xabcd or abcd). Device | 87 | compatibility, by the device number in hexadecimal (0xabcd or abcd). Device |
82 | numbers given as 0xabcd will be interpreted as 0.0.abcd. | 88 | numbers given as 0xabcd will be interpreted as 0.0.abcd. |
@@ -98,8 +104,7 @@ debugfs entries | |||
98 | handling). | 104 | handling). |
99 | 105 | ||
100 | - /sys/kernel/debug/s390dbf/cio_msg/sprintf | 106 | - /sys/kernel/debug/s390dbf/cio_msg/sprintf |
101 | Various debug messages from the common I/O-layer, including messages | 107 | Various debug messages from the common I/O-layer. |
102 | printed when cio_msg=yes. | ||
103 | 108 | ||
104 | - /sys/kernel/debug/s390dbf/cio_trace/hex_ascii | 109 | - /sys/kernel/debug/s390dbf/cio_trace/hex_ascii |
105 | Logs the calling of functions in the common I/O-layer and, if applicable, | 110 | Logs the calling of functions in the common I/O-layer and, if applicable, |
diff --git a/Documentation/scheduler/sched-design-CFS.txt b/Documentation/scheduler/sched-design-CFS.txt index 88bcb8767335..9d8eb553884c 100644 --- a/Documentation/scheduler/sched-design-CFS.txt +++ b/Documentation/scheduler/sched-design-CFS.txt | |||
@@ -1,151 +1,242 @@ | |||
1 | ============= | ||
2 | CFS Scheduler | ||
3 | ============= | ||
1 | 4 | ||
2 | This is the CFS scheduler. | ||
3 | |||
4 | 80% of CFS's design can be summed up in a single sentence: CFS basically | ||
5 | models an "ideal, precise multi-tasking CPU" on real hardware. | ||
6 | |||
7 | "Ideal multi-tasking CPU" is a (non-existent :-)) CPU that has 100% | ||
8 | physical power and which can run each task at precise equal speed, in | ||
9 | parallel, each at 1/nr_running speed. For example: if there are 2 tasks | ||
10 | running then it runs each at 50% physical power - totally in parallel. | ||
11 | |||
12 | On real hardware, we can run only a single task at once, so while that | ||
13 | one task runs, the other tasks that are waiting for the CPU are at a | ||
14 | disadvantage - the current task gets an unfair amount of CPU time. In | ||
15 | CFS this fairness imbalance is expressed and tracked via the per-task | ||
16 | p->wait_runtime (nanosec-unit) value. "wait_runtime" is the amount of | ||
17 | time the task should now run on the CPU for it to become completely fair | ||
18 | and balanced. | ||
19 | |||
20 | ( small detail: on 'ideal' hardware, the p->wait_runtime value would | ||
21 | always be zero - no task would ever get 'out of balance' from the | ||
22 | 'ideal' share of CPU time. ) | ||
23 | |||
24 | CFS's task picking logic is based on this p->wait_runtime value and it | ||
25 | is thus very simple: it always tries to run the task with the largest | ||
26 | p->wait_runtime value. In other words, CFS tries to run the task with | ||
27 | the 'gravest need' for more CPU time. So CFS always tries to split up | ||
28 | CPU time between runnable tasks as close to 'ideal multitasking | ||
29 | hardware' as possible. | ||
30 | |||
31 | Most of the rest of CFS's design just falls out of this really simple | ||
32 | concept, with a few add-on embellishments like nice levels, | ||
33 | multiprocessing and various algorithm variants to recognize sleepers. | ||
34 | |||
35 | In practice it works like this: the system runs a task a bit, and when | ||
36 | the task schedules (or a scheduler tick happens) the task's CPU usage is | ||
37 | 'accounted for': the (small) time it just spent using the physical CPU | ||
38 | is deducted from p->wait_runtime. [minus the 'fair share' it would have | ||
39 | gotten anyway]. Once p->wait_runtime gets low enough so that another | ||
40 | task becomes the 'leftmost task' of the time-ordered rbtree it maintains | ||
41 | (plus a small amount of 'granularity' distance relative to the leftmost | ||
42 | task so that we do not over-schedule tasks and trash the cache) then the | ||
43 | new leftmost task is picked and the current task is preempted. | ||
44 | |||
45 | The rq->fair_clock value tracks the 'CPU time a runnable task would have | ||
46 | fairly gotten, had it been runnable during that time'. So by using | ||
47 | rq->fair_clock values we can accurately timestamp and measure the | ||
48 | 'expected CPU time' a task should have gotten. All runnable tasks are | ||
49 | sorted in the rbtree by the "rq->fair_clock - p->wait_runtime" key, and | ||
50 | CFS picks the 'leftmost' task and sticks to it. As the system progresses | ||
51 | forwards, newly woken tasks are put into the tree more and more to the | ||
52 | right - slowly but surely giving a chance for every task to become the | ||
53 | 'leftmost task' and thus get on the CPU within a deterministic amount of | ||
54 | time. | ||
55 | |||
56 | Some implementation details: | ||
57 | |||
58 | - the introduction of Scheduling Classes: an extensible hierarchy of | ||
59 | scheduler modules. These modules encapsulate scheduling policy | ||
60 | details and are handled by the scheduler core without the core | ||
61 | code assuming about them too much. | ||
62 | |||
63 | - sched_fair.c implements the 'CFS desktop scheduler': it is a | ||
64 | replacement for the vanilla scheduler's SCHED_OTHER interactivity | ||
65 | code. | ||
66 | |||
67 | I'd like to give credit to Con Kolivas for the general approach here: | ||
68 | he has proven via RSDL/SD that 'fair scheduling' is possible and that | ||
69 | it results in better desktop scheduling. Kudos Con! | ||
70 | |||
71 | The CFS patch uses a completely different approach and implementation | ||
72 | from RSDL/SD. My goal was to make CFS's interactivity quality exceed | ||
73 | that of RSDL/SD, which is a high standard to meet :-) Testing | ||
74 | feedback is welcome to decide this one way or another. [ and, in any | ||
75 | case, all of SD's logic could be added via a kernel/sched_sd.c module | ||
76 | as well, if Con is interested in such an approach. ] | ||
77 | |||
78 | CFS's design is quite radical: it does not use runqueues, it uses a | ||
79 | time-ordered rbtree to build a 'timeline' of future task execution, | ||
80 | and thus has no 'array switch' artifacts (by which both the vanilla | ||
81 | scheduler and RSDL/SD are affected). | ||
82 | |||
83 | CFS uses nanosecond granularity accounting and does not rely on any | ||
84 | jiffies or other HZ detail. Thus the CFS scheduler has no notion of | ||
85 | 'timeslices' and has no heuristics whatsoever. There is only one | ||
86 | central tunable (you have to switch on CONFIG_SCHED_DEBUG): | ||
87 | |||
88 | /proc/sys/kernel/sched_granularity_ns | ||
89 | |||
90 | which can be used to tune the scheduler from 'desktop' (low | ||
91 | latencies) to 'server' (good batching) workloads. It defaults to a | ||
92 | setting suitable for desktop workloads. SCHED_BATCH is handled by the | ||
93 | CFS scheduler module too. | ||
94 | |||
95 | Due to its design, the CFS scheduler is not prone to any of the | ||
96 | 'attacks' that exist today against the heuristics of the stock | ||
97 | scheduler: fiftyp.c, thud.c, chew.c, ring-test.c, massive_intr.c all | ||
98 | work fine and do not impact interactivity and produce the expected | ||
99 | behavior. | ||
100 | |||
101 | the CFS scheduler has a much stronger handling of nice levels and | ||
102 | SCHED_BATCH: both types of workloads should be isolated much more | ||
103 | agressively than under the vanilla scheduler. | ||
104 | |||
105 | ( another detail: due to nanosec accounting and timeline sorting, | ||
106 | sched_yield() support is very simple under CFS, and in fact under | ||
107 | CFS sched_yield() behaves much better than under any other | ||
108 | scheduler i have tested so far. ) | ||
109 | |||
110 | - sched_rt.c implements SCHED_FIFO and SCHED_RR semantics, in a simpler | ||
111 | way than the vanilla scheduler does. It uses 100 runqueues (for all | ||
112 | 100 RT priority levels, instead of 140 in the vanilla scheduler) | ||
113 | and it needs no expired array. | ||
114 | |||
115 | - reworked/sanitized SMP load-balancing: the runqueue-walking | ||
116 | assumptions are gone from the load-balancing code now, and | ||
117 | iterators of the scheduling modules are used. The balancing code got | ||
118 | quite a bit simpler as a result. | ||
119 | |||
120 | |||
121 | Group scheduler extension to CFS | ||
122 | ================================ | ||
123 | |||
124 | Normally the scheduler operates on individual tasks and strives to provide | ||
125 | fair CPU time to each task. Sometimes, it may be desirable to group tasks | ||
126 | and provide fair CPU time to each such task group. For example, it may | ||
127 | be desirable to first provide fair CPU time to each user on the system | ||
128 | and then to each task belonging to a user. | ||
129 | |||
130 | CONFIG_FAIR_GROUP_SCHED strives to achieve exactly that. It lets | ||
131 | SCHED_NORMAL/BATCH tasks be be grouped and divides CPU time fairly among such | ||
132 | groups. At present, there are two (mutually exclusive) mechanisms to group | ||
133 | tasks for CPU bandwidth control purpose: | ||
134 | |||
135 | - Based on user id (CONFIG_FAIR_USER_SCHED) | ||
136 | In this option, tasks are grouped according to their user id. | ||
137 | - Based on "cgroup" pseudo filesystem (CONFIG_FAIR_CGROUP_SCHED) | ||
138 | This options lets the administrator create arbitrary groups | ||
139 | of tasks, using the "cgroup" pseudo filesystem. See | ||
140 | Documentation/cgroups.txt for more information about this | ||
141 | filesystem. | ||
142 | 5 | ||
143 | Only one of these options to group tasks can be chosen and not both. | 6 | 1. OVERVIEW |
7 | |||
8 | CFS stands for "Completely Fair Scheduler," and is the new "desktop" process | ||
9 | scheduler implemented by Ingo Molnar and merged in Linux 2.6.23. It is the | ||
10 | replacement for the previous vanilla scheduler's SCHED_OTHER interactivity | ||
11 | code. | ||
12 | |||
13 | 80% of CFS's design can be summed up in a single sentence: CFS basically models | ||
14 | an "ideal, precise multi-tasking CPU" on real hardware. | ||
15 | |||
16 | "Ideal multi-tasking CPU" is a (non-existent :-)) CPU that has 100% physical | ||
17 | power and which can run each task at precise equal speed, in parallel, each at | ||
18 | 1/nr_running speed. For example: if there are 2 tasks running, then it runs | ||
19 | each at 50% physical power --- i.e., actually in parallel. | ||
20 | |||
21 | On real hardware, we can run only a single task at once, so we have to | ||
22 | introduce the concept of "virtual runtime." The virtual runtime of a task | ||
23 | specifies when its next timeslice would start execution on the ideal | ||
24 | multi-tasking CPU described above. In practice, the virtual runtime of a task | ||
25 | is its actual runtime normalized to the total number of running tasks. | ||
26 | |||
27 | |||
28 | |||
29 | 2. FEW IMPLEMENTATION DETAILS | ||
30 | |||
31 | In CFS the virtual runtime is expressed and tracked via the per-task | ||
32 | p->se.vruntime (nanosec-unit) value. This way, it's possible to accurately | ||
33 | timestamp and measure the "expected CPU time" a task should have gotten. | ||
34 | |||
35 | [ small detail: on "ideal" hardware, at any time all tasks would have the same | ||
36 | p->se.vruntime value --- i.e., tasks would execute simultaneously and no task | ||
37 | would ever get "out of balance" from the "ideal" share of CPU time. ] | ||
38 | |||
39 | CFS's task picking logic is based on this p->se.vruntime value and it is thus | ||
40 | very simple: it always tries to run the task with the smallest p->se.vruntime | ||
41 | value (i.e., the task which executed least so far). CFS always tries to split | ||
42 | up CPU time between runnable tasks as close to "ideal multitasking hardware" as | ||
43 | possible. | ||
44 | |||
45 | Most of the rest of CFS's design just falls out of this really simple concept, | ||
46 | with a few add-on embellishments like nice levels, multiprocessing and various | ||
47 | algorithm variants to recognize sleepers. | ||
48 | |||
49 | |||
50 | |||
51 | 3. THE RBTREE | ||
52 | |||
53 | CFS's design is quite radical: it does not use the old data structures for the | ||
54 | runqueues, but it uses a time-ordered rbtree to build a "timeline" of future | ||
55 | task execution, and thus has no "array switch" artifacts (by which both the | ||
56 | previous vanilla scheduler and RSDL/SD are affected). | ||
57 | |||
58 | CFS also maintains the rq->cfs.min_vruntime value, which is a monotonic | ||
59 | increasing value tracking the smallest vruntime among all tasks in the | ||
60 | runqueue. The total amount of work done by the system is tracked using | ||
61 | min_vruntime; that value is used to place newly activated entities on the left | ||
62 | side of the tree as much as possible. | ||
63 | |||
64 | The total number of running tasks in the runqueue is accounted through the | ||
65 | rq->cfs.load value, which is the sum of the weights of the tasks queued on the | ||
66 | runqueue. | ||
67 | |||
68 | CFS maintains a time-ordered rbtree, where all runnable tasks are sorted by the | ||
69 | p->se.vruntime key (there is a subtraction using rq->cfs.min_vruntime to | ||
70 | account for possible wraparounds). CFS picks the "leftmost" task from this | ||
71 | tree and sticks to it. | ||
72 | As the system progresses forwards, the executed tasks are put into the tree | ||
73 | more and more to the right --- slowly but surely giving a chance for every task | ||
74 | to become the "leftmost task" and thus get on the CPU within a deterministic | ||
75 | amount of time. | ||
76 | |||
77 | Summing up, CFS works like this: it runs a task a bit, and when the task | ||
78 | schedules (or a scheduler tick happens) the task's CPU usage is "accounted | ||
79 | for": the (small) time it just spent using the physical CPU is added to | ||
80 | p->se.vruntime. Once p->se.vruntime gets high enough so that another task | ||
81 | becomes the "leftmost task" of the time-ordered rbtree it maintains (plus a | ||
82 | small amount of "granularity" distance relative to the leftmost task so that we | ||
83 | do not over-schedule tasks and trash the cache), then the new leftmost task is | ||
84 | picked and the current task is preempted. | ||
85 | |||
86 | |||
87 | |||
88 | 4. SOME FEATURES OF CFS | ||
89 | |||
90 | CFS uses nanosecond granularity accounting and does not rely on any jiffies or | ||
91 | other HZ detail. Thus the CFS scheduler has no notion of "timeslices" in the | ||
92 | way the previous scheduler had, and has no heuristics whatsoever. There is | ||
93 | only one central tunable (you have to switch on CONFIG_SCHED_DEBUG): | ||
94 | |||
95 | /proc/sys/kernel/sched_granularity_ns | ||
96 | |||
97 | which can be used to tune the scheduler from "desktop" (i.e., low latencies) to | ||
98 | "server" (i.e., good batching) workloads. It defaults to a setting suitable | ||
99 | for desktop workloads. SCHED_BATCH is handled by the CFS scheduler module too. | ||
100 | |||
101 | Due to its design, the CFS scheduler is not prone to any of the "attacks" that | ||
102 | exist today against the heuristics of the stock scheduler: fiftyp.c, thud.c, | ||
103 | chew.c, ring-test.c, massive_intr.c all work fine and do not impact | ||
104 | interactivity and produce the expected behavior. | ||
105 | |||
106 | The CFS scheduler has a much stronger handling of nice levels and SCHED_BATCH | ||
107 | than the previous vanilla scheduler: both types of workloads are isolated much | ||
108 | more aggressively. | ||
109 | |||
110 | SMP load-balancing has been reworked/sanitized: the runqueue-walking | ||
111 | assumptions are gone from the load-balancing code now, and iterators of the | ||
112 | scheduling modules are used. The balancing code got quite a bit simpler as a | ||
113 | result. | ||
114 | |||
115 | |||
116 | |||
117 | 5. Scheduling policies | ||
118 | |||
119 | CFS implements three scheduling policies: | ||
120 | |||
121 | - SCHED_NORMAL (traditionally called SCHED_OTHER): The scheduling | ||
122 | policy that is used for regular tasks. | ||
123 | |||
124 | - SCHED_BATCH: Does not preempt nearly as often as regular tasks | ||
125 | would, thereby allowing tasks to run longer and make better use of | ||
126 | caches but at the cost of interactivity. This is well suited for | ||
127 | batch jobs. | ||
128 | |||
129 | - SCHED_IDLE: This is even weaker than nice 19, but its not a true | ||
130 | idle timer scheduler in order to avoid to get into priority | ||
131 | inversion problems which would deadlock the machine. | ||
132 | |||
133 | SCHED_FIFO/_RR are implemented in sched_rt.c and are as specified by | ||
134 | POSIX. | ||
135 | |||
136 | The command chrt from util-linux-ng 2.13.1.1 can set all of these except | ||
137 | SCHED_IDLE. | ||
144 | 138 | ||
145 | Group scheduler tunables: | ||
146 | 139 | ||
147 | When CONFIG_FAIR_USER_SCHED is defined, a directory is created in sysfs for | 140 | |
148 | each new user and a "cpu_share" file is added in that directory. | 141 | 6. SCHEDULING CLASSES |
142 | |||
143 | The new CFS scheduler has been designed in such a way to introduce "Scheduling | ||
144 | Classes," an extensible hierarchy of scheduler modules. These modules | ||
145 | encapsulate scheduling policy details and are handled by the scheduler core | ||
146 | without the core code assuming too much about them. | ||
147 | |||
148 | sched_fair.c implements the CFS scheduler described above. | ||
149 | |||
150 | sched_rt.c implements SCHED_FIFO and SCHED_RR semantics, in a simpler way than | ||
151 | the previous vanilla scheduler did. It uses 100 runqueues (for all 100 RT | ||
152 | priority levels, instead of 140 in the previous scheduler) and it needs no | ||
153 | expired array. | ||
154 | |||
155 | Scheduling classes are implemented through the sched_class structure, which | ||
156 | contains hooks to functions that must be called whenever an interesting event | ||
157 | occurs. | ||
158 | |||
159 | This is the (partial) list of the hooks: | ||
160 | |||
161 | - enqueue_task(...) | ||
162 | |||
163 | Called when a task enters a runnable state. | ||
164 | It puts the scheduling entity (task) into the red-black tree and | ||
165 | increments the nr_running variable. | ||
166 | |||
167 | - dequeue_tree(...) | ||
168 | |||
169 | When a task is no longer runnable, this function is called to keep the | ||
170 | corresponding scheduling entity out of the red-black tree. It decrements | ||
171 | the nr_running variable. | ||
172 | |||
173 | - yield_task(...) | ||
174 | |||
175 | This function is basically just a dequeue followed by an enqueue, unless the | ||
176 | compat_yield sysctl is turned on; in that case, it places the scheduling | ||
177 | entity at the right-most end of the red-black tree. | ||
178 | |||
179 | - check_preempt_curr(...) | ||
180 | |||
181 | This function checks if a task that entered the runnable state should | ||
182 | preempt the currently running task. | ||
183 | |||
184 | - pick_next_task(...) | ||
185 | |||
186 | This function chooses the most appropriate task eligible to run next. | ||
187 | |||
188 | - set_curr_task(...) | ||
189 | |||
190 | This function is called when a task changes its scheduling class or changes | ||
191 | its task group. | ||
192 | |||
193 | - task_tick(...) | ||
194 | |||
195 | This function is mostly called from time tick functions; it might lead to | ||
196 | process switch. This drives the running preemption. | ||
197 | |||
198 | - task_new(...) | ||
199 | |||
200 | The core scheduler gives the scheduling module an opportunity to manage new | ||
201 | task startup. The CFS scheduling module uses it for group scheduling, while | ||
202 | the scheduling module for a real-time task does not use it. | ||
203 | |||
204 | |||
205 | |||
206 | 7. GROUP SCHEDULER EXTENSIONS TO CFS | ||
207 | |||
208 | Normally, the scheduler operates on individual tasks and strives to provide | ||
209 | fair CPU time to each task. Sometimes, it may be desirable to group tasks and | ||
210 | provide fair CPU time to each such task group. For example, it may be | ||
211 | desirable to first provide fair CPU time to each user on the system and then to | ||
212 | each task belonging to a user. | ||
213 | |||
214 | CONFIG_GROUP_SCHED strives to achieve exactly that. It lets tasks to be | ||
215 | grouped and divides CPU time fairly among such groups. | ||
216 | |||
217 | CONFIG_RT_GROUP_SCHED permits to group real-time (i.e., SCHED_FIFO and | ||
218 | SCHED_RR) tasks. | ||
219 | |||
220 | CONFIG_FAIR_GROUP_SCHED permits to group CFS (i.e., SCHED_NORMAL and | ||
221 | SCHED_BATCH) tasks. | ||
222 | |||
223 | At present, there are two (mutually exclusive) mechanisms to group tasks for | ||
224 | CPU bandwidth control purposes: | ||
225 | |||
226 | - Based on user id (CONFIG_USER_SCHED) | ||
227 | |||
228 | With this option, tasks are grouped according to their user id. | ||
229 | |||
230 | - Based on "cgroup" pseudo filesystem (CONFIG_CGROUP_SCHED) | ||
231 | |||
232 | This options needs CONFIG_CGROUPS to be defined, and lets the administrator | ||
233 | create arbitrary groups of tasks, using the "cgroup" pseudo filesystem. See | ||
234 | Documentation/cgroups.txt for more information about this filesystem. | ||
235 | |||
236 | Only one of these options to group tasks can be chosen and not both. | ||
237 | |||
238 | When CONFIG_USER_SCHED is defined, a directory is created in sysfs for each new | ||
239 | user and a "cpu_share" file is added in that directory. | ||
149 | 240 | ||
150 | # cd /sys/kernel/uids | 241 | # cd /sys/kernel/uids |
151 | # cat 512/cpu_share # Display user 512's CPU share | 242 | # cat 512/cpu_share # Display user 512's CPU share |
@@ -155,16 +246,14 @@ each new user and a "cpu_share" file is added in that directory. | |||
155 | 2048 | 246 | 2048 |
156 | # | 247 | # |
157 | 248 | ||
158 | CPU bandwidth between two users are divided in the ratio of their CPU shares. | 249 | CPU bandwidth between two users is divided in the ratio of their CPU shares. |
159 | For ex: if you would like user "root" to get twice the bandwidth of user | 250 | For example: if you would like user "root" to get twice the bandwidth of user |
160 | "guest", then set the cpu_share for both the users such that "root"'s | 251 | "guest," then set the cpu_share for both the users such that "root"'s cpu_share |
161 | cpu_share is twice "guest"'s cpu_share | 252 | is twice "guest"'s cpu_share. |
162 | |||
163 | 253 | ||
164 | When CONFIG_FAIR_CGROUP_SCHED is defined, a "cpu.shares" file is created | 254 | When CONFIG_CGROUP_SCHED is defined, a "cpu.shares" file is created for each |
165 | for each group created using the pseudo filesystem. See example steps | 255 | group created using the pseudo filesystem. See example steps below to create |
166 | below to create task groups and modify their CPU share using the "cgroups" | 256 | task groups and modify their CPU share using the "cgroups" pseudo filesystem. |
167 | pseudo filesystem | ||
168 | 257 | ||
169 | # mkdir /dev/cpuctl | 258 | # mkdir /dev/cpuctl |
170 | # mount -t cgroup -ocpu none /dev/cpuctl | 259 | # mount -t cgroup -ocpu none /dev/cpuctl |
diff --git a/Documentation/scsi/scsi_fc_transport.txt b/Documentation/scsi/scsi_fc_transport.txt index 75143f0c23b6..38d324d62b25 100644 --- a/Documentation/scsi/scsi_fc_transport.txt +++ b/Documentation/scsi/scsi_fc_transport.txt | |||
@@ -436,6 +436,42 @@ Other: | |||
436 | was updated to remove all vports for the fc_host as well. | 436 | was updated to remove all vports for the fc_host as well. |
437 | 437 | ||
438 | 438 | ||
439 | Transport supplied functions | ||
440 | ---------------------------- | ||
441 | |||
442 | The following functions are supplied by the FC-transport for use by LLDs. | ||
443 | |||
444 | fc_vport_create - create a vport | ||
445 | fc_vport_terminate - detach and remove a vport | ||
446 | |||
447 | Details: | ||
448 | |||
449 | /** | ||
450 | * fc_vport_create - Admin App or LLDD requests creation of a vport | ||
451 | * @shost: scsi host the virtual port is connected to. | ||
452 | * @ids: The world wide names, FC4 port roles, etc for | ||
453 | * the virtual port. | ||
454 | * | ||
455 | * Notes: | ||
456 | * This routine assumes no locks are held on entry. | ||
457 | */ | ||
458 | struct fc_vport * | ||
459 | fc_vport_create(struct Scsi_Host *shost, struct fc_vport_identifiers *ids) | ||
460 | |||
461 | /** | ||
462 | * fc_vport_terminate - Admin App or LLDD requests termination of a vport | ||
463 | * @vport: fc_vport to be terminated | ||
464 | * | ||
465 | * Calls the LLDD vport_delete() function, then deallocates and removes | ||
466 | * the vport from the shost and object tree. | ||
467 | * | ||
468 | * Notes: | ||
469 | * This routine assumes no locks are held on entry. | ||
470 | */ | ||
471 | int | ||
472 | fc_vport_terminate(struct fc_vport *vport) | ||
473 | |||
474 | |||
439 | Credits | 475 | Credits |
440 | ======= | 476 | ======= |
441 | The following people have contributed to this document: | 477 | The following people have contributed to this document: |
diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt index b117e42a6166..e0e54a27fc10 100644 --- a/Documentation/sound/alsa/ALSA-Configuration.txt +++ b/Documentation/sound/alsa/ALSA-Configuration.txt | |||
@@ -746,8 +746,10 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. | |||
746 | Module snd-hda-intel | 746 | Module snd-hda-intel |
747 | -------------------- | 747 | -------------------- |
748 | 748 | ||
749 | Module for Intel HD Audio (ICH6, ICH6M, ESB2, ICH7, ICH8), | 749 | Module for Intel HD Audio (ICH6, ICH6M, ESB2, ICH7, ICH8, ICH9, ICH10, |
750 | ATI SB450, SB600, RS600, | 750 | PCH, SCH), |
751 | ATI SB450, SB600, R600, RS600, RS690, RS780, RV610, RV620, | ||
752 | RV630, RV635, RV670, RV770, | ||
751 | VIA VT8251/VT8237A, | 753 | VIA VT8251/VT8237A, |
752 | SIS966, ULI M5461 | 754 | SIS966, ULI M5461 |
753 | 755 | ||
@@ -807,6 +809,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. | |||
807 | ALC260 | 809 | ALC260 |
808 | hp HP machines | 810 | hp HP machines |
809 | hp-3013 HP machines (3013-variant) | 811 | hp-3013 HP machines (3013-variant) |
812 | hp-dc7600 HP DC7600 | ||
810 | fujitsu Fujitsu S7020 | 813 | fujitsu Fujitsu S7020 |
811 | acer Acer TravelMate | 814 | acer Acer TravelMate |
812 | will Will laptops (PB V7900) | 815 | will Will laptops (PB V7900) |
@@ -828,8 +831,11 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. | |||
828 | hippo Hippo (ATI) with jack detection, Sony UX-90s | 831 | hippo Hippo (ATI) with jack detection, Sony UX-90s |
829 | hippo_1 Hippo (Benq) with jack detection | 832 | hippo_1 Hippo (Benq) with jack detection |
830 | sony-assamd Sony ASSAMD | 833 | sony-assamd Sony ASSAMD |
834 | toshiba-s06 Toshiba S06 | ||
835 | toshiba-rx1 Toshiba RX1 | ||
831 | ultra Samsung Q1 Ultra Vista model | 836 | ultra Samsung Q1 Ultra Vista model |
832 | lenovo-3000 Lenovo 3000 y410 | 837 | lenovo-3000 Lenovo 3000 y410 |
838 | nec NEC Versa S9100 | ||
833 | basic fixed pin assignment w/o SPDIF | 839 | basic fixed pin assignment w/o SPDIF |
834 | auto auto-config reading BIOS (default) | 840 | auto auto-config reading BIOS (default) |
835 | 841 | ||
@@ -838,6 +844,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. | |||
838 | 3stack 3-stack model | 844 | 3stack 3-stack model |
839 | toshiba Toshiba A205 | 845 | toshiba Toshiba A205 |
840 | acer Acer laptops | 846 | acer Acer laptops |
847 | acer-aspire Acer Aspire One | ||
841 | dell Dell OEM laptops (Vostro 1200) | 848 | dell Dell OEM laptops (Vostro 1200) |
842 | zepto Zepto laptops | 849 | zepto Zepto laptops |
843 | test for testing/debugging purpose, almost all controls can | 850 | test for testing/debugging purpose, almost all controls can |
@@ -847,6 +854,9 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. | |||
847 | 854 | ||
848 | ALC269 | 855 | ALC269 |
849 | basic Basic preset | 856 | basic Basic preset |
857 | quanta Quanta FL1 | ||
858 | eeepc-p703 ASUS Eeepc P703 P900A | ||
859 | eeepc-p901 ASUS Eeepc P901 S101 | ||
850 | 860 | ||
851 | ALC662/663 | 861 | ALC662/663 |
852 | 3stack-dig 3-stack (2-channel) with SPDIF | 862 | 3stack-dig 3-stack (2-channel) with SPDIF |
@@ -856,10 +866,17 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. | |||
856 | lenovo-101e Lenovo laptop | 866 | lenovo-101e Lenovo laptop |
857 | eeepc-p701 ASUS Eeepc P701 | 867 | eeepc-p701 ASUS Eeepc P701 |
858 | eeepc-ep20 ASUS Eeepc EP20 | 868 | eeepc-ep20 ASUS Eeepc EP20 |
869 | ecs ECS/Foxconn mobo | ||
859 | m51va ASUS M51VA | 870 | m51va ASUS M51VA |
860 | g71v ASUS G71V | 871 | g71v ASUS G71V |
861 | h13 ASUS H13 | 872 | h13 ASUS H13 |
862 | g50v ASUS G50V | 873 | g50v ASUS G50V |
874 | asus-mode1 ASUS | ||
875 | asus-mode2 ASUS | ||
876 | asus-mode3 ASUS | ||
877 | asus-mode4 ASUS | ||
878 | asus-mode5 ASUS | ||
879 | asus-mode6 ASUS | ||
863 | auto auto-config reading BIOS (default) | 880 | auto auto-config reading BIOS (default) |
864 | 881 | ||
865 | ALC882/885 | 882 | ALC882/885 |
@@ -891,12 +908,14 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. | |||
891 | lenovo-101e Lenovo 101E | 908 | lenovo-101e Lenovo 101E |
892 | lenovo-nb0763 Lenovo NB0763 | 909 | lenovo-nb0763 Lenovo NB0763 |
893 | lenovo-ms7195-dig Lenovo MS7195 | 910 | lenovo-ms7195-dig Lenovo MS7195 |
911 | lenovo-sky Lenovo Sky | ||
894 | haier-w66 Haier W66 | 912 | haier-w66 Haier W66 |
895 | 3stack-hp HP machines with 3stack (Lucknow, Samba boards) | 913 | 3stack-hp HP machines with 3stack (Lucknow, Samba boards) |
896 | 6stack-dell Dell machines with 6stack (Inspiron 530) | 914 | 6stack-dell Dell machines with 6stack (Inspiron 530) |
897 | mitac Mitac 8252D | 915 | mitac Mitac 8252D |
898 | clevo-m720 Clevo M720 laptop series | 916 | clevo-m720 Clevo M720 laptop series |
899 | fujitsu-pi2515 Fujitsu AMILO Pi2515 | 917 | fujitsu-pi2515 Fujitsu AMILO Pi2515 |
918 | 3stack-6ch-intel Intel DG33* boards | ||
900 | auto auto-config reading BIOS (default) | 919 | auto auto-config reading BIOS (default) |
901 | 920 | ||
902 | ALC861/660 | 921 | ALC861/660 |
@@ -929,7 +948,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. | |||
929 | allout 5-jack in back, 2-jack in front, SPDIF out | 948 | allout 5-jack in back, 2-jack in front, SPDIF out |
930 | auto auto-config reading BIOS (default) | 949 | auto auto-config reading BIOS (default) |
931 | 950 | ||
932 | AD1882 | 951 | AD1882 / AD1882A |
933 | 3stack 3-stack mode (default) | 952 | 3stack 3-stack mode (default) |
934 | 6stack 6-stack mode | 953 | 6stack 6-stack mode |
935 | 954 | ||
@@ -1079,7 +1098,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. | |||
1079 | register value without FIFO size correction as the current | 1098 | register value without FIFO size correction as the current |
1080 | DMA pointer. position_fix=2 will make the driver to use | 1099 | DMA pointer. position_fix=2 will make the driver to use |
1081 | the position buffer instead of reading SD_LPIB register. | 1100 | the position buffer instead of reading SD_LPIB register. |
1082 | (Usually SD_LPLIB register is more accurate than the | 1101 | (Usually SD_LPIB register is more accurate than the |
1083 | position buffer.) | 1102 | position buffer.) |
1084 | 1103 | ||
1085 | NB: If you get many "azx_get_response timeout" messages at | 1104 | NB: If you get many "azx_get_response timeout" messages at |
@@ -1166,6 +1185,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. | |||
1166 | * Event Electronics, EZ8 | 1185 | * Event Electronics, EZ8 |
1167 | * Digigram VX442 | 1186 | * Digigram VX442 |
1168 | * Lionstracs, Mediastaton | 1187 | * Lionstracs, Mediastaton |
1188 | * Terrasoniq TS 88 | ||
1169 | 1189 | ||
1170 | model - Use the given board model, one of the following: | 1190 | model - Use the given board model, one of the following: |
1171 | delta1010, dio2496, delta66, delta44, audiophile, delta410, | 1191 | delta1010, dio2496, delta66, delta44, audiophile, delta410, |
@@ -1200,7 +1220,10 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. | |||
1200 | * TerraTec Phase 22 | 1220 | * TerraTec Phase 22 |
1201 | * TerraTec Phase 28 | 1221 | * TerraTec Phase 28 |
1202 | * AudioTrak Prodigy 7.1 | 1222 | * AudioTrak Prodigy 7.1 |
1203 | * AudioTrak Prodigy 7.1LT | 1223 | * AudioTrak Prodigy 7.1 LT |
1224 | * AudioTrak Prodigy 7.1 XT | ||
1225 | * AudioTrak Prodigy 7.1 HIFI | ||
1226 | * AudioTrak Prodigy 7.1 HD2 | ||
1204 | * AudioTrak Prodigy 192 | 1227 | * AudioTrak Prodigy 192 |
1205 | * Pontis MS300 | 1228 | * Pontis MS300 |
1206 | * Albatron K8X800 Pro II | 1229 | * Albatron K8X800 Pro II |
@@ -1211,12 +1234,16 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. | |||
1211 | * Shuttle SN25P | 1234 | * Shuttle SN25P |
1212 | * Onkyo SE-90PCI | 1235 | * Onkyo SE-90PCI |
1213 | * Onkyo SE-200PCI | 1236 | * Onkyo SE-200PCI |
1237 | * ESI Juli@ | ||
1238 | * Hercules Fortissimo IV | ||
1239 | * EGO-SYS WaveTerminal 192M | ||
1214 | 1240 | ||
1215 | model - Use the given board model, one of the following: | 1241 | model - Use the given board model, one of the following: |
1216 | revo51, revo71, amp2000, prodigy71, prodigy71lt, | 1242 | revo51, revo71, amp2000, prodigy71, prodigy71lt, |
1217 | prodigy192, aureon51, aureon71, universe, ap192, | 1243 | prodigy71xt, prodigy71hifi, prodigyhd2, prodigy192, |
1218 | k8x800, phase22, phase28, ms300, av710, se200pci, | 1244 | juli, aureon51, aureon71, universe, ap192, k8x800, |
1219 | se90pci | 1245 | phase22, phase28, ms300, av710, se200pci, se90pci, |
1246 | fortissimo4, sn25p, WT192M | ||
1220 | 1247 | ||
1221 | This module supports multiple cards and autoprobe. | 1248 | This module supports multiple cards and autoprobe. |
1222 | 1249 | ||
@@ -1255,7 +1282,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. | |||
1255 | 1282 | ||
1256 | Module for AC'97 motherboards from Intel and compatibles. | 1283 | Module for AC'97 motherboards from Intel and compatibles. |
1257 | * Intel i810/810E, i815, i820, i830, i84x, MX440 | 1284 | * Intel i810/810E, i815, i820, i830, i84x, MX440 |
1258 | ICH5, ICH6, ICH7, ESB2 | 1285 | ICH5, ICH6, ICH7, 6300ESB, ESB2 |
1259 | * SiS 7012 (SiS 735) | 1286 | * SiS 7012 (SiS 735) |
1260 | * NVidia NForce, NForce2, NForce3, MCP04, CK804 | 1287 | * NVidia NForce, NForce2, NForce3, MCP04, CK804 |
1261 | CK8, CK8S, MCP501 | 1288 | CK8, CK8S, MCP501 |
@@ -1951,6 +1978,8 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. | |||
1951 | * CHIC True Sound 4Dwave | 1978 | * CHIC True Sound 4Dwave |
1952 | * Shark Predator4D-PCI | 1979 | * Shark Predator4D-PCI |
1953 | * Jaton SonicWave 4D | 1980 | * Jaton SonicWave 4D |
1981 | * SiS SI7018 PCI Audio | ||
1982 | * Hoontech SoundTrack Digital 4DWave NX | ||
1954 | 1983 | ||
1955 | pcm_channels - max channels (voices) reserved for PCM | 1984 | pcm_channels - max channels (voices) reserved for PCM |
1956 | wavetable_size - max wavetable size in kB (4-?kb) | 1985 | wavetable_size - max wavetable size in kB (4-?kb) |
@@ -1966,12 +1995,25 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. | |||
1966 | 1995 | ||
1967 | vid - Vendor ID for the device (optional) | 1996 | vid - Vendor ID for the device (optional) |
1968 | pid - Product ID for the device (optional) | 1997 | pid - Product ID for the device (optional) |
1998 | nrpacks - Max. number of packets per URB (default: 8) | ||
1999 | async_unlink - Use async unlink mode (default: yes) | ||
1969 | device_setup - Device specific magic number (optional) | 2000 | device_setup - Device specific magic number (optional) |
1970 | - Influence depends on the device | 2001 | - Influence depends on the device |
1971 | - Default: 0x0000 | 2002 | - Default: 0x0000 |
2003 | ignore_ctl_error - Ignore any USB-controller regarding mixer | ||
2004 | interface (default: no) | ||
1972 | 2005 | ||
1973 | This module supports multiple devices, autoprobe and hotplugging. | 2006 | This module supports multiple devices, autoprobe and hotplugging. |
1974 | 2007 | ||
2008 | NB: nrpacks parameter can be modified dynamically via sysfs. | ||
2009 | Don't put the value over 20. Changing via sysfs has no sanity | ||
2010 | check. | ||
2011 | NB: async_unlink=0 would cause Oops. It remains just for | ||
2012 | debugging purpose (if any). | ||
2013 | NB: ignore_ctl_error=1 may help when you get an error at accessing | ||
2014 | the mixer element such as URB error -22. This happens on some | ||
2015 | buggy USB device or the controller. | ||
2016 | |||
1975 | Module snd-usb-caiaq | 2017 | Module snd-usb-caiaq |
1976 | -------------------- | 2018 | -------------------- |
1977 | 2019 | ||
@@ -2078,7 +2120,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. | |||
2078 | ------------------- | 2120 | ------------------- |
2079 | 2121 | ||
2080 | Module for sound cards based on the Asus AV100/AV200 chips, | 2122 | Module for sound cards based on the Asus AV100/AV200 chips, |
2081 | i.e., Xonar D1, DX, D2 and D2X. | 2123 | i.e., Xonar D1, DX, D2, D2X and HDAV1.3 (Deluxe). |
2082 | 2124 | ||
2083 | This module supports autoprobe and multiple cards. | 2125 | This module supports autoprobe and multiple cards. |
2084 | 2126 | ||
diff --git a/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl b/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl index e13c4e67029f..87a7c07ab658 100644 --- a/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl +++ b/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl | |||
@@ -5073,8 +5073,7 @@ struct _snd_pcm_runtime { | |||
5073 | with <constant>SNDRV_DMA_TYPE_CONTINUOUS</constant> type and the | 5073 | with <constant>SNDRV_DMA_TYPE_CONTINUOUS</constant> type and the |
5074 | <function>snd_dma_continuous_data(GFP_KERNEL)</function> device pointer, | 5074 | <function>snd_dma_continuous_data(GFP_KERNEL)</function> device pointer, |
5075 | where <constant>GFP_KERNEL</constant> is the kernel allocation flag to | 5075 | where <constant>GFP_KERNEL</constant> is the kernel allocation flag to |
5076 | use. For the SBUS, <constant>SNDRV_DMA_TYPE_SBUS</constant> and | 5076 | use. |
5077 | <function>snd_dma_sbus_data(sbus_dev)</function> are used instead. | ||
5078 | For the PCI scatter-gather buffers, use | 5077 | For the PCI scatter-gather buffers, use |
5079 | <constant>SNDRV_DMA_TYPE_DEV_SG</constant> with | 5078 | <constant>SNDRV_DMA_TYPE_DEV_SG</constant> with |
5080 | <function>snd_dma_pci_data(pci)</function> | 5079 | <function>snd_dma_pci_data(pci)</function> |
@@ -6135,44 +6134,58 @@ struct _snd_pcm_runtime { | |||
6135 | </para> | 6134 | </para> |
6136 | </section> | 6135 | </section> |
6137 | 6136 | ||
6138 | <section id="useful-functions-snd-assert"> | 6137 | <section id="useful-functions-snd-bug"> |
6139 | <title><function>snd_assert()</function></title> | 6138 | <title><function>snd_BUG()</function></title> |
6140 | <para> | 6139 | <para> |
6141 | <function>snd_assert()</function> macro is similar with the | 6140 | It shows the <computeroutput>BUG?</computeroutput> message and |
6142 | normal <function>assert()</function> macro. For example, | 6141 | stack trace as well as <function>snd_BUG_ON</function> at the point. |
6142 | It's useful to show that a fatal error happens there. | ||
6143 | </para> | ||
6144 | <para> | ||
6145 | When no debug flag is set, this macro is ignored. | ||
6146 | </para> | ||
6147 | </section> | ||
6148 | |||
6149 | <section id="useful-functions-snd-bug-on"> | ||
6150 | <title><function>snd_BUG_ON()</function></title> | ||
6151 | <para> | ||
6152 | <function>snd_BUG_ON()</function> macro is similar with | ||
6153 | <function>WARN_ON()</function> macro. For example, | ||
6143 | 6154 | ||
6144 | <informalexample> | 6155 | <informalexample> |
6145 | <programlisting> | 6156 | <programlisting> |
6146 | <![CDATA[ | 6157 | <![CDATA[ |
6147 | snd_assert(pointer != NULL, return -EINVAL); | 6158 | snd_BUG_ON(!pointer); |
6148 | ]]> | 6159 | ]]> |
6149 | </programlisting> | 6160 | </programlisting> |
6150 | </informalexample> | 6161 | </informalexample> |
6151 | </para> | ||
6152 | 6162 | ||
6153 | <para> | 6163 | or it can be used as the condition, |
6154 | The first argument is the expression to evaluate, and the | 6164 | <informalexample> |
6155 | second argument is the action if it fails. When | 6165 | <programlisting> |
6156 | <constant>CONFIG_SND_DEBUG</constant>, is set, it will show an | 6166 | <![CDATA[ |
6157 | error message such as <computeroutput>BUG? (xxx)</computeroutput> | 6167 | if (snd_BUG_ON(non_zero_is_bug)) |
6158 | together with stack trace. | 6168 | return -EINVAL; |
6159 | </para> | 6169 | ]]> |
6160 | <para> | 6170 | </programlisting> |
6161 | When no debug flag is set, this macro is ignored. | 6171 | </informalexample> |
6162 | </para> | ||
6163 | </section> | ||
6164 | 6172 | ||
6165 | <section id="useful-functions-snd-bug"> | ||
6166 | <title><function>snd_BUG()</function></title> | ||
6167 | <para> | ||
6168 | It shows the <computeroutput>BUG?</computeroutput> message and | ||
6169 | stack trace as well as <function>snd_assert</function> at the point. | ||
6170 | It's useful to show that a fatal error happens there. | ||
6171 | </para> | 6173 | </para> |
6174 | |||
6172 | <para> | 6175 | <para> |
6173 | When no debug flag is set, this macro is ignored. | 6176 | The macro takes an conditional expression to evaluate. |
6177 | When <constant>CONFIG_SND_DEBUG</constant>, is set, the | ||
6178 | expression is actually evaluated. If it's non-zero, it shows | ||
6179 | the warning message such as | ||
6180 | <computeroutput>BUG? (xxx)</computeroutput> | ||
6181 | normally followed by stack trace. It returns the evaluated | ||
6182 | value. | ||
6183 | When no <constant>CONFIG_SND_DEBUG</constant> is set, this | ||
6184 | macro always returns zero. | ||
6174 | </para> | 6185 | </para> |
6186 | |||
6175 | </section> | 6187 | </section> |
6188 | |||
6176 | </chapter> | 6189 | </chapter> |
6177 | 6190 | ||
6178 | 6191 | ||
diff --git a/Documentation/sound/alsa/soc/dapm.txt b/Documentation/sound/alsa/soc/dapm.txt index b2ed6983f40d..46f9684d0b29 100644 --- a/Documentation/sound/alsa/soc/dapm.txt +++ b/Documentation/sound/alsa/soc/dapm.txt | |||
@@ -135,11 +135,7 @@ when the Mic is inserted:- | |||
135 | 135 | ||
136 | static int spitz_mic_bias(struct snd_soc_dapm_widget* w, int event) | 136 | static int spitz_mic_bias(struct snd_soc_dapm_widget* w, int event) |
137 | { | 137 | { |
138 | if(SND_SOC_DAPM_EVENT_ON(event)) | 138 | gpio_set_value(SPITZ_GPIO_MIC_BIAS, SND_SOC_DAPM_EVENT_ON(event)); |
139 | set_scoop_gpio(&spitzscoop2_device.dev, SPITZ_SCP2_MIC_BIAS); | ||
140 | else | ||
141 | reset_scoop_gpio(&spitzscoop2_device.dev, SPITZ_SCP2_MIC_BIAS); | ||
142 | |||
143 | return 0; | 139 | return 0; |
144 | } | 140 | } |
145 | 141 | ||
@@ -269,11 +265,7 @@ powered only when the spk is in use. | |||
269 | /* turn speaker amplifier on/off depending on use */ | 265 | /* turn speaker amplifier on/off depending on use */ |
270 | static int corgi_amp_event(struct snd_soc_dapm_widget *w, int event) | 266 | static int corgi_amp_event(struct snd_soc_dapm_widget *w, int event) |
271 | { | 267 | { |
272 | if (SND_SOC_DAPM_EVENT_ON(event)) | 268 | gpio_set_value(CORGI_GPIO_APM_ON, SND_SOC_DAPM_EVENT_ON(event)); |
273 | set_scoop_gpio(&corgiscoop_device.dev, CORGI_SCP_APM_ON); | ||
274 | else | ||
275 | reset_scoop_gpio(&corgiscoop_device.dev, CORGI_SCP_APM_ON); | ||
276 | |||
277 | return 0; | 269 | return 0; |
278 | } | 270 | } |
279 | 271 | ||
diff --git a/Documentation/sparc/sbus_drivers.txt b/Documentation/sparc/sbus_drivers.txt deleted file mode 100644 index eb1e28ad8822..000000000000 --- a/Documentation/sparc/sbus_drivers.txt +++ /dev/null | |||
@@ -1,309 +0,0 @@ | |||
1 | |||
2 | Writing SBUS Drivers | ||
3 | |||
4 | David S. Miller (davem@redhat.com) | ||
5 | |||
6 | The SBUS driver interfaces of the Linux kernel have been | ||
7 | revamped completely for 2.4.x for several reasons. Foremost were | ||
8 | performance and complexity concerns. This document details these | ||
9 | new interfaces and how they are used to write an SBUS device driver. | ||
10 | |||
11 | SBUS drivers need to include <asm/sbus.h> to get access | ||
12 | to functions and structures described here. | ||
13 | |||
14 | Probing and Detection | ||
15 | |||
16 | Each SBUS device inside the machine is described by a | ||
17 | structure called "struct sbus_dev". Likewise, each SBUS bus | ||
18 | found in the system is described by a "struct sbus_bus". For | ||
19 | each SBUS bus, the devices underneath are hung in a tree-like | ||
20 | fashion off of the bus structure. | ||
21 | |||
22 | The SBUS device structure contains enough information | ||
23 | for you to implement your device probing algorithm and obtain | ||
24 | the bits necessary to run your device. The most commonly | ||
25 | used members of this structure, and their typical usage, | ||
26 | will be detailed below. | ||
27 | |||
28 | Here is a piece of skeleton code for performing a device | ||
29 | probe in an SBUS driver under Linux: | ||
30 | |||
31 | static int __devinit mydevice_probe_one(struct sbus_dev *sdev) | ||
32 | { | ||
33 | struct mysdevice *mp = kzalloc(sizeof(*mp), GFP_KERNEL); | ||
34 | |||
35 | if (!mp) | ||
36 | return -ENODEV; | ||
37 | |||
38 | ... | ||
39 | dev_set_drvdata(&sdev->ofdev.dev, mp); | ||
40 | return 0; | ||
41 | ... | ||
42 | } | ||
43 | |||
44 | static int __devinit mydevice_probe(struct of_device *dev, | ||
45 | const struct of_device_id *match) | ||
46 | { | ||
47 | struct sbus_dev *sdev = to_sbus_device(&dev->dev); | ||
48 | |||
49 | return mydevice_probe_one(sdev); | ||
50 | } | ||
51 | |||
52 | static int __devexit mydevice_remove(struct of_device *dev) | ||
53 | { | ||
54 | struct sbus_dev *sdev = to_sbus_device(&dev->dev); | ||
55 | struct mydevice *mp = dev_get_drvdata(&dev->dev); | ||
56 | |||
57 | return mydevice_remove_one(sdev, mp); | ||
58 | } | ||
59 | |||
60 | static struct of_device_id mydevice_match[] = { | ||
61 | { | ||
62 | .name = "mydevice", | ||
63 | }, | ||
64 | {}, | ||
65 | }; | ||
66 | |||
67 | MODULE_DEVICE_TABLE(of, mydevice_match); | ||
68 | |||
69 | static struct of_platform_driver mydevice_driver = { | ||
70 | .match_table = mydevice_match, | ||
71 | .probe = mydevice_probe, | ||
72 | .remove = __devexit_p(mydevice_remove), | ||
73 | .driver = { | ||
74 | .name = "mydevice", | ||
75 | }, | ||
76 | }; | ||
77 | |||
78 | static int __init mydevice_init(void) | ||
79 | { | ||
80 | return of_register_driver(&mydevice_driver, &sbus_bus_type); | ||
81 | } | ||
82 | |||
83 | static void __exit mydevice_exit(void) | ||
84 | { | ||
85 | of_unregister_driver(&mydevice_driver); | ||
86 | } | ||
87 | |||
88 | module_init(mydevice_init); | ||
89 | module_exit(mydevice_exit); | ||
90 | |||
91 | The mydevice_match table is a series of entries which | ||
92 | describes what SBUS devices your driver is meant for. In the | ||
93 | simplest case you specify a string for the 'name' field. Every | ||
94 | SBUS device with a 'name' property matching your string will | ||
95 | be passed one-by-one to your .probe method. | ||
96 | |||
97 | You should store away your device private state structure | ||
98 | pointer in the drvdata area so that you can retrieve it later on | ||
99 | in your .remove method. | ||
100 | |||
101 | Any memory allocated, registers mapped, IRQs registered, | ||
102 | etc. must be undone by your .remove method so that all resources | ||
103 | of your device are released by the time it returns. | ||
104 | |||
105 | You should _NOT_ use the for_each_sbus(), for_each_sbusdev(), | ||
106 | and for_all_sbusdev() interfaces. They are deprecated, will be | ||
107 | removed, and no new driver should reference them ever. | ||
108 | |||
109 | Mapping and Accessing I/O Registers | ||
110 | |||
111 | Each SBUS device structure contains an array of descriptors | ||
112 | which describe each register set. We abuse struct resource for that. | ||
113 | They each correspond to the "reg" properties provided by the OBP firmware. | ||
114 | |||
115 | Before you can access your device's registers you must map | ||
116 | them. And later if you wish to shutdown your driver (for module | ||
117 | unload or similar) you must unmap them. You must treat them as | ||
118 | a resource, which you allocate (map) before using and free up | ||
119 | (unmap) when you are done with it. | ||
120 | |||
121 | The mapping information is stored in an opaque value | ||
122 | typed as an "unsigned long". This is the type of the return value | ||
123 | of the mapping interface, and the arguments to the unmapping | ||
124 | interface. Let's say you want to map the first set of registers. | ||
125 | Perhaps part of your driver software state structure looks like: | ||
126 | |||
127 | struct mydevice { | ||
128 | unsigned long control_regs; | ||
129 | ... | ||
130 | struct sbus_dev *sdev; | ||
131 | ... | ||
132 | }; | ||
133 | |||
134 | At initialization time you then use the sbus_ioremap | ||
135 | interface to map in your registers, like so: | ||
136 | |||
137 | static void init_one_mydevice(struct sbus_dev *sdev) | ||
138 | { | ||
139 | struct mydevice *mp; | ||
140 | ... | ||
141 | |||
142 | mp->control_regs = sbus_ioremap(&sdev->resource[0], 0, | ||
143 | CONTROL_REGS_SIZE, "mydevice regs"); | ||
144 | if (!mp->control_regs) { | ||
145 | /* Failure, cleanup and return. */ | ||
146 | } | ||
147 | } | ||
148 | |||
149 | Second argument to sbus_ioremap is an offset for | ||
150 | cranky devices with broken OBP PROM. The sbus_ioremap uses only | ||
151 | a start address and flags from the resource structure. | ||
152 | Therefore it is possible to use the same resource to map | ||
153 | several sets of registers or even to fabricate a resource | ||
154 | structure if driver gets physical address from some private place. | ||
155 | This practice is discouraged though. Use whatever OBP PROM | ||
156 | provided to you. | ||
157 | |||
158 | And here is how you might unmap these registers later at | ||
159 | driver shutdown or module unload time, using the sbus_iounmap | ||
160 | interface: | ||
161 | |||
162 | static void mydevice_unmap_regs(struct mydevice *mp) | ||
163 | { | ||
164 | sbus_iounmap(mp->control_regs, CONTROL_REGS_SIZE); | ||
165 | } | ||
166 | |||
167 | Finally, to actually access your registers there are 6 | ||
168 | interface routines at your disposal. Accesses are byte (8 bit), | ||
169 | word (16 bit), or longword (32 bit) sized. Here they are: | ||
170 | |||
171 | u8 sbus_readb(unsigned long reg) /* read byte */ | ||
172 | u16 sbus_readw(unsigned long reg) /* read word */ | ||
173 | u32 sbus_readl(unsigned long reg) /* read longword */ | ||
174 | void sbus_writeb(u8 value, unsigned long reg) /* write byte */ | ||
175 | void sbus_writew(u16 value, unsigned long reg) /* write word */ | ||
176 | void sbus_writel(u32 value, unsigned long reg) /* write longword */ | ||
177 | |||
178 | So, let's say your device has a control register of some sort | ||
179 | at offset zero. The following might implement resetting your device: | ||
180 | |||
181 | #define CONTROL 0x00UL | ||
182 | |||
183 | #define CONTROL_RESET 0x00000001 /* Reset hardware */ | ||
184 | |||
185 | static void mydevice_reset(struct mydevice *mp) | ||
186 | { | ||
187 | sbus_writel(CONTROL_RESET, mp->regs + CONTROL); | ||
188 | } | ||
189 | |||
190 | Or perhaps there is a data port register at an offset of | ||
191 | 16 bytes which allows you to read bytes from a fifo in the device: | ||
192 | |||
193 | #define DATA 0x10UL | ||
194 | |||
195 | static u8 mydevice_get_byte(struct mydevice *mp) | ||
196 | { | ||
197 | return sbus_readb(mp->regs + DATA); | ||
198 | } | ||
199 | |||
200 | It's pretty straightforward, and clueful readers may have | ||
201 | noticed that these interfaces mimick the PCI interfaces of the | ||
202 | Linux kernel. This was not by accident. | ||
203 | |||
204 | WARNING: | ||
205 | |||
206 | DO NOT try to treat these opaque register mapping | ||
207 | values as a memory mapped pointer to some structure | ||
208 | which you can dereference. | ||
209 | |||
210 | It may be memory mapped, it may not be. In fact it | ||
211 | could be a physical address, or it could be the time | ||
212 | of day xor'd with 0xdeadbeef. :-) | ||
213 | |||
214 | Whatever it is, it's an implementation detail. The | ||
215 | interface was done this way to shield the driver | ||
216 | author from such complexities. | ||
217 | |||
218 | Doing DVMA | ||
219 | |||
220 | SBUS devices can perform DMA transactions in a way similar | ||
221 | to PCI but dissimilar to ISA, e.g. DMA masters supply address. | ||
222 | In contrast to PCI, however, that address (a bus address) is | ||
223 | translated by IOMMU before a memory access is performed and therefore | ||
224 | it is virtual. Sun calls this procedure DVMA. | ||
225 | |||
226 | Linux supports two styles of using SBUS DVMA: "consistent memory" | ||
227 | and "streaming DVMA". CPU view of consistent memory chunk is, well, | ||
228 | consistent with a view of a device. Think of it as an uncached memory. | ||
229 | Typically this way of doing DVMA is not very fast and drivers use it | ||
230 | mostly for control blocks or queues. On some CPUs we cannot flush or | ||
231 | invalidate individual pages or cache lines and doing explicit flushing | ||
232 | over ever little byte in every control block would be wasteful. | ||
233 | |||
234 | Streaming DVMA is a preferred way to transfer large amounts of data. | ||
235 | This process works in the following way: | ||
236 | 1. a CPU stops accessing a certain part of memory, | ||
237 | flushes its caches covering that memory; | ||
238 | 2. a device does DVMA accesses, then posts an interrupt; | ||
239 | 3. CPU invalidates its caches and starts to access the memory. | ||
240 | |||
241 | A single streaming DVMA operation can touch several discontiguous | ||
242 | regions of a virtual bus address space. This is called a scatter-gather | ||
243 | DVMA. | ||
244 | |||
245 | [TBD: Why do not we neither Solaris attempt to map disjoint pages | ||
246 | into a single virtual chunk with the help of IOMMU, so that non SG | ||
247 | DVMA masters would do SG? It'd be very helpful for RAID.] | ||
248 | |||
249 | In order to perform a consistent DVMA a driver does something | ||
250 | like the following: | ||
251 | |||
252 | char *mem; /* Address in the CPU space */ | ||
253 | u32 busa; /* Address in the SBus space */ | ||
254 | |||
255 | mem = (char *) sbus_alloc_consistent(sdev, MYMEMSIZE, &busa); | ||
256 | |||
257 | Then mem is used when CPU accesses this memory and u32 | ||
258 | is fed to the device so that it can do DVMA. This is typically | ||
259 | done with an sbus_writel() into some device register. | ||
260 | |||
261 | Do not forget to free the DVMA resources once you are done: | ||
262 | |||
263 | sbus_free_consistent(sdev, MYMEMSIZE, mem, busa); | ||
264 | |||
265 | Streaming DVMA is more interesting. First you allocate some | ||
266 | memory suitable for it or pin down some user pages. Then it all works | ||
267 | like this: | ||
268 | |||
269 | char *mem = argumen1; | ||
270 | unsigned int size = argument2; | ||
271 | u32 busa; /* Address in the SBus space */ | ||
272 | |||
273 | *mem = 1; /* CPU can access */ | ||
274 | busa = sbus_map_single(sdev, mem, size); | ||
275 | if (busa == 0) ....... | ||
276 | |||
277 | /* Tell the device to use busa here */ | ||
278 | /* CPU cannot access the memory without sbus_dma_sync_single() */ | ||
279 | |||
280 | sbus_unmap_single(sdev, busa, size); | ||
281 | if (*mem == 0) .... /* CPU can access again */ | ||
282 | |||
283 | It is possible to retain mappings and ask the device to | ||
284 | access data again and again without calling sbus_unmap_single. | ||
285 | However, CPU caches must be invalidated with sbus_dma_sync_single | ||
286 | before such access. | ||
287 | |||
288 | [TBD but what about writeback caches here... do we have any?] | ||
289 | |||
290 | There is an equivalent set of functions doing the same thing | ||
291 | only with several memory segments at once for devices capable of | ||
292 | scatter-gather transfers. Use the Source, Luke. | ||
293 | |||
294 | Examples | ||
295 | |||
296 | drivers/net/sunhme.c | ||
297 | This is a complicated driver which illustrates many concepts | ||
298 | discussed above and plus it handles both PCI and SBUS boards. | ||
299 | |||
300 | drivers/scsi/esp.c | ||
301 | Check it out for scatter-gather DVMA. | ||
302 | |||
303 | drivers/sbus/char/bpp.c | ||
304 | A non-DVMA device. | ||
305 | |||
306 | drivers/net/sunlance.c | ||
307 | Lance driver abuses consistent mappings for data transfer. | ||
308 | It is a nifty trick which we do not particularly recommend... | ||
309 | Just check it out and know that it's legal. | ||
diff --git a/Documentation/timers/00-INDEX b/Documentation/timers/00-INDEX new file mode 100644 index 000000000000..397dc35e1323 --- /dev/null +++ b/Documentation/timers/00-INDEX | |||
@@ -0,0 +1,10 @@ | |||
1 | 00-INDEX | ||
2 | - this file | ||
3 | highres.txt | ||
4 | - High resolution timers and dynamic ticks design notes | ||
5 | hpet.txt | ||
6 | - High Precision Event Timer Driver for Linux | ||
7 | hrtimers.txt | ||
8 | - subsystem for high-resolution kernel timers | ||
9 | timer_stats.txt | ||
10 | - timer usage statistics | ||
diff --git a/Documentation/hpet.txt b/Documentation/timers/hpet.txt index 6ad52d9dad6c..e7c09abcfab4 100644 --- a/Documentation/hpet.txt +++ b/Documentation/timers/hpet.txt | |||
@@ -1,21 +1,32 @@ | |||
1 | High Precision Event Timer Driver for Linux | 1 | High Precision Event Timer Driver for Linux |
2 | 2 | ||
3 | The High Precision Event Timer (HPET) hardware is the future replacement | 3 | The High Precision Event Timer (HPET) hardware follows a specification |
4 | for the 8254 and Real Time Clock (RTC) periodic timer functionality. | 4 | by Intel and Microsoft which can be found at |
5 | Each HPET can have up to 32 timers. It is possible to configure the | 5 | |
6 | first two timers as legacy replacements for 8254 and RTC periodic timers. | 6 | http://www.intel.com/technology/architecture/hpetspec.htm |
7 | A specification done by Intel and Microsoft can be found at | 7 | |
8 | <http://www.intel.com/technology/architecture/hpetspec.htm>. | 8 | Each HPET has one fixed-rate counter (at 10+ MHz, hence "High Precision") |
9 | and up to 32 comparators. Normally three or more comparators are provided, | ||
10 | each of which can generate oneshot interupts and at least one of which has | ||
11 | additional hardware to support periodic interrupts. The comparators are | ||
12 | also called "timers", which can be misleading since usually timers are | ||
13 | independent of each other ... these share a counter, complicating resets. | ||
14 | |||
15 | HPET devices can support two interrupt routing modes. In one mode, the | ||
16 | comparators are additional interrupt sources with no particular system | ||
17 | role. Many x86 BIOS writers don't route HPET interrupts at all, which | ||
18 | prevents use of that mode. They support the other "legacy replacement" | ||
19 | mode where the first two comparators block interrupts from 8254 timers | ||
20 | and from the RTC. | ||
9 | 21 | ||
10 | The driver supports detection of HPET driver allocation and initialization | 22 | The driver supports detection of HPET driver allocation and initialization |
11 | of the HPET before the driver module_init routine is called. This enables | 23 | of the HPET before the driver module_init routine is called. This enables |
12 | platform code which uses timer 0 or 1 as the main timer to intercept HPET | 24 | platform code which uses timer 0 or 1 as the main timer to intercept HPET |
13 | initialization. An example of this initialization can be found in | 25 | initialization. An example of this initialization can be found in |
14 | arch/i386/kernel/time_hpet.c. | 26 | arch/x86/kernel/hpet.c. |
15 | 27 | ||
16 | The driver provides two APIs which are very similar to the API found in | 28 | The driver provides a userspace API which resembles the API found in the |
17 | the rtc.c driver. There is a user space API and a kernel space API. | 29 | RTC driver framework. An example user space program is provided below. |
18 | An example user space program is provided below. | ||
19 | 30 | ||
20 | #include <stdio.h> | 31 | #include <stdio.h> |
21 | #include <stdlib.h> | 32 | #include <stdlib.h> |
@@ -286,15 +297,3 @@ out: | |||
286 | 297 | ||
287 | return; | 298 | return; |
288 | } | 299 | } |
289 | |||
290 | The kernel API has three interfaces exported from the driver: | ||
291 | |||
292 | hpet_register(struct hpet_task *tp, int periodic) | ||
293 | hpet_unregister(struct hpet_task *tp) | ||
294 | hpet_control(struct hpet_task *tp, unsigned int cmd, unsigned long arg) | ||
295 | |||
296 | The kernel module using this interface fills in the ht_func and ht_data | ||
297 | members of the hpet_task structure before calling hpet_register. | ||
298 | hpet_control simply vectors to the hpet_ioctl routine and has the same | ||
299 | commands and respective arguments as the user API. hpet_unregister | ||
300 | is used to terminate usage of the HPET timer reserved by hpet_register. | ||
diff --git a/Documentation/x86/00-INDEX b/Documentation/x86/00-INDEX new file mode 100644 index 000000000000..dbe3377754af --- /dev/null +++ b/Documentation/x86/00-INDEX | |||
@@ -0,0 +1,4 @@ | |||
1 | 00-INDEX | ||
2 | - this file | ||
3 | mtrr.txt | ||
4 | - how to use x86 Memory Type Range Registers to increase performance | ||
diff --git a/Documentation/x86/i386/boot.txt b/Documentation/x86/boot.txt index 147bfe511cdd..83c0033ee9e0 100644 --- a/Documentation/x86/i386/boot.txt +++ b/Documentation/x86/boot.txt | |||
@@ -308,7 +308,7 @@ Protocol: 2.00+ | |||
308 | 308 | ||
309 | Field name: start_sys | 309 | Field name: start_sys |
310 | Type: read | 310 | Type: read |
311 | Offset/size: 0x20c/4 | 311 | Offset/size: 0x20c/2 |
312 | Protocol: 2.00+ | 312 | Protocol: 2.00+ |
313 | 313 | ||
314 | The load low segment (0x1000). Obsolete. | 314 | The load low segment (0x1000). Obsolete. |
diff --git a/Documentation/mtrr.txt b/Documentation/x86/mtrr.txt index c39ac395970e..cc071dc333c2 100644 --- a/Documentation/mtrr.txt +++ b/Documentation/x86/mtrr.txt | |||
@@ -18,7 +18,7 @@ Richard Gooch | |||
18 | The AMD K6-2 (stepping 8 and above) and K6-3 processors have two | 18 | The AMD K6-2 (stepping 8 and above) and K6-3 processors have two |
19 | MTRRs. These are supported. The AMD Athlon family provide 8 Intel | 19 | MTRRs. These are supported. The AMD Athlon family provide 8 Intel |
20 | style MTRRs. | 20 | style MTRRs. |
21 | 21 | ||
22 | The Centaur C6 (WinChip) has 8 MCRs, allowing write-combining. These | 22 | The Centaur C6 (WinChip) has 8 MCRs, allowing write-combining. These |
23 | are supported. | 23 | are supported. |
24 | 24 | ||
@@ -87,7 +87,7 @@ reg00: base=0x00000000 ( 0MB), size= 64MB: write-back, count=1 | |||
87 | reg01: base=0xfb000000 (4016MB), size= 16MB: write-combining, count=1 | 87 | reg01: base=0xfb000000 (4016MB), size= 16MB: write-combining, count=1 |
88 | reg02: base=0xfb000000 (4016MB), size= 4kB: uncachable, count=1 | 88 | reg02: base=0xfb000000 (4016MB), size= 4kB: uncachable, count=1 |
89 | 89 | ||
90 | Some cards (especially Voodoo Graphics boards) need this 4 kB area | 90 | Some cards (especially Voodoo Graphics boards) need this 4 kB area |
91 | excluded from the beginning of the region because it is used for | 91 | excluded from the beginning of the region because it is used for |
92 | registers. | 92 | registers. |
93 | 93 | ||
diff --git a/Documentation/x86/pat.txt b/Documentation/x86/pat.txt index 17965f927c15..c93ff5f4c0dd 100644 --- a/Documentation/x86/pat.txt +++ b/Documentation/x86/pat.txt | |||
@@ -14,6 +14,10 @@ PAT allows for different types of memory attributes. The most commonly used | |||
14 | ones that will be supported at this time are Write-back, Uncached, | 14 | ones that will be supported at this time are Write-back, Uncached, |
15 | Write-combined and Uncached Minus. | 15 | Write-combined and Uncached Minus. |
16 | 16 | ||
17 | |||
18 | PAT APIs | ||
19 | -------- | ||
20 | |||
17 | There are many different APIs in the kernel that allows setting of memory | 21 | There are many different APIs in the kernel that allows setting of memory |
18 | attributes at the page level. In order to avoid aliasing, these interfaces | 22 | attributes at the page level. In order to avoid aliasing, these interfaces |
19 | should be used thoughtfully. Below is a table of interfaces available, | 23 | should be used thoughtfully. Below is a table of interfaces available, |
@@ -26,38 +30,38 @@ address range to avoid any aliasing. | |||
26 | API | RAM | ACPI,... | Reserved/Holes | | 30 | API | RAM | ACPI,... | Reserved/Holes | |
27 | -----------------------|----------|------------|------------------| | 31 | -----------------------|----------|------------|------------------| |
28 | | | | | | 32 | | | | | |
29 | ioremap | -- | UC | UC | | 33 | ioremap | -- | UC- | UC- | |
30 | | | | | | 34 | | | | | |
31 | ioremap_cache | -- | WB | WB | | 35 | ioremap_cache | -- | WB | WB | |
32 | | | | | | 36 | | | | | |
33 | ioremap_nocache | -- | UC | UC | | 37 | ioremap_nocache | -- | UC- | UC- | |
34 | | | | | | 38 | | | | | |
35 | ioremap_wc | -- | -- | WC | | 39 | ioremap_wc | -- | -- | WC | |
36 | | | | | | 40 | | | | | |
37 | set_memory_uc | UC | -- | -- | | 41 | set_memory_uc | UC- | -- | -- | |
38 | set_memory_wb | | | | | 42 | set_memory_wb | | | | |
39 | | | | | | 43 | | | | | |
40 | set_memory_wc | WC | -- | -- | | 44 | set_memory_wc | WC | -- | -- | |
41 | set_memory_wb | | | | | 45 | set_memory_wb | | | | |
42 | | | | | | 46 | | | | | |
43 | pci sysfs resource | -- | -- | UC | | 47 | pci sysfs resource | -- | -- | UC- | |
44 | | | | | | 48 | | | | | |
45 | pci sysfs resource_wc | -- | -- | WC | | 49 | pci sysfs resource_wc | -- | -- | WC | |
46 | is IORESOURCE_PREFETCH| | | | | 50 | is IORESOURCE_PREFETCH| | | | |
47 | | | | | | 51 | | | | | |
48 | pci proc | -- | -- | UC | | 52 | pci proc | -- | -- | UC- | |
49 | !PCIIOC_WRITE_COMBINE | | | | | 53 | !PCIIOC_WRITE_COMBINE | | | | |
50 | | | | | | 54 | | | | | |
51 | pci proc | -- | -- | WC | | 55 | pci proc | -- | -- | WC | |
52 | PCIIOC_WRITE_COMBINE | | | | | 56 | PCIIOC_WRITE_COMBINE | | | | |
53 | | | | | | 57 | | | | | |
54 | /dev/mem | -- | UC | UC | | 58 | /dev/mem | -- | WB/WC/UC- | WB/WC/UC- | |
55 | read-write | | | | | 59 | read-write | | | | |
56 | | | | | | 60 | | | | | |
57 | /dev/mem | -- | UC | UC | | 61 | /dev/mem | -- | UC- | UC- | |
58 | mmap SYNC flag | | | | | 62 | mmap SYNC flag | | | | |
59 | | | | | | 63 | | | | | |
60 | /dev/mem | -- | WB/WC/UC | WB/WC/UC | | 64 | /dev/mem | -- | WB/WC/UC- | WB/WC/UC- | |
61 | mmap !SYNC flag | |(from exist-| (from exist- | | 65 | mmap !SYNC flag | |(from exist-| (from exist- | |
62 | and | | ing alias)| ing alias) | | 66 | and | | ing alias)| ing alias) | |
63 | any alias to this area| | | | | 67 | any alias to this area| | | | |
@@ -68,7 +72,7 @@ pci proc | -- | -- | WC | | |||
68 | and | | | | | 72 | and | | | | |
69 | MTRR says WB | | | | | 73 | MTRR says WB | | | | |
70 | | | | | | 74 | | | | | |
71 | /dev/mem | -- | -- | UC_MINUS | | 75 | /dev/mem | -- | -- | UC- | |
72 | mmap !SYNC flag | | | | | 76 | mmap !SYNC flag | | | | |
73 | no alias to this area | | | | | 77 | no alias to this area | | | | |
74 | and | | | | | 78 | and | | | | |
@@ -98,3 +102,35 @@ types. | |||
98 | 102 | ||
99 | Drivers should use set_memory_[uc|wc] to set access type for RAM ranges. | 103 | Drivers should use set_memory_[uc|wc] to set access type for RAM ranges. |
100 | 104 | ||
105 | |||
106 | PAT debugging | ||
107 | ------------- | ||
108 | |||
109 | With CONFIG_DEBUG_FS enabled, PAT memtype list can be examined by | ||
110 | |||
111 | # mount -t debugfs debugfs /sys/kernel/debug | ||
112 | # cat /sys/kernel/debug/x86/pat_memtype_list | ||
113 | PAT memtype list: | ||
114 | uncached-minus @ 0x7fadf000-0x7fae0000 | ||
115 | uncached-minus @ 0x7fb19000-0x7fb1a000 | ||
116 | uncached-minus @ 0x7fb1a000-0x7fb1b000 | ||
117 | uncached-minus @ 0x7fb1b000-0x7fb1c000 | ||
118 | uncached-minus @ 0x7fb1c000-0x7fb1d000 | ||
119 | uncached-minus @ 0x7fb1d000-0x7fb1e000 | ||
120 | uncached-minus @ 0x7fb1e000-0x7fb25000 | ||
121 | uncached-minus @ 0x7fb25000-0x7fb26000 | ||
122 | uncached-minus @ 0x7fb26000-0x7fb27000 | ||
123 | uncached-minus @ 0x7fb27000-0x7fb28000 | ||
124 | uncached-minus @ 0x7fb28000-0x7fb2e000 | ||
125 | uncached-minus @ 0x7fb2e000-0x7fb2f000 | ||
126 | uncached-minus @ 0x7fb2f000-0x7fb30000 | ||
127 | uncached-minus @ 0x7fb31000-0x7fb32000 | ||
128 | uncached-minus @ 0x80000000-0x90000000 | ||
129 | |||
130 | This list shows physical address ranges and various PAT settings used to | ||
131 | access those physical address ranges. | ||
132 | |||
133 | Another, more verbose way of getting PAT related debug messages is with | ||
134 | "debugpat" boot parameter. With this parameter, various debug messages are | ||
135 | printed to dmesg log. | ||
136 | |||
diff --git a/Documentation/x86/i386/usb-legacy-support.txt b/Documentation/x86/usb-legacy-support.txt index 1894cdfc69d9..1894cdfc69d9 100644 --- a/Documentation/x86/i386/usb-legacy-support.txt +++ b/Documentation/x86/usb-legacy-support.txt | |||
diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt index b0c7b6c4abda..72ffb5373ec7 100644 --- a/Documentation/x86/x86_64/boot-options.txt +++ b/Documentation/x86/x86_64/boot-options.txt | |||
@@ -54,10 +54,6 @@ APICs | |||
54 | apicmaintimer. Useful when your PIT timer is totally | 54 | apicmaintimer. Useful when your PIT timer is totally |
55 | broken. | 55 | broken. |
56 | 56 | ||
57 | disable_8254_timer / enable_8254_timer | ||
58 | Enable interrupt 0 timer routing over the 8254 in addition to over | ||
59 | the IO-APIC. The kernel tries to set a sensible default. | ||
60 | |||
61 | Early Console | 57 | Early Console |
62 | 58 | ||
63 | syntax: earlyprintk=vga | 59 | syntax: earlyprintk=vga |
diff --git a/Documentation/x86/i386/zero-page.txt b/Documentation/x86/zero-page.txt index 169ad423a3d1..169ad423a3d1 100644 --- a/Documentation/x86/i386/zero-page.txt +++ b/Documentation/x86/zero-page.txt | |||