diff options
-rw-r--r-- | Documentation/DocBook/kernel-hacking.tmpl | 310 |
1 files changed, 144 insertions, 166 deletions
diff --git a/Documentation/DocBook/kernel-hacking.tmpl b/Documentation/DocBook/kernel-hacking.tmpl index 49a9ef82d575..6367bba32d22 100644 --- a/Documentation/DocBook/kernel-hacking.tmpl +++ b/Documentation/DocBook/kernel-hacking.tmpl | |||
@@ -8,8 +8,7 @@ | |||
8 | 8 | ||
9 | <authorgroup> | 9 | <authorgroup> |
10 | <author> | 10 | <author> |
11 | <firstname>Paul</firstname> | 11 | <firstname>Rusty</firstname> |
12 | <othername>Rusty</othername> | ||
13 | <surname>Russell</surname> | 12 | <surname>Russell</surname> |
14 | <affiliation> | 13 | <affiliation> |
15 | <address> | 14 | <address> |
@@ -20,7 +19,7 @@ | |||
20 | </authorgroup> | 19 | </authorgroup> |
21 | 20 | ||
22 | <copyright> | 21 | <copyright> |
23 | <year>2001</year> | 22 | <year>2005</year> |
24 | <holder>Rusty Russell</holder> | 23 | <holder>Rusty Russell</holder> |
25 | </copyright> | 24 | </copyright> |
26 | 25 | ||
@@ -64,7 +63,7 @@ | |||
64 | <chapter id="introduction"> | 63 | <chapter id="introduction"> |
65 | <title>Introduction</title> | 64 | <title>Introduction</title> |
66 | <para> | 65 | <para> |
67 | Welcome, gentle reader, to Rusty's Unreliable Guide to Linux | 66 | Welcome, gentle reader, to Rusty's Remarkably Unreliable Guide to Linux |
68 | Kernel Hacking. This document describes the common routines and | 67 | Kernel Hacking. This document describes the common routines and |
69 | general requirements for kernel code: its goal is to serve as a | 68 | general requirements for kernel code: its goal is to serve as a |
70 | primer for Linux kernel development for experienced C | 69 | primer for Linux kernel development for experienced C |
@@ -96,13 +95,13 @@ | |||
96 | 95 | ||
97 | <listitem> | 96 | <listitem> |
98 | <para> | 97 | <para> |
99 | not associated with any process, serving a softirq, tasklet or bh; | 98 | not associated with any process, serving a softirq or tasklet; |
100 | </para> | 99 | </para> |
101 | </listitem> | 100 | </listitem> |
102 | 101 | ||
103 | <listitem> | 102 | <listitem> |
104 | <para> | 103 | <para> |
105 | running in kernel space, associated with a process; | 104 | running in kernel space, associated with a process (user context); |
106 | </para> | 105 | </para> |
107 | </listitem> | 106 | </listitem> |
108 | 107 | ||
@@ -114,11 +113,12 @@ | |||
114 | </itemizedlist> | 113 | </itemizedlist> |
115 | 114 | ||
116 | <para> | 115 | <para> |
117 | There is a strict ordering between these: other than the last | 116 | There is an ordering between these. The bottom two can preempt |
118 | category (userspace) each can only be pre-empted by those above. | 117 | each other, but above that is a strict hierarchy: each can only be |
119 | For example, while a softirq is running on a CPU, no other | 118 | preempted by the ones above it. For example, while a softirq is |
120 | softirq will pre-empt it, but a hardware interrupt can. However, | 119 | running on a CPU, no other softirq will preempt it, but a hardware |
121 | any other CPUs in the system execute independently. | 120 | interrupt can. However, any other CPUs in the system execute |
121 | independently. | ||
122 | </para> | 122 | </para> |
123 | 123 | ||
124 | <para> | 124 | <para> |
@@ -130,10 +130,10 @@ | |||
130 | <title>User Context</title> | 130 | <title>User Context</title> |
131 | 131 | ||
132 | <para> | 132 | <para> |
133 | User context is when you are coming in from a system call or | 133 | User context is when you are coming in from a system call or other |
134 | other trap: you can sleep, and you own the CPU (except for | 134 | trap: like userspace, you can be preempted by more important tasks |
135 | interrupts) until you call <function>schedule()</function>. | 135 | and by interrupts. You can sleep, by calling |
136 | In other words, user context (unlike userspace) is not pre-emptable. | 136 | <function>schedule()</function>. |
137 | </para> | 137 | </para> |
138 | 138 | ||
139 | <note> | 139 | <note> |
@@ -153,7 +153,7 @@ | |||
153 | 153 | ||
154 | <caution> | 154 | <caution> |
155 | <para> | 155 | <para> |
156 | Beware that if you have interrupts or bottom halves disabled | 156 | Beware that if you have preemption or softirqs disabled |
157 | (see below), <function>in_interrupt()</function> will return a | 157 | (see below), <function>in_interrupt()</function> will return a |
158 | false positive. | 158 | false positive. |
159 | </para> | 159 | </para> |
@@ -168,10 +168,10 @@ | |||
168 | <hardware>keyboard</hardware> are examples of real | 168 | <hardware>keyboard</hardware> are examples of real |
169 | hardware which produce interrupts at any time. The kernel runs | 169 | hardware which produce interrupts at any time. The kernel runs |
170 | interrupt handlers, which services the hardware. The kernel | 170 | interrupt handlers, which services the hardware. The kernel |
171 | guarantees that this handler is never re-entered: if another | 171 | guarantees that this handler is never re-entered: if the same |
172 | interrupt arrives, it is queued (or dropped). Because it | 172 | interrupt arrives, it is queued (or dropped). Because it |
173 | disables interrupts, this handler has to be fast: frequently it | 173 | disables interrupts, this handler has to be fast: frequently it |
174 | simply acknowledges the interrupt, marks a `software interrupt' | 174 | simply acknowledges the interrupt, marks a 'software interrupt' |
175 | for execution and exits. | 175 | for execution and exits. |
176 | </para> | 176 | </para> |
177 | 177 | ||
@@ -188,60 +188,52 @@ | |||
188 | </sect1> | 188 | </sect1> |
189 | 189 | ||
190 | <sect1 id="basics-softirqs"> | 190 | <sect1 id="basics-softirqs"> |
191 | <title>Software Interrupt Context: Bottom Halves, Tasklets, softirqs</title> | 191 | <title>Software Interrupt Context: Softirqs and Tasklets</title> |
192 | 192 | ||
193 | <para> | 193 | <para> |
194 | Whenever a system call is about to return to userspace, or a | 194 | Whenever a system call is about to return to userspace, or a |
195 | hardware interrupt handler exits, any `software interrupts' | 195 | hardware interrupt handler exits, any 'software interrupts' |
196 | which are marked pending (usually by hardware interrupts) are | 196 | which are marked pending (usually by hardware interrupts) are |
197 | run (<filename>kernel/softirq.c</filename>). | 197 | run (<filename>kernel/softirq.c</filename>). |
198 | </para> | 198 | </para> |
199 | 199 | ||
200 | <para> | 200 | <para> |
201 | Much of the real interrupt handling work is done here. Early in | 201 | Much of the real interrupt handling work is done here. Early in |
202 | the transition to <acronym>SMP</acronym>, there were only `bottom | 202 | the transition to <acronym>SMP</acronym>, there were only 'bottom |
203 | halves' (BHs), which didn't take advantage of multiple CPUs. Shortly | 203 | halves' (BHs), which didn't take advantage of multiple CPUs. Shortly |
204 | after we switched from wind-up computers made of match-sticks and snot, | 204 | after we switched from wind-up computers made of match-sticks and snot, |
205 | we abandoned this limitation. | 205 | we abandoned this limitation and switched to 'softirqs'. |
206 | </para> | 206 | </para> |
207 | 207 | ||
208 | <para> | 208 | <para> |
209 | <filename class="headerfile">include/linux/interrupt.h</filename> lists the | 209 | <filename class="headerfile">include/linux/interrupt.h</filename> lists the |
210 | different BH's. No matter how many CPUs you have, no two BHs will run at | 210 | different softirqs. A very important softirq is the |
211 | the same time. This made the transition to SMP simpler, but sucks hard for | 211 | timer softirq (<filename |
212 | scalable performance. A very important bottom half is the timer | 212 | class="headerfile">include/linux/timer.h</filename>): you can |
213 | BH (<filename class="headerfile">include/linux/timer.h</filename>): you | 213 | register to have it call functions for you in a given length of |
214 | can register to have it call functions for you in a given length of time. | 214 | time. |
215 | </para> | 215 | </para> |
216 | 216 | ||
217 | <para> | 217 | <para> |
218 | 2.3.43 introduced softirqs, and re-implemented the (now | 218 | Softirqs are often a pain to deal with, since the same softirq |
219 | deprecated) BHs underneath them. Softirqs are fully-SMP | 219 | will run simultaneously on more than one CPU. For this reason, |
220 | versions of BHs: they can run on as many CPUs at once as | 220 | tasklets (<filename |
221 | required. This means they need to deal with any races in shared | 221 | class="headerfile">include/linux/interrupt.h</filename>) are more |
222 | data using their own locks. A bitmask is used to keep track of | 222 | often used: they are dynamically-registrable (meaning you can have |
223 | which are enabled, so the 32 available softirqs should not be | 223 | as many as you want), and they also guarantee that any tasklet |
224 | used up lightly. (<emphasis>Yes</emphasis>, people will | 224 | will only run on one CPU at any time, although different tasklets |
225 | notice). | 225 | can run simultaneously. |
226 | </para> | ||
227 | |||
228 | <para> | ||
229 | tasklets (<filename class="headerfile">include/linux/interrupt.h</filename>) | ||
230 | are like softirqs, except they are dynamically-registrable (meaning you | ||
231 | can have as many as you want), and they also guarantee that any tasklet | ||
232 | will only run on one CPU at any time, although different tasklets can | ||
233 | run simultaneously (unlike different BHs). | ||
234 | </para> | 226 | </para> |
235 | <caution> | 227 | <caution> |
236 | <para> | 228 | <para> |
237 | The name `tasklet' is misleading: they have nothing to do with `tasks', | 229 | The name 'tasklet' is misleading: they have nothing to do with 'tasks', |
238 | and probably more to do with some bad vodka Alexey Kuznetsov had at the | 230 | and probably more to do with some bad vodka Alexey Kuznetsov had at the |
239 | time. | 231 | time. |
240 | </para> | 232 | </para> |
241 | </caution> | 233 | </caution> |
242 | 234 | ||
243 | <para> | 235 | <para> |
244 | You can tell you are in a softirq (or bottom half, or tasklet) | 236 | You can tell you are in a softirq (or tasklet) |
245 | using the <function>in_softirq()</function> macro | 237 | using the <function>in_softirq()</function> macro |
246 | (<filename class="headerfile">include/linux/interrupt.h</filename>). | 238 | (<filename class="headerfile">include/linux/interrupt.h</filename>). |
247 | </para> | 239 | </para> |
@@ -288,11 +280,10 @@ | |||
288 | <term>A rigid stack limit</term> | 280 | <term>A rigid stack limit</term> |
289 | <listitem> | 281 | <listitem> |
290 | <para> | 282 | <para> |
291 | The kernel stack is about 6K in 2.2 (for most | 283 | Depending on configuration options the kernel stack is about 3K to 6K for most 32-bit architectures: it's |
292 | architectures: it's about 14K on the Alpha), and shared | 284 | about 14K on most 64-bit archs, and often shared with interrupts |
293 | with interrupts so you can't use it all. Avoid deep | 285 | so you can't use it all. Avoid deep recursion and huge local |
294 | recursion and huge local arrays on the stack (allocate | 286 | arrays on the stack (allocate them dynamically instead). |
295 | them dynamically instead). | ||
296 | </para> | 287 | </para> |
297 | </listitem> | 288 | </listitem> |
298 | </varlistentry> | 289 | </varlistentry> |
@@ -339,7 +330,7 @@ asmlinkage long sys_mycall(int arg) | |||
339 | 330 | ||
340 | <para> | 331 | <para> |
341 | If all your routine does is read or write some parameter, consider | 332 | If all your routine does is read or write some parameter, consider |
342 | implementing a <function>sysctl</function> interface instead. | 333 | implementing a <function>sysfs</function> interface instead. |
343 | </para> | 334 | </para> |
344 | 335 | ||
345 | <para> | 336 | <para> |
@@ -417,7 +408,10 @@ cond_resched(); /* Will sleep */ | |||
417 | </para> | 408 | </para> |
418 | 409 | ||
419 | <para> | 410 | <para> |
420 | You will eventually lock up your box if you break these rules. | 411 | You should always compile your kernel |
412 | <symbol>CONFIG_DEBUG_SPINLOCK_SLEEP</symbol> on, and it will warn | ||
413 | you if you break these rules. If you <emphasis>do</emphasis> break | ||
414 | the rules, you will eventually lock up your box. | ||
421 | </para> | 415 | </para> |
422 | 416 | ||
423 | <para> | 417 | <para> |
@@ -515,8 +509,7 @@ printk(KERN_INFO "my ip: %d.%d.%d.%d\n", NIPQUAD(ipaddress)); | |||
515 | success). | 509 | success). |
516 | </para> | 510 | </para> |
517 | </caution> | 511 | </caution> |
518 | [Yes, this moronic interface makes me cringe. Please submit a | 512 | [Yes, this moronic interface makes me cringe. The flamewar comes up every year or so. --RR.] |
519 | patch and become my hero --RR.] | ||
520 | </para> | 513 | </para> |
521 | <para> | 514 | <para> |
522 | The functions may sleep implicitly. This should never be called | 515 | The functions may sleep implicitly. This should never be called |
@@ -587,10 +580,11 @@ printk(KERN_INFO "my ip: %d.%d.%d.%d\n", NIPQUAD(ipaddress)); | |||
587 | </variablelist> | 580 | </variablelist> |
588 | 581 | ||
589 | <para> | 582 | <para> |
590 | If you see a <errorname>kmem_grow: Called nonatomically from int | 583 | If you see a <errorname>sleeping function called from invalid |
591 | </errorname> warning message you called a memory allocation function | 584 | context</errorname> warning message, then maybe you called a |
592 | from interrupt context without <constant>GFP_ATOMIC</constant>. | 585 | sleeping allocation function from interrupt context without |
593 | You should really fix that. Run, don't walk. | 586 | <constant>GFP_ATOMIC</constant>. You should really fix that. |
587 | Run, don't walk. | ||
594 | </para> | 588 | </para> |
595 | 589 | ||
596 | <para> | 590 | <para> |
@@ -639,16 +633,16 @@ printk(KERN_INFO "my ip: %d.%d.%d.%d\n", NIPQUAD(ipaddress)); | |||
639 | </sect1> | 633 | </sect1> |
640 | 634 | ||
641 | <sect1 id="routines-udelay"> | 635 | <sect1 id="routines-udelay"> |
642 | <title><function>udelay()</function>/<function>mdelay()</function> | 636 | <title><function>mdelay()</function>/<function>udelay()</function> |
643 | <filename class="headerfile">include/asm/delay.h</filename> | 637 | <filename class="headerfile">include/asm/delay.h</filename> |
644 | <filename class="headerfile">include/linux/delay.h</filename> | 638 | <filename class="headerfile">include/linux/delay.h</filename> |
645 | </title> | 639 | </title> |
646 | 640 | ||
647 | <para> | 641 | <para> |
648 | The <function>udelay()</function> function can be used for small pauses. | 642 | The <function>udelay()</function> and <function>ndelay()</function> functions can be used for small pauses. |
649 | Do not use large values with <function>udelay()</function> as you risk | 643 | Do not use large values with them as you risk |
650 | overflow - the helper function <function>mdelay()</function> is useful | 644 | overflow - the helper function <function>mdelay()</function> is useful |
651 | here, or even consider <function>schedule_timeout()</function>. | 645 | here, or consider <function>msleep()</function>. |
652 | </para> | 646 | </para> |
653 | </sect1> | 647 | </sect1> |
654 | 648 | ||
@@ -698,8 +692,8 @@ printk(KERN_INFO "my ip: %d.%d.%d.%d\n", NIPQUAD(ipaddress)); | |||
698 | These routines disable soft interrupts on the local CPU, and | 692 | These routines disable soft interrupts on the local CPU, and |
699 | restore them. They are reentrant; if soft interrupts were | 693 | restore them. They are reentrant; if soft interrupts were |
700 | disabled before, they will still be disabled after this pair | 694 | disabled before, they will still be disabled after this pair |
701 | of functions has been called. They prevent softirqs, tasklets | 695 | of functions has been called. They prevent softirqs and tasklets |
702 | and bottom halves from running on the current CPU. | 696 | from running on the current CPU. |
703 | </para> | 697 | </para> |
704 | </sect1> | 698 | </sect1> |
705 | 699 | ||
@@ -708,10 +702,16 @@ printk(KERN_INFO "my ip: %d.%d.%d.%d\n", NIPQUAD(ipaddress)); | |||
708 | <filename class="headerfile">include/asm/smp.h</filename></title> | 702 | <filename class="headerfile">include/asm/smp.h</filename></title> |
709 | 703 | ||
710 | <para> | 704 | <para> |
711 | <function>smp_processor_id()</function> returns the current | 705 | <function>get_cpu()</function> disables preemption (so you won't |
712 | processor number, between 0 and <symbol>NR_CPUS</symbol> (the | 706 | suddenly get moved to another CPU) and returns the current |
713 | maximum number of CPUs supported by Linux, currently 32). These | 707 | processor number, between 0 and <symbol>NR_CPUS</symbol>. Note |
714 | values are not necessarily continuous. | 708 | that the CPU numbers are not necessarily continuous. You return |
709 | it again with <function>put_cpu()</function> when you are done. | ||
710 | </para> | ||
711 | <para> | ||
712 | If you know you cannot be preempted by another task (ie. you are | ||
713 | in interrupt context, or have preemption disabled) you can use | ||
714 | smp_processor_id(). | ||
715 | </para> | 715 | </para> |
716 | </sect1> | 716 | </sect1> |
717 | 717 | ||
@@ -722,19 +722,14 @@ printk(KERN_INFO "my ip: %d.%d.%d.%d\n", NIPQUAD(ipaddress)); | |||
722 | <para> | 722 | <para> |
723 | After boot, the kernel frees up a special section; functions | 723 | After boot, the kernel frees up a special section; functions |
724 | marked with <type>__init</type> and data structures marked with | 724 | marked with <type>__init</type> and data structures marked with |
725 | <type>__initdata</type> are dropped after boot is complete (within | 725 | <type>__initdata</type> are dropped after boot is complete: similarly |
726 | modules this directive is currently ignored). <type>__exit</type> | 726 | modules discard this memory after initialization. <type>__exit</type> |
727 | is used to declare a function which is only required on exit: the | 727 | is used to declare a function which is only required on exit: the |
728 | function will be dropped if this file is not compiled as a module. | 728 | function will be dropped if this file is not compiled as a module. |
729 | See the header file for use. Note that it makes no sense for a function | 729 | See the header file for use. Note that it makes no sense for a function |
730 | marked with <type>__init</type> to be exported to modules with | 730 | marked with <type>__init</type> to be exported to modules with |
731 | <function>EXPORT_SYMBOL()</function> - this will break. | 731 | <function>EXPORT_SYMBOL()</function> - this will break. |
732 | </para> | 732 | </para> |
733 | <para> | ||
734 | Static data structures marked as <type>__initdata</type> must be initialised | ||
735 | (as opposed to ordinary static data which is zeroed BSS) and cannot be | ||
736 | <type>const</type>. | ||
737 | </para> | ||
738 | 733 | ||
739 | </sect1> | 734 | </sect1> |
740 | 735 | ||
@@ -762,9 +757,8 @@ printk(KERN_INFO "my ip: %d.%d.%d.%d\n", NIPQUAD(ipaddress)); | |||
762 | <para> | 757 | <para> |
763 | The function can return a negative error number to cause | 758 | The function can return a negative error number to cause |
764 | module loading to fail (unfortunately, this has no effect if | 759 | module loading to fail (unfortunately, this has no effect if |
765 | the module is compiled into the kernel). For modules, this is | 760 | the module is compiled into the kernel). This function is |
766 | called in user context, with interrupts enabled, and the | 761 | called in user context with interrupts enabled, so it can sleep. |
767 | kernel lock held, so it can sleep. | ||
768 | </para> | 762 | </para> |
769 | </sect1> | 763 | </sect1> |
770 | 764 | ||
@@ -779,6 +773,34 @@ printk(KERN_INFO "my ip: %d.%d.%d.%d\n", NIPQUAD(ipaddress)); | |||
779 | reached zero. This function can also sleep, but cannot fail: | 773 | reached zero. This function can also sleep, but cannot fail: |
780 | everything must be cleaned up by the time it returns. | 774 | everything must be cleaned up by the time it returns. |
781 | </para> | 775 | </para> |
776 | |||
777 | <para> | ||
778 | Note that this macro is optional: if it is not present, your | ||
779 | module will not be removable (except for 'rmmod -f'). | ||
780 | </para> | ||
781 | </sect1> | ||
782 | |||
783 | <sect1 id="routines-module-use-counters"> | ||
784 | <title> <function>try_module_get()</function>/<function>module_put()</function> | ||
785 | <filename class="headerfile">include/linux/module.h</filename></title> | ||
786 | |||
787 | <para> | ||
788 | These manipulate the module usage count, to protect against | ||
789 | removal (a module also can't be removed if another module uses one | ||
790 | of its exported symbols: see below). Before calling into module | ||
791 | code, you should call <function>try_module_get()</function> on | ||
792 | that module: if it fails, then the module is being removed and you | ||
793 | should act as if it wasn't there. Otherwise, you can safely enter | ||
794 | the module, and call <function>module_put()</function> when you're | ||
795 | finished. | ||
796 | </para> | ||
797 | |||
798 | <para> | ||
799 | Most registerable structures have an | ||
800 | <structfield>owner</structfield> field, such as in the | ||
801 | <structname>file_operations</structname> structure. Set this field | ||
802 | to the macro <symbol>THIS_MODULE</symbol>. | ||
803 | </para> | ||
782 | </sect1> | 804 | </sect1> |
783 | 805 | ||
784 | <!-- add info on new-style module refcounting here --> | 806 | <!-- add info on new-style module refcounting here --> |
@@ -821,7 +843,7 @@ printk(KERN_INFO "my ip: %d.%d.%d.%d\n", NIPQUAD(ipaddress)); | |||
821 | There is a macro to do this: | 843 | There is a macro to do this: |
822 | <function>wait_event_interruptible()</function> | 844 | <function>wait_event_interruptible()</function> |
823 | 845 | ||
824 | <filename class="headerfile">include/linux/sched.h</filename> The | 846 | <filename class="headerfile">include/linux/wait.h</filename> The |
825 | first argument is the wait queue head, and the second is an | 847 | first argument is the wait queue head, and the second is an |
826 | expression which is evaluated; the macro returns | 848 | expression which is evaluated; the macro returns |
827 | <returnvalue>0</returnvalue> when this expression is true, or | 849 | <returnvalue>0</returnvalue> when this expression is true, or |
@@ -847,10 +869,11 @@ printk(KERN_INFO "my ip: %d.%d.%d.%d\n", NIPQUAD(ipaddress)); | |||
847 | <para> | 869 | <para> |
848 | Call <function>wake_up()</function> | 870 | Call <function>wake_up()</function> |
849 | 871 | ||
850 | <filename class="headerfile">include/linux/sched.h</filename>;, | 872 | <filename class="headerfile">include/linux/wait.h</filename>;, |
851 | which will wake up every process in the queue. The exception is | 873 | which will wake up every process in the queue. The exception is |
852 | if one has <constant>TASK_EXCLUSIVE</constant> set, in which case | 874 | if one has <constant>TASK_EXCLUSIVE</constant> set, in which case |
853 | the remainder of the queue will not be woken. | 875 | the remainder of the queue will not be woken. There are other variants |
876 | of this basic function available in the same header. | ||
854 | </para> | 877 | </para> |
855 | </sect1> | 878 | </sect1> |
856 | </chapter> | 879 | </chapter> |
@@ -863,7 +886,7 @@ printk(KERN_INFO "my ip: %d.%d.%d.%d\n", NIPQUAD(ipaddress)); | |||
863 | first class of operations work on <type>atomic_t</type> | 886 | first class of operations work on <type>atomic_t</type> |
864 | 887 | ||
865 | <filename class="headerfile">include/asm/atomic.h</filename>; this | 888 | <filename class="headerfile">include/asm/atomic.h</filename>; this |
866 | contains a signed integer (at least 24 bits long), and you must use | 889 | contains a signed integer (at least 32 bits long), and you must use |
867 | these functions to manipulate or read atomic_t variables. | 890 | these functions to manipulate or read atomic_t variables. |
868 | <function>atomic_read()</function> and | 891 | <function>atomic_read()</function> and |
869 | <function>atomic_set()</function> get and set the counter, | 892 | <function>atomic_set()</function> get and set the counter, |
@@ -882,13 +905,12 @@ printk(KERN_INFO "my ip: %d.%d.%d.%d\n", NIPQUAD(ipaddress)); | |||
882 | 905 | ||
883 | <para> | 906 | <para> |
884 | Note that these functions are slower than normal arithmetic, and | 907 | Note that these functions are slower than normal arithmetic, and |
885 | so should not be used unnecessarily. On some platforms they | 908 | so should not be used unnecessarily. |
886 | are much slower, like 32-bit Sparc where they use a spinlock. | ||
887 | </para> | 909 | </para> |
888 | 910 | ||
889 | <para> | 911 | <para> |
890 | The second class of atomic operations is atomic bit operations on a | 912 | The second class of atomic operations is atomic bit operations on an |
891 | <type>long</type>, defined in | 913 | <type>unsigned long</type>, defined in |
892 | 914 | ||
893 | <filename class="headerfile">include/linux/bitops.h</filename>. These | 915 | <filename class="headerfile">include/linux/bitops.h</filename>. These |
894 | operations generally take a pointer to the bit pattern, and a bit | 916 | operations generally take a pointer to the bit pattern, and a bit |
@@ -899,7 +921,7 @@ printk(KERN_INFO "my ip: %d.%d.%d.%d\n", NIPQUAD(ipaddress)); | |||
899 | <function>test_and_clear_bit()</function> and | 921 | <function>test_and_clear_bit()</function> and |
900 | <function>test_and_change_bit()</function> do the same thing, | 922 | <function>test_and_change_bit()</function> do the same thing, |
901 | except return true if the bit was previously set; these are | 923 | except return true if the bit was previously set; these are |
902 | particularly useful for very simple locking. | 924 | particularly useful for atomically setting flags. |
903 | </para> | 925 | </para> |
904 | 926 | ||
905 | <para> | 927 | <para> |
@@ -907,12 +929,6 @@ printk(KERN_INFO "my ip: %d.%d.%d.%d\n", NIPQUAD(ipaddress)); | |||
907 | than BITS_PER_LONG. The resulting behavior is strange on big-endian | 929 | than BITS_PER_LONG. The resulting behavior is strange on big-endian |
908 | platforms though so it is a good idea not to do this. | 930 | platforms though so it is a good idea not to do this. |
909 | </para> | 931 | </para> |
910 | |||
911 | <para> | ||
912 | Note that the order of bits depends on the architecture, and in | ||
913 | particular, the bitfield passed to these operations must be at | ||
914 | least as large as a <type>long</type>. | ||
915 | </para> | ||
916 | </chapter> | 932 | </chapter> |
917 | 933 | ||
918 | <chapter id="symbols"> | 934 | <chapter id="symbols"> |
@@ -932,11 +948,8 @@ printk(KERN_INFO "my ip: %d.%d.%d.%d\n", NIPQUAD(ipaddress)); | |||
932 | <filename class="headerfile">include/linux/module.h</filename></title> | 948 | <filename class="headerfile">include/linux/module.h</filename></title> |
933 | 949 | ||
934 | <para> | 950 | <para> |
935 | This is the classic method of exporting a symbol, and it works | 951 | This is the classic method of exporting a symbol: dynamically |
936 | for both modules and non-modules. In the kernel all these | 952 | loaded modules will be able to use the symbol as normal. |
937 | declarations are often bundled into a single file to help | ||
938 | genksyms (which searches source files for these declarations). | ||
939 | See the comment on genksyms and Makefiles below. | ||
940 | </para> | 953 | </para> |
941 | </sect1> | 954 | </sect1> |
942 | 955 | ||
@@ -949,7 +962,8 @@ printk(KERN_INFO "my ip: %d.%d.%d.%d\n", NIPQUAD(ipaddress)); | |||
949 | symbols exported by <function>EXPORT_SYMBOL_GPL()</function> can | 962 | symbols exported by <function>EXPORT_SYMBOL_GPL()</function> can |
950 | only be seen by modules with a | 963 | only be seen by modules with a |
951 | <function>MODULE_LICENSE()</function> that specifies a GPL | 964 | <function>MODULE_LICENSE()</function> that specifies a GPL |
952 | compatible license. | 965 | compatible license. It implies that the function is considered |
966 | an internal implementation issue, and not really an interface. | ||
953 | </para> | 967 | </para> |
954 | </sect1> | 968 | </sect1> |
955 | </chapter> | 969 | </chapter> |
@@ -962,12 +976,13 @@ printk(KERN_INFO "my ip: %d.%d.%d.%d\n", NIPQUAD(ipaddress)); | |||
962 | <filename class="headerfile">include/linux/list.h</filename></title> | 976 | <filename class="headerfile">include/linux/list.h</filename></title> |
963 | 977 | ||
964 | <para> | 978 | <para> |
965 | There are three sets of linked-list routines in the kernel | 979 | There used to be three sets of linked-list routines in the kernel |
966 | headers, but this one seems to be winning out (and Linus has | 980 | headers, but this one is the winner. If you don't have some |
967 | used it). If you don't have some particular pressing need for | 981 | particular pressing need for a single list, it's a good choice. |
968 | a single list, it's a good choice. In fact, I don't care | 982 | </para> |
969 | whether it's a good choice or not, just use it so we can get | 983 | |
970 | rid of the others. | 984 | <para> |
985 | In particular, <function>list_for_each_entry</function> is useful. | ||
971 | </para> | 986 | </para> |
972 | </sect1> | 987 | </sect1> |
973 | 988 | ||
@@ -979,14 +994,13 @@ printk(KERN_INFO "my ip: %d.%d.%d.%d\n", NIPQUAD(ipaddress)); | |||
979 | convention, and return <returnvalue>0</returnvalue> for success, | 994 | convention, and return <returnvalue>0</returnvalue> for success, |
980 | and a negative error number | 995 | and a negative error number |
981 | (eg. <returnvalue>-EFAULT</returnvalue>) for failure. This can be | 996 | (eg. <returnvalue>-EFAULT</returnvalue>) for failure. This can be |
982 | unintuitive at first, but it's fairly widespread in the networking | 997 | unintuitive at first, but it's fairly widespread in the kernel. |
983 | code, for example. | ||
984 | </para> | 998 | </para> |
985 | 999 | ||
986 | <para> | 1000 | <para> |
987 | The filesystem code uses <function>ERR_PTR()</function> | 1001 | Using <function>ERR_PTR()</function> |
988 | 1002 | ||
989 | <filename class="headerfile">include/linux/fs.h</filename>; to | 1003 | <filename class="headerfile">include/linux/err.h</filename>; to |
990 | encode a negative error number into a pointer, and | 1004 | encode a negative error number into a pointer, and |
991 | <function>IS_ERR()</function> and <function>PTR_ERR()</function> | 1005 | <function>IS_ERR()</function> and <function>PTR_ERR()</function> |
992 | to get it back out again: avoids a separate pointer parameter for | 1006 | to get it back out again: avoids a separate pointer parameter for |
@@ -1040,7 +1054,7 @@ static struct block_device_operations opt_fops = { | |||
1040 | supported, due to lack of general use, but the following are | 1054 | supported, due to lack of general use, but the following are |
1041 | considered standard (see the GCC info page section "C | 1055 | considered standard (see the GCC info page section "C |
1042 | Extensions" for more details - Yes, really the info page, the | 1056 | Extensions" for more details - Yes, really the info page, the |
1043 | man page is only a short summary of the stuff in info): | 1057 | man page is only a short summary of the stuff in info). |
1044 | </para> | 1058 | </para> |
1045 | <itemizedlist> | 1059 | <itemizedlist> |
1046 | <listitem> | 1060 | <listitem> |
@@ -1091,7 +1105,7 @@ static struct block_device_operations opt_fops = { | |||
1091 | </listitem> | 1105 | </listitem> |
1092 | <listitem> | 1106 | <listitem> |
1093 | <para> | 1107 | <para> |
1094 | Function names as strings (__FUNCTION__) | 1108 | Function names as strings (__func__). |
1095 | </para> | 1109 | </para> |
1096 | </listitem> | 1110 | </listitem> |
1097 | <listitem> | 1111 | <listitem> |
@@ -1164,63 +1178,35 @@ static struct block_device_operations opt_fops = { | |||
1164 | <listitem> | 1178 | <listitem> |
1165 | <para> | 1179 | <para> |
1166 | Usually you want a configuration option for your kernel hack. | 1180 | Usually you want a configuration option for your kernel hack. |
1167 | Edit <filename>Config.in</filename> in the appropriate directory | 1181 | Edit <filename>Kconfig</filename> in the appropriate directory. |
1168 | (but under <filename>arch/</filename> it's called | 1182 | The Config language is simple to use by cut and paste, and there's |
1169 | <filename>config.in</filename>). The Config Language used is not | 1183 | complete documentation in |
1170 | bash, even though it looks like bash; the safe way is to use only | 1184 | <filename>Documentation/kbuild/kconfig-language.txt</filename>. |
1171 | the constructs that you already see in | ||
1172 | <filename>Config.in</filename> files (see | ||
1173 | <filename>Documentation/kbuild/kconfig-language.txt</filename>). | ||
1174 | It's good to run "make xconfig" at least once to test (because | ||
1175 | it's the only one with a static parser). | ||
1176 | </para> | ||
1177 | |||
1178 | <para> | ||
1179 | Variables which can be Y or N use <type>bool</type> followed by a | ||
1180 | tagline and the config define name (which must start with | ||
1181 | CONFIG_). The <type>tristate</type> function is the same, but | ||
1182 | allows the answer M (which defines | ||
1183 | <symbol>CONFIG_foo_MODULE</symbol> in your source, instead of | ||
1184 | <symbol>CONFIG_FOO</symbol>) if <symbol>CONFIG_MODULES</symbol> | ||
1185 | is enabled. | ||
1186 | </para> | 1185 | </para> |
1187 | 1186 | ||
1188 | <para> | 1187 | <para> |
1189 | You may well want to make your CONFIG option only visible if | 1188 | You may well want to make your CONFIG option only visible if |
1190 | <symbol>CONFIG_EXPERIMENTAL</symbol> is enabled: this serves as a | 1189 | <symbol>CONFIG_EXPERIMENTAL</symbol> is enabled: this serves as a |
1191 | warning to users. There many other fancy things you can do: see | 1190 | warning to users. There many other fancy things you can do: see |
1192 | the various <filename>Config.in</filename> files for ideas. | 1191 | the various <filename>Kconfig</filename> files for ideas. |
1193 | </para> | 1192 | </para> |
1194 | </listitem> | ||
1195 | 1193 | ||
1196 | <listitem> | ||
1197 | <para> | 1194 | <para> |
1198 | Edit the <filename>Makefile</filename>: the CONFIG variables are | 1195 | In your description of the option, make sure you address both the |
1199 | exported here so you can conditionalize compilation with `ifeq'. | 1196 | expert user and the user who knows nothing about your feature. Mention |
1200 | If your file exports symbols then add the names to | 1197 | incompatibilities and issues here. <emphasis> Definitely |
1201 | <varname>export-objs</varname> so that genksyms will find them. | 1198 | </emphasis> end your description with <quote> if in doubt, say N |
1202 | <caution> | 1199 | </quote> (or, occasionally, `Y'); this is for people who have no |
1203 | <para> | 1200 | idea what you are talking about. |
1204 | There is a restriction on the kernel build system that objects | ||
1205 | which export symbols must have globally unique names. | ||
1206 | If your object does not have a globally unique name then the | ||
1207 | standard fix is to move the | ||
1208 | <function>EXPORT_SYMBOL()</function> statements to their own | ||
1209 | object with a unique name. | ||
1210 | This is why several systems have separate exporting objects, | ||
1211 | usually suffixed with ksyms. | ||
1212 | </para> | ||
1213 | </caution> | ||
1214 | </para> | 1201 | </para> |
1215 | </listitem> | 1202 | </listitem> |
1216 | 1203 | ||
1217 | <listitem> | 1204 | <listitem> |
1218 | <para> | 1205 | <para> |
1219 | Document your option in Documentation/Configure.help. Mention | 1206 | Edit the <filename>Makefile</filename>: the CONFIG variables are |
1220 | incompatibilities and issues here. <emphasis> Definitely | 1207 | exported here so you can usually just add a "obj-$(CONFIG_xxx) += |
1221 | </emphasis> end your description with <quote> if in doubt, say N | 1208 | xxx.o" line. The syntax is documented in |
1222 | </quote> (or, occasionally, `Y'); this is for people who have no | 1209 | <filename>Documentation/kbuild/makefiles.txt</filename>. |
1223 | idea what you are talking about. | ||
1224 | </para> | 1210 | </para> |
1225 | </listitem> | 1211 | </listitem> |
1226 | 1212 | ||
@@ -1253,20 +1239,12 @@ static struct block_device_operations opt_fops = { | |||
1253 | </para> | 1239 | </para> |
1254 | 1240 | ||
1255 | <para> | 1241 | <para> |
1256 | <filename>include/linux/brlock.h:</filename> | 1242 | <filename>include/asm-i386/delay.h:</filename> |
1257 | </para> | 1243 | </para> |
1258 | <programlisting> | 1244 | <programlisting> |
1259 | extern inline void br_read_lock (enum brlock_indices idx) | 1245 | #define ndelay(n) (__builtin_constant_p(n) ? \ |
1260 | { | 1246 | ((n) > 20000 ? __bad_ndelay() : __const_udelay((n) * 5ul)) : \ |
1261 | /* | 1247 | __ndelay(n)) |
1262 | * This causes a link-time bug message if an | ||
1263 | * invalid index is used: | ||
1264 | */ | ||
1265 | if (idx >= __BR_END) | ||
1266 | __br_lock_usage_bug(); | ||
1267 | |||
1268 | read_lock(&__brlock_array[smp_processor_id()][idx]); | ||
1269 | } | ||
1270 | </programlisting> | 1248 | </programlisting> |
1271 | 1249 | ||
1272 | <para> | 1250 | <para> |