aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/CodingStyle20
-rw-r--r--Documentation/DocBook/kernel-api.tmpl4
-rw-r--r--Documentation/DocBook/uio-howto.tmpl611
-rw-r--r--Documentation/HOWTO3
-rw-r--r--Documentation/driver-model/devres.txt2
-rw-r--r--Documentation/feature-removal-schedule.txt14
-rw-r--r--Documentation/ja_JP/HOWTO650
-rw-r--r--Documentation/ja_JP/stable_api_nonsense.txt263
-rw-r--r--Documentation/zh_CN/HOWTO536
-rw-r--r--Documentation/zh_CN/stable_api_nonsense.txt157
-rw-r--r--drivers/Kconfig1
-rw-r--r--drivers/Makefile1
-rw-r--r--drivers/base/core.c168
-rw-r--r--drivers/base/power/Makefile2
-rw-r--r--drivers/base/power/power.h5
-rw-r--r--drivers/base/power/runtime.c85
-rw-r--r--drivers/base/power/sysfs.c66
-rw-r--r--drivers/firewire/fw-ohci.c3
-rw-r--r--drivers/firewire/fw-sbp2.c16
-rw-r--r--drivers/firewire/fw-transaction.c9
-rw-r--r--drivers/firewire/fw-transaction.h4
-rw-r--r--drivers/infiniband/core/cm.c2
-rw-r--r--drivers/infiniband/core/cma.c2
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c1
-rw-r--r--drivers/infiniband/hw/ehca/ehca_av.c2
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes.h54
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes_pSeries.h156
-rw-r--r--drivers/infiniband/hw/ehca/ehca_cq.c2
-rw-r--r--drivers/infiniband/hw/ehca/ehca_eq.c3
-rw-r--r--drivers/infiniband/hw/ehca/ehca_hca.c28
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.c56
-rw-r--r--drivers/infiniband/hw/ehca/ehca_iverbs.h7
-rw-r--r--drivers/infiniband/hw/ehca/ehca_main.c50
-rw-r--r--drivers/infiniband/hw/ehca/ehca_mrmw.c1087
-rw-r--r--drivers/infiniband/hw/ehca/ehca_mrmw.h21
-rw-r--r--drivers/infiniband/hw/ehca/ehca_qes.h22
-rw-r--r--drivers/infiniband/hw/ehca/ehca_qp.c39
-rw-r--r--drivers/infiniband/hw/ehca/ehca_reqs.c15
-rw-r--r--drivers/infiniband/hw/ehca/ehca_tools.h31
-rw-r--r--drivers/infiniband/hw/ehca/ehca_uverbs.c10
-rw-r--r--drivers/infiniband/hw/ehca/hcp_if.c8
-rw-r--r--drivers/infiniband/hw/ehca/hcp_phyp.c2
-rw-r--r--drivers/infiniband/hw/ehca/hipz_fns_core.h4
-rw-r--r--drivers/infiniband/hw/ehca/hipz_hw.h24
-rw-r--r--drivers/infiniband/hw/ehca/ipz_pt_fn.c2
-rw-r--r--drivers/infiniband/hw/ehca/ipz_pt_fn.h4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_driver.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_eeprom.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_intr.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_kernel.h2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ruc.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_user_pages.c26
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.h4
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c115
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c22
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c221
-rw-r--r--drivers/infiniband/hw/mthca/mthca_srq.c28
-rw-r--r--drivers/infiniband/hw/mthca/mthca_wqe.h15
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h5
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c4
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c47
-rw-r--r--drivers/net/mlx4/catas.c106
-rw-r--r--drivers/net/mlx4/eq.c56
-rw-r--r--drivers/net/mlx4/intf.c2
-rw-r--r--drivers/net/mlx4/main.c26
-rw-r--r--drivers/net/mlx4/mlx4.h13
-rw-r--r--drivers/pcmcia/ds.c40
-rw-r--r--drivers/uio/Kconfig29
-rw-r--r--drivers/uio/Makefile2
-rw-r--r--drivers/uio/uio.c701
-rw-r--r--drivers/uio/uio_cif.c156
-rw-r--r--drivers/usb/core/driver.c7
-rw-r--r--fs/debugfs/inode.c5
-rw-r--r--fs/gfs2/ops_file.c24
-rw-r--r--fs/locks.c112
-rw-r--r--fs/nfs/file.c16
-rw-r--r--fs/nfsd/nfs4state.c10
-rw-r--r--fs/sysfs/dir.c25
-rw-r--r--fs/sysfs/file.c9
-rw-r--r--fs/sysfs/inode.c2
-rw-r--r--fs/sysfs/mount.c10
-rw-r--r--fs/sysfs/symlink.c12
-rw-r--r--fs/sysfs/sysfs.h1
-rw-r--r--include/linux/device.h10
-rw-r--r--include/linux/fs.h4
-rw-r--r--include/linux/kobject.h25
-rw-r--r--include/linux/pm.h11
-rw-r--r--include/linux/uio_driver.h91
-rw-r--r--kernel/power/Kconfig12
-rw-r--r--lib/kobject_uevent.c30
91 files changed, 4630 insertions, 1670 deletions
diff --git a/Documentation/CodingStyle b/Documentation/CodingStyle
index a667eb1fc26e..7f1730f1a1ae 100644
--- a/Documentation/CodingStyle
+++ b/Documentation/CodingStyle
@@ -633,12 +633,27 @@ covers RTL which is used frequently with assembly language in the kernel.
633 633
634Kernel developers like to be seen as literate. Do mind the spelling 634Kernel developers like to be seen as literate. Do mind the spelling
635of kernel messages to make a good impression. Do not use crippled 635of kernel messages to make a good impression. Do not use crippled
636words like "dont" and use "do not" or "don't" instead. 636words like "dont"; use "do not" or "don't" instead. Make the messages
637concise, clear, and unambiguous.
637 638
638Kernel messages do not have to be terminated with a period. 639Kernel messages do not have to be terminated with a period.
639 640
640Printing numbers in parentheses (%d) adds no value and should be avoided. 641Printing numbers in parentheses (%d) adds no value and should be avoided.
641 642
643There are a number of driver model diagnostic macros in <linux/device.h>
644which you should use to make sure messages are matched to the right device
645and driver, and are tagged with the right level: dev_err(), dev_warn(),
646dev_info(), and so forth. For messages that aren't associated with a
647particular device, <linux/kernel.h> defines pr_debug() and pr_info().
648
649Coming up with good debugging messages can be quite a challenge; and once
650you have them, they can be a huge help for remote troubleshooting. Such
651messages should be compiled out when the DEBUG symbol is not defined (that
652is, by default they are not included). When you use dev_dbg() or pr_debug(),
653that's automatic. Many subsystems have Kconfig options to turn on -DDEBUG.
654A related convention uses VERBOSE_DEBUG to add dev_vdbg() messages to the
655ones already enabled by DEBUG.
656
642 657
643 Chapter 14: Allocating memory 658 Chapter 14: Allocating memory
644 659
@@ -790,4 +805,5 @@ Kernel CodingStyle, by greg@kroah.com at OLS 2002:
790http://www.kroah.com/linux/talks/ols_2002_kernel_codingstyle_talk/html/ 805http://www.kroah.com/linux/talks/ols_2002_kernel_codingstyle_talk/html/
791 806
792-- 807--
793Last updated on 2006-December-06. 808Last updated on 2007-July-13.
809
diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl
index fd2ef4d29b6d..a0af560ed740 100644
--- a/Documentation/DocBook/kernel-api.tmpl
+++ b/Documentation/DocBook/kernel-api.tmpl
@@ -408,6 +408,10 @@ X!Edrivers/pnp/system.c
408!Edrivers/pnp/manager.c 408!Edrivers/pnp/manager.c
409!Edrivers/pnp/support.c 409!Edrivers/pnp/support.c
410 </sect1> 410 </sect1>
411 <sect1><title>Userspace IO devices</title>
412!Edrivers/uio/uio.c
413!Iinclude/linux/uio_driver.h
414 </sect1>
411 </chapter> 415 </chapter>
412 416
413 <chapter id="blkdev"> 417 <chapter id="blkdev">
diff --git a/Documentation/DocBook/uio-howto.tmpl b/Documentation/DocBook/uio-howto.tmpl
new file mode 100644
index 000000000000..e3bb29a8d8dd
--- /dev/null
+++ b/Documentation/DocBook/uio-howto.tmpl
@@ -0,0 +1,611 @@
1<?xml version="1.0" encoding="UTF-8"?>
2<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
3"http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd" []>
4
5<book id="index">
6<bookinfo>
7<title>The Userspace I/O HOWTO</title>
8
9<author>
10 <firstname>Hans-Jürgen</firstname>
11 <surname>Koch</surname>
12 <authorblurb><para>Linux developer, Linutronix</para></authorblurb>
13 <affiliation>
14 <orgname>
15 <ulink url="http://www.linutronix.de">Linutronix</ulink>
16 </orgname>
17
18 <address>
19 <email>hjk@linutronix.de</email>
20 </address>
21 </affiliation>
22</author>
23
24<pubdate>2006-12-11</pubdate>
25
26<abstract>
27 <para>This HOWTO describes concept and usage of Linux kernel's
28 Userspace I/O system.</para>
29</abstract>
30
31<revhistory>
32 <revision>
33 <revnumber>0.3</revnumber>
34 <date>2007-04-29</date>
35 <authorinitials>hjk</authorinitials>
36 <revremark>Added section about userspace drivers.</revremark>
37 </revision>
38 <revision>
39 <revnumber>0.2</revnumber>
40 <date>2007-02-13</date>
41 <authorinitials>hjk</authorinitials>
42 <revremark>Update after multiple mappings were added.</revremark>
43 </revision>
44 <revision>
45 <revnumber>0.1</revnumber>
46 <date>2006-12-11</date>
47 <authorinitials>hjk</authorinitials>
48 <revremark>First draft.</revremark>
49 </revision>
50</revhistory>
51</bookinfo>
52
53<chapter id="aboutthisdoc">
54<?dbhtml filename="about.html"?>
55<title>About this document</title>
56
57<sect1 id="copyright">
58<?dbhtml filename="copyright.html"?>
59<title>Copyright and License</title>
60<para>
61 Copyright (c) 2006 by Hans-Jürgen Koch.</para>
62<para>
63This documentation is Free Software licensed under the terms of the
64GPL version 2.
65</para>
66</sect1>
67
68<sect1 id="translations">
69<?dbhtml filename="translations.html"?>
70<title>Translations</title>
71
72<para>If you know of any translations for this document, or you are
73interested in translating it, please email me
74<email>hjk@linutronix.de</email>.
75</para>
76</sect1>
77
78<sect1 id="preface">
79<title>Preface</title>
80 <para>
81 For many types of devices, creating a Linux kernel driver is
82 overkill. All that is really needed is some way to handle an
83 interrupt and provide access to the memory space of the
84 device. The logic of controlling the device does not
85 necessarily have to be within the kernel, as the device does
86 not need to take advantage of any of other resources that the
87 kernel provides. One such common class of devices that are
88 like this are for industrial I/O cards.
89 </para>
90 <para>
91 To address this situation, the userspace I/O system (UIO) was
92 designed. For typical industrial I/O cards, only a very small
93 kernel module is needed. The main part of the driver will run in
94 user space. This simplifies development and reduces the risk of
95 serious bugs within a kernel module.
96 </para>
97</sect1>
98
99<sect1 id="thanks">
100<title>Acknowledgments</title>
101 <para>I'd like to thank Thomas Gleixner and Benedikt Spranger of
102 Linutronix, who have not only written most of the UIO code, but also
103 helped greatly writing this HOWTO by giving me all kinds of background
104 information.</para>
105</sect1>
106
107<sect1 id="feedback">
108<title>Feedback</title>
109 <para>Find something wrong with this document? (Or perhaps something
110 right?) I would love to hear from you. Please email me at
111 <email>hjk@linutronix.de</email>.</para>
112</sect1>
113</chapter>
114
115<chapter id="about">
116<?dbhtml filename="about.html"?>
117<title>About UIO</title>
118
119<para>If you use UIO for your card's driver, here's what you get:</para>
120
121<itemizedlist>
122<listitem>
123 <para>only one small kernel module to write and maintain.</para>
124</listitem>
125<listitem>
126 <para>develop the main part of your driver in user space,
127 with all the tools and libraries you're used to.</para>
128</listitem>
129<listitem>
130 <para>bugs in your driver won't crash the kernel.</para>
131</listitem>
132<listitem>
133 <para>updates of your driver can take place without recompiling
134 the kernel.</para>
135</listitem>
136<listitem>
137 <para>if you need to keep some parts of your driver closed source,
138 you can do so without violating the GPL license on the kernel.</para>
139</listitem>
140</itemizedlist>
141
142<sect1 id="how_uio_works">
143<title>How UIO works</title>
144 <para>
145 Each UIO device is accessed through a device file and several
146 sysfs attribute files. The device file will be called
147 <filename>/dev/uio0</filename> for the first device, and
148 <filename>/dev/uio1</filename>, <filename>/dev/uio2</filename>
149 and so on for subsequent devices.
150 </para>
151
152 <para><filename>/dev/uioX</filename> is used to access the
153 address space of the card. Just use
154 <function>mmap()</function> to access registers or RAM
155 locations of your card.
156 </para>
157
158 <para>
159 Interrupts are handled by reading from
160 <filename>/dev/uioX</filename>. A blocking
161 <function>read()</function> from
162 <filename>/dev/uioX</filename> will return as soon as an
163 interrupt occurs. You can also use
164 <function>select()</function> on
165 <filename>/dev/uioX</filename> to wait for an interrupt. The
166 integer value read from <filename>/dev/uioX</filename>
167 represents the total interrupt count. You can use this number
168 to figure out if you missed some interrupts.
169 </para>
170
171 <para>
172 To handle interrupts properly, your custom kernel module can
173 provide its own interrupt handler. It will automatically be
174 called by the built-in handler.
175 </para>
176
177 <para>
178 For cards that don't generate interrupts but need to be
179 polled, there is the possibility to set up a timer that
180 triggers the interrupt handler at configurable time intervals.
181 See <filename>drivers/uio/uio_dummy.c</filename> for an
182 example of this technique.
183 </para>
184
185 <para>
186 Each driver provides attributes that are used to read or write
187 variables. These attributes are accessible through sysfs
188 files. A custom kernel driver module can add its own
189 attributes to the device owned by the uio driver, but not added
190 to the UIO device itself at this time. This might change in the
191 future if it would be found to be useful.
192 </para>
193
194 <para>
195 The following standard attributes are provided by the UIO
196 framework:
197 </para>
198<itemizedlist>
199<listitem>
200 <para>
201 <filename>name</filename>: The name of your device. It is
202 recommended to use the name of your kernel module for this.
203 </para>
204</listitem>
205<listitem>
206 <para>
207 <filename>version</filename>: A version string defined by your
208 driver. This allows the user space part of your driver to deal
209 with different versions of the kernel module.
210 </para>
211</listitem>
212<listitem>
213 <para>
214 <filename>event</filename>: The total number of interrupts
215 handled by the driver since the last time the device node was
216 read.
217 </para>
218</listitem>
219</itemizedlist>
220<para>
221 These attributes appear under the
222 <filename>/sys/class/uio/uioX</filename> directory. Please
223 note that this directory might be a symlink, and not a real
224 directory. Any userspace code that accesses it must be able
225 to handle this.
226</para>
227<para>
228 Each UIO device can make one or more memory regions available for
229 memory mapping. This is necessary because some industrial I/O cards
230 require access to more than one PCI memory region in a driver.
231</para>
232<para>
233 Each mapping has its own directory in sysfs, the first mapping
234 appears as <filename>/sys/class/uio/uioX/maps/map0/</filename>.
235 Subsequent mappings create directories <filename>map1/</filename>,
236 <filename>map2/</filename>, and so on. These directories will only
237 appear if the size of the mapping is not 0.
238</para>
239<para>
240 Each <filename>mapX/</filename> directory contains two read-only files
241 that show start address and size of the memory:
242</para>
243<itemizedlist>
244<listitem>
245 <para>
246 <filename>addr</filename>: The address of memory that can be mapped.
247 </para>
248</listitem>
249<listitem>
250 <para>
251 <filename>size</filename>: The size, in bytes, of the memory
252 pointed to by addr.
253 </para>
254</listitem>
255</itemizedlist>
256
257<para>
258 From userspace, the different mappings are distinguished by adjusting
259 the <varname>offset</varname> parameter of the
260 <function>mmap()</function> call. To map the memory of mapping N, you
261 have to use N times the page size as your offset:
262</para>
263<programlisting format="linespecific">
264offset = N * getpagesize();
265</programlisting>
266
267</sect1>
268</chapter>
269
270<chapter id="using-uio_dummy" xreflabel="Using uio_dummy">
271<?dbhtml filename="using-uio_dummy.html"?>
272<title>Using uio_dummy</title>
273 <para>
274 Well, there is no real use for uio_dummy. Its only purpose is
275 to test most parts of the UIO system (everything except
276 hardware interrupts), and to serve as an example for the
277 kernel module that you will have to write yourself.
278 </para>
279
280<sect1 id="what_uio_dummy_does">
281<title>What uio_dummy does</title>
282 <para>
283 The kernel module <filename>uio_dummy.ko</filename> creates a
284 device that uses a timer to generate periodic interrupts. The
285 interrupt handler does nothing but increment a counter. The
286 driver adds two custom attributes, <varname>count</varname>
287 and <varname>freq</varname>, that appear under
288 <filename>/sys/devices/platform/uio_dummy/</filename>.
289 </para>
290
291 <para>
292 The attribute <varname>count</varname> can be read and
293 written. The associated file
294 <filename>/sys/devices/platform/uio_dummy/count</filename>
295 appears as a normal text file and contains the total number of
296 timer interrupts. If you look at it (e.g. using
297 <function>cat</function>), you'll notice it is slowly counting
298 up.
299 </para>
300
301 <para>
302 The attribute <varname>freq</varname> can be read and written.
303 The content of
304 <filename>/sys/devices/platform/uio_dummy/freq</filename>
305 represents the number of system timer ticks between two timer
306 interrupts. The default value of <varname>freq</varname> is
307 the value of the kernel variable <varname>HZ</varname>, which
308 gives you an interval of one second. Lower values will
309 increase the frequency. Try the following:
310 </para>
311<programlisting format="linespecific">
312cd /sys/devices/platform/uio_dummy/
313echo 100 > freq
314</programlisting>
315 <para>
316 Use <function>cat count</function> to see how the interrupt
317 frequency changes.
318 </para>
319</sect1>
320</chapter>
321
322<chapter id="custom_kernel_module" xreflabel="Writing your own kernel module">
323<?dbhtml filename="custom_kernel_module.html"?>
324<title>Writing your own kernel module</title>
325 <para>
326 Please have a look at <filename>uio_dummy.c</filename> as an
327 example. The following paragraphs explain the different
328 sections of this file.
329 </para>
330
331<sect1 id="uio_info">
332<title>struct uio_info</title>
333 <para>
334 This structure tells the framework the details of your driver,
335 Some of the members are required, others are optional.
336 </para>
337
338<itemizedlist>
339<listitem><para>
340<varname>char *name</varname>: Required. The name of your driver as
341it will appear in sysfs. I recommend using the name of your module for this.
342</para></listitem>
343
344<listitem><para>
345<varname>char *version</varname>: Required. This string appears in
346<filename>/sys/class/uio/uioX/version</filename>.
347</para></listitem>
348
349<listitem><para>
350<varname>struct uio_mem mem[ MAX_UIO_MAPS ]</varname>: Required if you
351have memory that can be mapped with <function>mmap()</function>. For each
352mapping you need to fill one of the <varname>uio_mem</varname> structures.
353See the description below for details.
354</para></listitem>
355
356<listitem><para>
357<varname>long irq</varname>: Required. If your hardware generates an
358interrupt, it's your modules task to determine the irq number during
359initialization. If you don't have a hardware generated interrupt but
360want to trigger the interrupt handler in some other way, set
361<varname>irq</varname> to <varname>UIO_IRQ_CUSTOM</varname>. The
362uio_dummy module does this as it triggers the event mechanism in a timer
363routine. If you had no interrupt at all, you could set
364<varname>irq</varname> to <varname>UIO_IRQ_NONE</varname>, though this
365rarely makes sense.
366</para></listitem>
367
368<listitem><para>
369<varname>unsigned long irq_flags</varname>: Required if you've set
370<varname>irq</varname> to a hardware interrupt number. The flags given
371here will be used in the call to <function>request_irq()</function>.
372</para></listitem>
373
374<listitem><para>
375<varname>int (*mmap)(struct uio_info *info, struct vm_area_struct
376*vma)</varname>: Optional. If you need a special
377<function>mmap()</function> function, you can set it here. If this
378pointer is not NULL, your <function>mmap()</function> will be called
379instead of the built-in one.
380</para></listitem>
381
382<listitem><para>
383<varname>int (*open)(struct uio_info *info, struct inode *inode)
384</varname>: Optional. You might want to have your own
385<function>open()</function>, e.g. to enable interrupts only when your
386device is actually used.
387</para></listitem>
388
389<listitem><para>
390<varname>int (*release)(struct uio_info *info, struct inode *inode)
391</varname>: Optional. If you define your own
392<function>open()</function>, you will probably also want a custom
393<function>release()</function> function.
394</para></listitem>
395</itemizedlist>
396
397<para>
398Usually, your device will have one or more memory regions that can be mapped
399to user space. For each region, you have to set up a
400<varname>struct uio_mem</varname> in the <varname>mem[]</varname> array.
401Here's a description of the fields of <varname>struct uio_mem</varname>:
402</para>
403
404<itemizedlist>
405<listitem><para>
406<varname>int memtype</varname>: Required if the mapping is used. Set this to
407<varname>UIO_MEM_PHYS</varname> if you you have physical memory on your
408card to be mapped. Use <varname>UIO_MEM_LOGICAL</varname> for logical
409memory (e.g. allocated with <function>kmalloc()</function>). There's also
410<varname>UIO_MEM_VIRTUAL</varname> for virtual memory.
411</para></listitem>
412
413<listitem><para>
414<varname>unsigned long addr</varname>: Required if the mapping is used.
415Fill in the address of your memory block. This address is the one that
416appears in sysfs.
417</para></listitem>
418
419<listitem><para>
420<varname>unsigned long size</varname>: Fill in the size of the
421memory block that <varname>addr</varname> points to. If <varname>size</varname>
422is zero, the mapping is considered unused. Note that you
423<emphasis>must</emphasis> initialize <varname>size</varname> with zero for
424all unused mappings.
425</para></listitem>
426
427<listitem><para>
428<varname>void *internal_addr</varname>: If you have to access this memory
429region from within your kernel module, you will want to map it internally by
430using something like <function>ioremap()</function>. Addresses
431returned by this function cannot be mapped to user space, so you must not
432store it in <varname>addr</varname>. Use <varname>internal_addr</varname>
433instead to remember such an address.
434</para></listitem>
435</itemizedlist>
436
437<para>
438Please do not touch the <varname>kobj</varname> element of
439<varname>struct uio_mem</varname>! It is used by the UIO framework
440to set up sysfs files for this mapping. Simply leave it alone.
441</para>
442</sect1>
443
444<sect1 id="adding_irq_handler">
445<title>Adding an interrupt handler</title>
446 <para>
447 What you need to do in your interrupt handler depends on your
448 hardware and on how you want to handle it. You should try to
449 keep the amount of code in your kernel interrupt handler low.
450 If your hardware requires no action that you
451 <emphasis>have</emphasis> to perform after each interrupt,
452 then your handler can be empty.</para> <para>If, on the other
453 hand, your hardware <emphasis>needs</emphasis> some action to
454 be performed after each interrupt, then you
455 <emphasis>must</emphasis> do it in your kernel module. Note
456 that you cannot rely on the userspace part of your driver. Your
457 userspace program can terminate at any time, possibly leaving
458 your hardware in a state where proper interrupt handling is
459 still required.
460 </para>
461
462 <para>
463 There might also be applications where you want to read data
464 from your hardware at each interrupt and buffer it in a piece
465 of kernel memory you've allocated for that purpose. With this
466 technique you could avoid loss of data if your userspace
467 program misses an interrupt.
468 </para>
469
470 <para>
471 A note on shared interrupts: Your driver should support
472 interrupt sharing whenever this is possible. It is possible if
473 and only if your driver can detect whether your hardware has
474 triggered the interrupt or not. This is usually done by looking
475 at an interrupt status register. If your driver sees that the
476 IRQ bit is actually set, it will perform its actions, and the
477 handler returns IRQ_HANDLED. If the driver detects that it was
478 not your hardware that caused the interrupt, it will do nothing
479 and return IRQ_NONE, allowing the kernel to call the next
480 possible interrupt handler.
481 </para>
482
483 <para>
484 If you decide not to support shared interrupts, your card
485 won't work in computers with no free interrupts. As this
486 frequently happens on the PC platform, you can save yourself a
487 lot of trouble by supporting interrupt sharing.
488 </para>
489</sect1>
490
491</chapter>
492
493<chapter id="userspace_driver" xreflabel="Writing a driver in user space">
494<?dbhtml filename="userspace_driver.html"?>
495<title>Writing a driver in userspace</title>
496 <para>
497 Once you have a working kernel module for your hardware, you can
498 write the userspace part of your driver. You don't need any special
499 libraries, your driver can be written in any reasonable language,
500 you can use floating point numbers and so on. In short, you can
501 use all the tools and libraries you'd normally use for writing a
502 userspace application.
503 </para>
504
505<sect1 id="getting_uio_information">
506<title>Getting information about your UIO device</title>
507 <para>
508 Information about all UIO devices is available in sysfs. The
509 first thing you should do in your driver is check
510 <varname>name</varname> and <varname>version</varname> to
511 make sure your talking to the right device and that its kernel
512 driver has the version you expect.
513 </para>
514 <para>
515 You should also make sure that the memory mapping you need
516 exists and has the size you expect.
517 </para>
518 <para>
519 There is a tool called <varname>lsuio</varname> that lists
520 UIO devices and their attributes. It is available here:
521 </para>
522 <para>
523 <ulink url="http://www.osadl.org/projects/downloads/UIO/user/">
524 http://www.osadl.org/projects/downloads/UIO/user/</ulink>
525 </para>
526 <para>
527 With <varname>lsuio</varname> you can quickly check if your
528 kernel module is loaded and which attributes it exports.
529 Have a look at the manpage for details.
530 </para>
531 <para>
532 The source code of <varname>lsuio</varname> can serve as an
533 example for getting information about an UIO device.
534 The file <filename>uio_helper.c</filename> contains a lot of
535 functions you could use in your userspace driver code.
536 </para>
537</sect1>
538
539<sect1 id="mmap_device_memory">
540<title>mmap() device memory</title>
541 <para>
542 After you made sure you've got the right device with the
543 memory mappings you need, all you have to do is to call
544 <function>mmap()</function> to map the device's memory
545 to userspace.
546 </para>
547 <para>
548 The parameter <varname>offset</varname> of the
549 <function>mmap()</function> call has a special meaning
550 for UIO devices: It is used to select which mapping of
551 your device you want to map. To map the memory of
552 mapping N, you have to use N times the page size as
553 your offset:
554 </para>
555<programlisting format="linespecific">
556 offset = N * getpagesize();
557</programlisting>
558 <para>
559 N starts from zero, so if you've got only one memory
560 range to map, set <varname>offset = 0</varname>.
561 A drawback of this technique is that memory is always
562 mapped beginning with its start address.
563 </para>
564</sect1>
565
566<sect1 id="wait_for_interrupts">
567<title>Waiting for interrupts</title>
568 <para>
569 After you successfully mapped your devices memory, you
570 can access it like an ordinary array. Usually, you will
571 perform some initialization. After that, your hardware
572 starts working and will generate an interrupt as soon
573 as it's finished, has some data available, or needs your
574 attention because an error occured.
575 </para>
576 <para>
577 <filename>/dev/uioX</filename> is a read-only file. A
578 <function>read()</function> will always block until an
579 interrupt occurs. There is only one legal value for the
580 <varname>count</varname> parameter of
581 <function>read()</function>, and that is the size of a
582 signed 32 bit integer (4). Any other value for
583 <varname>count</varname> causes <function>read()</function>
584 to fail. The signed 32 bit integer read is the interrupt
585 count of your device. If the value is one more than the value
586 you read the last time, everything is OK. If the difference
587 is greater than one, you missed interrupts.
588 </para>
589 <para>
590 You can also use <function>select()</function> on
591 <filename>/dev/uioX</filename>.
592 </para>
593</sect1>
594
595</chapter>
596
597<appendix id="app1">
598<title>Further information</title>
599<itemizedlist>
600 <listitem><para>
601 <ulink url="http://www.osadl.org">
602 OSADL homepage.</ulink>
603 </para></listitem>
604 <listitem><para>
605 <ulink url="http://www.linutronix.de">
606 Linutronix homepage.</ulink>
607 </para></listitem>
608</itemizedlist>
609</appendix>
610
611</book>
diff --git a/Documentation/HOWTO b/Documentation/HOWTO
index 98e2701c746f..f8cc3f8ed152 100644
--- a/Documentation/HOWTO
+++ b/Documentation/HOWTO
@@ -249,6 +249,9 @@ process is as follows:
249 release a new -rc kernel every week. 249 release a new -rc kernel every week.
250 - Process continues until the kernel is considered "ready", the 250 - Process continues until the kernel is considered "ready", the
251 process should last around 6 weeks. 251 process should last around 6 weeks.
252 - A list of known regressions present in each -rc release is
253 tracked at the following URI:
254 http://kernelnewbies.org/known_regressions
252 255
253It is worth mentioning what Andrew Morton wrote on the linux-kernel 256It is worth mentioning what Andrew Morton wrote on the linux-kernel
254mailing list about kernel releases: 257mailing list about kernel releases:
diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index 6c8d8f27db34..8569072fa387 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -207,7 +207,7 @@ responsibility. This is usually non-issue because bus ops and
207resource allocations already do the job. 207resource allocations already do the job.
208 208
209For an example of single-instance devres type, read pcim_iomap_table() 209For an example of single-instance devres type, read pcim_iomap_table()
210in lib/iomap.c. 210in lib/devres.c.
211 211
212All devres interface functions can be called without context if the 212All devres interface functions can be called without context if the
213right gfp mask is given. 213right gfp mask is given.
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index d05e6243b4df..66c8b4b165c1 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -26,9 +26,7 @@ Who: Hans Verkuil <hverkuil@xs4all.nl> and
26 26
27--------------------------- 27---------------------------
28 28
29What: /sys/devices/.../power/state 29What: dev->power.power_state
30 dev->power.power_state
31 dpm_runtime_{suspend,resume)()
32When: July 2007 30When: July 2007
33Why: Broken design for runtime control over driver power states, confusing 31Why: Broken design for runtime control over driver power states, confusing
34 driver-internal runtime power management with: mechanisms to support 32 driver-internal runtime power management with: mechanisms to support
@@ -310,3 +308,13 @@ Why: The arch/powerpc tree is the merged architecture for ppc32 and ppc64
310Who: linuxppc-dev@ozlabs.org 308Who: linuxppc-dev@ozlabs.org
311 309
312--------------------------- 310---------------------------
311
312What: mthca driver's MSI support
313When: January 2008
314Files: drivers/infiniband/hw/mthca/*.[ch]
315Why: All mthca hardware also supports MSI-X, which provides
316 strictly more functionality than MSI. So there is no point in
317 having both MSI-X and MSI support in the driver.
318Who: Roland Dreier <rolandd@cisco.com>
319
320---------------------------
diff --git a/Documentation/ja_JP/HOWTO b/Documentation/ja_JP/HOWTO
new file mode 100644
index 000000000000..b2446a090870
--- /dev/null
+++ b/Documentation/ja_JP/HOWTO
@@ -0,0 +1,650 @@
1NOTE:
2This is Japanese translated version of "Documentation/HOWTO".
3This one is maintained by Tsugikazu Shibata <tshibata@ab.jp.nec.com>
4and JF Project team <www.linux.or.jp/JF>.
5If you find difference with original file or problem in translation,
6please contact maintainer of this file or JF project.
7
8Please also note that purpose of this file is easier to read for non
9English natives and not to be intended to fork. So, if you have any
10comments or updates of this file, please try to update Original(English)
11file at first.
12
13Last Updated: 2007/06/04
14==================================
15これは、
16linux-2.6.21/Documentation/HOWTO
17の和訳です。
18
19翻訳団体: JF プロジェクト < http://www.linux.or.jp/JF/ >
20翻訳日: 2007/06/04
21翻訳者: Tsugikazu Shibata <tshibata at ab dot jp dot nec dot com>
22校正者: 松倉さん <nbh--mats at nifty dot com>
23 小林 雅典さん (Masanori Kobayasi) <zap03216 at nifty dot ne dot jp>
24 武井伸光さん、<takei at webmasters dot gr dot jp>
25 かねこさん (Seiji Kaneko) <skaneko at a2 dot mbn dot or dot jp>
26 野口さん (Kenji Noguchi) <tokyo246 at gmail dot com>
27 河内さん (Takayoshi Kochi) <t-kochi at bq dot jp dot nec dot com>
28 岩本さん (iwamoto) <iwamoto.kn at ncos dot nec dot co dot jp>
29==================================
30
31Linux カーネル開発のやり方
32-------------------------------
33
34これは上のトピック( Linux カーネル開発のやり方)の重要な事柄を網羅した
35ドキュメントです。ここには Linux カーネル開発者になるための方法と
36Linux カーネル開発コミュニティと共に活動するやり方を学ぶ方法が含まれて
37います。カーネルプログラミングに関する技術的な項目に関することは何も含
38めないようにしていますが、カーネル開発者となるための正しい方向に向かう
39手助けになります。
40
41もし、このドキュメントのどこかが古くなっていた場合には、このドキュメン
42トの最後にリストしたメンテナーにパッチを送ってください。
43
44はじめに
45---------
46
47あなたは Linux カーネルの開発者になる方法を学びたいのでしょうか? そ
48れともあなたは上司から「このデバイスの Linux ドライバを書くように」と
49言われているのでしょうか? 
50この文書の目的は、あなたが踏むべき手順と、コミュニティと一緒にうまく働
51くヒントを書き下すことで、あなたが知るべき全てのことを教えることです。
52また、このコミュニティがなぜ今うまくまわっているのかという理由の一部も
53説明しようと試みています。
54
55カーネルは 少量のアーキテクチャ依存部分がアセンブリ言語で書かれている
56以外は大部分は C 言語で書かれています。C言語をよく理解していることはカー
57ネル開発者には必要です。アーキテクチャ向けの低レベル部分の開発をするの
58でなければ、(どんなアーキテクチャでも)アセンブリ(訳注: 言語)は必要あり
59ません。以下の本は、C 言語の十分な知識や何年もの経験に取って代わるもの
60ではありませんが、少なくともリファレンスとしてはいい本です。
61 - "The C Programming Language" by Kernighan and Ritchie [Prentice Hall]
62 -『プログラミング言語C第2版』(B.W. カーニハン/D.M. リッチー著 石田晴久訳) [共立出版]
63 - "Practical C Programming" by Steve Oualline [O'Reilly]
64 - 『C実践プログラミング第3版』(Steve Oualline著 望月康司監訳 谷口功訳) [オライリージャパン]
65 - "C: A Reference Manual" by Harbison and Steele [Prentice Hall]
66 - 『新・詳説 C 言語 H&S リファレンス』
67 (サミュエル P ハービソン/ガイ L スティール共著 斉藤 信男監訳)[ソフトバンク]
68
69カーネルは GNU C と GNU ツールチェインを使って書かれています。カーネル
70は ISO C89 仕様に準拠して書く一方で、標準には無い言語拡張を多く使って
71います。カーネルは標準 C ライブラリとは関係がないといった、C 言語フリー
72スタンディング環境です。そのため、C の標準で使えないものもあります。任
73意の long long の除算や浮動小数点は使えません。
74ときどき、カーネルがツールチェインや C 言語拡張に置いている前提がどう
75なっているのかわかりにくいことがあり、また、残念なことに決定的なリファ
76レンスは存在しません。情報を得るには、gcc の info ページ( info gcc )を
77みてください。
78
79あなたは既存の開発コミュニティと一緒に作業する方法を学ぼうとしているこ
80とに留意してください。そのコミュニティは、コーディング、スタイル、
81開発手順について高度な標準を持つ、多様な人の集まりです。
82地理的に分散した大規模なチームに対してもっともうまくいくとわかったこと
83をベースにしながら、これらの標準は長い時間をかけて築かれてきました。
84これらはきちんと文書化されていますから、事前にこれらの標準についてでき
85るだけたくさん学んでください。また皆があなたやあなたの会社のやり方に合わ
86せてくれると思わないでください。
87
88法的問題
89------------
90
91Linux カーネルのソースコードは GPL ライセンスの下でリリースされていま
92す。ライセンスの詳細については、ソースツリーのメインディレクトリに存在
93する、COPYING のファイルをみてください。もしライセンスについてさらに質
94問があれば、Linux Kernel メーリングリストに質問するのではなく、どうぞ
95法律家に相談してください。メーリングリストの人達は法律家ではなく、法的
96問題については彼らの声明はあてにするべきではありません。
97
98GPL に関する共通の質問や回答については、以下を参照してください。
99 http://www.gnu.org/licenses/gpl-faq.html
100
101ドキュメント
102------------
103
104Linux カーネルソースツリーは幅広い範囲のドキュメントを含んでおり、それ
105らはカーネルコミュニティと会話する方法を学ぶのに非常に貴重なものです。
106新しい機能がカーネルに追加される場合、その機能の使い方について説明した
107新しいドキュメントファイルも追加することを勧めます。
108カーネルの変更が、カーネルがユーザ空間に公開しているインターフェイスの
109変更を引き起こす場合、その変更を説明するマニュアルページのパッチや情報
110をマニュアルページのメンテナ mtk-manpages@gmx.net に送ることを勧めます。
111
112以下はカーネルソースツリーに含まれている読んでおくべきファイルの一覧で
113す-
114
115 README
116 このファイルは Linuxカーネルの簡単な背景とカーネルを設定(訳注
117 configure )し、生成(訳注 build )するために必要なことは何かが書かれ
118 ています。カーネルに関して初めての人はここからスタートするとよいで
119 しょう。
120
121 Documentation/Changes
122 このファイルはカーネルをうまく生成(訳注 build )し、走らせるのに最
123 小限のレベルで必要な数々のソフトウェアパッケージの一覧を示してい
124 ます。
125
126 Documentation/CodingStyle
127 これは Linux カーネルのコーディングスタイルと背景にある理由を記述
128 しています。全ての新しいコードはこのドキュメントにあるガイドライン
129 に従っていることを期待されています。大部分のメンテナーはこれらのルー
130 ルに従っているものだけを受け付け、多くの人は正しいスタイルのコード
131 だけをレビューします。
132
133 Documentation/SubmittingPatches
134 Documentation/SubmittingDrivers
135 これらのファイルには、どうやってうまくパッチを作って投稿するかに
136 ついて非常に詳しく書かれており、以下を含みます(これだけに限らない
137 けれども)
138 - Email に含むこと
139 - Email の形式
140 - だれに送るか
141 これらのルールに従えばうまくいくことを保証することではありません
142 が (すべてのパッチは内容とスタイルについて精査を受けるので)、
143 ルールに従わなければ間違いなくうまくいかないでしょう。
144 この他にパッチを作る方法についてのよくできた記述は-
145
146 "The Perfect Patch"
147 http://www.zip.com.au/~akpm/linux/patches/stuff/tpp.txt
148 "Linux kernel patch submission format"
149 http://linux.yyz.us/patch-format.html
150
151 Documentation/stable_api_nonsense.txt
152 このファイルはカーネルの中に不変のAPIを持たないことにした意識的な
153 決断の背景にある理由について書かれています。以下のようなことを含
154 んでいます-
155 - サブシステムとの間に層を作ること(コンパチビリティのため?)
156 - オペレーティングシステム間のドライバの移植性
157 - カーネルソースツリーの素早い変更を遅らせる(もしくは素早い変更
158 を妨げる)
159 このドキュメントは Linux 開発の思想を理解するのに非常に重要です。
160 そして、他のOSでの開発者が Linux に移る時にとても重要です。
161
162 Documentation/SecurityBugs
163 もし Linux カーネルでセキュリティ問題を発見したように思ったら、こ
164 のドキュメントのステップに従ってカーネル開発者に連絡し、問題解決を
165 支援してください。
166
167 Documentation/ManagementStyle
168 このドキュメントは Linux カーネルのメンテナー達がどう行動するか、
169 彼らの手法の背景にある共有されている精神について記述しています。こ
170 れはカーネル開発の初心者なら(もしくは、単に興味があるだけの人でも)
171 重要です。なぜならこのドキュメントは、カーネルメンテナー達の独特な
172 行動についての多くの誤解や混乱を解消するからです。
173
174 Documentation/stable_kernel_rules.txt
175 このファイルはどのように stable カーネルのリリースが行われるかのルー
176 ルが記述されています。そしてこれらのリリースの中のどこかで変更を取
177 り入れてもらいたい場合に何をすればいいかが示されています。
178
179 Documentation/kernel-docs.txt
180  カーネル開発に付随する外部ドキュメントのリストです。もしあなたが
181 探しているものがカーネル内のドキュメントでみつからなかった場合、
182 このリストをあたってみてください。
183
184 Documentation/applying-patches.txt
185 パッチとはなにか、パッチをどうやって様々なカーネルの開発ブランチに
186 適用するのかについて正確に記述した良い入門書です。
187
188カーネルはソースコードから自動的に生成可能な多数のドキュメントを自分自
189身でもっています。これにはカーネル内 API のすべての記述や、どう正しく
190ロックをかけるかの規則が含まれます。このドキュメントは
191Documentation/DocBook/ ディレクトリに作られ、以下のように
192 make pdfdocs
193 make psdocs
194 make htmldocs
195 make mandocs
196コマンドを実行するとメインカーネルのソースディレクトリから
197それぞれ、PDF, Postscript, HTML, man page の形式で生成されます。
198
199カーネル開発者になるには
200---------------------------
201
202もしあなたが、Linux カーネル開発について何も知らないならば、
203KernelNewbies プロジェクトを見るべきです
204 http://kernelnewbies.org
205
206このサイトには役に立つメーリングリストがあり、基本的なカーネル開発に関
207するほとんどどんな種類の質問もできます (既に回答されているようなことを
208聞く前にまずはアーカイブを調べてください)。
209またここには、リアルタイムで質問を聞くことができる IRC チャネルや、Linux
210カーネルの開発に関して学ぶのに便利なたくさんの役に立つドキュメントがあ
211ります。
212
213web サイトには、コードの構成、サブシステム、現在存在するプロジェクト(ツ
214リーにあるもの無いものの両方)の基本的な管理情報があります。
215ここには、また、カーネルのコンパイルのやり方やパッチの当て方などの間接
216的な基本情報も記述されています。
217
218あなたがどこからスタートしてよいかわからないが、Linux カーネル開発コミュ
219ニティに参加して何かすることをさがしている場合には、Linux kernel
220Janitor's プロジェクトにいけばよいでしょう -
221 http://janitor.kernelnewbies.org/
222ここはそのようなスタートをするのにうってつけの場所です。ここには、
223Linux カーネルソースツリーの中に含まれる、きれいにし、修正しなければな
224らない、単純な問題のリストが記述されています。このプロジェクトに関わる
225開発者と一緒に作業することで、あなたのパッチを Linuxカーネルツリーに入
226れるための基礎を学ぶことができ、そしてもしあなたがまだアイディアを持っ
227ていない場合には、次にやる仕事の方向性が見えてくるかもしれません。
228
229もしあなたが、すでにひとまとまりコードを書いていて、カーネルツリーに入
230れたいと思っていたり、それに関する適切な支援を求めたい場合、カーネル
231メンターズプロジェクトはそのような皆さんを助けるためにできました。
232ここにはメーリングリストがあり、以下から参照できます
233 http://selenic.com/mailman/listinfo/kernel-mentors
234
235実際に Linux カーネルのコードについて修正を加える前に、どうやってその
236コードが動作するのかを理解することが必要です。そのためには、特別なツー
237ルの助けを借りてでも、それを直接よく読むことが最良の方法です(ほとんど
238のトリッキーな部分は十分にコメントしてありますから)。そういうツールで
239特におすすめなのは、Linux クロスリファレンスプロジェクトです。これは、
240自己参照方式で、索引がついた web 形式で、ソースコードを参照することが
241できます。この最新の素晴しいカーネルコードのリポジトリは以下で見つかり
242ます-
243 http://sosdg.org/~coywolf/lxr/
244
245開発プロセス
246-----------------------
247
248Linux カーネルの開発プロセスは現在幾つかの異なるメインカーネル「ブラン
249チ」と多数のサブシステム毎のカーネルブランチから構成されます。
250これらのブランチとは-
251 - メインの 2.6.x カーネルツリー
252 - 2.6.x.y -stable カーネルツリー
253 - 2.6.x -git カーネルパッチ
254 - 2.6.x -mm カーネルパッチ
255 - サブシステム毎のカーネルツリーとパッチ
256
2572.6.x カーネルツリー
258-----------------
259
2602.6.x カーネルは Linus Torvalds によってメンテナンスされ、kernel.org
261の pub/linux/kernel/v2.6/ ディレクトリに存在します。この開発プロセスは
262以下のとおり-
263
264 - 新しいカーネルがリリースされた直後に、2週間の特別期間が設けられ、
265 この期間中に、メンテナー達は Linus に大きな差分を送ることができま
266 す。このような差分は通常 -mm カーネルに数週間含まれてきたパッチで
267 す。 大きな変更は git(カーネルのソース管理ツール、詳細は
268 http://git.or.cz/ 参照) を使って送るのが好ましいやり方ですが、パッ
269 チファイルの形式のまま送るのでも十分です。
270
271 - 2週間後、-rc1 カーネルがリリースされ、この後にはカーネル全体の安定
272 性に影響をあたえるような新機能は含まない類のパッチしか取り込むこと
273 はできません。新しいドライバ(もしくはファイルシステム)のパッチは
274 -rc1 の後で受け付けられることもあることを覚えておいてください。な
275 ぜなら、変更が独立していて、追加されたコードの外の領域に影響を与え
276 ない限り、退行のリスクは無いからです。-rc1 がリリースされた後、
277 Linus へパッチを送付するのに git を使うこともできますが、パッチは
278 レビューのために、パブリックなメーリングリストへも同時に送る必要が
279 あります。
280
281 - 新しい -rc は Linus が、最新の git ツリーがテスト目的であれば十分
282 に安定した状態にあると判断したときにリリースされます。目標は毎週新
283 しい -rc カーネルをリリースすることです。
284
285 - このプロセスはカーネルが 「準備ができた」と考えられるまで継続しま
286 す。このプロセスはだいたい 6週間継続します。
287
288Andrew Morton が Linux-kernel メーリングリストにカーネルリリースについ
289て書いたことをここで言っておくことは価値があります-
290 「カーネルがいつリリースされるかは誰も知りません。なぜなら、これは現
291 実に認識されたバグの状況によりリリースされるのであり、前もって決めら
292 れた計画によってリリースされるものではないからです。」
293
2942.6.x.y -stable カーネルツリー
295---------------------------
296
297バージョンに4つ目の数字がついたカーネルは -stable カーネルです。これに
298は、2.6.x カーネルで見つかったセキュリティ問題や重大な後戻りに対する比
299較的小さい重要な修正が含まれます。
300
301これは、開発/実験的バージョンのテストに協力することに興味が無く、
302最新の安定したカーネルを使いたいユーザに推奨するブランチです。
303
304もし、2.6.x.y カーネルが存在しない場合には、番号が一番大きい 2.6.x
305が最新の安定版カーネルです。
306
3072.6.x.y は "stable" チーム <stable@kernel.org> でメンテされており、だ
308いたい隔週でリリースされています。
309
310カーネルツリーに入っている、Documentation/stable_kernel_rules.txt ファ
311イルにはどのような種類の変更が -stable ツリーに受け入れ可能か、またリ
312リースプロセスがどう動くかが記述されています。
313
3142.6.x -git パッチ
315------------------
316
317git リポジトリで管理されているLinus のカーネルツリーの毎日のスナップ
318ショットがあります。(だから -git という名前がついています)。これらのパッ
319チはおおむね毎日リリースされており、Linus のツリーの現状を表します。こ
320れは -rc カーネルと比べて、パッチが大丈夫かどうかも確認しないで自動的
321に生成されるので、より実験的です。
322
3232.6.x -mm カーネルパッチ
324------------------------
325
326Andrew Morton によってリリースされる実験的なカーネルパッチ群です。
327Andrew は個別のサブシステムカーネルツリーとパッチを全て集めてきて
328linux-kernel メーリングリストで収集された多数のパッチと同時に一つにま
329とめます。
330このツリーは新機能とパッチが検証される場となります。ある期間の間パッチ
331が -mm に入って価値を証明されたら、Andrew やサブシステムメンテナが、メ
332インラインへ入れるように Linus にプッシュします。
333
334メインカーネルツリーに含めるために Linus に送る前に、すべての新しいパッ
335チが -mm ツリーでテストされることが強く推奨されます。
336
337これらのカーネルは安定して動作すべきシステムとして使うのには適切ではあ
338りませんし、カーネルブランチの中でももっとも動作にリスクが高いものです。
339
340もしあなたが、カーネル開発プロセスの支援をしたいと思っているのであれば、
341どうぞこれらのカーネルリリースをテストに使ってみて、そしてもし問題があ
342れば、またもし全てが正しく動作したとしても、linux-kernel メーリングリ
343ストにフィードバックを提供してください。
344
345すべての他の実験的パッチに加えて、これらのカーネルは通常リリース時点で
346メインラインの -git カーネルに含まれる全ての変更も含んでいます。
347
348-mm カーネルは決まったスケジュールではリリースされません、しかし通常幾
349つかの -mm カーネル (1 から 3 が普通)が各-rc カーネルの間にリリースさ
350れます。
351
352サブシステム毎のカーネルツリーとパッチ
353-------------------------------------------
354
355カーネルの様々な領域で何が起きているかを見られるようにするため、多くの
356カーネルサブシステム開発者は彼らの開発ツリーを公開しています。これらの
357ツリーは説明したように -mm カーネルリリースに入れ込まれます。
358
359以下はさまざまなカーネルツリーの中のいくつかのリスト-
360
361 git ツリー-
362 - Kbuild の開発ツリー、Sam Ravnborg <sam@ravnborg.org>
363 kernel.org:/pub/scm/linux/kernel/git/sam/kbuild.git
364
365 - ACPI の開発ツリー、 Len Brown <len.brown@intel.com>
366 kernel.org:/pub/scm/linux/kernel/git/lenb/linux-acpi-2.6.git
367
368 - Block の開発ツリー、Jens Axboe <axboe@suse.de>
369 kernel.org:/pub/scm/linux/kernel/git/axboe/linux-2.6-block.git
370
371 - DRM の開発ツリー、Dave Airlie <airlied@linux.ie>
372 kernel.org:/pub/scm/linux/kernel/git/airlied/drm-2.6.git
373
374 - ia64 の開発ツリー、Tony Luck <tony.luck@intel.com>
375 kernel.org:/pub/scm/linux/kernel/git/aegl/linux-2.6.git
376
377 - ieee1394 の開発ツリー、Jody McIntyre <scjody@modernduck.com>
378 kernel.org:/pub/scm/linux/kernel/git/scjody/ieee1394.git
379
380 - infiniband, Roland Dreier <rolandd@cisco.com>
381 kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git
382
383 - libata, Jeff Garzik <jgarzik@pobox.com>
384 kernel.org:/pub/scm/linux/kernel/git/jgarzik/libata-dev.git
385
386 - ネットワークドライバ, Jeff Garzik <jgarzik@pobox.com>
387 kernel.org:/pub/scm/linux/kernel/git/jgarzik/netdev-2.6.git
388
389 - pcmcia, Dominik Brodowski <linux@dominikbrodowski.net>
390 kernel.org:/pub/scm/linux/kernel/git/brodo/pcmcia-2.6.git
391
392 - SCSI, James Bottomley <James.Bottomley@SteelEye.com>
393 kernel.org:/pub/scm/linux/kernel/git/jejb/scsi-misc-2.6.git
394
395 その他の git カーネルツリーは http://kernel.org/git に一覧表がありま
396 す。
397
398 quilt ツリー-
399 - USB, PCI ドライバコアと I2C, Greg Kroah-Hartman <gregkh@suse.de>
400 kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/
401
402バグレポート
403-------------
404
405bugzilla.kernel.org は Linux カーネル開発者がカーネルのバグを追跡する
406場所です。ユーザは見つけたバグの全てをこのツールで報告すべきです。
407どう kernel bugzilla を使うかの詳細は、以下を参照してください-
408 http://test.kernel.org/bugzilla/faq.html
409
410メインカーネルソースディレクトリにあるファイル REPORTING-BUGS はカーネ
411ルバグらしいものについてどうレポートするかの良いテンプレートであり、問
412題の追跡を助けるためにカーネル開発者にとってどんな情報が必要なのかの詳
413細が書かれています。
414
415メーリングリスト
416-------------
417
418上のいくつかのドキュメントで述べていますが、コアカーネル開発者の大部分
419は Linux kernel メーリングリストに参加しています。このリストの登録/脱
420退の方法については以下を参照してください-
421 http://vger.kernel.org/vger-lists.html#linux-kernel
422
423このメーリングリストのアーカイブは web 上の多数の場所に存在します。こ
424れらのアーカイブを探すにはサーチエンジンを使いましょう。例えば-
425 http://dir.gmane.org/gmane.linux.kernel
426
427リストに投稿する前にすでにその話題がアーカイブに存在するかどうかを検索
428することを是非やってください。多数の事がすでに詳細に渡って議論されて
429おり、アーカイブにのみ記録されています。
430
431大部分のカーネルサブシステムも自分の個別の開発を実施するメーリングリス
432トを持っています。個々のグループがどんなリストを持っているかは、
433MAINTAINERS ファイルにリストがありますので参照してください。
434
435多くのリストは kernel.org でホストされています。これらの情報は以下にあ
436ります-
437 http://vger.kernel.org/vger-lists.html
438
439メーリングリストを使う場合、良い行動習慣に従うようにしましょう。
440少し安っぽいが、以下の URL は上のリスト(や他のリスト)で会話する場合の
441シンプルなガイドラインを示しています-
442 http://www.albion.com/netiquette/
443
444もし複数の人があなたのメールに返事をした場合、CC: で受ける人のリストは
445だいぶ多くなるでしょう。良い理由がない場合、CC: リストから誰かを削除を
446しないように、また、メーリングリストのアドレスだけにリプライすることの
447ないようにしましょう。1つは送信者から、もう1つはリストからのように、メー
448ルを2回受けることになってもそれに慣れ、しゃれたメールヘッダーを追加し
449てこの状態を変えようとしないように。人々はそのようなことは好みません。
450
451今までのメールでのやりとりとその間のあなたの発言はそのまま残し、
452"John Kernlehacker wrote ...:" の行をあなたのリプライの先頭行にして、
453メールの先頭でなく、各引用行の間にあなたの言いたいことを追加するべきで
454す。
455
456もしパッチをメールに付ける場合は、Documentaion/SubmittingPatches に提
457示されているように、それは プレーンな可読テキストにすることを忘れない
458ようにしましょう。カーネル開発者は 添付や圧縮したパッチを扱いたがりま
459せん-
460彼らはあなたのパッチの行毎にコメントを入れたいので、そのためにはそうす
461るしかありません。あなたのメールプログラムが空白やタブを圧縮しないよう
462に確認した方がいいです。最初の良いテストとしては、自分にメールを送って
463みて、そのパッチを自分で当ててみることです。もしそれがうまく行かないな
464ら、あなたのメールプログラムを直してもらうか、正しく動くように変えるべ
465きです。
466
467とりわけ、他の登録者に対する尊敬を表すようにすることを覚えておいてくだ
468さい。
469
470コミュニティと共に働くこと
471--------------------------
472
473カーネルコミュニティのゴールは可能なかぎり最高のカーネルを提供すること
474です。あなたがパッチを受け入れてもらうために投稿した場合、それは、技術
475的メリットだけがレビューされます。その際、あなたは何を予想すべきでしょ
476うか?
477 - 批判
478 - コメント
479 - 変更の要求
480 - パッチの正当性の証明要求
481 - 沈黙
482
483思い出してください、ここはあなたのパッチをカーネルに入れる話です。あ
484なたは、あなたのパッチに対する批判とコメントを受け入れるべきで、それら
485を技術的レベルで評価して、パッチを再作成するか、なぜそれらの変更をすべ
486きでないかを明確で簡潔な理由の説明を提供してください。
487もし、あなたのパッチに何も反応がない場合、たまにはメールの山に埋もれて
488見逃され、あなたの投稿が忘れられてしまうこともあるので、数日待って再度
489投稿してください。
490
491あなたがやるべきでないものは?
492 - 質問なしにあなたのパッチが受け入れられると想像すること
493 - 守りに入ること
494 - コメントを無視すること
495 - 要求された変更を何もしないでパッチを出し直すこと
496
497可能な限り最高の技術的解決を求めているコミュニティでは、パッチがどのく
498らい有益なのかについては常に異なる意見があります。あなたは協調的である
499べきですし、また、あなたのアイディアをカーネルに対してうまく合わせるよ
500うにすることが望まれています。もしくは、最低限あなたのアイディアがそれ
501だけの価値があるとすすんで証明するようにしなければなりません。
502正しい解決に向かって進もうという意志がある限り、間違うことがあっても許
503容されることを忘れないでください。
504
505あなたの最初のパッチに単に 1ダースもの修正を求めるリストの返答になるこ
506とも普通のことです。これはあなたのパッチが受け入れられないということで
507は *ありません*、そしてあなた自身に反対することを意味するのでも *ありま
508せん*。単に自分のパッチに対して指摘された問題を全て修正して再送すれば
509いいのです。
510
511カーネルコミュニティと企業組織のちがい
512-----------------------------------------------------------------
513
514カーネルコミュニティは大部分の伝統的な会社の開発環境とは異ったやり方で
515動いています。以下は問題を避けるためにできるとよいことののリストです-
516
517 あなたの提案する変更について言うときのうまい言い方:
518
519 - "これは複数の問題を解決します"
520 - "これは2000行のコードを削除します"
521 - "以下のパッチは、私が言おうとしていることを説明するものです"
522 - "私はこれを5つの異なるアーキテクチャでテストしたのですが..."
523 - "以下は一連の小さなパッチ群ですが..."
524 - "これは典型的なマシンでの性能を向上させます.."
525
526 やめた方がいい悪い言い方:
527
528 - このやり方で AIX/ptx/Solaris ではできたので、できるはずだ
529 - 私はこれを20年もの間やってきた、だから
530 - これは、私の会社が金儲けをするために必要だ
531 - これは我々のエンタープライズ向け商品ラインのためである
532 - これは 私が自分のアイディアを記述した、1000ページの設計資料である
533 - 私はこれについて、6ケ月作業している。
534 - 以下は ... に関する5000行のパッチです
535 - 私は現在のぐちゃぐちゃを全部書き直した、それが以下です...
536 - 私は〆切がある、そのためこのパッチは今すぐ適用される必要がある
537
538カーネルコミュニティが大部分の伝統的なソフトウェアエンジニアリングの労
539働環境と異なるもう一つの点は、やりとりに顔を合わせないということです。
540email と irc を第一のコミュニケーションの形とする一つの利点は、性別や
541民族の差別がないことです。Linux カーネルの職場環境は女性や少数民族を受
542容します。なぜなら、email アドレスによってのみあなたが認識されるからで
543す。
544国際的な側面からも活動領域を均等にするようにします。なぜならば、あなた
545は人の名前で性別を想像できないからです。ある男性が アンドレアという名
546前で、女性の名前は パット かもしれません (訳注 Andrea は米国では女性、
547それ以外(欧州など)では男性名として使われることが多い。同様に、Pat は
548Patricia (主に女性名)や Patrick (主に男性名)の略称)。
549Linux カーネルの活動をして、意見を表明したことがある大部分の女性は、前
550向きな経験をもっています。
551
552言葉の壁は英語が得意でない一部の人には問題になります。
553メーリングリストの中できちんとアイディアを交換するには、相当うまく英語
554を操れる必要があることもあります。そのため、あなたは自分のメール
555を送る前に英語で意味が通じているかをチェックすることをお薦めします。
556
557変更を分割する
558---------------------
559
560Linux カーネルコミュニティは、一度に大量のコードの塊を喜んで受容するこ
561とはありません。変更は正確に説明される必要があり、議論され、小さい、個
562別の部分に分割する必要があります。これはこれまで多くの会社がやり慣れて
563きたことと全く正反対のことです。あなたのプロポーザルは、開発プロセスのと
564ても早い段階から紹介されるべきです。そうすれば あなたは自分のやってい
565ることにフィードバックを得られます。これは、コミュニティからみれば、あ
566なたが彼らと一緒にやっているように感じられ、単にあなたの提案する機能の
567ゴミ捨て場として使っているのではない、と感じられるでしょう。
568しかし、一度に 50 もの email をメーリングリストに送りつけるようなことは
569やってはいけません、あなたのパッチ群はいつもどんな時でもそれよりは小さ
570くなければなりません。
571
572パッチを分割する理由は以下です-
573
5741) 小さいパッチはあなたのパッチが適用される見込みを大きくします、カー
575 ネルの人達はパッチが正しいかどうかを確認する時間や労力をかけないか
576 らです。5行のパッチはメンテナがたった1秒見るだけで適用できます。し
577 かし、500行のパッチは、正しいことをレビューするのに数時間かかるかも
578 しれません(時間はパッチのサイズなどにより指数関数に比例してかかりま
579 す)
580 小さいパッチは何かあったときにデバッグもとても簡単になります。パッ
581 チを1個1個取り除くのは、とても大きなパッチを当てた後に(かつ、何かお
582 かしくなった後で)解剖するのに比べればとても簡単です。
583
5842) 小さいパッチを送るだけでなく、送るまえに、書き直して、シンプルにす
585 る(もしくは、単に順番を変えるだけでも)ことも、とても重要です。
586
587以下はカーネル開発者の Al Viro のたとえ話しです:
588
589 "生徒の数学の宿題を採点する先生のことを考えてみてください、先
590 生は生徒が解に到達するまでの試行錯誤をみたいとは思わないでしょ
591 う。先生は簡潔な最高の解をみたいのです。良い生徒はこれを知って
592 おり、そして最終解の前の中間作業を提出することは決してないので
593 す"
594 カーネル開発でもこれは同じです。メンテナー達とレビューア達は、
595 問題を解決する解の背後になる思考プロセスをみたいとは思いません。
596 彼らは単純であざやかな解決方法をみたいのです。
597
598あざやかな解を説明するのと、コミュニティと共に仕事をし、未解決の仕事を
599議論することのバランスをキープするのは難しいかもしれません。
600ですから、開発プロセスの早期段階で改善のためのフィードバックをもらうよ
601うにするのもいいですが、変更点を小さい部分に分割して全体ではまだ完成し
602ていない仕事を(部分的に)取り込んでもらえるようにすることもいいことです。
603
604また、でき上がっていないものや、"将来直す" ようなパッチを、本流に含め
605てもらうように送っても、それは受け付けられないことを理解してください。
606
607あなたの変更を正当化する
608-------------------
609
610あなたのパッチを分割するのと同時に、なぜその変更を追加しなければならな
611いかを Linux コミュニティに知らせることはとても重要です。新機能は必要
612性と有用性で正当化されなければなりません。
613
614あなたの変更の説明
615--------------------
616
617あなたのパッチを送付する場合には、メールの中のテキストで何を言うかにつ
618いて、特別に注意を払ってください。この情報はパッチの ChangeLog に使わ
619れ、いつも皆がみられるように保管されます。これは次のような項目を含め、
620パッチを完全に記述するべきです-
621
622 - なぜ変更が必要か
623 - パッチ全体の設計アプローチ
624 - 実装の詳細
625 - テスト結果
626
627これについて全てがどのようにあるべきかについての詳細は、以下のドキュメ
628ントの ChangeLog セクションをみてください-
629 "The Perfect Patch"
630 http://www.zip.com.au/~akpm/linux/patches/stuff/tpp.txt
631
632これらのどれもが、時にはとても困難です。これらの慣例を完璧に実施するに
633は数年かかるかもしれません。これは継続的な改善のプロセスであり、そのた
634めには多数の忍耐と決意を必要とするものです。でも、諦めないで、これは可
635能なことです。多数の人がすでにできていますし、彼らも皆最初はあなたと同
636じところからスタートしたのですから。
637
638Paolo Ciarrocchi に感謝、彼は彼の書いた "Development Process"
639(http://linux.tar.bz/articles/2.6-development_process)セクショ
640ンをこのテキストの原型にすることを許可してくれました。
641Rundy Dunlap と Gerrit Huizenga はメーリングリストでやるべきこととやっ
642てはいけないことのリストを提供してくれました。
643以下の人々のレビュー、コメント、貢献に感謝。
644Pat Mochel, Hanna Linder, Randy Dunlap, Kay Sievers,
645Vojtech Pavlik, Jan Kara, Josh Boyer, Kees Cook, Andrew Morton, Andi
646Kleen, Vadim Lobanov, Jesper Juhl, Adrian Bunk, Keri Harris, Frans Pop,
647David A. Wheeler, Junio Hamano, Michael Kerrisk, と Alex Shepard
648彼らの支援なしでは、このドキュメントはできなかったでしょう。
649
650Maintainer: Greg Kroah-Hartman <greg@kroah.com>
diff --git a/Documentation/ja_JP/stable_api_nonsense.txt b/Documentation/ja_JP/stable_api_nonsense.txt
new file mode 100644
index 000000000000..b3f2b27f0881
--- /dev/null
+++ b/Documentation/ja_JP/stable_api_nonsense.txt
@@ -0,0 +1,263 @@
1NOTE:
2This is a Japanese translated version of
3"Documentation/stable_api_nonsense.txt".
4This one is maintained by
5IKEDA, Munehiro <m-ikeda@ds.jp.nec.com>
6and JF Project team <http://www.linux.or.jp/JF/>.
7If you find difference with original file or problem in translation,
8please contact the maintainer of this file or JF project.
9
10Please also note that purpose of this file is easier to read for non
11English natives and not to be intended to fork. So, if you have any
12comments or updates of this file, please try to update
13Original(English) file at first.
14
15==================================
16これは、
17linux-2.6.22-rc4/Documentation/stable_api_nonsense.txt の和訳
18です。
19翻訳団体: JF プロジェクト < http://www.linux.or.jp/JF/ >
20翻訳日 : 2007/06/11
21原著作者: Greg Kroah-Hartman < greg at kroah dot com >
22翻訳者 : 池田 宗広 < m-ikeda at ds dot jp dot nec dot com >
23校正者 : Masanori Kobayashi さん < zap03216 at nifty dot ne dot jp >
24 Seiji Kaneko さん < skaneko at a2 dot mbn dot or dot jp >
25==================================
26
27
28
29Linux カーネルのドライバインターフェース
30(あなたの質問すべてに対する回答とその他諸々)
31
32Greg Kroah-Hartman <greg at kroah dot com>
33
34
35この文書は、なぜ Linux ではバイナリカーネルインターフェースが定義
36されていないのか、またはなぜ不変のカーネルインターフェースを持たな
37いのか、ということを説明するために書かれた。ここでの話題は「カーネ
38ル内部の」インターフェースについてであり、ユーザー空間とのインター
39フェースではないことを理解してほしい。カーネルとユーザー空間とのイ
40ンターフェースとはアプリケーションプログラムが使用するものであり、
41つまりシステムコールのインターフェースがこれに当たる。これは今まで
42長きに渡り、かつ今後も「まさしく」不変である。私は確か 0.9 か何か
43より前のカーネルを使ってビルドした古いプログラムを持っているが、そ
44れは最新の 2.6 カーネルでもきちんと動作する。ユーザー空間とのイン
45ターフェースは、ユーザーとアプリケーションプログラマが不変性を信頼
46してよいものの一つである。
47
48
49要旨
50----
51
52あなたは不変のカーネルインターフェースが必要だと考えているかもしれ
53ないが、実際のところはそうではない。あなたは必要としているものが分
54かっていない。あなたが必要としているものは安定して動作するドライバ
55であり、それはドライバがメインのカーネルツリーに含まれる場合のみ得
56ることができる。ドライバがメインのカーネルツリーに含まれていると、
57他にも多くの良いことがある。それは、Linux をより強固で、安定な、成
58熟したオペレーティングシステムにすることができるということだ。これ
59こそ、そもそもあなたが Linux を使う理由のはずだ。
60
61
62はじめに
63--------
64
65カーネル内部のインターフェース変更を心配しなければならないドライバ
66を書きたいなどというのは、変わり者だけだ。この世界のほとんどの人は、
67そのようなドライバがどんなインターフェースを使っているかなど知らな
68いし、そんなドライバのことなど全く気にもかけていない。
69
70
71まず初めに、クローズソースとか、ソースコードの隠蔽とか、バイナリの
72みが配布される使い物にならない代物[訳注(1)]とか、実体はバイナリ
73コードでそれを読み込むためのラッパー部分のみソースコードが公開され
74ているとか、その他用語は何であれ GPL の下にソースコードがリリース
75されていないカーネルドライバに関する法的な問題について、私は「いか
76なる議論も」行うつもりがない。法的な疑問があるのならば、プログラマ
77である私ではなく、弁護士に相談して欲しい。ここでは単に、技術的な問
78題について述べることにする。(法的な問題を軽視しているわけではない。
79それらは実際に存在するし、あなたはそれをいつも気にかけておく必要が
80ある)
81
82訳注(1)
83「使い物にならない代物」の原文は "blob"
84
85
86さてここでは、バイナリカーネルインターフェースについてと、ソースレ
87ベルでのインターフェースの不変性について、という二つの話題を取り上
88げる。この二つは互いに依存する関係にあるが、まずはバイナリインター
89フェースについて議論を行いやっつけてしまおう。
90
91
92バイナリカーネルインターフェース
93--------------------------------
94
95もしソースレベルでのインターフェースが不変ならば、バイナリインター
96フェースも当然のように不変である、というのは正しいだろうか?正しく
97ない。Linux カーネルに関する以下の事実を考えてみてほしい。
98 - あなたが使用するCコンパイラのバージョンによって、カーネル内部
99 の構造体の配置構造は異なったものになる。また、関数は異なった方
100 法でカーネルに含まれることになるかもしれない(例えばインライン
101 関数として扱われたり、扱われなかったりする)。個々の関数がどの
102 ようにコンパイルされるかはそれほど重要ではないが、構造体のパデ
103 ィングが異なるというのは非常に重要である。
104 - あなたがカーネルのビルドオプションをどのように設定するかによっ
105 て、カーネルには広い範囲で異なった事態が起こり得る。
106 - データ構造は異なるデータフィールドを持つかもしれない
107 - いくつかの関数は全く実装されていない状態になり得る
108 (例:SMP向けではないビルドでは、いくつかのロックは中身が
109 カラにコンパイルされる)
110 - カーネル内のメモリは、異なった方法で配置され得る。これはビ
111 ルドオプションに依存している。
112 - Linux は様々な異なるプロセッサアーキテクチャ上で動作する。
113 あるアーキテクチャ用のバイナリドライバを、他のアーキテクチャで
114 正常に動作させる方法はない。
115
116
117ある特定のカーネル設定を使用し、カーネルをビルドしたのと正確に同じ
118Cコンパイラを使用して単にカーネルモジュールをコンパイルするだけで
119も、あなたはこれらいくつもの問題に直面することになる。ある特定の
120Linux ディストリビューションの、ある特定のリリースバージョン用にモ
121ジュールを提供しようと思っただけでも、これらの問題を引き起こすには
122十分である。にも関わらず Linux ディストリビューションの数と、サ
123ポートするディストリビューションのリリース数を掛け算し、それら一つ
124一つについてビルドを行ったとしたら、今度はリリースごとのビルドオプ
125ションの違いという悪夢にすぐさま悩まされることになる。また、ディス
126トリビューションの各リリースバージョンには、異なるハードウェア(プ
127ロセッサタイプや種々のオプション)に対応するため、何種類かのカーネ
128ルが含まれているということも理解して欲しい。従って、ある一つのリ
129リースバージョンだけのためにモジュールを作成する場合でも、あなたは
130何バージョンものモジュールを用意しなければならない。
131
132
133信じて欲しい。このような方法でサポートを続けようとするなら、あなた
134はいずれ正気を失うだろう。遠い昔、私はそれがいかに困難なことか、身
135をもって学んだのだ・・・
136
137
138不変のカーネルソースレベルインターフェース
139------------------------------------------
140
141メインカーネルツリーに含まれていない Linux カーネルドライバを継続
142してサポートしていこうとしている人たちとの議論においては、これは極
143めて「引火性の高い」話題である。[訳注(2)]
144
145訳注(2)
146「引火性の高い」の原文は "volatile"。
147volatile には「揮発性の」「爆発しやすい」という意味の他、「変わり
148やすい」「移り気な」という意味がある。
149「(この話題は)爆発的に激しい論争を巻き起こしかねない」ということ
150を、「(カーネルのソースレベルインターフェースは)移ろい行くもので
151ある」ということを連想させる "volatile" という単語で表現している。
152
153
154Linux カーネルの開発は継続的に速いペースで行われ、決して歩みを緩め
155ることがない。その中でカーネル開発者達は、現状のインターフェースに
156あるバグを見つけ、より良い方法を考え出す。彼らはやがて、現状のイン
157ターフェースがより正しく動作するように修正を行う。その過程で関数の
158名前は変更されるかもしれず、構造体は大きく、または小さくなるかもし
159れず、関数の引数は検討しなおされるかもしれない。そのような場合、引
160き続き全てが正常に動作するよう、カーネル内でこれらのインターフェー
161スを使用している個所も全て同時に修正される。
162
163
164具体的な例として、カーネル内の USB インターフェースを挙げる。USB
165サブシステムはこれまでに少なくとも3回の書き直しが行われ、その結果
166インターフェースが変更された。これらの書き直しはいくつかの異なった
167問題を修正するために行われた。
168 - 同期的データストリームが非同期に変更された。これにより多数のド
169 ライバを単純化でき、全てのドライバのスループットが向上した。今
170 やほとんど全ての USB デバイスは、考えられる最高の速度で動作し
171 ている。
172 - USB ドライバが USB サブシステムのコアから行う、データパケット
173 用のメモリ確保方法が変更された。これに伴い、いくつもの文書化さ
174 れたデッドロック条件を回避するため、全ての USB ドライバはより
175 多くの情報を USB コアに提供しなければならないようになっている。
176
177
178このできごとは、数多く存在するクローズソースのオペレーティングシス
179テムとは全く対照的だ。それらは長期に渡り古い USB インターフェース
180をメンテナンスしなければならない。古いインターフェースが残ることで、
181新たな開発者が偶然古いインターフェースを使い、正しくない方法で開発
182を行ってしまう可能性が生じる。これによりシステムの安定性は危険にさ
183らされることになる。
184
185
186上に挙げたどちらの例においても、開発者達はその変更が重要かつ必要で
187あることに合意し、比較的楽にそれを実行した。もし Linux がソースレ
188ベルでインターフェースの不変性を保証しなければならないとしたら、新
189しいインターフェースを作ると同時に、古い、問題のある方を今後ともメ
190ンテナンスするという余計な仕事を USB の開発者にさせなければならな
191い。Linux の USB 開発者は、自分の時間を使って仕事をしている。よっ
192て、価値のない余計な仕事を報酬もなしに実行しろと言うことはできない。
193
194
195セキュリティ問題も、Linux にとっては非常に重要である。ひとたびセキ
196ュリティに関する問題が発見されれば、それは極めて短期間のうちに修正
197される。セキュリティ問題の発生を防ぐための修正は、カーネルの内部イ
198ンターフェースの変更を何度も引き起こしてきた。その際同時に、変更さ
199れたインターフェースを使用する全てのドライバもまた変更された。これ
200により問題が解消し、将来偶然に問題が再発してしまわないことが保証さ
201れる。もし内部インターフェースの変更が許されないとしたら、このよう
202にセキュリティ問題を修正し、将来再発しないことを保証することなど不
203可能なのだ。
204
205
206カーネルのインターフェースは時が経つにつれクリーンナップを受ける。
207誰も使っていないインターフェースは削除される。これにより、可能な限
208りカーネルが小さく保たれ、現役の全てのインターフェースが可能な限り
209テストされることを保証しているのだ。(使われていないインターフェー
210スの妥当性をテストすることは不可能と言っていいだろう)
211
212
213
214これから何をすべきか
215-----------------------
216
217では、もしメインのカーネルツリーに含まれない Linux カーネルドライ
218バがあったとして、あなたは、つまり開発者は何をするべきだろうか?全
219てのディストリビューションの全てのカーネルバージョン向けにバイナリ
220のドライバを供給することは悪夢であり、カーネルインターフェースの変
221更を追いかけ続けることもまた過酷な仕事だ。
222
223
224答えは簡単。そのドライバをメインのカーネルツリーに入れてしまえばよ
225い。(ここで言及しているのは、GPL に従って公開されるドライバのこと
226だということに注意してほしい。あなたのコードがそれに該当しないなら
227ば、さよなら。幸運を祈ります。ご自分で何とかしてください。Andrew
228と Linus からのコメント<Andrew と Linus のコメントへのリンクをこ
229こに置く>をどうぞ)ドライバがメインツリーに入れば、カーネルのイン
230ターフェースが変更された場合、変更を行った開発者によってドライバも
231修正されることになるだろう。あなたはほとんど労力を払うことなしに、
232常にビルド可能できちんと動作するドライバを手に入れることができる。
233
234
235ドライバをメインのカーネルツリーに入れると、非常に好ましい以下の効
236果がある。
237 - ドライバの品質が向上する一方で、(元の開発者にとっての)メンテ
238 ナンスコストは下がる。
239 - あなたのドライバに他の開発者が機能を追加してくれる。
240 - 誰かがあなたのドライバにあるバグを見つけ、修正してくれる。
241 - 誰かがあなたのドライバにある改善点を見つけてくれる。
242 - 外部インターフェースが変更されドライバの更新が必要になった場合、
243 誰かがあなたの代わりに更新してくれる。
244 - ドライバを入れてくれとディストロに頼まなくても、そのドライバは
245 全ての Linux ディストリビューションに自動的に含まれてリリース
246 される。
247
248
249Linux では、他のどのオペレーティングシステムよりも数多くのデバイス
250が「そのまま」使用できるようになった。また Linux は、どのオペレー
251ティングシステムよりも数多くのプロセッサアーキテクチャ上でそれらの
252デバイスを使用することができるようにもなった。このように、Linux の
253開発モデルは実証されており、今後も間違いなく正しい方向へと進んでい
254くだろう。:)
255
256
257
258------
259
260この文書の初期の草稿に対し、Randy Dunlap, Andrew Morton, David
261Brownell, Hanna Linder, Robert Love, Nishanth Aravamudan から査読
262と助言を頂きました。感謝申し上げます。
263
diff --git a/Documentation/zh_CN/HOWTO b/Documentation/zh_CN/HOWTO
new file mode 100644
index 000000000000..48fc67bfbe3d
--- /dev/null
+++ b/Documentation/zh_CN/HOWTO
@@ -0,0 +1,536 @@
1Chinese translated version of Documentation/HOWTO
2
3If you have any comment or update to the content, please contact the
4original document maintainer directly. However, if you have problem
5communicating in English you can also ask the Chinese maintainer for
6help. Contact the Chinese maintainer, if this translation is outdated
7or there is problem with translation.
8
9Maintainer: Greg Kroah-Hartman <greg@kroah.com>
10Chinese maintainer: Li Yang <leoli@freescale.com>
11---------------------------------------------------------------------
12Documentation/HOWTO 的中文翻译
13
14如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文
15交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻
16译存在问题,请联系中文版维护者。
17
18英文版维护者: Greg Kroah-Hartman <greg@kroah.com>
19中文版维护者: 李阳 Li Yang <leoli@freescale.com>
20中文版翻译者: 李阳 Li Yang <leoli@freescale.com>
21中文版校译者: 钟宇 TripleX Chung <xxx.phy@gmail.com>
22 陈琦 Maggie Chen <chenqi@beyondsoft.com>
23 王聪 Wang Cong <xiyou.wangcong@gmail.com>
24
25以下为正文
26---------------------------------------------------------------------
27
28如何参与Linux内核开发
29---------------------
30
31这是一篇将如何参与Linux内核开发的相关问题一网打尽的终极秘笈。它将指导你
32成为一名Linux内核开发者,并且学会如何同Linux内核开发社区合作。它尽可能不
33包括任何关于内核编程的技术细节,但会给你指引一条获得这些知识的正确途径。
34
35如果这篇文章中的任何内容不再适用,请给文末列出的文件维护者发送补丁。
36
37
38入门
39----
40
41你想了解如何成为一名Linux内核开发者?或者老板吩咐你“给这个设备写个Linux
42驱动程序”?这篇文章的目的就是教会你达成这些目标的全部诀窍,它将描述你需
43要经过的流程以及给出如何同内核社区合作的一些提示。它还将试图解释内核社区
44为何这样运作。
45
46Linux内核大部分是由C语言写成的,一些体系结构相关的代码用到了汇编语言。要
47参与内核开发,你必须精通C语言。除非你想为某个架构开发底层代码,否则你并
48不需要了解(任何体系结构的)汇编语言。下面列举的书籍虽然不能替代扎实的C
49语言教育和多年的开发经验,但如果需要的话,做为参考还是不错的:
50 - "The C Programming Language" by Kernighan and Ritchie [Prentice Hall]
51 《C程序设计语言(第2版·新版)》(徐宝文 李志 译)[机械工业出版社]
52 - "Practical C Programming" by Steve Oualline [O'Reilly]
53 《实用C语言编程(第三版)》(郭大海 译)[中国电力出版社]
54 - "C: A Reference Manual" by Harbison and Steele [Prentice Hall]
55 《C语言参考手册(原书第5版)》(邱仲潘 等译)[机械工业出版社]
56
57Linux内核使用GNU C和GNU工具链开发。虽然它遵循ISO C89标准,但也用到了一些
58标准中没有定义的扩展。内核是自给自足的C环境,不依赖于标准C库的支持,所以
59并不支持C标准中的部分定义。比如long long类型的大数除法和浮点运算就不允许
60使用。有时候确实很难弄清楚内核对工具链的要求和它所使用的扩展,不幸的是目
61前还没有明确的参考资料可以解释它们。请查阅gcc信息页(使用“info gcc”命令
62显示)获得一些这方面信息。
63
64请记住你是在学习怎么和已经存在的开发社区打交道。它由一群形形色色的人组成,
65他们对代码、风格和过程有着很高的标准。这些标准是在长期实践中总结出来的,
66适应于地理上分散的大型开发团队。它们已经被很好得整理成档,建议你在开发
67之前尽可能多的学习这些标准,而不要期望别人来适应你或者你公司的行为方式。
68
69
70法律问题
71--------
72
73Linux内核源代码都是在GPL(通用公共许可证)的保护下发布的。要了解这种许可
74的细节请查看源代码主目录下的COPYING文件。如果你对它还有更深入问题请联系
75律师,而不要在Linux内核邮件组上提问。因为邮件组里的人并不是律师,不要期
76望他们的话有法律效力。
77
78对于GPL的常见问题和解答,请访问以下链接:
79 http://www.gnu.org/licenses/gpl-faq.html
80
81
82文档
83----
84
85Linux内核代码中包含有大量的文档。这些文档对于学习如何与内核社区互动有着
86不可估量的价值。当一个新的功能被加入内核,最好把解释如何使用这个功能的文
87档也放进内核。当内核的改动导致面向用户空间的接口发生变化时,最好将相关信
88息或手册页(manpages)的补丁发到mtk-manpages@gmx.net,以向手册页(manpages)
89的维护者解释这些变化。
90
91以下是内核代码中需要阅读的文档:
92 README
93 文件简要介绍了Linux内核的背景,并且描述了如何配置和编译内核。内核的
94 新用户应该从这里开始。
95
96 Documentation/Changes
97 文件给出了用来编译和使用内核所需要的最小软件包列表。
98
99 Documentation/CodingStyle
100 描述Linux内核的代码风格和理由。所有新代码需要遵守这篇文档中定义的规
101 范。大多数维护者只会接收符合规定的补丁,很多人也只会帮忙检查符合风格
102 的代码。
103
104 Documentation/SubmittingPatches
105 Documentation/SubmittingDrivers
106 这两份文档明确描述如何创建和发送补丁,其中包括(但不仅限于):
107 - 邮件内容
108 - 邮件格式
109 - 选择收件人
110 遵守这些规定并不能保证提交成功(因为所有补丁需要通过严格的内容和风格
111 审查),但是忽视他们几乎就意味着失败。
112
113 其他关于如何正确地生成补丁的优秀文档包括:
114 "The Perfect Patch"
115 http://www.zip.com.au/~akpm/linux/patches/stuff/tpp.txt
116 "Linux kernel patch submission format"
117 http://linux.yyz.us/patch-format.html
118
119 Documentation/stable_api_nonsense.txt
120 论证内核为什么特意不包括稳定的内核内部API,也就是说不包括像这样的特
121 性:
122 - 子系统中间层(为了兼容性?)
123 - 在不同操作系统间易于移植的驱动程序
124 - 减缓(甚至阻止)内核代码的快速变化
125 这篇文档对于理解Linux的开发哲学至关重要。对于将开发平台从其他操作系
126 统转移到Linux的人来说也很重要。
127
128 Documentation/SecurityBugs
129 如果你认为自己发现了Linux内核的安全性问题,请根据这篇文档中的步骤来
130 提醒其他内核开发者并帮助解决这个问题。
131
132 Documentation/ManagementStyle
133 描述内核维护者的工作方法及其共有特点。这对于刚刚接触内核开发(或者对
134 它感到好奇)的人来说很重要,因为它解释了很多对于内核维护者独特行为的
135 普遍误解与迷惑。
136
137 Documentation/stable_kernel_rules.txt
138 解释了稳定版内核发布的规则,以及如何将改动放入这些版本的步骤。
139
140 Documentation/kernel-docs.txt
141 有助于内核开发的外部文档列表。如果你在内核自带的文档中没有找到你想找
142 的内容,可以查看这些文档。
143
144 Documentation/applying-patches.txt
145 关于补丁是什么以及如何将它打在不同内核开发分支上的好介绍
146
147内核还拥有大量从代码自动生成的文档。它包含内核内部API的全面介绍以及如何
148妥善处理加锁的规则。生成的文档会放在 Documentation/DocBook/目录下。在内
149核源码的主目录中使用以下不同命令将会分别生成PDF、Postscript、HTML和手册
150页等不同格式的文档:
151 make pdfdocs
152 make psdocs
153 make htmldocs
154 make mandocs
155
156
157如何成为内核开发者
158------------------
159如果你对Linux内核开发一无所知,你应该访问“Linux内核新手”计划:
160 http://kernelnewbies.org
161它拥有一个可以问各种最基本的内核开发问题的邮件列表(在提问之前一定要记得
162查找已往的邮件,确认是否有人已经回答过相同的问题)。它还拥有一个可以获得
163实时反馈的IRC聊天频道,以及大量对于学习Linux内核开发相当有帮助的文档。
164
165网站简要介绍了源代码组织结构、子系统划分以及目前正在进行的项目(包括内核
166中的和单独维护的)。它还提供了一些基本的帮助信息,比如如何编译内核和打补
167丁。
168
169如果你想加入内核开发社区并协助完成一些任务,却找不到从哪里开始,可以访问
170“Linux内核房管员”计划:
171 http://janitor.kernelnewbies.org/
172这是极佳的起点。它提供一个相对简单的任务列表,列出内核代码中需要被重新
173整理或者改正的地方。通过和负责这个计划的开发者们一同工作,你会学到将补丁
174集成进内核的基本原理。如果还没有决定下一步要做什么的话,你还可能会得到方
175向性的指点。
176
177如果你已经有一些现成的代码想要放到内核中,但是需要一些帮助来使它们拥有正
178确的格式。请访问“内核导师”计划。这个计划就是用来帮助你完成这个目标的。它
179是一个邮件列表,地址如下:
180 http://selenic.com/mailman/listinfo/kernel-mentors
181
182在真正动手修改内核代码之前,理解要修改的代码如何运作是必需的。要达到这个
183目的,没什么办法比直接读代码更有效了(大多数花招都会有相应的注释),而且
184一些特制的工具还可以提供帮助。例如,“Linux代码交叉引用”项目就是一个值得
185特别推荐的帮助工具,它将源代码显示在有编目和索引的网页上。其中一个更新及
186时的内核源码库,可以通过以下地址访问:
187 http://sosdg.org/~coywolf/lxr/
188
189
190开发流程
191--------
192
193目前Linux内核开发流程包括几个“主内核分支”和很多子系统相关的内核分支。这
194些分支包括:
195 - 2.6.x主内核源码树
196 - 2.6.x.y -stable内核源码树
197 - 2.6.x -git内核补丁集
198 - 2.6.x -mm内核补丁集
199 - 子系统相关的内核源码树和补丁集
200
201
2022.6.x内核主源码树
203-----------------
2042.6.x内核是由Linus Torvalds(Linux的创造者)亲自维护的。你可以在
205kernel.org网站的pub/linux/kernel/v2.6/目录下找到它。它的开发遵循以下步
206骤:
207 - 每当一个新版本的内核被发布,为期两周的集成窗口将被打开。在这段时间里
208 维护者可以向Linus提交大段的修改,通常这些修改已经被放到-mm内核中几个
209 星期了。提交大量修改的首选方式是使用git工具(内核的代码版本管理工具
210 ,更多的信息可以在http://git.or.cz/获取),不过使用普通补丁也是可以
211 的。
212 - 两个星期以后-rc1版本内核发布。之后只有不包含可能影响整个内核稳定性的
213 新功能的补丁才可能被接受。请注意一个全新的驱动程序(或者文件系统)有
214 可能在-rc1后被接受是因为这样的修改完全独立,不会影响其他的代码,所以
215 没有造成内核退步的风险。在-rc1以后也可以用git向Linus提交补丁,不过所
216 有的补丁需要同时被发送到相应的公众邮件列表以征询意见。
217 - 当Linus认为当前的git源码树已经达到一个合理健全的状态足以发布供人测试
218 时,一个新的-rc版本就会被发布。计划是每周都发布新的-rc版本。
219 - 这个过程一直持续下去直到内核被认为达到足够稳定的状态,持续时间大概是
220 6个星期。
221
222关于内核发布,值得一提的是Andrew Morton在linux-kernel邮件列表中如是说:
223 “没有人知道新内核何时会被发布,因为发布是根据已知bug的情况来决定
224 的,而不是根据一个事先制定好的时间表。”
225
226
2272.6.x.y -stable(稳定版)内核源码树
228-----------------------------------
229由4个数字组成的内核版本号说明此内核是-stable版本。它们包含基于2.6.x版本
230内核的相对较小且至关重要的修补,这些修补针对安全性问题或者严重的内核退步。
231
232这种版本的内核适用于那些期望获得最新的稳定版内核并且不想参与测试开发版或
233者实验版的用户。
234
235如果没有2.6.x.y版本内核存在,那么最新的2.6.x版本内核就相当于是当前的稳定
236版内核。
237
2382.6.x.y版本由“稳定版”小组(邮件地址<stable@kernel.org>)维护,一般隔周发
239布新版本。
240
241内核源码中的Documentation/stable_kernel_rules.txt文件具体描述了可被稳定
242版内核接受的修改类型以及发布的流程。
243
244
2452.6.x -git补丁集
246----------------
247Linus的内核源码树的每日快照,这个源码树是由git工具管理的(由此得名)。这
248些补丁通常每天更新以反映Linus的源码树的最新状态。它们比-rc版本的内核源码
249树更具试验性质,因为这个补丁集是全自动生成的,没有任何人来确认其是否真正
250健全。
251
252
2532.6.x -mm补丁集
254---------------
255这是由Andrew Morton维护的试验性内核补丁集。Andrew将所有子系统的内核源码
256和补丁拼凑到一起,并且加入了大量从linux-kernel邮件列表中采集的补丁。这个
257源码树是新功能和补丁的试炼场。当补丁在-mm补丁集里证明了其价值以后Andrew
258或者相应子系统的维护者会将补丁发给Linus以便集成进主内核源码树。
259
260在将所有新补丁发给Linus以集成到主内核源码树之前,我们非常鼓励先把这些补
261丁放在-mm版内核源码树中进行测试。
262
263这些内核版本不适合在需要稳定运行的系统上运行,因为运行它们比运行任何其他
264内核分支都更具有风险。
265
266如果你想为内核开发进程提供帮助,请尝试并使用这些内核版本,并在
267linux-kernel邮件列表中提供反馈,告诉大家你遇到了问题还是一切正常。
268
269通常-mm版补丁集不光包括这些额外的试验性补丁,还包括发布时-git版主源码树
270中的改动。
271
272-mm版内核没有固定的发布周期,但是通常在每两个-rc版内核发布之间都会有若干
273个-mm版内核发布(一般是1至3个)。
274
275
276子系统相关内核源码树和补丁集
277----------------------------
278相当一部分内核子系统开发者会公开他们自己的开发源码树,以便其他人能了解内
279核的不同领域正在发生的事情。如上所述,这些源码树会被集成到-mm版本内核中。
280
281下面是目前可用的一些内核源码树的列表:
282 通过git管理的源码树:
283 - Kbuild开发源码树, Sam Ravnborg <sam@ravnborg.org>
284 git.kernel.org:/pub/scm/linux/kernel/git/sam/kbuild.git
285
286 - ACPI开发源码树, Len Brown <len.brown@intel.com>
287 git.kernel.org:/pub/scm/linux/kernel/git/lenb/linux-acpi-2.6.git
288
289 - 块设备开发源码树, Jens Axboe <axboe@suse.de>
290 git.kernel.org:/pub/scm/linux/kernel/git/axboe/linux-2.6-block.git
291
292 - DRM开发源码树, Dave Airlie <airlied@linux.ie>
293 git.kernel.org:/pub/scm/linux/kernel/git/airlied/drm-2.6.git
294
295 - ia64开发源码树, Tony Luck <tony.luck@intel.com>
296 git.kernel.org:/pub/scm/linux/kernel/git/aegl/linux-2.6.git
297
298 - ieee1394开发源码树, Jody McIntyre <scjody@modernduck.com>
299 git.kernel.org:/pub/scm/linux/kernel/git/scjody/ieee1394.git
300
301 - infiniband开发源码树, Roland Dreier <rolandd@cisco.com>
302 git.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git
303
304 - libata开发源码树, Jeff Garzik <jgarzik@pobox.com>
305 git.kernel.org:/pub/scm/linux/kernel/git/jgarzik/libata-dev.git
306
307 - 网络驱动程序开发源码树, Jeff Garzik <jgarzik@pobox.com>
308 git.kernel.org:/pub/scm/linux/kernel/git/jgarzik/netdev-2.6.git
309
310 - pcmcia开发源码树, Dominik Brodowski <linux@dominikbrodowski.net>
311 git.kernel.org:/pub/scm/linux/kernel/git/brodo/pcmcia-2.6.git
312
313 - SCSI开发源码树, James Bottomley <James.Bottomley@SteelEye.com>
314 git.kernel.org:/pub/scm/linux/kernel/git/jejb/scsi-misc-2.6.git
315
316 使用quilt管理的补丁集:
317 - USB, PCI, 驱动程序核心和I2C, Greg Kroah-Hartman <gregkh@suse.de>
318 kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/
319 - x86-64, 部分i386, Andi Kleen <ak@suse.de>
320 ftp.firstfloor.org:/pub/ak/x86_64/quilt/
321
322 其他内核源码树可以在http://git.kernel.org的列表中和MAINTAINERS文件里
323 找到。
324
325报告bug
326-------
327
328bugzilla.kernel.org是Linux内核开发者们用来跟踪内核Bug的网站。我们鼓励用
329户在这个工具中报告找到的所有bug。如何使用内核bugzilla的细节请访问:
330 http://test.kernel.org/bugzilla/faq.html
331
332内核源码主目录中的REPORTING-BUGS文件里有一个很好的模板。它指导用户如何报
333告可能的内核bug以及需要提供哪些信息来帮助内核开发者们找到问题的根源。
334
335
336利用bug报告
337-----------
338
339练习内核开发技能的最好办法就是修改其他人报告的bug。你不光可以帮助内核变
340得更加稳定,还可以学会如何解决实际问题从而提高自己的技能,并且让其他开发
341者感受到你的存在。修改bug是赢得其他开发者赞誉的最好办法,因为并不是很多
342人都喜欢浪费时间去修改别人报告的bug。
343
344要尝试修改已知的bug,请访问http://bugzilla.kernel.org网址。如果你想获得
345最新bug的通知,可以订阅bugme-new邮件列表(只有新的bug报告会被寄到这里)
346或者订阅bugme-janitor邮件列表(所有bugzilla的变动都会被寄到这里)。
347
348 http://lists.osdl.org/mailman/listinfo/bugme-new
349 http://lists.osdl.org/mailman/listinfo/bugme-janitors
350
351
352邮件列表
353--------
354
355正如上面的文档所描述,大多数的骨干内核开发者都加入了Linux Kernel邮件列
356表。如何订阅和退订列表的细节可以在这里找到:
357 http://vger.kernel.org/vger-lists.html#linux-kernel
358网上很多地方都有这个邮件列表的存档(archive)。可以使用搜索引擎来找到这些
359存档。比如:
360 http://dir.gmane.org/gmane.linux.kernel
361在发信之前,我们强烈建议你先在存档中搜索你想要讨论的问题。很多已经被详细
362讨论过的问题只在邮件列表的存档中可以找到。
363
364大多数内核子系统也有自己独立的邮件列表来协调各自的开发工作。从
365MAINTAINERS文件中可以找到不同话题对应的邮件列表。
366
367很多邮件列表架设在kernel.org服务器上。这些列表的信息可以在这里找到:
368 http://vger.kernel.org/vger-lists.html
369
370在使用这些邮件列表时,请记住保持良好的行为习惯。下面的链接提供了与这些列
371表(或任何其它邮件列表)交流的一些简单规则,虽然内容有点滥竽充数。
372 http://www.albion.com/netiquette/
373
374当有很多人回复你的邮件时,邮件的抄送列表会变得很长。请不要将任何人从抄送
375列表中删除,除非你有足够的理由这么做。也不要只回复到邮件列表。请习惯于同
376一封邮件接收两次(一封来自发送者一封来自邮件列表),而不要试图通过添加一
377些奇特的邮件头来解决这个问题,人们不会喜欢的。
378
379记住保留你所回复内容的上下文和源头。在你回复邮件的顶部保留“某某某说到……”
380这几行。将你的评论加在被引用的段落之间而不要放在邮件的顶部。
381
382如果你在邮件中附带补丁,请确认它们是可以直接阅读的纯文本(如
383Documentation/SubmittingPatches文档中所述)。内核开发者们不希望遇到附件
384或者被压缩了的补丁。只有这样才能保证他们可以直接评论你的每行代码。请确保
385你使用的邮件发送程序不会修改空格和制表符。一个防范性的测试方法是先将邮件
386发送给自己,然后自己尝试是否可以顺利地打上收到的补丁。如果测试不成功,请
387调整或者更换你的邮件发送程序直到它正确工作为止。
388
389总而言之,请尊重其他的邮件列表订阅者。
390
391
392同内核社区合作
393----------------
394
395内核社区的目标就是提供尽善尽美的内核。所以当你提交补丁期望被接受进内核的
396时候,它的技术价值以及其他方面都将被评审。那么你可能会得到什么呢?
397 - 批评
398 - 评论
399 - 要求修改
400 - 要求证明修改的必要性
401 - 沉默
402
403要记住,这些是把补丁放进内核的正常情况。你必须学会听取对补丁的批评和评论,
404从技术层面评估它们,然后要么重写你的补丁要么简明扼要地论证修改是不必要
405的。如果你发的邮件没有得到任何回应,请过几天后再试一次,因为有时信件会湮
406没在茫茫信海中。
407
408你不应该做的事情:
409 - 期望自己的补丁不受任何质疑就直接被接受
410 - 翻脸
411 - 忽略别人的评论
412 - 没有按照别人的要求做任何修改就重新提交
413
414在一个努力追寻最好技术方案的社区里,对于一个补丁有多少好处总会有不同的见
415解。你必须要抱着合作的态度,愿意改变自己的观点来适应内核的风格。或者至少
416愿意去证明你的想法是有价值的。记住,犯错误是允许的,只要你愿意朝着正确的
417方案去努力。
418
419如果你的第一个补丁换来的是一堆修改建议,这是很正常的。这并不代表你的补丁
420不会被接受,也不意味着有人和你作对。你只需要改正所有提出的问题然后重新发
421送你的补丁。
422
423内核社区和公司文化的差异
424------------------------
425
426内核社区的工作模式同大多数传统公司开发队伍的工作模式并不相同。下面这些例
427子,可以帮助你避免某些可能发生问题:
428 用这些话介绍你的修改提案会有好处:
429 - 它同时解决了多个问题
430 - 它删除了2000行代码
431 - 这是补丁,它已经解释了我想要说明的
432 - 我在5种不同的体系结构上测试过它……
433 - 这是一系列小补丁用来……
434 - 这个修改提高了普通机器的性能……
435
436 应该避免如下的说法:
437 - 我们在AIX/ptx/Solaris就是这么做的,所以这么做肯定是好的……
438 - 我做这行已经20年了,所以……
439 - 为了我们公司赚钱考虑必须这么做
440 - 这是我们的企业产品线所需要的
441 - 这里是描述我观点的1000页设计文档
442 - 这是一个5000行的补丁用来……
443 - 我重写了现在乱七八糟的代码,这就是……
444 - 我被规定了最后期限,所以这个补丁需要立刻被接受
445
446另外一个内核社区与大部分传统公司的软件开发队伍不同的地方是无法面对面地交
447流。使用电子邮件和IRC聊天工具做为主要沟通工具的一个好处是性别和种族歧视
448将会更少。Linux内核的工作环境更能接受妇女和少数族群,因为每个人在别人眼
449里只是一个邮件地址。国际化也帮助了公平的实现,因为你无法通过姓名来判断人
450的性别。男人有可能叫李丽,女人也有可能叫王刚。大多数在Linux内核上工作过
451并表达过看法的女性对在linux上工作的经历都给出了正面的评价。
452
453对于一些不习惯使用英语的人来说,语言可能是一个引起问题的障碍。在邮件列表
454中要正确地表达想法必需良好地掌握语言,所以建议你在发送邮件之前最好检查一
455下英文写得是否正确。
456
457
458拆分修改
459--------
460
461Linux内核社区并不喜欢一下接收大段的代码。修改需要被恰当地介绍、讨论并且
462拆分成独立的小段。这几乎完全和公司中的习惯背道而驰。你的想法应该在开发最
463开始的阶段就让大家知道,这样你就可以及时获得对你正在进行的开发的反馈。这
464样也会让社区觉得你是在和他们协作,而不是仅仅把他们当作倾销新功能的对象。
465无论如何,你不要一次性地向邮件列表发送50封信,你的补丁序列应该永远用不到
466这么多。
467
468将补丁拆开的原因如下:
469
4701) 小的补丁更有可能被接受,因为它们不需要太多的时间和精力去验证其正确性。
471 一个5行的补丁,可能在维护者看了一眼以后就会被接受。而500行的补丁则
472 需要数个小时来审查其正确性(所需时间随补丁大小增加大约呈指数级增长)。
473
474 当出了问题的时候,小的补丁也会让调试变得非常容易。一个一个补丁地回溯
475 将会比仔细剖析一个被打上的大补丁(这个补丁破坏了其他东西)容易得多。
476
4772)不光发送小的补丁很重要,在提交之前重新编排、化简(或者仅仅重新排列)
478 补丁也是很重要的。
479
480这里有内核开发者Al Viro打的一个比方:
481 “想象一个老师正在给学生批改数学作业。老师并不希望看到学生为了得
482 到正确解法所进行的尝试和产生的错误。他希望看到的是最干净最优雅的
483 解答。好学生了解这点,绝不会把最终解决之前的中间方案提交上去。”
484
485 内核开发也是这样。维护者和评审者不希望看到一个人在解决问题时的思
486 考过程。他们只希望看到简单和优雅的解决方案。
487
488直接给出一流的解决方案,和社区一起协作讨论尚未完成的工作,这两者之间似乎
489很难找到一个平衡点。所以最好尽早开始收集有利于你进行改进的反馈;同时也要
490保证修改分成很多小块,这样在整个项目都准备好被包含进内核之前,其中的一部
491分可能会先被接收。
492
493必须了解这样做是不可接受的:试图将未完成的工作提交进内核,然后再找时间修
494复。
495
496
497证明修改的必要性
498----------------
499除了将补丁拆成小块,很重要的一点是让Linux社区了解他们为什么需要这样修改。
500你必须证明新功能是有人需要的并且是有用的。
501
502
503记录修改
504--------
505
506当你发送补丁的时候,需要特别留意邮件正文的内容。因为这里的信息将会做为补
507丁的修改记录(ChangeLog),会被一直保留以备大家查阅。它需要完全地描述补丁,
508包括:
509 - 为什么需要这个修改
510 - 补丁的总体设计
511 - 实现细节
512 - 测试结果
513
514想了解它具体应该看起来像什么,请查阅以下文档中的“ChangeLog”章节:
515 “The Perfect Patch”
516 http://www.zip.com.au/~akpm/linux/patches/stuff/tpp.txt
517
518
519这些事情有时候做起来很难。要在任何方面都做到完美可能需要好几年时间。这是
520一个持续提高的过程,它需要大量的耐心和决心。只要不放弃,你一定可以做到。
521很多人已经做到了,而他们都曾经和现在的你站在同样的起点上。
522
523
524---------------
525感谢Paolo Ciarrocchi允许“开发流程”部分基于他所写的文章
526(http://linux.tar.bz/articles/2.6-development_process),感谢Randy
527Dunlap和Gerrit Huizenga完善了应该说和不该说的列表。感谢Pat Mochel, Hanna
528Linder, Randy Dunlap, Kay Sievers, Vojtech Pavlik, Jan Kara, Josh Boyer,
529Kees Cook, Andrew Morton, Andi Kleen, Vadim Lobanov, Jesper Juhl, Adrian
530Bunk, Keri Harris, Frans Pop, David A. Wheeler, Junio Hamano, Michael
531Kerrisk和Alex Shepard的评审、建议和贡献。没有他们的帮助,这篇文档是不可
532能完成的。
533
534
535
536英文版维护者: Greg Kroah-Hartman <greg@kroah.com>
diff --git a/Documentation/zh_CN/stable_api_nonsense.txt b/Documentation/zh_CN/stable_api_nonsense.txt
new file mode 100644
index 000000000000..c26a27d1ee7d
--- /dev/null
+++ b/Documentation/zh_CN/stable_api_nonsense.txt
@@ -0,0 +1,157 @@
1Chinese translated version of Documentation/stable_api_nonsense.txt
2
3If you have any comment or update to the content, please contact the
4original document maintainer directly. However, if you have problem
5communicating in English you can also ask the Chinese maintainer for help.
6Contact the Chinese maintainer, if this translation is outdated or there
7is problem with translation.
8
9Maintainer: Greg Kroah-Hartman <greg@kroah.com>
10Chinese maintainer: TripleX Chung <zhongyu@18mail.cn>
11---------------------------------------------------------------------
12Documentation/stable_api_nonsense.txt 的中文翻译
13
14如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文
15交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻
16译存在问题,请联系中文版维护者。
17
18英文版维护者: Greg Kroah-Hartman <greg@kroah.com>
19中文版维护者: 钟宇 TripleX Chung <zhongyu@18mail.cn>
20中文版翻译者: 钟宇 TripleX Chung <zhongyu@18mail.cn>
21中文版校译者: 李阳 Li Yang <leoli@freescale.com>
22以下为正文
23---------------------------------------------------------------------
24
25写作本文档的目的,是为了解释为什么Linux既没有二进制内核接口,也没有稳定
26的内核接口。这里所说的内核接口,是指内核里的接口,而不是内核和用户空间
27的接口。内核到用户空间的接口,是提供给应用程序使用的系统调用,系统调用
28在历史上几乎没有过变化,将来也不会有变化。我有一些老应用程序是在0.9版本
29或者更早版本的内核上编译的,在使用2.6版本内核的Linux发布上依然用得很好
30。用户和应用程序作者可以将这个接口看成是稳定的。
31
32
33执行纲要
34--------
35
36你也许以为自己想要稳定的内核接口,但是你不清楚你要的实际上不是它。你需
37要的其实是稳定的驱动程序,而你只有将驱动程序放到公版内核的源代码树里,
38才有可能达到这个目的。而且这样做还有很多其它好处,正是因为这些好处使得
39Linux能成为强壮,稳定,成熟的操作系统,这也是你最开始选择Linux的原因。
40
41
42入门
43-----
44
45只有那些写驱动程序的“怪人”才会担心内核接口的改变,对广大用户来说,既
46看不到内核接口,也不需要去关心它。
47
48首先,我不打算讨论关于任何非GPL许可的内核驱动的法律问题,这些非GPL许可
49的驱动程序包括不公开源代码,隐藏源代码,二进制或者是用源代码包装,或者
50是其它任何形式的不能以GPL许可公开源代码的驱动程序。如果有法律问题,请咨
51询律师,我只是一个程序员,所以我只打算探讨技术问题(不是小看法律问题,
52法律问题很实际,并且需要一直关注)。
53
54既然只谈技术问题,我们就有了下面两个主题:二进制内核接口和稳定的内核源
55代码接口。这两个问题是互相关联的,让我们先解决掉二进制接口的问题。
56
57
58二进制内核接口
59--------------
60假如我们有一个稳定的内核源代码接口,那么自然而然的,我们就拥有了稳定的
61二进制接口,是这样的吗?错。让我们看看关于Linux内核的几点事实:
62 - 取决于所用的C编译器的版本,不同的内核数据结构里的结构体的对齐方
63式会有差别,代码中不同函数的表现形式也不一样(函数是不是被inline编译取
64决于编译器行为)。不同的函数的表现形式并不重要,但是数据结构内部的对齐
65方式很关键。
66 - 取决于内核的配置选项,不同的选项会让内核的很多东西发生改变:
67 - 同一个结构体可能包含不同的成员变量
68 - 有的函数可能根本不会被实现(比如编译的时候没有选择SMP支持
69,一些锁函数就会被定义成空函数)。
70 - 内核使用的内存会以不同的方式对齐,这取决于不同的内核配置选
71项。
72 - Linux可以在很多的不同体系结构的处理器上运行。在某个体系结构上编
73译好的二进制驱动程序,不可能在另外一个体系结构上正确的运行。
74
75对于一个特定的内核,满足这些条件并不难,使用同一个C编译器和同样的内核配
76置选项来编译驱动程序模块就可以了。这对于给一个特定Linux发布的特定版本提
77供驱动程序,是完全可以满足需求的。但是如果你要给不同发布的不同版本都发
78布一个驱动程序,就需要在每个发布上用不同的内核设置参数都编译一次内核,
79这简直跟噩梦一样。而且还要注意到,每个Linux发布还提供不同的Linux内核,
80这些内核都针对不同的硬件类型进行了优化(有很多种不同的处理器,还有不同
81的内核设置选项)。所以每发布一次驱动程序,都需要提供很多不同版本的内核
82模块。
83
84相信我,如果你真的要采取这种发布方式,一定会慢慢疯掉,我很久以前就有过
85深刻的教训...
86
87
88稳定的内核源代码接口
89--------------------
90
91如果有人不将他的内核驱动程序,放入公版内核的源代码树,而又想让驱动程序
92一直保持在最新的内核中可用,那么这个话题将会变得没完没了。
93 内核开发是持续而且快节奏的,从来都不会慢下来。内核开发人员在当前接口中
94找到bug,或者找到更好的实现方式。一旦发现这些,他们就很快会去修改当前的
95接口。修改接口意味着,函数名可能会改变,结构体可能被扩充或者删减,函数
96的参数也可能发生改变。一旦接口被修改,内核中使用这些接口的地方需要同时
97修正,这样才能保证所有的东西继续工作。
98
99举一个例子,内核的USB驱动程序接口在USB子系统的整个生命周期中,至少经历
100了三次重写。这些重写解决以下问题:
101 - 把数据流从同步模式改成非同步模式,这个改动减少了一些驱动程序的
102复杂度,提高了所有USB驱动程序的吞吐率,这样几乎所有的USB设备都能以最大
103速率工作了。
104 - 修改了USB核心代码中为USB驱动分配数据包内存的方式,所有的驱动都
105需要提供更多的参数给USB核心,以修正了很多已经被记录在案的死锁。
106
107这和一些封闭源代码的操作系统形成鲜明的对比,在那些操作系统上,不得不额
108外的维护旧的USB接口。这导致了一个可能性,新的开发者依然会不小心使用旧的
109接口,以不恰当的方式编写代码,进而影响到操作系统的稳定性。
110 在上面的例子中,所有的开发者都同意这些重要的改动,在这样的情况下修改代
111价很低。如果Linux保持一个稳定的内核源代码接口,那么就得创建一个新的接口
112;旧的,有问题的接口必须一直维护,给Linux USB开发者带来额外的工作。既然
113所有的Linux USB驱动的作者都是利用自己的时间工作,那么要求他们去做毫无意
114义的免费额外工作,是不可能的。
115 安全问题对Linux来说十分重要。一个安全问题被发现,就会在短时间内得到修
116正。在很多情况下,这将导致Linux内核中的一些接口被重写,以从根本上避免安
117全问题。一旦接口被重写,所有使用这些接口的驱动程序,必须同时得到修正,
118以确定安全问题已经得到修复并且不可能在未来还有同样的安全问题。如果内核
119内部接口不允许改变,那么就不可能修复这样的安全问题,也不可能确认这样的
120安全问题以后不会发生。
121开发者一直在清理内核接口。如果一个接口没有人在使用了,它就会被删除。这
122样可以确保内核尽可能的小,而且所有潜在的接口都会得到尽可能完整的测试
123(没有人使用的接口是不可能得到良好的测试的)。
124
125
126要做什么
127-------
128
129如果你写了一个Linux内核驱动,但是它还不在Linux源代码树里,作为一个开发
130者,你应该怎么做?为每个发布的每个版本提供一个二进制驱动,那简直是一个
131噩梦,要跟上永远处于变化之中的内核接口,也是一件辛苦活。
132很简单,让你的驱动进入内核源代码树(要记得我们在谈论的是以GPL许可发行
133的驱动,如果你的代码不符合GPL,那么祝你好运,你只能自己解决这个问题了,
134你这个吸血鬼<把Andrew和Linus对吸血鬼的定义链接到这里>)。当你的代码加入
135公版内核源代码树之后,如果一个内核接口改变,你的驱动会直接被修改接口的
136那个人修改。保证你的驱动永远都可以编译通过,并且一直工作,你几乎不需要
137做什么事情。
138
139把驱动放到内核源代码树里会有很多的好处:
140 - 驱动的质量会提升,而维护成本(对原始作者来说)会下降。
141 - 其他人会给驱动添加新特性。
142 - 其他人会找到驱动中的bug并修复。
143 - 其他人会在驱动中找到性能优化的机会。
144 - 当外部的接口的改变需要修改驱动程序的时候,其他人会修改驱动程序
145
146 - 不需要联系任何发行商,这个驱动会自动的随着所有的Linux发布一起发
147布。
148
149和别的操作系统相比,Linux为更多不同的设备提供现成的驱动,而且能在更多不
150同体系结构的处理器上支持这些设备。这个经过考验的开发模式,必然是错不了
151的 :)
152
153-------------
154感谢 Randy Dunlap, Andrew Morton, David Brownell, Hanna Linder,
155Robert Love, and Nishanth Aravamudan 对于本文档早期版本的评审和建议。
156
157英文版维护者: Greg Kroah-Hartman <greg@kroah.com>
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 7916f4b86d23..ae01d86070bb 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -84,4 +84,5 @@ source "drivers/auxdisplay/Kconfig"
84 84
85source "drivers/kvm/Kconfig" 85source "drivers/kvm/Kconfig"
86 86
87source "drivers/uio/Kconfig"
87endmenu 88endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index 6d9d7fab77f5..c34c8efff609 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -40,6 +40,7 @@ obj-$(CONFIG_ATA) += ata/
40obj-$(CONFIG_FUSION) += message/ 40obj-$(CONFIG_FUSION) += message/
41obj-$(CONFIG_FIREWIRE) += firewire/ 41obj-$(CONFIG_FIREWIRE) += firewire/
42obj-$(CONFIG_IEEE1394) += ieee1394/ 42obj-$(CONFIG_IEEE1394) += ieee1394/
43obj-$(CONFIG_UIO) += uio/
43obj-y += cdrom/ 44obj-y += cdrom/
44obj-y += auxdisplay/ 45obj-y += auxdisplay/
45obj-$(CONFIG_MTD) += mtd/ 46obj-$(CONFIG_MTD) += mtd/
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 0455aa78fa13..3599ab2506d2 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -24,6 +24,8 @@
24#include "base.h" 24#include "base.h"
25#include "power/power.h" 25#include "power/power.h"
26 26
27extern const char *kobject_actions[];
28
27int (*platform_notify)(struct device * dev) = NULL; 29int (*platform_notify)(struct device * dev) = NULL;
28int (*platform_notify_remove)(struct device * dev) = NULL; 30int (*platform_notify_remove)(struct device * dev) = NULL;
29 31
@@ -303,10 +305,25 @@ out:
303static ssize_t store_uevent(struct device *dev, struct device_attribute *attr, 305static ssize_t store_uevent(struct device *dev, struct device_attribute *attr,
304 const char *buf, size_t count) 306 const char *buf, size_t count)
305{ 307{
306 if (memcmp(buf, "add", 3) != 0) 308 size_t len = count;
307 dev_err(dev, "uevent: unsupported action-string; this will " 309 enum kobject_action action;
308 "be ignored in a future kernel version"); 310
311 if (len && buf[len-1] == '\n')
312 len--;
313
314 for (action = 0; action < KOBJ_MAX; action++) {
315 if (strncmp(kobject_actions[action], buf, len) != 0)
316 continue;
317 if (kobject_actions[action][len] != '\0')
318 continue;
319 kobject_uevent(&dev->kobj, action);
320 goto out;
321 }
322
323 dev_err(dev, "uevent: unsupported action-string; this will "
324 "be ignored in a future kernel version\n");
309 kobject_uevent(&dev->kobj, KOBJ_ADD); 325 kobject_uevent(&dev->kobj, KOBJ_ADD);
326out:
310 return count; 327 return count;
311} 328}
312 329
@@ -643,6 +660,82 @@ static int setup_parent(struct device *dev, struct device *parent)
643 return 0; 660 return 0;
644} 661}
645 662
663static int device_add_class_symlinks(struct device *dev)
664{
665 int error;
666
667 if (!dev->class)
668 return 0;
669 error = sysfs_create_link(&dev->kobj, &dev->class->subsys.kobj,
670 "subsystem");
671 if (error)
672 goto out;
673 /*
674 * If this is not a "fake" compatible device, then create the
675 * symlink from the class to the device.
676 */
677 if (dev->kobj.parent != &dev->class->subsys.kobj) {
678 error = sysfs_create_link(&dev->class->subsys.kobj, &dev->kobj,
679 dev->bus_id);
680 if (error)
681 goto out_subsys;
682 }
683 /* only bus-device parents get a "device"-link */
684 if (dev->parent && dev->parent->bus) {
685 error = sysfs_create_link(&dev->kobj, &dev->parent->kobj,
686 "device");
687 if (error)
688 goto out_busid;
689#ifdef CONFIG_SYSFS_DEPRECATED
690 {
691 char * class_name = make_class_name(dev->class->name,
692 &dev->kobj);
693 if (class_name)
694 error = sysfs_create_link(&dev->parent->kobj,
695 &dev->kobj, class_name);
696 kfree(class_name);
697 if (error)
698 goto out_device;
699 }
700#endif
701 }
702 return 0;
703
704#ifdef CONFIG_SYSFS_DEPRECATED
705out_device:
706 if (dev->parent)
707 sysfs_remove_link(&dev->kobj, "device");
708#endif
709out_busid:
710 if (dev->kobj.parent != &dev->class->subsys.kobj)
711 sysfs_remove_link(&dev->class->subsys.kobj, dev->bus_id);
712out_subsys:
713 sysfs_remove_link(&dev->kobj, "subsystem");
714out:
715 return error;
716}
717
718static void device_remove_class_symlinks(struct device *dev)
719{
720 if (!dev->class)
721 return;
722 if (dev->parent) {
723#ifdef CONFIG_SYSFS_DEPRECATED
724 char *class_name;
725
726 class_name = make_class_name(dev->class->name, &dev->kobj);
727 if (class_name) {
728 sysfs_remove_link(&dev->parent->kobj, class_name);
729 kfree(class_name);
730 }
731#endif
732 sysfs_remove_link(&dev->kobj, "device");
733 }
734 if (dev->kobj.parent != &dev->class->subsys.kobj)
735 sysfs_remove_link(&dev->class->subsys.kobj, dev->bus_id);
736 sysfs_remove_link(&dev->kobj, "subsystem");
737}
738
646/** 739/**
647 * device_add - add device to device hierarchy. 740 * device_add - add device to device hierarchy.
648 * @dev: device. 741 * @dev: device.
@@ -657,7 +750,6 @@ static int setup_parent(struct device *dev, struct device *parent)
657int device_add(struct device *dev) 750int device_add(struct device *dev)
658{ 751{
659 struct device *parent = NULL; 752 struct device *parent = NULL;
660 char *class_name = NULL;
661 struct class_interface *class_intf; 753 struct class_interface *class_intf;
662 int error = -EINVAL; 754 int error = -EINVAL;
663 755
@@ -697,27 +789,9 @@ int device_add(struct device *dev)
697 goto ueventattrError; 789 goto ueventattrError;
698 } 790 }
699 791
700 if (dev->class) { 792 error = device_add_class_symlinks(dev);
701 sysfs_create_link(&dev->kobj, &dev->class->subsys.kobj, 793 if (error)
702 "subsystem"); 794 goto SymlinkError;
703 /* If this is not a "fake" compatible device, then create the
704 * symlink from the class to the device. */
705 if (dev->kobj.parent != &dev->class->subsys.kobj)
706 sysfs_create_link(&dev->class->subsys.kobj,
707 &dev->kobj, dev->bus_id);
708 if (parent) {
709 sysfs_create_link(&dev->kobj, &dev->parent->kobj,
710 "device");
711#ifdef CONFIG_SYSFS_DEPRECATED
712 class_name = make_class_name(dev->class->name,
713 &dev->kobj);
714 if (class_name)
715 sysfs_create_link(&dev->parent->kobj,
716 &dev->kobj, class_name);
717#endif
718 }
719 }
720
721 error = device_add_attrs(dev); 795 error = device_add_attrs(dev);
722 if (error) 796 if (error)
723 goto AttrsError; 797 goto AttrsError;
@@ -744,7 +818,6 @@ int device_add(struct device *dev)
744 up(&dev->class->sem); 818 up(&dev->class->sem);
745 } 819 }
746 Done: 820 Done:
747 kfree(class_name);
748 put_device(dev); 821 put_device(dev);
749 return error; 822 return error;
750 BusError: 823 BusError:
@@ -755,6 +828,8 @@ int device_add(struct device *dev)
755 BUS_NOTIFY_DEL_DEVICE, dev); 828 BUS_NOTIFY_DEL_DEVICE, dev);
756 device_remove_attrs(dev); 829 device_remove_attrs(dev);
757 AttrsError: 830 AttrsError:
831 device_remove_class_symlinks(dev);
832 SymlinkError:
758 if (MAJOR(dev->devt)) 833 if (MAJOR(dev->devt))
759 device_remove_file(dev, &devt_attr); 834 device_remove_file(dev, &devt_attr);
760 835
@@ -1139,7 +1214,7 @@ int device_rename(struct device *dev, char *new_name)
1139{ 1214{
1140 char *old_class_name = NULL; 1215 char *old_class_name = NULL;
1141 char *new_class_name = NULL; 1216 char *new_class_name = NULL;
1142 char *old_symlink_name = NULL; 1217 char *old_device_name = NULL;
1143 int error; 1218 int error;
1144 1219
1145 dev = get_device(dev); 1220 dev = get_device(dev);
@@ -1153,42 +1228,49 @@ int device_rename(struct device *dev, char *new_name)
1153 old_class_name = make_class_name(dev->class->name, &dev->kobj); 1228 old_class_name = make_class_name(dev->class->name, &dev->kobj);
1154#endif 1229#endif
1155 1230
1156 if (dev->class) { 1231 old_device_name = kmalloc(BUS_ID_SIZE, GFP_KERNEL);
1157 old_symlink_name = kmalloc(BUS_ID_SIZE, GFP_KERNEL); 1232 if (!old_device_name) {
1158 if (!old_symlink_name) { 1233 error = -ENOMEM;
1159 error = -ENOMEM; 1234 goto out;
1160 goto out_free_old_class;
1161 }
1162 strlcpy(old_symlink_name, dev->bus_id, BUS_ID_SIZE);
1163 } 1235 }
1164 1236 strlcpy(old_device_name, dev->bus_id, BUS_ID_SIZE);
1165 strlcpy(dev->bus_id, new_name, BUS_ID_SIZE); 1237 strlcpy(dev->bus_id, new_name, BUS_ID_SIZE);
1166 1238
1167 error = kobject_rename(&dev->kobj, new_name); 1239 error = kobject_rename(&dev->kobj, new_name);
1240 if (error) {
1241 strlcpy(dev->bus_id, old_device_name, BUS_ID_SIZE);
1242 goto out;
1243 }
1168 1244
1169#ifdef CONFIG_SYSFS_DEPRECATED 1245#ifdef CONFIG_SYSFS_DEPRECATED
1170 if (old_class_name) { 1246 if (old_class_name) {
1171 new_class_name = make_class_name(dev->class->name, &dev->kobj); 1247 new_class_name = make_class_name(dev->class->name, &dev->kobj);
1172 if (new_class_name) { 1248 if (new_class_name) {
1173 sysfs_create_link(&dev->parent->kobj, &dev->kobj, 1249 error = sysfs_create_link(&dev->parent->kobj,
1174 new_class_name); 1250 &dev->kobj, new_class_name);
1251 if (error)
1252 goto out;
1175 sysfs_remove_link(&dev->parent->kobj, old_class_name); 1253 sysfs_remove_link(&dev->parent->kobj, old_class_name);
1176 } 1254 }
1177 } 1255 }
1178#endif 1256#endif
1179 1257
1180 if (dev->class) { 1258 if (dev->class) {
1181 sysfs_remove_link(&dev->class->subsys.kobj, 1259 sysfs_remove_link(&dev->class->subsys.kobj, old_device_name);
1182 old_symlink_name); 1260 error = sysfs_create_link(&dev->class->subsys.kobj, &dev->kobj,
1183 sysfs_create_link(&dev->class->subsys.kobj, &dev->kobj, 1261 dev->bus_id);
1184 dev->bus_id); 1262 if (error) {
1263 /* Uh... how to unravel this if restoring can fail? */
1264 dev_err(dev, "%s: sysfs_create_symlink failed (%d)\n",
1265 __FUNCTION__, error);
1266 }
1185 } 1267 }
1268out:
1186 put_device(dev); 1269 put_device(dev);
1187 1270
1188 kfree(new_class_name); 1271 kfree(new_class_name);
1189 kfree(old_symlink_name);
1190 out_free_old_class:
1191 kfree(old_class_name); 1272 kfree(old_class_name);
1273 kfree(old_device_name);
1192 1274
1193 return error; 1275 return error;
1194} 1276}
diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile
index 91f230939c1e..fff178007208 100644
--- a/drivers/base/power/Makefile
+++ b/drivers/base/power/Makefile
@@ -1,5 +1,5 @@
1obj-y := shutdown.o 1obj-y := shutdown.o
2obj-$(CONFIG_PM) += main.o suspend.o resume.o runtime.o sysfs.o 2obj-$(CONFIG_PM) += main.o suspend.o resume.o sysfs.o
3obj-$(CONFIG_PM_TRACE) += trace.o 3obj-$(CONFIG_PM_TRACE) += trace.o
4 4
5ifeq ($(CONFIG_DEBUG_DRIVER),y) 5ifeq ($(CONFIG_DEBUG_DRIVER),y)
diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h
index 2760f25b3ac5..591a0dd5deee 100644
--- a/drivers/base/power/power.h
+++ b/drivers/base/power/power.h
@@ -62,11 +62,6 @@ extern int resume_device(struct device *);
62 */ 62 */
63extern int suspend_device(struct device *, pm_message_t); 63extern int suspend_device(struct device *, pm_message_t);
64 64
65
66/*
67 * runtime.c
68 */
69
70#else /* CONFIG_PM */ 65#else /* CONFIG_PM */
71 66
72 67
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
deleted file mode 100644
index df6174d85866..000000000000
--- a/drivers/base/power/runtime.c
+++ /dev/null
@@ -1,85 +0,0 @@
1/*
2 * drivers/base/power/runtime.c - Handling dynamic device power management.
3 *
4 * Copyright (c) 2003 Patrick Mochel
5 * Copyright (c) 2003 Open Source Development Lab
6 *
7 */
8
9#include <linux/device.h>
10#include "power.h"
11
12
13static void runtime_resume(struct device * dev)
14{
15 dev_dbg(dev, "resuming\n");
16 if (!dev->power.power_state.event)
17 return;
18 if (!resume_device(dev))
19 dev->power.power_state = PMSG_ON;
20}
21
22
23/**
24 * dpm_runtime_resume - Power one device back on.
25 * @dev: Device.
26 *
27 * Bring one device back to the on state by first powering it
28 * on, then restoring state. We only operate on devices that aren't
29 * already on.
30 * FIXME: We need to handle devices that are in an unknown state.
31 */
32
33void dpm_runtime_resume(struct device * dev)
34{
35 mutex_lock(&dpm_mtx);
36 runtime_resume(dev);
37 mutex_unlock(&dpm_mtx);
38}
39EXPORT_SYMBOL(dpm_runtime_resume);
40
41
42/**
43 * dpm_runtime_suspend - Put one device in low-power state.
44 * @dev: Device.
45 * @state: State to enter.
46 */
47
48int dpm_runtime_suspend(struct device * dev, pm_message_t state)
49{
50 int error = 0;
51
52 mutex_lock(&dpm_mtx);
53 if (dev->power.power_state.event == state.event)
54 goto Done;
55
56 if (dev->power.power_state.event)
57 runtime_resume(dev);
58
59 if (!(error = suspend_device(dev, state)))
60 dev->power.power_state = state;
61 Done:
62 mutex_unlock(&dpm_mtx);
63 return error;
64}
65EXPORT_SYMBOL(dpm_runtime_suspend);
66
67
68#if 0
69/**
70 * dpm_set_power_state - Update power_state field.
71 * @dev: Device.
72 * @state: Power state device is in.
73 *
74 * This is an update mechanism for drivers to notify the core
75 * what power state a device is in. Device probing code may not
76 * always be able to tell, but we need accurate information to
77 * work reliably.
78 */
79void dpm_set_power_state(struct device * dev, pm_message_t state)
80{
81 mutex_lock(&dpm_mtx);
82 dev->power.power_state = state;
83 mutex_unlock(&dpm_mtx);
84}
85#endif /* 0 */
diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c
index 2d47517dbe32..f2ed179cd695 100644
--- a/drivers/base/power/sysfs.c
+++ b/drivers/base/power/sysfs.c
@@ -7,69 +7,6 @@
7#include "power.h" 7#include "power.h"
8 8
9 9
10#ifdef CONFIG_PM_SYSFS_DEPRECATED
11
12/**
13 * state - Control current power state of device
14 *
15 * show() returns the current power state of the device. '0' indicates
16 * the device is on. Other values (2) indicate the device is in some low
17 * power state.
18 *
19 * store() sets the current power state, which is an integer valued
20 * 0, 2, or 3. Devices with bus.suspend_late(), or bus.resume_early()
21 * methods fail this operation; those methods couldn't be called.
22 * Otherwise,
23 *
24 * - If the recorded dev->power.power_state.event matches the
25 * target value, nothing is done.
26 * - If the recorded event code is nonzero, the device is reactivated
27 * by calling bus.resume() and/or class.resume().
28 * - If the target value is nonzero, the device is suspended by
29 * calling class.suspend() and/or bus.suspend() with event code
30 * PM_EVENT_SUSPEND.
31 *
32 * This mechanism is DEPRECATED and should only be used for testing.
33 */
34
35static ssize_t state_show(struct device * dev, struct device_attribute *attr, char * buf)
36{
37 if (dev->power.power_state.event)
38 return sprintf(buf, "2\n");
39 else
40 return sprintf(buf, "0\n");
41}
42
43static ssize_t state_store(struct device * dev, struct device_attribute *attr, const char * buf, size_t n)
44{
45 pm_message_t state;
46 int error = -EINVAL;
47
48 /* disallow incomplete suspend sequences */
49 if (dev->bus && (dev->bus->suspend_late || dev->bus->resume_early))
50 return error;
51
52 state.event = PM_EVENT_SUSPEND;
53 /* Older apps expected to write "3" here - confused with PCI D3 */
54 if ((n == 1) && !strcmp(buf, "3"))
55 error = dpm_runtime_suspend(dev, state);
56
57 if ((n == 1) && !strcmp(buf, "2"))
58 error = dpm_runtime_suspend(dev, state);
59
60 if ((n == 1) && !strcmp(buf, "0")) {
61 dpm_runtime_resume(dev);
62 error = 0;
63 }
64
65 return error ? error : n;
66}
67
68static DEVICE_ATTR(state, 0644, state_show, state_store);
69
70
71#endif /* CONFIG_PM_SYSFS_DEPRECATED */
72
73/* 10/*
74 * wakeup - Report/change current wakeup option for device 11 * wakeup - Report/change current wakeup option for device
75 * 12 *
@@ -143,9 +80,6 @@ static DEVICE_ATTR(wakeup, 0644, wake_show, wake_store);
143 80
144 81
145static struct attribute * power_attrs[] = { 82static struct attribute * power_attrs[] = {
146#ifdef CONFIG_PM_SYSFS_DEPRECATED
147 &dev_attr_state.attr,
148#endif
149 &dev_attr_wakeup.attr, 83 &dev_attr_wakeup.attr,
150 NULL, 84 NULL,
151}; 85};
diff --git a/drivers/firewire/fw-ohci.c b/drivers/firewire/fw-ohci.c
index 41476abc0693..db703758db98 100644
--- a/drivers/firewire/fw-ohci.c
+++ b/drivers/firewire/fw-ohci.c
@@ -224,6 +224,7 @@ ohci_update_phy_reg(struct fw_card *card, int addr,
224 u32 val, old; 224 u32 val, old;
225 225
226 reg_write(ohci, OHCI1394_PhyControl, OHCI1394_PhyControl_Read(addr)); 226 reg_write(ohci, OHCI1394_PhyControl, OHCI1394_PhyControl_Read(addr));
227 flush_writes(ohci);
227 msleep(2); 228 msleep(2);
228 val = reg_read(ohci, OHCI1394_PhyControl); 229 val = reg_read(ohci, OHCI1394_PhyControl);
229 if ((val & OHCI1394_PhyControl_ReadDone) == 0) { 230 if ((val & OHCI1394_PhyControl_ReadDone) == 0) {
@@ -586,7 +587,7 @@ static void context_stop(struct context *ctx)
586 break; 587 break;
587 588
588 fw_notify("context_stop: still active (0x%08x)\n", reg); 589 fw_notify("context_stop: still active (0x%08x)\n", reg);
589 msleep(1); 590 mdelay(1);
590 } 591 }
591} 592}
592 593
diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c
index 7c53be0387fb..fc984474162c 100644
--- a/drivers/firewire/fw-sbp2.c
+++ b/drivers/firewire/fw-sbp2.c
@@ -840,7 +840,6 @@ complete_command_orb(struct sbp2_orb *base_orb, struct sbp2_status *status)
840 container_of(base_orb, struct sbp2_command_orb, base); 840 container_of(base_orb, struct sbp2_command_orb, base);
841 struct fw_unit *unit = orb->unit; 841 struct fw_unit *unit = orb->unit;
842 struct fw_device *device = fw_device(unit->device.parent); 842 struct fw_device *device = fw_device(unit->device.parent);
843 struct scatterlist *sg;
844 int result; 843 int result;
845 844
846 if (status != NULL) { 845 if (status != NULL) {
@@ -876,11 +875,10 @@ complete_command_orb(struct sbp2_orb *base_orb, struct sbp2_status *status)
876 dma_unmap_single(device->card->device, orb->base.request_bus, 875 dma_unmap_single(device->card->device, orb->base.request_bus,
877 sizeof(orb->request), DMA_TO_DEVICE); 876 sizeof(orb->request), DMA_TO_DEVICE);
878 877
879 if (orb->cmd->use_sg > 0) { 878 if (scsi_sg_count(orb->cmd) > 0)
880 sg = (struct scatterlist *)orb->cmd->request_buffer; 879 dma_unmap_sg(device->card->device, scsi_sglist(orb->cmd),
881 dma_unmap_sg(device->card->device, sg, orb->cmd->use_sg, 880 scsi_sg_count(orb->cmd),
882 orb->cmd->sc_data_direction); 881 orb->cmd->sc_data_direction);
883 }
884 882
885 if (orb->page_table_bus != 0) 883 if (orb->page_table_bus != 0)
886 dma_unmap_single(device->card->device, orb->page_table_bus, 884 dma_unmap_single(device->card->device, orb->page_table_bus,
@@ -901,8 +899,8 @@ static int sbp2_command_orb_map_scatterlist(struct sbp2_command_orb *orb)
901 int sg_len, l, i, j, count; 899 int sg_len, l, i, j, count;
902 dma_addr_t sg_addr; 900 dma_addr_t sg_addr;
903 901
904 sg = (struct scatterlist *)orb->cmd->request_buffer; 902 sg = scsi_sglist(orb->cmd);
905 count = dma_map_sg(device->card->device, sg, orb->cmd->use_sg, 903 count = dma_map_sg(device->card->device, sg, scsi_sg_count(orb->cmd),
906 orb->cmd->sc_data_direction); 904 orb->cmd->sc_data_direction);
907 if (count == 0) 905 if (count == 0)
908 goto fail; 906 goto fail;
@@ -971,7 +969,7 @@ static int sbp2_command_orb_map_scatterlist(struct sbp2_command_orb *orb)
971 return 0; 969 return 0;
972 970
973 fail_page_table: 971 fail_page_table:
974 dma_unmap_sg(device->card->device, sg, orb->cmd->use_sg, 972 dma_unmap_sg(device->card->device, sg, scsi_sg_count(orb->cmd),
975 orb->cmd->sc_data_direction); 973 orb->cmd->sc_data_direction);
976 fail: 974 fail:
977 return -ENOMEM; 975 return -ENOMEM;
@@ -1031,7 +1029,7 @@ static int sbp2_scsi_queuecommand(struct scsi_cmnd *cmd, scsi_done_fn_t done)
1031 orb->request.misc |= 1029 orb->request.misc |=
1032 COMMAND_ORB_DIRECTION(SBP2_DIRECTION_TO_MEDIA); 1030 COMMAND_ORB_DIRECTION(SBP2_DIRECTION_TO_MEDIA);
1033 1031
1034 if (cmd->use_sg && sbp2_command_orb_map_scatterlist(orb) < 0) 1032 if (scsi_sg_count(cmd) && sbp2_command_orb_map_scatterlist(orb) < 0)
1035 goto fail_mapping; 1033 goto fail_mapping;
1036 1034
1037 fw_memcpy_to_be32(&orb->request, &orb->request, sizeof(orb->request)); 1035 fw_memcpy_to_be32(&orb->request, &orb->request, sizeof(orb->request));
diff --git a/drivers/firewire/fw-transaction.c b/drivers/firewire/fw-transaction.c
index 80d0121463d0..3ce8e2fbe15f 100644
--- a/drivers/firewire/fw-transaction.c
+++ b/drivers/firewire/fw-transaction.c
@@ -605,8 +605,10 @@ fw_send_response(struct fw_card *card, struct fw_request *request, int rcode)
605 * check is sufficient to ensure we don't send response to 605 * check is sufficient to ensure we don't send response to
606 * broadcast packets or posted writes. 606 * broadcast packets or posted writes.
607 */ 607 */
608 if (request->ack != ACK_PENDING) 608 if (request->ack != ACK_PENDING) {
609 kfree(request);
609 return; 610 return;
611 }
610 612
611 if (rcode == RCODE_COMPLETE) 613 if (rcode == RCODE_COMPLETE)
612 fw_fill_response(&request->response, request->request_header, 614 fw_fill_response(&request->response, request->request_header,
@@ -628,11 +630,6 @@ fw_core_handle_request(struct fw_card *card, struct fw_packet *p)
628 unsigned long flags; 630 unsigned long flags;
629 int tcode, destination, source; 631 int tcode, destination, source;
630 632
631 if (p->payload_length > 2048) {
632 /* FIXME: send error response. */
633 return;
634 }
635
636 if (p->ack != ACK_PENDING && p->ack != ACK_COMPLETE) 633 if (p->ack != ACK_PENDING && p->ack != ACK_COMPLETE)
637 return; 634 return;
638 635
diff --git a/drivers/firewire/fw-transaction.h b/drivers/firewire/fw-transaction.h
index 5abed193f4a6..5ceaccd10564 100644
--- a/drivers/firewire/fw-transaction.h
+++ b/drivers/firewire/fw-transaction.h
@@ -123,6 +123,10 @@ typedef void (*fw_transaction_callback_t)(struct fw_card *card, int rcode,
123 size_t length, 123 size_t length,
124 void *callback_data); 124 void *callback_data);
125 125
126/*
127 * Important note: The callback must guarantee that either fw_send_response()
128 * or kfree() is called on the @request.
129 */
126typedef void (*fw_address_callback_t)(struct fw_card *card, 130typedef void (*fw_address_callback_t)(struct fw_card *card,
127 struct fw_request *request, 131 struct fw_request *request,
128 int tcode, int destination, int source, 132 int tcode, int destination, int source,
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 9820c67ba47d..4df269f5d9ac 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -3374,7 +3374,7 @@ int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
3374} 3374}
3375EXPORT_SYMBOL(ib_cm_init_qp_attr); 3375EXPORT_SYMBOL(ib_cm_init_qp_attr);
3376 3376
3377void cm_get_ack_delay(struct cm_device *cm_dev) 3377static void cm_get_ack_delay(struct cm_device *cm_dev)
3378{ 3378{
3379 struct ib_device_attr attr; 3379 struct ib_device_attr attr;
3380 3380
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 23af7a032a03..9ffb9987450a 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -573,7 +573,7 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
573 break; 573 break;
574 case RDMA_TRANSPORT_IWARP: 574 case RDMA_TRANSPORT_IWARP:
575 if (!id_priv->cm_id.iw) { 575 if (!id_priv->cm_id.iw) {
576 qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE; 576 qp_attr->qp_access_flags = 0;
577 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; 577 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
578 } else 578 } else
579 ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr, 579 ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index 3b41dc0c39dd..5dc68cd5621b 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -1914,6 +1914,7 @@ int iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
1914fail3: 1914fail3:
1915 cxgb3_free_stid(ep->com.tdev, ep->stid); 1915 cxgb3_free_stid(ep->com.tdev, ep->stid);
1916fail2: 1916fail2:
1917 cm_id->rem_ref(cm_id);
1917 put_ep(&ep->com); 1918 put_ep(&ep->com);
1918fail1: 1919fail1:
1919out: 1920out:
diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c
index 3cd6bf3402d1..e53a97af1260 100644
--- a/drivers/infiniband/hw/ehca/ehca_av.c
+++ b/drivers/infiniband/hw/ehca/ehca_av.c
@@ -79,7 +79,7 @@ struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
79 av->av.ipd = (ah_mult > 0) ? 79 av->av.ipd = (ah_mult > 0) ?
80 ((ehca_mult - 1) / ah_mult) : 0; 80 ((ehca_mult - 1) / ah_mult) : 0;
81 } else 81 } else
82 av->av.ipd = ehca_static_rate; 82 av->av.ipd = ehca_static_rate;
83 83
84 av->av.lnh = ah_attr->ah_flags; 84 av->av.lnh = ah_attr->ah_flags;
85 av->av.grh.word_0 = EHCA_BMASK_SET(GRH_IPVERSION_MASK, 6); 85 av->av.grh.word_0 = EHCA_BMASK_SET(GRH_IPVERSION_MASK, 6);
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index daf823ea1ace..043e4fb23fb0 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -204,11 +204,11 @@ struct ehca_mr {
204 spinlock_t mrlock; 204 spinlock_t mrlock;
205 205
206 enum ehca_mr_flag flags; 206 enum ehca_mr_flag flags;
207 u32 num_pages; /* number of MR pages */ 207 u32 num_kpages; /* number of kernel pages */
208 u32 num_4k; /* number of 4k "page" portions to form MR */ 208 u32 num_hwpages; /* number of hw pages to form MR */
209 int acl; /* ACL (stored here for usage in reregister) */ 209 int acl; /* ACL (stored here for usage in reregister) */
210 u64 *start; /* virtual start address (stored here for */ 210 u64 *start; /* virtual start address (stored here for */
211 /* usage in reregister) */ 211 /* usage in reregister) */
212 u64 size; /* size (stored here for usage in reregister) */ 212 u64 size; /* size (stored here for usage in reregister) */
213 u32 fmr_page_size; /* page size for FMR */ 213 u32 fmr_page_size; /* page size for FMR */
214 u32 fmr_max_pages; /* max pages for FMR */ 214 u32 fmr_max_pages; /* max pages for FMR */
@@ -217,9 +217,6 @@ struct ehca_mr {
217 /* fw specific data */ 217 /* fw specific data */
218 struct ipz_mrmw_handle ipz_mr_handle; /* MR handle for h-calls */ 218 struct ipz_mrmw_handle ipz_mr_handle; /* MR handle for h-calls */
219 struct h_galpas galpas; 219 struct h_galpas galpas;
220 /* data for userspace bridge */
221 u32 nr_of_pages;
222 void *pagearray;
223}; 220};
224 221
225struct ehca_mw { 222struct ehca_mw {
@@ -241,26 +238,29 @@ enum ehca_mr_pgi_type {
241 238
242struct ehca_mr_pginfo { 239struct ehca_mr_pginfo {
243 enum ehca_mr_pgi_type type; 240 enum ehca_mr_pgi_type type;
244 u64 num_pages; 241 u64 num_kpages;
245 u64 page_cnt; 242 u64 kpage_cnt;
246 u64 num_4k; /* number of 4k "page" portions */ 243 u64 num_hwpages; /* number of hw pages */
247 u64 page_4k_cnt; /* counter for 4k "page" portions */ 244 u64 hwpage_cnt; /* counter for hw pages */
248 u64 next_4k; /* next 4k "page" portion in buffer/chunk/listelem */ 245 u64 next_hwpage; /* next hw page in buffer/chunk/listelem */
249 246
250 /* type EHCA_MR_PGI_PHYS section */ 247 union {
251 int num_phys_buf; 248 struct { /* type EHCA_MR_PGI_PHYS section */
252 struct ib_phys_buf *phys_buf_array; 249 int num_phys_buf;
253 u64 next_buf; 250 struct ib_phys_buf *phys_buf_array;
254 251 u64 next_buf;
255 /* type EHCA_MR_PGI_USER section */ 252 } phy;
256 struct ib_umem *region; 253 struct { /* type EHCA_MR_PGI_USER section */
257 struct ib_umem_chunk *next_chunk; 254 struct ib_umem *region;
258 u64 next_nmap; 255 struct ib_umem_chunk *next_chunk;
259 256 u64 next_nmap;
260 /* type EHCA_MR_PGI_FMR section */ 257 } usr;
261 u64 *page_list; 258 struct { /* type EHCA_MR_PGI_FMR section */
262 u64 next_listelem; 259 u64 fmr_pgsize;
263 /* next_4k also used within EHCA_MR_PGI_FMR */ 260 u64 *page_list;
261 u64 next_listelem;
262 } fmr;
263 } u;
264}; 264};
265 265
266/* output parameters for MR/FMR hipz calls */ 266/* output parameters for MR/FMR hipz calls */
@@ -391,6 +391,6 @@ struct ehca_alloc_qp_parms {
391 391
392int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp); 392int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp);
393int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int qp_num); 393int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int qp_num);
394struct ehca_qp* ehca_cq_get_qp(struct ehca_cq *cq, int qp_num); 394struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int qp_num);
395 395
396#endif 396#endif
diff --git a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h
index fb3df5c271e7..1798e6466bd0 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h
@@ -154,83 +154,83 @@ struct hcp_modify_qp_control_block {
154 u32 reserved_70_127[58]; /* 70 */ 154 u32 reserved_70_127[58]; /* 70 */
155}; 155};
156 156
157#define MQPCB_MASK_QKEY EHCA_BMASK_IBM(0,0) 157#define MQPCB_MASK_QKEY EHCA_BMASK_IBM( 0, 0)
158#define MQPCB_MASK_SEND_PSN EHCA_BMASK_IBM(2,2) 158#define MQPCB_MASK_SEND_PSN EHCA_BMASK_IBM( 2, 2)
159#define MQPCB_MASK_RECEIVE_PSN EHCA_BMASK_IBM(3,3) 159#define MQPCB_MASK_RECEIVE_PSN EHCA_BMASK_IBM( 3, 3)
160#define MQPCB_MASK_PRIM_PHYS_PORT EHCA_BMASK_IBM(4,4) 160#define MQPCB_MASK_PRIM_PHYS_PORT EHCA_BMASK_IBM( 4, 4)
161#define MQPCB_PRIM_PHYS_PORT EHCA_BMASK_IBM(24,31) 161#define MQPCB_PRIM_PHYS_PORT EHCA_BMASK_IBM(24, 31)
162#define MQPCB_MASK_ALT_PHYS_PORT EHCA_BMASK_IBM(5,5) 162#define MQPCB_MASK_ALT_PHYS_PORT EHCA_BMASK_IBM( 5, 5)
163#define MQPCB_MASK_PRIM_P_KEY_IDX EHCA_BMASK_IBM(6,6) 163#define MQPCB_MASK_PRIM_P_KEY_IDX EHCA_BMASK_IBM( 6, 6)
164#define MQPCB_PRIM_P_KEY_IDX EHCA_BMASK_IBM(24,31) 164#define MQPCB_PRIM_P_KEY_IDX EHCA_BMASK_IBM(24, 31)
165#define MQPCB_MASK_ALT_P_KEY_IDX EHCA_BMASK_IBM(7,7) 165#define MQPCB_MASK_ALT_P_KEY_IDX EHCA_BMASK_IBM( 7, 7)
166#define MQPCB_MASK_RDMA_ATOMIC_CTRL EHCA_BMASK_IBM(8,8) 166#define MQPCB_MASK_RDMA_ATOMIC_CTRL EHCA_BMASK_IBM( 8, 8)
167#define MQPCB_MASK_QP_STATE EHCA_BMASK_IBM(9,9) 167#define MQPCB_MASK_QP_STATE EHCA_BMASK_IBM( 9, 9)
168#define MQPCB_QP_STATE EHCA_BMASK_IBM(24,31) 168#define MQPCB_QP_STATE EHCA_BMASK_IBM(24, 31)
169#define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES EHCA_BMASK_IBM(11,11) 169#define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES EHCA_BMASK_IBM(11, 11)
170#define MQPCB_MASK_PATH_MIGRATION_STATE EHCA_BMASK_IBM(12,12) 170#define MQPCB_MASK_PATH_MIGRATION_STATE EHCA_BMASK_IBM(12, 12)
171#define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP EHCA_BMASK_IBM(13,13) 171#define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP EHCA_BMASK_IBM(13, 13)
172#define MQPCB_MASK_DEST_QP_NR EHCA_BMASK_IBM(14,14) 172#define MQPCB_MASK_DEST_QP_NR EHCA_BMASK_IBM(14, 14)
173#define MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD EHCA_BMASK_IBM(15,15) 173#define MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD EHCA_BMASK_IBM(15, 15)
174#define MQPCB_MASK_SERVICE_LEVEL EHCA_BMASK_IBM(16,16) 174#define MQPCB_MASK_SERVICE_LEVEL EHCA_BMASK_IBM(16, 16)
175#define MQPCB_MASK_SEND_GRH_FLAG EHCA_BMASK_IBM(17,17) 175#define MQPCB_MASK_SEND_GRH_FLAG EHCA_BMASK_IBM(17, 17)
176#define MQPCB_MASK_RETRY_COUNT EHCA_BMASK_IBM(18,18) 176#define MQPCB_MASK_RETRY_COUNT EHCA_BMASK_IBM(18, 18)
177#define MQPCB_MASK_TIMEOUT EHCA_BMASK_IBM(19,19) 177#define MQPCB_MASK_TIMEOUT EHCA_BMASK_IBM(19, 19)
178#define MQPCB_MASK_PATH_MTU EHCA_BMASK_IBM(20,20) 178#define MQPCB_MASK_PATH_MTU EHCA_BMASK_IBM(20, 20)
179#define MQPCB_PATH_MTU EHCA_BMASK_IBM(24,31) 179#define MQPCB_PATH_MTU EHCA_BMASK_IBM(24, 31)
180#define MQPCB_MASK_MAX_STATIC_RATE EHCA_BMASK_IBM(21,21) 180#define MQPCB_MASK_MAX_STATIC_RATE EHCA_BMASK_IBM(21, 21)
181#define MQPCB_MAX_STATIC_RATE EHCA_BMASK_IBM(24,31) 181#define MQPCB_MAX_STATIC_RATE EHCA_BMASK_IBM(24, 31)
182#define MQPCB_MASK_DLID EHCA_BMASK_IBM(22,22) 182#define MQPCB_MASK_DLID EHCA_BMASK_IBM(22, 22)
183#define MQPCB_DLID EHCA_BMASK_IBM(16,31) 183#define MQPCB_DLID EHCA_BMASK_IBM(16, 31)
184#define MQPCB_MASK_RNR_RETRY_COUNT EHCA_BMASK_IBM(23,23) 184#define MQPCB_MASK_RNR_RETRY_COUNT EHCA_BMASK_IBM(23, 23)
185#define MQPCB_RNR_RETRY_COUNT EHCA_BMASK_IBM(29,31) 185#define MQPCB_RNR_RETRY_COUNT EHCA_BMASK_IBM(29, 31)
186#define MQPCB_MASK_SOURCE_PATH_BITS EHCA_BMASK_IBM(24,24) 186#define MQPCB_MASK_SOURCE_PATH_BITS EHCA_BMASK_IBM(24, 24)
187#define MQPCB_SOURCE_PATH_BITS EHCA_BMASK_IBM(25,31) 187#define MQPCB_SOURCE_PATH_BITS EHCA_BMASK_IBM(25, 31)
188#define MQPCB_MASK_TRAFFIC_CLASS EHCA_BMASK_IBM(25,25) 188#define MQPCB_MASK_TRAFFIC_CLASS EHCA_BMASK_IBM(25, 25)
189#define MQPCB_TRAFFIC_CLASS EHCA_BMASK_IBM(24,31) 189#define MQPCB_TRAFFIC_CLASS EHCA_BMASK_IBM(24, 31)
190#define MQPCB_MASK_HOP_LIMIT EHCA_BMASK_IBM(26,26) 190#define MQPCB_MASK_HOP_LIMIT EHCA_BMASK_IBM(26, 26)
191#define MQPCB_HOP_LIMIT EHCA_BMASK_IBM(24,31) 191#define MQPCB_HOP_LIMIT EHCA_BMASK_IBM(24, 31)
192#define MQPCB_MASK_SOURCE_GID_IDX EHCA_BMASK_IBM(27,27) 192#define MQPCB_MASK_SOURCE_GID_IDX EHCA_BMASK_IBM(27, 27)
193#define MQPCB_SOURCE_GID_IDX EHCA_BMASK_IBM(24,31) 193#define MQPCB_SOURCE_GID_IDX EHCA_BMASK_IBM(24, 31)
194#define MQPCB_MASK_FLOW_LABEL EHCA_BMASK_IBM(28,28) 194#define MQPCB_MASK_FLOW_LABEL EHCA_BMASK_IBM(28, 28)
195#define MQPCB_FLOW_LABEL EHCA_BMASK_IBM(12,31) 195#define MQPCB_FLOW_LABEL EHCA_BMASK_IBM(12, 31)
196#define MQPCB_MASK_DEST_GID EHCA_BMASK_IBM(30,30) 196#define MQPCB_MASK_DEST_GID EHCA_BMASK_IBM(30, 30)
197#define MQPCB_MASK_SERVICE_LEVEL_AL EHCA_BMASK_IBM(31,31) 197#define MQPCB_MASK_SERVICE_LEVEL_AL EHCA_BMASK_IBM(31, 31)
198#define MQPCB_SERVICE_LEVEL_AL EHCA_BMASK_IBM(28,31) 198#define MQPCB_SERVICE_LEVEL_AL EHCA_BMASK_IBM(28, 31)
199#define MQPCB_MASK_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(32,32) 199#define MQPCB_MASK_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(32, 32)
200#define MQPCB_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(31,31) 200#define MQPCB_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(31, 31)
201#define MQPCB_MASK_RETRY_COUNT_AL EHCA_BMASK_IBM(33,33) 201#define MQPCB_MASK_RETRY_COUNT_AL EHCA_BMASK_IBM(33, 33)
202#define MQPCB_RETRY_COUNT_AL EHCA_BMASK_IBM(29,31) 202#define MQPCB_RETRY_COUNT_AL EHCA_BMASK_IBM(29, 31)
203#define MQPCB_MASK_TIMEOUT_AL EHCA_BMASK_IBM(34,34) 203#define MQPCB_MASK_TIMEOUT_AL EHCA_BMASK_IBM(34, 34)
204#define MQPCB_TIMEOUT_AL EHCA_BMASK_IBM(27,31) 204#define MQPCB_TIMEOUT_AL EHCA_BMASK_IBM(27, 31)
205#define MQPCB_MASK_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(35,35) 205#define MQPCB_MASK_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(35, 35)
206#define MQPCB_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(24,31) 206#define MQPCB_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(24, 31)
207#define MQPCB_MASK_DLID_AL EHCA_BMASK_IBM(36,36) 207#define MQPCB_MASK_DLID_AL EHCA_BMASK_IBM(36, 36)
208#define MQPCB_DLID_AL EHCA_BMASK_IBM(16,31) 208#define MQPCB_DLID_AL EHCA_BMASK_IBM(16, 31)
209#define MQPCB_MASK_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(37,37) 209#define MQPCB_MASK_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(37, 37)
210#define MQPCB_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(29,31) 210#define MQPCB_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(29, 31)
211#define MQPCB_MASK_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(38,38) 211#define MQPCB_MASK_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(38, 38)
212#define MQPCB_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(25,31) 212#define MQPCB_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(25, 31)
213#define MQPCB_MASK_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(39,39) 213#define MQPCB_MASK_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(39, 39)
214#define MQPCB_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(24,31) 214#define MQPCB_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(24, 31)
215#define MQPCB_MASK_HOP_LIMIT_AL EHCA_BMASK_IBM(40,40) 215#define MQPCB_MASK_HOP_LIMIT_AL EHCA_BMASK_IBM(40, 40)
216#define MQPCB_HOP_LIMIT_AL EHCA_BMASK_IBM(24,31) 216#define MQPCB_HOP_LIMIT_AL EHCA_BMASK_IBM(24, 31)
217#define MQPCB_MASK_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(41,41) 217#define MQPCB_MASK_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(41, 41)
218#define MQPCB_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(24,31) 218#define MQPCB_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(24, 31)
219#define MQPCB_MASK_FLOW_LABEL_AL EHCA_BMASK_IBM(42,42) 219#define MQPCB_MASK_FLOW_LABEL_AL EHCA_BMASK_IBM(42, 42)
220#define MQPCB_FLOW_LABEL_AL EHCA_BMASK_IBM(12,31) 220#define MQPCB_FLOW_LABEL_AL EHCA_BMASK_IBM(12, 31)
221#define MQPCB_MASK_DEST_GID_AL EHCA_BMASK_IBM(44,44) 221#define MQPCB_MASK_DEST_GID_AL EHCA_BMASK_IBM(44, 44)
222#define MQPCB_MASK_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(45,45) 222#define MQPCB_MASK_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(45, 45)
223#define MQPCB_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(16,31) 223#define MQPCB_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(16, 31)
224#define MQPCB_MASK_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(46,46) 224#define MQPCB_MASK_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(46, 46)
225#define MQPCB_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(16,31) 225#define MQPCB_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(16, 31)
226#define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(47,47) 226#define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(47, 47)
227#define MQPCB_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(31,31) 227#define MQPCB_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(31, 31)
228#define MQPCB_QP_NUMBER EHCA_BMASK_IBM(8,31) 228#define MQPCB_QP_NUMBER EHCA_BMASK_IBM( 8, 31)
229#define MQPCB_MASK_QP_ENABLE EHCA_BMASK_IBM(48,48) 229#define MQPCB_MASK_QP_ENABLE EHCA_BMASK_IBM(48, 48)
230#define MQPCB_QP_ENABLE EHCA_BMASK_IBM(31,31) 230#define MQPCB_QP_ENABLE EHCA_BMASK_IBM(31, 31)
231#define MQPCB_MASK_CURR_SRQ_LIMIT EHCA_BMASK_IBM(49,49) 231#define MQPCB_MASK_CURR_SRQ_LIMIT EHCA_BMASK_IBM(49, 49)
232#define MQPCB_CURR_SRQ_LIMIT EHCA_BMASK_IBM(16,31) 232#define MQPCB_CURR_SRQ_LIMIT EHCA_BMASK_IBM(16, 31)
233#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG EHCA_BMASK_IBM(50,50) 233#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG EHCA_BMASK_IBM(50, 50)
234#define MQPCB_MASK_SHARED_RQ_HNDL EHCA_BMASK_IBM(51,51) 234#define MQPCB_MASK_SHARED_RQ_HNDL EHCA_BMASK_IBM(51, 51)
235 235
236#endif /* __EHCA_CLASSES_PSERIES_H__ */ 236#endif /* __EHCA_CLASSES_PSERIES_H__ */
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c
index 01d4a148bd71..9e87883b561a 100644
--- a/drivers/infiniband/hw/ehca/ehca_cq.c
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -97,7 +97,7 @@ int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num)
97 return ret; 97 return ret;
98} 98}
99 99
100struct ehca_qp* ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num) 100struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num)
101{ 101{
102 struct ehca_qp *ret = NULL; 102 struct ehca_qp *ret = NULL;
103 unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1); 103 unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1);
diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c
index 4961eb88827c..4825975f88cf 100644
--- a/drivers/infiniband/hw/ehca/ehca_eq.c
+++ b/drivers/infiniband/hw/ehca/ehca_eq.c
@@ -96,7 +96,8 @@ int ehca_create_eq(struct ehca_shca *shca,
96 for (i = 0; i < nr_pages; i++) { 96 for (i = 0; i < nr_pages; i++) {
97 u64 rpage; 97 u64 rpage;
98 98
99 if (!(vpage = ipz_qpageit_get_inc(&eq->ipz_queue))) { 99 vpage = ipz_qpageit_get_inc(&eq->ipz_queue);
100 if (!vpage) {
100 ret = H_RESOURCE; 101 ret = H_RESOURCE;
101 goto create_eq_exit2; 102 goto create_eq_exit2;
102 } 103 }
diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c
index bbd3c6a5822f..fc19ef9fd963 100644
--- a/drivers/infiniband/hw/ehca/ehca_hca.c
+++ b/drivers/infiniband/hw/ehca/ehca_hca.c
@@ -127,6 +127,7 @@ int ehca_query_port(struct ib_device *ibdev,
127 u8 port, struct ib_port_attr *props) 127 u8 port, struct ib_port_attr *props)
128{ 128{
129 int ret = 0; 129 int ret = 0;
130 u64 h_ret;
130 struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, 131 struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
131 ib_device); 132 ib_device);
132 struct hipz_query_port *rblock; 133 struct hipz_query_port *rblock;
@@ -137,7 +138,8 @@ int ehca_query_port(struct ib_device *ibdev,
137 return -ENOMEM; 138 return -ENOMEM;
138 } 139 }
139 140
140 if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) { 141 h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
142 if (h_ret != H_SUCCESS) {
141 ehca_err(&shca->ib_device, "Can't query port properties"); 143 ehca_err(&shca->ib_device, "Can't query port properties");
142 ret = -EINVAL; 144 ret = -EINVAL;
143 goto query_port1; 145 goto query_port1;
@@ -197,6 +199,7 @@ int ehca_query_sma_attr(struct ehca_shca *shca,
197 u8 port, struct ehca_sma_attr *attr) 199 u8 port, struct ehca_sma_attr *attr)
198{ 200{
199 int ret = 0; 201 int ret = 0;
202 u64 h_ret;
200 struct hipz_query_port *rblock; 203 struct hipz_query_port *rblock;
201 204
202 rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); 205 rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC);
@@ -205,7 +208,8 @@ int ehca_query_sma_attr(struct ehca_shca *shca,
205 return -ENOMEM; 208 return -ENOMEM;
206 } 209 }
207 210
208 if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) { 211 h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
212 if (h_ret != H_SUCCESS) {
209 ehca_err(&shca->ib_device, "Can't query port properties"); 213 ehca_err(&shca->ib_device, "Can't query port properties");
210 ret = -EINVAL; 214 ret = -EINVAL;
211 goto query_sma_attr1; 215 goto query_sma_attr1;
@@ -230,9 +234,11 @@ query_sma_attr1:
230int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) 234int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
231{ 235{
232 int ret = 0; 236 int ret = 0;
233 struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device); 237 u64 h_ret;
238 struct ehca_shca *shca;
234 struct hipz_query_port *rblock; 239 struct hipz_query_port *rblock;
235 240
241 shca = container_of(ibdev, struct ehca_shca, ib_device);
236 if (index > 16) { 242 if (index > 16) {
237 ehca_err(&shca->ib_device, "Invalid index: %x.", index); 243 ehca_err(&shca->ib_device, "Invalid index: %x.", index);
238 return -EINVAL; 244 return -EINVAL;
@@ -244,7 +250,8 @@ int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
244 return -ENOMEM; 250 return -ENOMEM;
245 } 251 }
246 252
247 if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) { 253 h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
254 if (h_ret != H_SUCCESS) {
248 ehca_err(&shca->ib_device, "Can't query port properties"); 255 ehca_err(&shca->ib_device, "Can't query port properties");
249 ret = -EINVAL; 256 ret = -EINVAL;
250 goto query_pkey1; 257 goto query_pkey1;
@@ -262,6 +269,7 @@ int ehca_query_gid(struct ib_device *ibdev, u8 port,
262 int index, union ib_gid *gid) 269 int index, union ib_gid *gid)
263{ 270{
264 int ret = 0; 271 int ret = 0;
272 u64 h_ret;
265 struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, 273 struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
266 ib_device); 274 ib_device);
267 struct hipz_query_port *rblock; 275 struct hipz_query_port *rblock;
@@ -277,7 +285,8 @@ int ehca_query_gid(struct ib_device *ibdev, u8 port,
277 return -ENOMEM; 285 return -ENOMEM;
278 } 286 }
279 287
280 if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) { 288 h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
289 if (h_ret != H_SUCCESS) {
281 ehca_err(&shca->ib_device, "Can't query port properties"); 290 ehca_err(&shca->ib_device, "Can't query port properties");
282 ret = -EINVAL; 291 ret = -EINVAL;
283 goto query_gid1; 292 goto query_gid1;
@@ -302,11 +311,12 @@ int ehca_modify_port(struct ib_device *ibdev,
302 struct ib_port_modify *props) 311 struct ib_port_modify *props)
303{ 312{
304 int ret = 0; 313 int ret = 0;
305 struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device); 314 struct ehca_shca *shca;
306 struct hipz_query_port *rblock; 315 struct hipz_query_port *rblock;
307 u32 cap; 316 u32 cap;
308 u64 hret; 317 u64 hret;
309 318
319 shca = container_of(ibdev, struct ehca_shca, ib_device);
310 if ((props->set_port_cap_mask | props->clr_port_cap_mask) 320 if ((props->set_port_cap_mask | props->clr_port_cap_mask)
311 & ~allowed_port_caps) { 321 & ~allowed_port_caps) {
312 ehca_err(&shca->ib_device, "Non-changeable bits set in masks " 322 ehca_err(&shca->ib_device, "Non-changeable bits set in masks "
@@ -325,7 +335,8 @@ int ehca_modify_port(struct ib_device *ibdev,
325 goto modify_port1; 335 goto modify_port1;
326 } 336 }
327 337
328 if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) { 338 hret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
339 if (hret != H_SUCCESS) {
329 ehca_err(&shca->ib_device, "Can't query port properties"); 340 ehca_err(&shca->ib_device, "Can't query port properties");
330 ret = -EINVAL; 341 ret = -EINVAL;
331 goto modify_port2; 342 goto modify_port2;
@@ -337,7 +348,8 @@ int ehca_modify_port(struct ib_device *ibdev,
337 hret = hipz_h_modify_port(shca->ipz_hca_handle, port, 348 hret = hipz_h_modify_port(shca->ipz_hca_handle, port,
338 cap, props->init_type, port_modify_mask); 349 cap, props->init_type, port_modify_mask);
339 if (hret != H_SUCCESS) { 350 if (hret != H_SUCCESS) {
340 ehca_err(&shca->ib_device, "Modify port failed hret=%lx", hret); 351 ehca_err(&shca->ib_device, "Modify port failed hret=%lx",
352 hret);
341 ret = -EINVAL; 353 ret = -EINVAL;
342 } 354 }
343 355
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
index 96eba3830754..4fb01fcb63ae 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -49,26 +49,26 @@
49#include "hipz_fns.h" 49#include "hipz_fns.h"
50#include "ipz_pt_fn.h" 50#include "ipz_pt_fn.h"
51 51
52#define EQE_COMPLETION_EVENT EHCA_BMASK_IBM(1,1) 52#define EQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1)
53#define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM(8,31) 53#define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM( 8, 31)
54#define EQE_EE_IDENTIFIER EHCA_BMASK_IBM(2,7) 54#define EQE_EE_IDENTIFIER EHCA_BMASK_IBM( 2, 7)
55#define EQE_CQ_NUMBER EHCA_BMASK_IBM(8,31) 55#define EQE_CQ_NUMBER EHCA_BMASK_IBM( 8, 31)
56#define EQE_QP_NUMBER EHCA_BMASK_IBM(8,31) 56#define EQE_QP_NUMBER EHCA_BMASK_IBM( 8, 31)
57#define EQE_QP_TOKEN EHCA_BMASK_IBM(32,63) 57#define EQE_QP_TOKEN EHCA_BMASK_IBM(32, 63)
58#define EQE_CQ_TOKEN EHCA_BMASK_IBM(32,63) 58#define EQE_CQ_TOKEN EHCA_BMASK_IBM(32, 63)
59 59
60#define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM(1,1) 60#define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1)
61#define NEQE_EVENT_CODE EHCA_BMASK_IBM(2,7) 61#define NEQE_EVENT_CODE EHCA_BMASK_IBM( 2, 7)
62#define NEQE_PORT_NUMBER EHCA_BMASK_IBM(8,15) 62#define NEQE_PORT_NUMBER EHCA_BMASK_IBM( 8, 15)
63#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16,16) 63#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16, 16)
64#define NEQE_DISRUPTIVE EHCA_BMASK_IBM(16,16) 64#define NEQE_DISRUPTIVE EHCA_BMASK_IBM(16, 16)
65 65
66#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52,63) 66#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52, 63)
67#define ERROR_DATA_TYPE EHCA_BMASK_IBM(0,7) 67#define ERROR_DATA_TYPE EHCA_BMASK_IBM( 0, 7)
68 68
69static void queue_comp_task(struct ehca_cq *__cq); 69static void queue_comp_task(struct ehca_cq *__cq);
70 70
71static struct ehca_comp_pool* pool; 71static struct ehca_comp_pool *pool;
72#ifdef CONFIG_HOTPLUG_CPU 72#ifdef CONFIG_HOTPLUG_CPU
73static struct notifier_block comp_pool_callback_nb; 73static struct notifier_block comp_pool_callback_nb;
74#endif 74#endif
@@ -85,8 +85,8 @@ static inline void comp_event_callback(struct ehca_cq *cq)
85 return; 85 return;
86} 86}
87 87
88static void print_error_data(struct ehca_shca * shca, void* data, 88static void print_error_data(struct ehca_shca *shca, void *data,
89 u64* rblock, int length) 89 u64 *rblock, int length)
90{ 90{
91 u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]); 91 u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]);
92 u64 resource = rblock[1]; 92 u64 resource = rblock[1];
@@ -94,7 +94,7 @@ static void print_error_data(struct ehca_shca * shca, void* data,
94 switch (type) { 94 switch (type) {
95 case 0x1: /* Queue Pair */ 95 case 0x1: /* Queue Pair */
96 { 96 {
97 struct ehca_qp *qp = (struct ehca_qp*)data; 97 struct ehca_qp *qp = (struct ehca_qp *)data;
98 98
99 /* only print error data if AER is set */ 99 /* only print error data if AER is set */
100 if (rblock[6] == 0) 100 if (rblock[6] == 0)
@@ -107,7 +107,7 @@ static void print_error_data(struct ehca_shca * shca, void* data,
107 } 107 }
108 case 0x4: /* Completion Queue */ 108 case 0x4: /* Completion Queue */
109 { 109 {
110 struct ehca_cq *cq = (struct ehca_cq*)data; 110 struct ehca_cq *cq = (struct ehca_cq *)data;
111 111
112 ehca_err(&shca->ib_device, 112 ehca_err(&shca->ib_device,
113 "CQ 0x%x (resource=%lx) has errors.", 113 "CQ 0x%x (resource=%lx) has errors.",
@@ -572,7 +572,7 @@ void ehca_tasklet_eq(unsigned long data)
572 ehca_process_eq((struct ehca_shca*)data, 1); 572 ehca_process_eq((struct ehca_shca*)data, 1);
573} 573}
574 574
575static inline int find_next_online_cpu(struct ehca_comp_pool* pool) 575static inline int find_next_online_cpu(struct ehca_comp_pool *pool)
576{ 576{
577 int cpu; 577 int cpu;
578 unsigned long flags; 578 unsigned long flags;
@@ -636,7 +636,7 @@ static void queue_comp_task(struct ehca_cq *__cq)
636 __queue_comp_task(__cq, cct); 636 __queue_comp_task(__cq, cct);
637} 637}
638 638
639static void run_comp_task(struct ehca_cpu_comp_task* cct) 639static void run_comp_task(struct ehca_cpu_comp_task *cct)
640{ 640{
641 struct ehca_cq *cq; 641 struct ehca_cq *cq;
642 unsigned long flags; 642 unsigned long flags;
@@ -666,12 +666,12 @@ static void run_comp_task(struct ehca_cpu_comp_task* cct)
666 666
667static int comp_task(void *__cct) 667static int comp_task(void *__cct)
668{ 668{
669 struct ehca_cpu_comp_task* cct = __cct; 669 struct ehca_cpu_comp_task *cct = __cct;
670 int cql_empty; 670 int cql_empty;
671 DECLARE_WAITQUEUE(wait, current); 671 DECLARE_WAITQUEUE(wait, current);
672 672
673 set_current_state(TASK_INTERRUPTIBLE); 673 set_current_state(TASK_INTERRUPTIBLE);
674 while(!kthread_should_stop()) { 674 while (!kthread_should_stop()) {
675 add_wait_queue(&cct->wait_queue, &wait); 675 add_wait_queue(&cct->wait_queue, &wait);
676 676
677 spin_lock_irq(&cct->task_lock); 677 spin_lock_irq(&cct->task_lock);
@@ -745,7 +745,7 @@ static void take_over_work(struct ehca_comp_pool *pool,
745 745
746 list_splice_init(&cct->cq_list, &list); 746 list_splice_init(&cct->cq_list, &list);
747 747
748 while(!list_empty(&list)) { 748 while (!list_empty(&list)) {
749 cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); 749 cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
750 750
751 list_del(&cq->entry); 751 list_del(&cq->entry);
@@ -768,7 +768,7 @@ static int comp_pool_callback(struct notifier_block *nfb,
768 case CPU_UP_PREPARE: 768 case CPU_UP_PREPARE:
769 case CPU_UP_PREPARE_FROZEN: 769 case CPU_UP_PREPARE_FROZEN:
770 ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu); 770 ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu);
771 if(!create_comp_task(pool, cpu)) { 771 if (!create_comp_task(pool, cpu)) {
772 ehca_gen_err("Can't create comp_task for cpu: %x", cpu); 772 ehca_gen_err("Can't create comp_task for cpu: %x", cpu);
773 return NOTIFY_BAD; 773 return NOTIFY_BAD;
774 } 774 }
@@ -838,7 +838,7 @@ int ehca_create_comp_pool(void)
838 838
839#ifdef CONFIG_HOTPLUG_CPU 839#ifdef CONFIG_HOTPLUG_CPU
840 comp_pool_callback_nb.notifier_call = comp_pool_callback; 840 comp_pool_callback_nb.notifier_call = comp_pool_callback;
841 comp_pool_callback_nb.priority =0; 841 comp_pool_callback_nb.priority = 0;
842 register_cpu_notifier(&comp_pool_callback_nb); 842 register_cpu_notifier(&comp_pool_callback_nb);
843#endif 843#endif
844 844
diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h
index 77aeca6a2c2f..dce503bb7d6b 100644
--- a/drivers/infiniband/hw/ehca/ehca_iverbs.h
+++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h
@@ -81,8 +81,9 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
81 int num_phys_buf, 81 int num_phys_buf,
82 int mr_access_flags, u64 *iova_start); 82 int mr_access_flags, u64 *iova_start);
83 83
84struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, 84struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
85 int mr_access_flags, struct ib_udata *udata); 85 u64 virt, int mr_access_flags,
86 struct ib_udata *udata);
86 87
87int ehca_rereg_phys_mr(struct ib_mr *mr, 88int ehca_rereg_phys_mr(struct ib_mr *mr,
88 int mr_rereg_mask, 89 int mr_rereg_mask,
@@ -192,7 +193,7 @@ void ehca_poll_eqs(unsigned long data);
192void *ehca_alloc_fw_ctrlblock(gfp_t flags); 193void *ehca_alloc_fw_ctrlblock(gfp_t flags);
193void ehca_free_fw_ctrlblock(void *ptr); 194void ehca_free_fw_ctrlblock(void *ptr);
194#else 195#else
195#define ehca_alloc_fw_ctrlblock(flags) ((void *) get_zeroed_page(flags)) 196#define ehca_alloc_fw_ctrlblock(flags) ((void *)get_zeroed_page(flags))
196#define ehca_free_fw_ctrlblock(ptr) free_page((unsigned long)(ptr)) 197#define ehca_free_fw_ctrlblock(ptr) free_page((unsigned long)(ptr))
197#endif 198#endif
198 199
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index 28ba2dd24216..36377c6db3d4 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -107,7 +107,7 @@ static DEFINE_SPINLOCK(shca_list_lock);
107static struct timer_list poll_eqs_timer; 107static struct timer_list poll_eqs_timer;
108 108
109#ifdef CONFIG_PPC_64K_PAGES 109#ifdef CONFIG_PPC_64K_PAGES
110static struct kmem_cache *ctblk_cache = NULL; 110static struct kmem_cache *ctblk_cache;
111 111
112void *ehca_alloc_fw_ctrlblock(gfp_t flags) 112void *ehca_alloc_fw_ctrlblock(gfp_t flags)
113{ 113{
@@ -200,8 +200,8 @@ static void ehca_destroy_slab_caches(void)
200#endif 200#endif
201} 201}
202 202
203#define EHCA_HCAAVER EHCA_BMASK_IBM(32,39) 203#define EHCA_HCAAVER EHCA_BMASK_IBM(32, 39)
204#define EHCA_REVID EHCA_BMASK_IBM(40,63) 204#define EHCA_REVID EHCA_BMASK_IBM(40, 63)
205 205
206static struct cap_descr { 206static struct cap_descr {
207 u64 mask; 207 u64 mask;
@@ -263,22 +263,27 @@ int ehca_sense_attributes(struct ehca_shca *shca)
263 263
264 ehca_gen_dbg(" ... hardware version=%x:%x", hcaaver, revid); 264 ehca_gen_dbg(" ... hardware version=%x:%x", hcaaver, revid);
265 265
266 if ((hcaaver == 1) && (revid == 0)) 266 if (hcaaver == 1) {
267 shca->hw_level = 0x11; 267 if (revid <= 3)
268 else if ((hcaaver == 1) && (revid == 1)) 268 shca->hw_level = 0x10 | (revid + 1);
269 shca->hw_level = 0x12; 269 else
270 else if ((hcaaver == 1) && (revid == 2)) 270 shca->hw_level = 0x14;
271 shca->hw_level = 0x13; 271 } else if (hcaaver == 2) {
272 else if ((hcaaver == 2) && (revid == 0)) 272 if (revid == 0)
273 shca->hw_level = 0x21; 273 shca->hw_level = 0x21;
274 else if ((hcaaver == 2) && (revid == 0x10)) 274 else if (revid == 0x10)
275 shca->hw_level = 0x22; 275 shca->hw_level = 0x22;
276 else { 276 else if (revid == 0x20 || revid == 0x21)
277 shca->hw_level = 0x23;
278 }
279
280 if (!shca->hw_level) {
277 ehca_gen_warn("unknown hardware version" 281 ehca_gen_warn("unknown hardware version"
278 " - assuming default level"); 282 " - assuming default level");
279 shca->hw_level = 0x22; 283 shca->hw_level = 0x22;
280 } 284 }
281 } 285 } else
286 shca->hw_level = ehca_hw_level;
282 ehca_gen_dbg(" ... hardware level=%x", shca->hw_level); 287 ehca_gen_dbg(" ... hardware level=%x", shca->hw_level);
283 288
284 shca->sport[0].rate = IB_RATE_30_GBPS; 289 shca->sport[0].rate = IB_RATE_30_GBPS;
@@ -290,7 +295,7 @@ int ehca_sense_attributes(struct ehca_shca *shca)
290 if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap)) 295 if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap))
291 ehca_gen_dbg(" %s", hca_cap_descr[i].descr); 296 ehca_gen_dbg(" %s", hca_cap_descr[i].descr);
292 297
293 port = (struct hipz_query_port *) rblock; 298 port = (struct hipz_query_port *)rblock;
294 h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port); 299 h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port);
295 if (h_ret != H_SUCCESS) { 300 if (h_ret != H_SUCCESS) {
296 ehca_gen_err("Cannot query port properties. h_ret=%lx", 301 ehca_gen_err("Cannot query port properties. h_ret=%lx",
@@ -439,7 +444,7 @@ static int ehca_create_aqp1(struct ehca_shca *shca, u32 port)
439 return -EPERM; 444 return -EPERM;
440 } 445 }
441 446
442 ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void*)(-1), 10, 0); 447 ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void *)(-1), 10, 0);
443 if (IS_ERR(ibcq)) { 448 if (IS_ERR(ibcq)) {
444 ehca_err(&shca->ib_device, "Cannot create AQP1 CQ."); 449 ehca_err(&shca->ib_device, "Cannot create AQP1 CQ.");
445 return PTR_ERR(ibcq); 450 return PTR_ERR(ibcq);
@@ -666,7 +671,7 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev,
666 } 671 }
667 672
668 /* create internal protection domain */ 673 /* create internal protection domain */
669 ibpd = ehca_alloc_pd(&shca->ib_device, (void*)(-1), NULL); 674 ibpd = ehca_alloc_pd(&shca->ib_device, (void *)(-1), NULL);
670 if (IS_ERR(ibpd)) { 675 if (IS_ERR(ibpd)) {
671 ehca_err(&shca->ib_device, "Cannot create internal PD."); 676 ehca_err(&shca->ib_device, "Cannot create internal PD.");
672 ret = PTR_ERR(ibpd); 677 ret = PTR_ERR(ibpd);
@@ -863,18 +868,21 @@ int __init ehca_module_init(void)
863 printk(KERN_INFO "eHCA Infiniband Device Driver " 868 printk(KERN_INFO "eHCA Infiniband Device Driver "
864 "(Rel.: SVNEHCA_0023)\n"); 869 "(Rel.: SVNEHCA_0023)\n");
865 870
866 if ((ret = ehca_create_comp_pool())) { 871 ret = ehca_create_comp_pool();
872 if (ret) {
867 ehca_gen_err("Cannot create comp pool."); 873 ehca_gen_err("Cannot create comp pool.");
868 return ret; 874 return ret;
869 } 875 }
870 876
871 if ((ret = ehca_create_slab_caches())) { 877 ret = ehca_create_slab_caches();
878 if (ret) {
872 ehca_gen_err("Cannot create SLAB caches"); 879 ehca_gen_err("Cannot create SLAB caches");
873 ret = -ENOMEM; 880 ret = -ENOMEM;
874 goto module_init1; 881 goto module_init1;
875 } 882 }
876 883
877 if ((ret = ibmebus_register_driver(&ehca_driver))) { 884 ret = ibmebus_register_driver(&ehca_driver);
885 if (ret) {
878 ehca_gen_err("Cannot register eHCA device driver"); 886 ehca_gen_err("Cannot register eHCA device driver");
879 ret = -EINVAL; 887 ret = -EINVAL;
880 goto module_init2; 888 goto module_init2;
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index add79bd44e39..6262c5462d50 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -48,6 +48,11 @@
48#include "hcp_if.h" 48#include "hcp_if.h"
49#include "hipz_hw.h" 49#include "hipz_hw.h"
50 50
51#define NUM_CHUNKS(length, chunk_size) \
52 (((length) + (chunk_size - 1)) / (chunk_size))
53/* max number of rpages (per hcall register_rpages) */
54#define MAX_RPAGES 512
55
51static struct kmem_cache *mr_cache; 56static struct kmem_cache *mr_cache;
52static struct kmem_cache *mw_cache; 57static struct kmem_cache *mw_cache;
53 58
@@ -56,9 +61,9 @@ static struct ehca_mr *ehca_mr_new(void)
56 struct ehca_mr *me; 61 struct ehca_mr *me;
57 62
58 me = kmem_cache_zalloc(mr_cache, GFP_KERNEL); 63 me = kmem_cache_zalloc(mr_cache, GFP_KERNEL);
59 if (me) { 64 if (me)
60 spin_lock_init(&me->mrlock); 65 spin_lock_init(&me->mrlock);
61 } else 66 else
62 ehca_gen_err("alloc failed"); 67 ehca_gen_err("alloc failed");
63 68
64 return me; 69 return me;
@@ -74,9 +79,9 @@ static struct ehca_mw *ehca_mw_new(void)
74 struct ehca_mw *me; 79 struct ehca_mw *me;
75 80
76 me = kmem_cache_zalloc(mw_cache, GFP_KERNEL); 81 me = kmem_cache_zalloc(mw_cache, GFP_KERNEL);
77 if (me) { 82 if (me)
78 spin_lock_init(&me->mwlock); 83 spin_lock_init(&me->mwlock);
79 } else 84 else
80 ehca_gen_err("alloc failed"); 85 ehca_gen_err("alloc failed");
81 86
82 return me; 87 return me;
@@ -106,11 +111,12 @@ struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
106 goto get_dma_mr_exit0; 111 goto get_dma_mr_exit0;
107 } 112 }
108 113
109 ret = ehca_reg_maxmr(shca, e_maxmr, (u64*)KERNELBASE, 114 ret = ehca_reg_maxmr(shca, e_maxmr, (u64 *)KERNELBASE,
110 mr_access_flags, e_pd, 115 mr_access_flags, e_pd,
111 &e_maxmr->ib.ib_mr.lkey, 116 &e_maxmr->ib.ib_mr.lkey,
112 &e_maxmr->ib.ib_mr.rkey); 117 &e_maxmr->ib.ib_mr.rkey);
113 if (ret) { 118 if (ret) {
119 ehca_mr_delete(e_maxmr);
114 ib_mr = ERR_PTR(ret); 120 ib_mr = ERR_PTR(ret);
115 goto get_dma_mr_exit0; 121 goto get_dma_mr_exit0;
116 } 122 }
@@ -144,9 +150,6 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
144 struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); 150 struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
145 151
146 u64 size; 152 u64 size;
147 struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
148 u32 num_pages_mr;
149 u32 num_pages_4k; /* 4k portion "pages" */
150 153
151 if ((num_phys_buf <= 0) || !phys_buf_array) { 154 if ((num_phys_buf <= 0) || !phys_buf_array) {
152 ehca_err(pd->device, "bad input values: num_phys_buf=%x " 155 ehca_err(pd->device, "bad input values: num_phys_buf=%x "
@@ -190,12 +193,6 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
190 goto reg_phys_mr_exit0; 193 goto reg_phys_mr_exit0;
191 } 194 }
192 195
193 /* determine number of MR pages */
194 num_pages_mr = ((((u64)iova_start % PAGE_SIZE) + size +
195 PAGE_SIZE - 1) / PAGE_SIZE);
196 num_pages_4k = ((((u64)iova_start % EHCA_PAGESIZE) + size +
197 EHCA_PAGESIZE - 1) / EHCA_PAGESIZE);
198
199 /* register MR on HCA */ 196 /* register MR on HCA */
200 if (ehca_mr_is_maxmr(size, iova_start)) { 197 if (ehca_mr_is_maxmr(size, iova_start)) {
201 e_mr->flags |= EHCA_MR_FLAG_MAXMR; 198 e_mr->flags |= EHCA_MR_FLAG_MAXMR;
@@ -207,13 +204,22 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
207 goto reg_phys_mr_exit1; 204 goto reg_phys_mr_exit1;
208 } 205 }
209 } else { 206 } else {
210 pginfo.type = EHCA_MR_PGI_PHYS; 207 struct ehca_mr_pginfo pginfo;
211 pginfo.num_pages = num_pages_mr; 208 u32 num_kpages;
212 pginfo.num_4k = num_pages_4k; 209 u32 num_hwpages;
213 pginfo.num_phys_buf = num_phys_buf; 210
214 pginfo.phys_buf_array = phys_buf_array; 211 num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size,
215 pginfo.next_4k = (((u64)iova_start & ~PAGE_MASK) / 212 PAGE_SIZE);
216 EHCA_PAGESIZE); 213 num_hwpages = NUM_CHUNKS(((u64)iova_start % EHCA_PAGESIZE) +
214 size, EHCA_PAGESIZE);
215 memset(&pginfo, 0, sizeof(pginfo));
216 pginfo.type = EHCA_MR_PGI_PHYS;
217 pginfo.num_kpages = num_kpages;
218 pginfo.num_hwpages = num_hwpages;
219 pginfo.u.phy.num_phys_buf = num_phys_buf;
220 pginfo.u.phy.phys_buf_array = phys_buf_array;
221 pginfo.next_hwpage = (((u64)iova_start & ~PAGE_MASK) /
222 EHCA_PAGESIZE);
217 223
218 ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags, 224 ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags,
219 e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, 225 e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
@@ -240,18 +246,19 @@ reg_phys_mr_exit0:
240 246
241/*----------------------------------------------------------------------*/ 247/*----------------------------------------------------------------------*/
242 248
243struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, 249struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
244 int mr_access_flags, struct ib_udata *udata) 250 u64 virt, int mr_access_flags,
251 struct ib_udata *udata)
245{ 252{
246 struct ib_mr *ib_mr; 253 struct ib_mr *ib_mr;
247 struct ehca_mr *e_mr; 254 struct ehca_mr *e_mr;
248 struct ehca_shca *shca = 255 struct ehca_shca *shca =
249 container_of(pd->device, struct ehca_shca, ib_device); 256 container_of(pd->device, struct ehca_shca, ib_device);
250 struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); 257 struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
251 struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; 258 struct ehca_mr_pginfo pginfo;
252 int ret; 259 int ret;
253 u32 num_pages_mr; 260 u32 num_kpages;
254 u32 num_pages_4k; /* 4k portion "pages" */ 261 u32 num_hwpages;
255 262
256 if (!pd) { 263 if (!pd) {
257 ehca_gen_err("bad pd=%p", pd); 264 ehca_gen_err("bad pd=%p", pd);
@@ -289,7 +296,7 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt
289 e_mr->umem = ib_umem_get(pd->uobject->context, start, length, 296 e_mr->umem = ib_umem_get(pd->uobject->context, start, length,
290 mr_access_flags); 297 mr_access_flags);
291 if (IS_ERR(e_mr->umem)) { 298 if (IS_ERR(e_mr->umem)) {
292 ib_mr = (void *) e_mr->umem; 299 ib_mr = (void *)e_mr->umem;
293 goto reg_user_mr_exit1; 300 goto reg_user_mr_exit1;
294 } 301 }
295 302
@@ -301,23 +308,24 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt
301 } 308 }
302 309
303 /* determine number of MR pages */ 310 /* determine number of MR pages */
304 num_pages_mr = (((virt % PAGE_SIZE) + length + PAGE_SIZE - 1) / 311 num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE);
305 PAGE_SIZE); 312 num_hwpages = NUM_CHUNKS((virt % EHCA_PAGESIZE) + length,
306 num_pages_4k = (((virt % EHCA_PAGESIZE) + length + EHCA_PAGESIZE - 1) / 313 EHCA_PAGESIZE);
307 EHCA_PAGESIZE);
308 314
309 /* register MR on HCA */ 315 /* register MR on HCA */
310 pginfo.type = EHCA_MR_PGI_USER; 316 memset(&pginfo, 0, sizeof(pginfo));
311 pginfo.num_pages = num_pages_mr; 317 pginfo.type = EHCA_MR_PGI_USER;
312 pginfo.num_4k = num_pages_4k; 318 pginfo.num_kpages = num_kpages;
313 pginfo.region = e_mr->umem; 319 pginfo.num_hwpages = num_hwpages;
314 pginfo.next_4k = e_mr->umem->offset / EHCA_PAGESIZE; 320 pginfo.u.usr.region = e_mr->umem;
315 pginfo.next_chunk = list_prepare_entry(pginfo.next_chunk, 321 pginfo.next_hwpage = e_mr->umem->offset / EHCA_PAGESIZE;
316 (&e_mr->umem->chunk_list), 322 pginfo.u.usr.next_chunk = list_prepare_entry(pginfo.u.usr.next_chunk,
317 list); 323 (&e_mr->umem->chunk_list),
318 324 list);
319 ret = ehca_reg_mr(shca, e_mr, (u64*) virt, length, mr_access_flags, e_pd, 325
320 &pginfo, &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey); 326 ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags,
327 e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
328 &e_mr->ib.ib_mr.rkey);
321 if (ret) { 329 if (ret) {
322 ib_mr = ERR_PTR(ret); 330 ib_mr = ERR_PTR(ret);
323 goto reg_user_mr_exit2; 331 goto reg_user_mr_exit2;
@@ -360,9 +368,9 @@ int ehca_rereg_phys_mr(struct ib_mr *mr,
360 struct ehca_pd *new_pd; 368 struct ehca_pd *new_pd;
361 u32 tmp_lkey, tmp_rkey; 369 u32 tmp_lkey, tmp_rkey;
362 unsigned long sl_flags; 370 unsigned long sl_flags;
363 u32 num_pages_mr = 0; 371 u32 num_kpages = 0;
364 u32 num_pages_4k = 0; /* 4k portion "pages" */ 372 u32 num_hwpages = 0;
365 struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; 373 struct ehca_mr_pginfo pginfo;
366 u32 cur_pid = current->tgid; 374 u32 cur_pid = current->tgid;
367 375
368 if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && 376 if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
@@ -414,7 +422,7 @@ int ehca_rereg_phys_mr(struct ib_mr *mr,
414 goto rereg_phys_mr_exit0; 422 goto rereg_phys_mr_exit0;
415 } 423 }
416 if (!phys_buf_array || num_phys_buf <= 0) { 424 if (!phys_buf_array || num_phys_buf <= 0) {
417 ehca_err(mr->device, "bad input values: mr_rereg_mask=%x" 425 ehca_err(mr->device, "bad input values mr_rereg_mask=%x"
418 " phys_buf_array=%p num_phys_buf=%x", 426 " phys_buf_array=%p num_phys_buf=%x",
419 mr_rereg_mask, phys_buf_array, num_phys_buf); 427 mr_rereg_mask, phys_buf_array, num_phys_buf);
420 ret = -EINVAL; 428 ret = -EINVAL;
@@ -438,10 +446,10 @@ int ehca_rereg_phys_mr(struct ib_mr *mr,
438 446
439 /* set requested values dependent on rereg request */ 447 /* set requested values dependent on rereg request */
440 spin_lock_irqsave(&e_mr->mrlock, sl_flags); 448 spin_lock_irqsave(&e_mr->mrlock, sl_flags);
441 new_start = e_mr->start; /* new == old address */ 449 new_start = e_mr->start;
442 new_size = e_mr->size; /* new == old length */ 450 new_size = e_mr->size;
443 new_acl = e_mr->acl; /* new == old access control */ 451 new_acl = e_mr->acl;
444 new_pd = container_of(mr->pd,struct ehca_pd,ib_pd); /*new == old PD*/ 452 new_pd = container_of(mr->pd, struct ehca_pd, ib_pd);
445 453
446 if (mr_rereg_mask & IB_MR_REREG_TRANS) { 454 if (mr_rereg_mask & IB_MR_REREG_TRANS) {
447 new_start = iova_start; /* change address */ 455 new_start = iova_start; /* change address */
@@ -458,17 +466,18 @@ int ehca_rereg_phys_mr(struct ib_mr *mr,
458 ret = -EINVAL; 466 ret = -EINVAL;
459 goto rereg_phys_mr_exit1; 467 goto rereg_phys_mr_exit1;
460 } 468 }
461 num_pages_mr = ((((u64)new_start % PAGE_SIZE) + new_size + 469 num_kpages = NUM_CHUNKS(((u64)new_start % PAGE_SIZE) +
462 PAGE_SIZE - 1) / PAGE_SIZE); 470 new_size, PAGE_SIZE);
463 num_pages_4k = ((((u64)new_start % EHCA_PAGESIZE) + new_size + 471 num_hwpages = NUM_CHUNKS(((u64)new_start % EHCA_PAGESIZE) +
464 EHCA_PAGESIZE - 1) / EHCA_PAGESIZE); 472 new_size, EHCA_PAGESIZE);
465 pginfo.type = EHCA_MR_PGI_PHYS; 473 memset(&pginfo, 0, sizeof(pginfo));
466 pginfo.num_pages = num_pages_mr; 474 pginfo.type = EHCA_MR_PGI_PHYS;
467 pginfo.num_4k = num_pages_4k; 475 pginfo.num_kpages = num_kpages;
468 pginfo.num_phys_buf = num_phys_buf; 476 pginfo.num_hwpages = num_hwpages;
469 pginfo.phys_buf_array = phys_buf_array; 477 pginfo.u.phy.num_phys_buf = num_phys_buf;
470 pginfo.next_4k = (((u64)iova_start & ~PAGE_MASK) / 478 pginfo.u.phy.phys_buf_array = phys_buf_array;
471 EHCA_PAGESIZE); 479 pginfo.next_hwpage = (((u64)iova_start & ~PAGE_MASK) /
480 EHCA_PAGESIZE);
472 } 481 }
473 if (mr_rereg_mask & IB_MR_REREG_ACCESS) 482 if (mr_rereg_mask & IB_MR_REREG_ACCESS)
474 new_acl = mr_access_flags; 483 new_acl = mr_access_flags;
@@ -510,7 +519,7 @@ int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
510 struct ehca_pd *my_pd = container_of(mr->pd, struct ehca_pd, ib_pd); 519 struct ehca_pd *my_pd = container_of(mr->pd, struct ehca_pd, ib_pd);
511 u32 cur_pid = current->tgid; 520 u32 cur_pid = current->tgid;
512 unsigned long sl_flags; 521 unsigned long sl_flags;
513 struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; 522 struct ehca_mr_hipzout_parms hipzout;
514 523
515 if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && 524 if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
516 (my_pd->ownpid != cur_pid)) { 525 (my_pd->ownpid != cur_pid)) {
@@ -536,14 +545,14 @@ int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
536 "hca_hndl=%lx mr_hndl=%lx lkey=%x", 545 "hca_hndl=%lx mr_hndl=%lx lkey=%x",
537 h_ret, mr, shca->ipz_hca_handle.handle, 546 h_ret, mr, shca->ipz_hca_handle.handle,
538 e_mr->ipz_mr_handle.handle, mr->lkey); 547 e_mr->ipz_mr_handle.handle, mr->lkey);
539 ret = ehca_mrmw_map_hrc_query_mr(h_ret); 548 ret = ehca2ib_return_code(h_ret);
540 goto query_mr_exit1; 549 goto query_mr_exit1;
541 } 550 }
542 mr_attr->pd = mr->pd; 551 mr_attr->pd = mr->pd;
543 mr_attr->device_virt_addr = hipzout.vaddr; 552 mr_attr->device_virt_addr = hipzout.vaddr;
544 mr_attr->size = hipzout.len; 553 mr_attr->size = hipzout.len;
545 mr_attr->lkey = hipzout.lkey; 554 mr_attr->lkey = hipzout.lkey;
546 mr_attr->rkey = hipzout.rkey; 555 mr_attr->rkey = hipzout.rkey;
547 ehca_mrmw_reverse_map_acl(&hipzout.acl, &mr_attr->mr_access_flags); 556 ehca_mrmw_reverse_map_acl(&hipzout.acl, &mr_attr->mr_access_flags);
548 557
549query_mr_exit1: 558query_mr_exit1:
@@ -596,7 +605,7 @@ int ehca_dereg_mr(struct ib_mr *mr)
596 "e_mr=%p hca_hndl=%lx mr_hndl=%lx mr->lkey=%x", 605 "e_mr=%p hca_hndl=%lx mr_hndl=%lx mr->lkey=%x",
597 h_ret, shca, e_mr, shca->ipz_hca_handle.handle, 606 h_ret, shca, e_mr, shca->ipz_hca_handle.handle,
598 e_mr->ipz_mr_handle.handle, mr->lkey); 607 e_mr->ipz_mr_handle.handle, mr->lkey);
599 ret = ehca_mrmw_map_hrc_free_mr(h_ret); 608 ret = ehca2ib_return_code(h_ret);
600 goto dereg_mr_exit0; 609 goto dereg_mr_exit0;
601 } 610 }
602 611
@@ -622,7 +631,7 @@ struct ib_mw *ehca_alloc_mw(struct ib_pd *pd)
622 struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); 631 struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
623 struct ehca_shca *shca = 632 struct ehca_shca *shca =
624 container_of(pd->device, struct ehca_shca, ib_device); 633 container_of(pd->device, struct ehca_shca, ib_device);
625 struct ehca_mw_hipzout_parms hipzout = {{0},0}; 634 struct ehca_mw_hipzout_parms hipzout;
626 635
627 e_mw = ehca_mw_new(); 636 e_mw = ehca_mw_new();
628 if (!e_mw) { 637 if (!e_mw) {
@@ -636,7 +645,7 @@ struct ib_mw *ehca_alloc_mw(struct ib_pd *pd)
636 ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lx " 645 ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lx "
637 "shca=%p hca_hndl=%lx mw=%p", 646 "shca=%p hca_hndl=%lx mw=%p",
638 h_ret, shca, shca->ipz_hca_handle.handle, e_mw); 647 h_ret, shca, shca->ipz_hca_handle.handle, e_mw);
639 ib_mw = ERR_PTR(ehca_mrmw_map_hrc_alloc(h_ret)); 648 ib_mw = ERR_PTR(ehca2ib_return_code(h_ret));
640 goto alloc_mw_exit1; 649 goto alloc_mw_exit1;
641 } 650 }
642 /* successful MW allocation */ 651 /* successful MW allocation */
@@ -679,7 +688,7 @@ int ehca_dealloc_mw(struct ib_mw *mw)
679 "mw=%p rkey=%x hca_hndl=%lx mw_hndl=%lx", 688 "mw=%p rkey=%x hca_hndl=%lx mw_hndl=%lx",
680 h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle, 689 h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle,
681 e_mw->ipz_mw_handle.handle); 690 e_mw->ipz_mw_handle.handle);
682 return ehca_mrmw_map_hrc_free_mw(h_ret); 691 return ehca2ib_return_code(h_ret);
683 } 692 }
684 /* successful deallocation */ 693 /* successful deallocation */
685 ehca_mw_delete(e_mw); 694 ehca_mw_delete(e_mw);
@@ -699,7 +708,7 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
699 struct ehca_mr *e_fmr; 708 struct ehca_mr *e_fmr;
700 int ret; 709 int ret;
701 u32 tmp_lkey, tmp_rkey; 710 u32 tmp_lkey, tmp_rkey;
702 struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; 711 struct ehca_mr_pginfo pginfo;
703 712
704 /* check other parameters */ 713 /* check other parameters */
705 if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && 714 if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
@@ -745,6 +754,7 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
745 e_fmr->flags |= EHCA_MR_FLAG_FMR; 754 e_fmr->flags |= EHCA_MR_FLAG_FMR;
746 755
747 /* register MR on HCA */ 756 /* register MR on HCA */
757 memset(&pginfo, 0, sizeof(pginfo));
748 ret = ehca_reg_mr(shca, e_fmr, NULL, 758 ret = ehca_reg_mr(shca, e_fmr, NULL,
749 fmr_attr->max_pages * (1 << fmr_attr->page_shift), 759 fmr_attr->max_pages * (1 << fmr_attr->page_shift),
750 mr_access_flags, e_pd, &pginfo, 760 mr_access_flags, e_pd, &pginfo,
@@ -783,7 +793,7 @@ int ehca_map_phys_fmr(struct ib_fmr *fmr,
783 container_of(fmr->device, struct ehca_shca, ib_device); 793 container_of(fmr->device, struct ehca_shca, ib_device);
784 struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr); 794 struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr);
785 struct ehca_pd *e_pd = container_of(fmr->pd, struct ehca_pd, ib_pd); 795 struct ehca_pd *e_pd = container_of(fmr->pd, struct ehca_pd, ib_pd);
786 struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; 796 struct ehca_mr_pginfo pginfo;
787 u32 tmp_lkey, tmp_rkey; 797 u32 tmp_lkey, tmp_rkey;
788 798
789 if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) { 799 if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
@@ -809,14 +819,16 @@ int ehca_map_phys_fmr(struct ib_fmr *fmr,
809 fmr, e_fmr->fmr_map_cnt, e_fmr->fmr_max_maps); 819 fmr, e_fmr->fmr_map_cnt, e_fmr->fmr_max_maps);
810 } 820 }
811 821
812 pginfo.type = EHCA_MR_PGI_FMR; 822 memset(&pginfo, 0, sizeof(pginfo));
813 pginfo.num_pages = list_len; 823 pginfo.type = EHCA_MR_PGI_FMR;
814 pginfo.num_4k = list_len * (e_fmr->fmr_page_size / EHCA_PAGESIZE); 824 pginfo.num_kpages = list_len;
815 pginfo.page_list = page_list; 825 pginfo.num_hwpages = list_len * (e_fmr->fmr_page_size / EHCA_PAGESIZE);
816 pginfo.next_4k = ((iova & (e_fmr->fmr_page_size-1)) / 826 pginfo.u.fmr.page_list = page_list;
817 EHCA_PAGESIZE); 827 pginfo.next_hwpage = ((iova & (e_fmr->fmr_page_size-1)) /
828 EHCA_PAGESIZE);
829 pginfo.u.fmr.fmr_pgsize = e_fmr->fmr_page_size;
818 830
819 ret = ehca_rereg_mr(shca, e_fmr, (u64*)iova, 831 ret = ehca_rereg_mr(shca, e_fmr, (u64 *)iova,
820 list_len * e_fmr->fmr_page_size, 832 list_len * e_fmr->fmr_page_size,
821 e_fmr->acl, e_pd, &pginfo, &tmp_lkey, &tmp_rkey); 833 e_fmr->acl, e_pd, &pginfo, &tmp_lkey, &tmp_rkey);
822 if (ret) 834 if (ret)
@@ -831,8 +843,7 @@ int ehca_map_phys_fmr(struct ib_fmr *fmr,
831map_phys_fmr_exit0: 843map_phys_fmr_exit0:
832 if (ret) 844 if (ret)
833 ehca_err(fmr->device, "ret=%x fmr=%p page_list=%p list_len=%x " 845 ehca_err(fmr->device, "ret=%x fmr=%p page_list=%p list_len=%x "
834 "iova=%lx", 846 "iova=%lx", ret, fmr, page_list, list_len, iova);
835 ret, fmr, page_list, list_len, iova);
836 return ret; 847 return ret;
837} /* end ehca_map_phys_fmr() */ 848} /* end ehca_map_phys_fmr() */
838 849
@@ -922,7 +933,7 @@ int ehca_dealloc_fmr(struct ib_fmr *fmr)
922 "hca_hndl=%lx fmr_hndl=%lx fmr->lkey=%x", 933 "hca_hndl=%lx fmr_hndl=%lx fmr->lkey=%x",
923 h_ret, e_fmr, shca->ipz_hca_handle.handle, 934 h_ret, e_fmr, shca->ipz_hca_handle.handle,
924 e_fmr->ipz_mr_handle.handle, fmr->lkey); 935 e_fmr->ipz_mr_handle.handle, fmr->lkey);
925 ret = ehca_mrmw_map_hrc_free_mr(h_ret); 936 ret = ehca2ib_return_code(h_ret);
926 goto free_fmr_exit0; 937 goto free_fmr_exit0;
927 } 938 }
928 /* successful deregistration */ 939 /* successful deregistration */
@@ -950,12 +961,12 @@ int ehca_reg_mr(struct ehca_shca *shca,
950 int ret; 961 int ret;
951 u64 h_ret; 962 u64 h_ret;
952 u32 hipz_acl; 963 u32 hipz_acl;
953 struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; 964 struct ehca_mr_hipzout_parms hipzout;
954 965
955 ehca_mrmw_map_acl(acl, &hipz_acl); 966 ehca_mrmw_map_acl(acl, &hipz_acl);
956 ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl); 967 ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl);
957 if (ehca_use_hp_mr == 1) 968 if (ehca_use_hp_mr == 1)
958 hipz_acl |= 0x00000001; 969 hipz_acl |= 0x00000001;
959 970
960 h_ret = hipz_h_alloc_resource_mr(shca->ipz_hca_handle, e_mr, 971 h_ret = hipz_h_alloc_resource_mr(shca->ipz_hca_handle, e_mr,
961 (u64)iova_start, size, hipz_acl, 972 (u64)iova_start, size, hipz_acl,
@@ -963,7 +974,7 @@ int ehca_reg_mr(struct ehca_shca *shca,
963 if (h_ret != H_SUCCESS) { 974 if (h_ret != H_SUCCESS) {
964 ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lx " 975 ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lx "
965 "hca_hndl=%lx", h_ret, shca->ipz_hca_handle.handle); 976 "hca_hndl=%lx", h_ret, shca->ipz_hca_handle.handle);
966 ret = ehca_mrmw_map_hrc_alloc(h_ret); 977 ret = ehca2ib_return_code(h_ret);
967 goto ehca_reg_mr_exit0; 978 goto ehca_reg_mr_exit0;
968 } 979 }
969 980
@@ -974,11 +985,11 @@ int ehca_reg_mr(struct ehca_shca *shca,
974 goto ehca_reg_mr_exit1; 985 goto ehca_reg_mr_exit1;
975 986
976 /* successful registration */ 987 /* successful registration */
977 e_mr->num_pages = pginfo->num_pages; 988 e_mr->num_kpages = pginfo->num_kpages;
978 e_mr->num_4k = pginfo->num_4k; 989 e_mr->num_hwpages = pginfo->num_hwpages;
979 e_mr->start = iova_start; 990 e_mr->start = iova_start;
980 e_mr->size = size; 991 e_mr->size = size;
981 e_mr->acl = acl; 992 e_mr->acl = acl;
982 *lkey = hipzout.lkey; 993 *lkey = hipzout.lkey;
983 *rkey = hipzout.rkey; 994 *rkey = hipzout.rkey;
984 return 0; 995 return 0;
@@ -988,10 +999,10 @@ ehca_reg_mr_exit1:
988 if (h_ret != H_SUCCESS) { 999 if (h_ret != H_SUCCESS) {
989 ehca_err(&shca->ib_device, "h_ret=%lx shca=%p e_mr=%p " 1000 ehca_err(&shca->ib_device, "h_ret=%lx shca=%p e_mr=%p "
990 "iova_start=%p size=%lx acl=%x e_pd=%p lkey=%x " 1001 "iova_start=%p size=%lx acl=%x e_pd=%p lkey=%x "
991 "pginfo=%p num_pages=%lx num_4k=%lx ret=%x", 1002 "pginfo=%p num_kpages=%lx num_hwpages=%lx ret=%x",
992 h_ret, shca, e_mr, iova_start, size, acl, e_pd, 1003 h_ret, shca, e_mr, iova_start, size, acl, e_pd,
993 hipzout.lkey, pginfo, pginfo->num_pages, 1004 hipzout.lkey, pginfo, pginfo->num_kpages,
994 pginfo->num_4k, ret); 1005 pginfo->num_hwpages, ret);
995 ehca_err(&shca->ib_device, "internal error in ehca_reg_mr, " 1006 ehca_err(&shca->ib_device, "internal error in ehca_reg_mr, "
996 "not recoverable"); 1007 "not recoverable");
997 } 1008 }
@@ -999,9 +1010,9 @@ ehca_reg_mr_exit0:
999 if (ret) 1010 if (ret)
1000 ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p " 1011 ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p "
1001 "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p " 1012 "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p "
1002 "num_pages=%lx num_4k=%lx", 1013 "num_kpages=%lx num_hwpages=%lx",
1003 ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo, 1014 ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo,
1004 pginfo->num_pages, pginfo->num_4k); 1015 pginfo->num_kpages, pginfo->num_hwpages);
1005 return ret; 1016 return ret;
1006} /* end ehca_reg_mr() */ 1017} /* end ehca_reg_mr() */
1007 1018
@@ -1026,24 +1037,24 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca,
1026 } 1037 }
1027 1038
1028 /* max 512 pages per shot */ 1039 /* max 512 pages per shot */
1029 for (i = 0; i < ((pginfo->num_4k + 512 - 1) / 512); i++) { 1040 for (i = 0; i < NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES); i++) {
1030 1041
1031 if (i == ((pginfo->num_4k + 512 - 1) / 512) - 1) { 1042 if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) {
1032 rnum = pginfo->num_4k % 512; /* last shot */ 1043 rnum = pginfo->num_hwpages % MAX_RPAGES; /* last shot */
1033 if (rnum == 0) 1044 if (rnum == 0)
1034 rnum = 512; /* last shot is full */ 1045 rnum = MAX_RPAGES; /* last shot is full */
1035 } else 1046 } else
1036 rnum = 512; 1047 rnum = MAX_RPAGES;
1037 1048
1038 if (rnum > 1) { 1049 ret = ehca_set_pagebuf(pginfo, rnum, kpage);
1039 ret = ehca_set_pagebuf(e_mr, pginfo, rnum, kpage); 1050 if (ret) {
1040 if (ret) { 1051 ehca_err(&shca->ib_device, "ehca_set_pagebuf "
1041 ehca_err(&shca->ib_device, "ehca_set_pagebuf "
1042 "bad rc, ret=%x rnum=%x kpage=%p", 1052 "bad rc, ret=%x rnum=%x kpage=%p",
1043 ret, rnum, kpage); 1053 ret, rnum, kpage);
1044 ret = -EFAULT; 1054 goto ehca_reg_mr_rpages_exit1;
1045 goto ehca_reg_mr_rpages_exit1; 1055 }
1046 } 1056
1057 if (rnum > 1) {
1047 rpage = virt_to_abs(kpage); 1058 rpage = virt_to_abs(kpage);
1048 if (!rpage) { 1059 if (!rpage) {
1049 ehca_err(&shca->ib_device, "kpage=%p i=%x", 1060 ehca_err(&shca->ib_device, "kpage=%p i=%x",
@@ -1051,21 +1062,14 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca,
1051 ret = -EFAULT; 1062 ret = -EFAULT;
1052 goto ehca_reg_mr_rpages_exit1; 1063 goto ehca_reg_mr_rpages_exit1;
1053 } 1064 }
1054 } else { /* rnum==1 */ 1065 } else
1055 ret = ehca_set_pagebuf_1(e_mr, pginfo, &rpage); 1066 rpage = *kpage;
1056 if (ret) {
1057 ehca_err(&shca->ib_device, "ehca_set_pagebuf_1 "
1058 "bad rc, ret=%x i=%x", ret, i);
1059 ret = -EFAULT;
1060 goto ehca_reg_mr_rpages_exit1;
1061 }
1062 }
1063 1067
1064 h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, e_mr, 1068 h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, e_mr,
1065 0, /* pagesize 4k */ 1069 0, /* pagesize 4k */
1066 0, rpage, rnum); 1070 0, rpage, rnum);
1067 1071
1068 if (i == ((pginfo->num_4k + 512 - 1) / 512) - 1) { 1072 if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) {
1069 /* 1073 /*
1070 * check for 'registration complete'==H_SUCCESS 1074 * check for 'registration complete'==H_SUCCESS
1071 * and for 'page registered'==H_PAGE_REGISTERED 1075 * and for 'page registered'==H_PAGE_REGISTERED
@@ -1078,7 +1082,7 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca,
1078 shca->ipz_hca_handle.handle, 1082 shca->ipz_hca_handle.handle,
1079 e_mr->ipz_mr_handle.handle, 1083 e_mr->ipz_mr_handle.handle,
1080 e_mr->ib.ib_mr.lkey); 1084 e_mr->ib.ib_mr.lkey);
1081 ret = ehca_mrmw_map_hrc_rrpg_last(h_ret); 1085 ret = ehca2ib_return_code(h_ret);
1082 break; 1086 break;
1083 } else 1087 } else
1084 ret = 0; 1088 ret = 0;
@@ -1089,7 +1093,7 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca,
1089 e_mr->ib.ib_mr.lkey, 1093 e_mr->ib.ib_mr.lkey,
1090 shca->ipz_hca_handle.handle, 1094 shca->ipz_hca_handle.handle,
1091 e_mr->ipz_mr_handle.handle); 1095 e_mr->ipz_mr_handle.handle);
1092 ret = ehca_mrmw_map_hrc_rrpg_notlast(h_ret); 1096 ret = ehca2ib_return_code(h_ret);
1093 break; 1097 break;
1094 } else 1098 } else
1095 ret = 0; 1099 ret = 0;
@@ -1101,8 +1105,8 @@ ehca_reg_mr_rpages_exit1:
1101ehca_reg_mr_rpages_exit0: 1105ehca_reg_mr_rpages_exit0:
1102 if (ret) 1106 if (ret)
1103 ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p pginfo=%p " 1107 ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p pginfo=%p "
1104 "num_pages=%lx num_4k=%lx", ret, shca, e_mr, pginfo, 1108 "num_kpages=%lx num_hwpages=%lx", ret, shca, e_mr,
1105 pginfo->num_pages, pginfo->num_4k); 1109 pginfo, pginfo->num_kpages, pginfo->num_hwpages);
1106 return ret; 1110 return ret;
1107} /* end ehca_reg_mr_rpages() */ 1111} /* end ehca_reg_mr_rpages() */
1108 1112
@@ -1124,7 +1128,7 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca,
1124 u64 *kpage; 1128 u64 *kpage;
1125 u64 rpage; 1129 u64 rpage;
1126 struct ehca_mr_pginfo pginfo_save; 1130 struct ehca_mr_pginfo pginfo_save;
1127 struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; 1131 struct ehca_mr_hipzout_parms hipzout;
1128 1132
1129 ehca_mrmw_map_acl(acl, &hipz_acl); 1133 ehca_mrmw_map_acl(acl, &hipz_acl);
1130 ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl); 1134 ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl);
@@ -1137,12 +1141,12 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca,
1137 } 1141 }
1138 1142
1139 pginfo_save = *pginfo; 1143 pginfo_save = *pginfo;
1140 ret = ehca_set_pagebuf(e_mr, pginfo, pginfo->num_4k, kpage); 1144 ret = ehca_set_pagebuf(pginfo, pginfo->num_hwpages, kpage);
1141 if (ret) { 1145 if (ret) {
1142 ehca_err(&shca->ib_device, "set pagebuf failed, e_mr=%p " 1146 ehca_err(&shca->ib_device, "set pagebuf failed, e_mr=%p "
1143 "pginfo=%p type=%x num_pages=%lx num_4k=%lx kpage=%p", 1147 "pginfo=%p type=%x num_kpages=%lx num_hwpages=%lx "
1144 e_mr, pginfo, pginfo->type, pginfo->num_pages, 1148 "kpage=%p", e_mr, pginfo, pginfo->type,
1145 pginfo->num_4k,kpage); 1149 pginfo->num_kpages, pginfo->num_hwpages, kpage);
1146 goto ehca_rereg_mr_rereg1_exit1; 1150 goto ehca_rereg_mr_rereg1_exit1;
1147 } 1151 }
1148 rpage = virt_to_abs(kpage); 1152 rpage = virt_to_abs(kpage);
@@ -1164,7 +1168,7 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca,
1164 "(Rereg1), h_ret=%lx e_mr=%p", h_ret, e_mr); 1168 "(Rereg1), h_ret=%lx e_mr=%p", h_ret, e_mr);
1165 *pginfo = pginfo_save; 1169 *pginfo = pginfo_save;
1166 ret = -EAGAIN; 1170 ret = -EAGAIN;
1167 } else if ((u64*)hipzout.vaddr != iova_start) { 1171 } else if ((u64 *)hipzout.vaddr != iova_start) {
1168 ehca_err(&shca->ib_device, "PHYP changed iova_start in " 1172 ehca_err(&shca->ib_device, "PHYP changed iova_start in "
1169 "rereg_pmr, iova_start=%p iova_start_out=%lx e_mr=%p " 1173 "rereg_pmr, iova_start=%p iova_start_out=%lx e_mr=%p "
1170 "mr_handle=%lx lkey=%x lkey_out=%x", iova_start, 1174 "mr_handle=%lx lkey=%x lkey_out=%x", iova_start,
@@ -1176,11 +1180,11 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca,
1176 * successful reregistration 1180 * successful reregistration
1177 * note: start and start_out are identical for eServer HCAs 1181 * note: start and start_out are identical for eServer HCAs
1178 */ 1182 */
1179 e_mr->num_pages = pginfo->num_pages; 1183 e_mr->num_kpages = pginfo->num_kpages;
1180 e_mr->num_4k = pginfo->num_4k; 1184 e_mr->num_hwpages = pginfo->num_hwpages;
1181 e_mr->start = iova_start; 1185 e_mr->start = iova_start;
1182 e_mr->size = size; 1186 e_mr->size = size;
1183 e_mr->acl = acl; 1187 e_mr->acl = acl;
1184 *lkey = hipzout.lkey; 1188 *lkey = hipzout.lkey;
1185 *rkey = hipzout.rkey; 1189 *rkey = hipzout.rkey;
1186 } 1190 }
@@ -1190,9 +1194,9 @@ ehca_rereg_mr_rereg1_exit1:
1190ehca_rereg_mr_rereg1_exit0: 1194ehca_rereg_mr_rereg1_exit0:
1191 if ( ret && (ret != -EAGAIN) ) 1195 if ( ret && (ret != -EAGAIN) )
1192 ehca_err(&shca->ib_device, "ret=%x lkey=%x rkey=%x " 1196 ehca_err(&shca->ib_device, "ret=%x lkey=%x rkey=%x "
1193 "pginfo=%p num_pages=%lx num_4k=%lx", 1197 "pginfo=%p num_kpages=%lx num_hwpages=%lx",
1194 ret, *lkey, *rkey, pginfo, pginfo->num_pages, 1198 ret, *lkey, *rkey, pginfo, pginfo->num_kpages,
1195 pginfo->num_4k); 1199 pginfo->num_hwpages);
1196 return ret; 1200 return ret;
1197} /* end ehca_rereg_mr_rereg1() */ 1201} /* end ehca_rereg_mr_rereg1() */
1198 1202
@@ -1214,10 +1218,12 @@ int ehca_rereg_mr(struct ehca_shca *shca,
1214 int rereg_3_hcall = 0; /* 1: use 3 hipz calls for reregistration */ 1218 int rereg_3_hcall = 0; /* 1: use 3 hipz calls for reregistration */
1215 1219
1216 /* first determine reregistration hCall(s) */ 1220 /* first determine reregistration hCall(s) */
1217 if ((pginfo->num_4k > 512) || (e_mr->num_4k > 512) || 1221 if ((pginfo->num_hwpages > MAX_RPAGES) ||
1218 (pginfo->num_4k > e_mr->num_4k)) { 1222 (e_mr->num_hwpages > MAX_RPAGES) ||
1219 ehca_dbg(&shca->ib_device, "Rereg3 case, pginfo->num_4k=%lx " 1223 (pginfo->num_hwpages > e_mr->num_hwpages)) {
1220 "e_mr->num_4k=%x", pginfo->num_4k, e_mr->num_4k); 1224 ehca_dbg(&shca->ib_device, "Rereg3 case, "
1225 "pginfo->num_hwpages=%lx e_mr->num_hwpages=%x",
1226 pginfo->num_hwpages, e_mr->num_hwpages);
1221 rereg_1_hcall = 0; 1227 rereg_1_hcall = 0;
1222 rereg_3_hcall = 1; 1228 rereg_3_hcall = 1;
1223 } 1229 }
@@ -1253,7 +1259,7 @@ int ehca_rereg_mr(struct ehca_shca *shca,
1253 h_ret, e_mr, shca->ipz_hca_handle.handle, 1259 h_ret, e_mr, shca->ipz_hca_handle.handle,
1254 e_mr->ipz_mr_handle.handle, 1260 e_mr->ipz_mr_handle.handle,
1255 e_mr->ib.ib_mr.lkey); 1261 e_mr->ib.ib_mr.lkey);
1256 ret = ehca_mrmw_map_hrc_free_mr(h_ret); 1262 ret = ehca2ib_return_code(h_ret);
1257 goto ehca_rereg_mr_exit0; 1263 goto ehca_rereg_mr_exit0;
1258 } 1264 }
1259 /* clean ehca_mr_t, without changing struct ib_mr and lock */ 1265 /* clean ehca_mr_t, without changing struct ib_mr and lock */
@@ -1281,9 +1287,9 @@ ehca_rereg_mr_exit0:
1281 if (ret) 1287 if (ret)
1282 ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p " 1288 ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p "
1283 "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p " 1289 "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p "
1284 "num_pages=%lx lkey=%x rkey=%x rereg_1_hcall=%x " 1290 "num_kpages=%lx lkey=%x rkey=%x rereg_1_hcall=%x "
1285 "rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size, 1291 "rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size,
1286 acl, e_pd, pginfo, pginfo->num_pages, *lkey, *rkey, 1292 acl, e_pd, pginfo, pginfo->num_kpages, *lkey, *rkey,
1287 rereg_1_hcall, rereg_3_hcall); 1293 rereg_1_hcall, rereg_3_hcall);
1288 return ret; 1294 return ret;
1289} /* end ehca_rereg_mr() */ 1295} /* end ehca_rereg_mr() */
@@ -1295,97 +1301,86 @@ int ehca_unmap_one_fmr(struct ehca_shca *shca,
1295{ 1301{
1296 int ret = 0; 1302 int ret = 0;
1297 u64 h_ret; 1303 u64 h_ret;
1298 int rereg_1_hcall = 1; /* 1: use hipz_mr_reregister directly */
1299 int rereg_3_hcall = 0; /* 1: use 3 hipz calls for unmapping */
1300 struct ehca_pd *e_pd = 1304 struct ehca_pd *e_pd =
1301 container_of(e_fmr->ib.ib_fmr.pd, struct ehca_pd, ib_pd); 1305 container_of(e_fmr->ib.ib_fmr.pd, struct ehca_pd, ib_pd);
1302 struct ehca_mr save_fmr; 1306 struct ehca_mr save_fmr;
1303 u32 tmp_lkey, tmp_rkey; 1307 u32 tmp_lkey, tmp_rkey;
1304 struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; 1308 struct ehca_mr_pginfo pginfo;
1305 struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; 1309 struct ehca_mr_hipzout_parms hipzout;
1310 struct ehca_mr save_mr;
1306 1311
1307 /* first check if reregistration hCall can be used for unmap */ 1312 if (e_fmr->fmr_max_pages <= MAX_RPAGES) {
1308 if (e_fmr->fmr_max_pages > 512) {
1309 rereg_1_hcall = 0;
1310 rereg_3_hcall = 1;
1311 }
1312
1313 if (rereg_1_hcall) {
1314 /* 1313 /*
1315 * note: after using rereg hcall with len=0, 1314 * note: after using rereg hcall with len=0,
1316 * rereg hcall must be used again for registering pages 1315 * rereg hcall must be used again for registering pages
1317 */ 1316 */
1318 h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_fmr, 0, 1317 h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_fmr, 0,
1319 0, 0, e_pd->fw_pd, 0, &hipzout); 1318 0, 0, e_pd->fw_pd, 0, &hipzout);
1320 if (h_ret != H_SUCCESS) { 1319 if (h_ret == H_SUCCESS) {
1321 /*
1322 * should not happen, because length checked above,
1323 * FMRs are not shared and no MW bound to FMRs
1324 */
1325 ehca_err(&shca->ib_device, "hipz_reregister_pmr failed "
1326 "(Rereg1), h_ret=%lx e_fmr=%p hca_hndl=%lx "
1327 "mr_hndl=%lx lkey=%x lkey_out=%x",
1328 h_ret, e_fmr, shca->ipz_hca_handle.handle,
1329 e_fmr->ipz_mr_handle.handle,
1330 e_fmr->ib.ib_fmr.lkey, hipzout.lkey);
1331 rereg_3_hcall = 1;
1332 } else {
1333 /* successful reregistration */ 1320 /* successful reregistration */
1334 e_fmr->start = NULL; 1321 e_fmr->start = NULL;
1335 e_fmr->size = 0; 1322 e_fmr->size = 0;
1336 tmp_lkey = hipzout.lkey; 1323 tmp_lkey = hipzout.lkey;
1337 tmp_rkey = hipzout.rkey; 1324 tmp_rkey = hipzout.rkey;
1325 return 0;
1338 } 1326 }
1327 /*
1328 * should not happen, because length checked above,
1329 * FMRs are not shared and no MW bound to FMRs
1330 */
1331 ehca_err(&shca->ib_device, "hipz_reregister_pmr failed "
1332 "(Rereg1), h_ret=%lx e_fmr=%p hca_hndl=%lx "
1333 "mr_hndl=%lx lkey=%x lkey_out=%x",
1334 h_ret, e_fmr, shca->ipz_hca_handle.handle,
1335 e_fmr->ipz_mr_handle.handle,
1336 e_fmr->ib.ib_fmr.lkey, hipzout.lkey);
1337 /* try free and rereg */
1339 } 1338 }
1340 1339
1341 if (rereg_3_hcall) { 1340 /* first free old FMR */
1342 struct ehca_mr save_mr; 1341 h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
1343 1342 if (h_ret != H_SUCCESS) {
1344 /* first free old FMR */ 1343 ehca_err(&shca->ib_device, "hipz_free_mr failed, "
1345 h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr); 1344 "h_ret=%lx e_fmr=%p hca_hndl=%lx mr_hndl=%lx "
1346 if (h_ret != H_SUCCESS) { 1345 "lkey=%x",
1347 ehca_err(&shca->ib_device, "hipz_free_mr failed, " 1346 h_ret, e_fmr, shca->ipz_hca_handle.handle,
1348 "h_ret=%lx e_fmr=%p hca_hndl=%lx mr_hndl=%lx " 1347 e_fmr->ipz_mr_handle.handle,
1349 "lkey=%x", 1348 e_fmr->ib.ib_fmr.lkey);
1350 h_ret, e_fmr, shca->ipz_hca_handle.handle, 1349 ret = ehca2ib_return_code(h_ret);
1351 e_fmr->ipz_mr_handle.handle, 1350 goto ehca_unmap_one_fmr_exit0;
1352 e_fmr->ib.ib_fmr.lkey); 1351 }
1353 ret = ehca_mrmw_map_hrc_free_mr(h_ret); 1352 /* clean ehca_mr_t, without changing lock */
1354 goto ehca_unmap_one_fmr_exit0; 1353 save_fmr = *e_fmr;
1355 } 1354 ehca_mr_deletenew(e_fmr);
1356 /* clean ehca_mr_t, without changing lock */ 1355
1357 save_fmr = *e_fmr; 1356 /* set some MR values */
1358 ehca_mr_deletenew(e_fmr); 1357 e_fmr->flags = save_fmr.flags;
1359 1358 e_fmr->fmr_page_size = save_fmr.fmr_page_size;
1360 /* set some MR values */ 1359 e_fmr->fmr_max_pages = save_fmr.fmr_max_pages;
1361 e_fmr->flags = save_fmr.flags; 1360 e_fmr->fmr_max_maps = save_fmr.fmr_max_maps;
1362 e_fmr->fmr_page_size = save_fmr.fmr_page_size; 1361 e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt;
1363 e_fmr->fmr_max_pages = save_fmr.fmr_max_pages; 1362 e_fmr->acl = save_fmr.acl;
1364 e_fmr->fmr_max_maps = save_fmr.fmr_max_maps; 1363
1365 e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt; 1364 memset(&pginfo, 0, sizeof(pginfo));
1366 e_fmr->acl = save_fmr.acl; 1365 pginfo.type = EHCA_MR_PGI_FMR;
1367 1366 pginfo.num_kpages = 0;
1368 pginfo.type = EHCA_MR_PGI_FMR; 1367 pginfo.num_hwpages = 0;
1369 pginfo.num_pages = 0; 1368 ret = ehca_reg_mr(shca, e_fmr, NULL,
1370 pginfo.num_4k = 0; 1369 (e_fmr->fmr_max_pages * e_fmr->fmr_page_size),
1371 ret = ehca_reg_mr(shca, e_fmr, NULL, 1370 e_fmr->acl, e_pd, &pginfo, &tmp_lkey,
1372 (e_fmr->fmr_max_pages * e_fmr->fmr_page_size), 1371 &tmp_rkey);
1373 e_fmr->acl, e_pd, &pginfo, &tmp_lkey, 1372 if (ret) {
1374 &tmp_rkey); 1373 u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr;
1375 if (ret) { 1374 memcpy(&e_fmr->flags, &(save_mr.flags),
1376 u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr; 1375 sizeof(struct ehca_mr) - offset);
1377 memcpy(&e_fmr->flags, &(save_mr.flags), 1376 goto ehca_unmap_one_fmr_exit0;
1378 sizeof(struct ehca_mr) - offset);
1379 goto ehca_unmap_one_fmr_exit0;
1380 }
1381 } 1377 }
1382 1378
1383ehca_unmap_one_fmr_exit0: 1379ehca_unmap_one_fmr_exit0:
1384 if (ret) 1380 if (ret)
1385 ehca_err(&shca->ib_device, "ret=%x tmp_lkey=%x tmp_rkey=%x " 1381 ehca_err(&shca->ib_device, "ret=%x tmp_lkey=%x tmp_rkey=%x "
1386 "fmr_max_pages=%x rereg_1_hcall=%x rereg_3_hcall=%x", 1382 "fmr_max_pages=%x",
1387 ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages, 1383 ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages);
1388 rereg_1_hcall, rereg_3_hcall);
1389 return ret; 1384 return ret;
1390} /* end ehca_unmap_one_fmr() */ 1385} /* end ehca_unmap_one_fmr() */
1391 1386
@@ -1403,7 +1398,7 @@ int ehca_reg_smr(struct ehca_shca *shca,
1403 int ret = 0; 1398 int ret = 0;
1404 u64 h_ret; 1399 u64 h_ret;
1405 u32 hipz_acl; 1400 u32 hipz_acl;
1406 struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; 1401 struct ehca_mr_hipzout_parms hipzout;
1407 1402
1408 ehca_mrmw_map_acl(acl, &hipz_acl); 1403 ehca_mrmw_map_acl(acl, &hipz_acl);
1409 ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl); 1404 ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl);
@@ -1419,15 +1414,15 @@ int ehca_reg_smr(struct ehca_shca *shca,
1419 shca->ipz_hca_handle.handle, 1414 shca->ipz_hca_handle.handle,
1420 e_origmr->ipz_mr_handle.handle, 1415 e_origmr->ipz_mr_handle.handle,
1421 e_origmr->ib.ib_mr.lkey); 1416 e_origmr->ib.ib_mr.lkey);
1422 ret = ehca_mrmw_map_hrc_reg_smr(h_ret); 1417 ret = ehca2ib_return_code(h_ret);
1423 goto ehca_reg_smr_exit0; 1418 goto ehca_reg_smr_exit0;
1424 } 1419 }
1425 /* successful registration */ 1420 /* successful registration */
1426 e_newmr->num_pages = e_origmr->num_pages; 1421 e_newmr->num_kpages = e_origmr->num_kpages;
1427 e_newmr->num_4k = e_origmr->num_4k; 1422 e_newmr->num_hwpages = e_origmr->num_hwpages;
1428 e_newmr->start = iova_start; 1423 e_newmr->start = iova_start;
1429 e_newmr->size = e_origmr->size; 1424 e_newmr->size = e_origmr->size;
1430 e_newmr->acl = acl; 1425 e_newmr->acl = acl;
1431 e_newmr->ipz_mr_handle = hipzout.handle; 1426 e_newmr->ipz_mr_handle = hipzout.handle;
1432 *lkey = hipzout.lkey; 1427 *lkey = hipzout.lkey;
1433 *rkey = hipzout.rkey; 1428 *rkey = hipzout.rkey;
@@ -1453,10 +1448,10 @@ int ehca_reg_internal_maxmr(
1453 struct ehca_mr *e_mr; 1448 struct ehca_mr *e_mr;
1454 u64 *iova_start; 1449 u64 *iova_start;
1455 u64 size_maxmr; 1450 u64 size_maxmr;
1456 struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; 1451 struct ehca_mr_pginfo pginfo;
1457 struct ib_phys_buf ib_pbuf; 1452 struct ib_phys_buf ib_pbuf;
1458 u32 num_pages_mr; 1453 u32 num_kpages;
1459 u32 num_pages_4k; /* 4k portion "pages" */ 1454 u32 num_hwpages;
1460 1455
1461 e_mr = ehca_mr_new(); 1456 e_mr = ehca_mr_new();
1462 if (!e_mr) { 1457 if (!e_mr) {
@@ -1468,28 +1463,29 @@ int ehca_reg_internal_maxmr(
1468 1463
1469 /* register internal max-MR on HCA */ 1464 /* register internal max-MR on HCA */
1470 size_maxmr = (u64)high_memory - PAGE_OFFSET; 1465 size_maxmr = (u64)high_memory - PAGE_OFFSET;
1471 iova_start = (u64*)KERNELBASE; 1466 iova_start = (u64 *)KERNELBASE;
1472 ib_pbuf.addr = 0; 1467 ib_pbuf.addr = 0;
1473 ib_pbuf.size = size_maxmr; 1468 ib_pbuf.size = size_maxmr;
1474 num_pages_mr = ((((u64)iova_start % PAGE_SIZE) + size_maxmr + 1469 num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr,
1475 PAGE_SIZE - 1) / PAGE_SIZE); 1470 PAGE_SIZE);
1476 num_pages_4k = ((((u64)iova_start % EHCA_PAGESIZE) + size_maxmr + 1471 num_hwpages = NUM_CHUNKS(((u64)iova_start % EHCA_PAGESIZE) + size_maxmr,
1477 EHCA_PAGESIZE - 1) / EHCA_PAGESIZE); 1472 EHCA_PAGESIZE);
1478 1473
1479 pginfo.type = EHCA_MR_PGI_PHYS; 1474 memset(&pginfo, 0, sizeof(pginfo));
1480 pginfo.num_pages = num_pages_mr; 1475 pginfo.type = EHCA_MR_PGI_PHYS;
1481 pginfo.num_4k = num_pages_4k; 1476 pginfo.num_kpages = num_kpages;
1482 pginfo.num_phys_buf = 1; 1477 pginfo.num_hwpages = num_hwpages;
1483 pginfo.phys_buf_array = &ib_pbuf; 1478 pginfo.u.phy.num_phys_buf = 1;
1479 pginfo.u.phy.phys_buf_array = &ib_pbuf;
1484 1480
1485 ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd, 1481 ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd,
1486 &pginfo, &e_mr->ib.ib_mr.lkey, 1482 &pginfo, &e_mr->ib.ib_mr.lkey,
1487 &e_mr->ib.ib_mr.rkey); 1483 &e_mr->ib.ib_mr.rkey);
1488 if (ret) { 1484 if (ret) {
1489 ehca_err(&shca->ib_device, "reg of internal max MR failed, " 1485 ehca_err(&shca->ib_device, "reg of internal max MR failed, "
1490 "e_mr=%p iova_start=%p size_maxmr=%lx num_pages_mr=%x " 1486 "e_mr=%p iova_start=%p size_maxmr=%lx num_kpages=%x "
1491 "num_pages_4k=%x", e_mr, iova_start, size_maxmr, 1487 "num_hwpages=%x", e_mr, iova_start, size_maxmr,
1492 num_pages_mr, num_pages_4k); 1488 num_kpages, num_hwpages);
1493 goto ehca_reg_internal_maxmr_exit1; 1489 goto ehca_reg_internal_maxmr_exit1;
1494 } 1490 }
1495 1491
@@ -1524,7 +1520,7 @@ int ehca_reg_maxmr(struct ehca_shca *shca,
1524 u64 h_ret; 1520 u64 h_ret;
1525 struct ehca_mr *e_origmr = shca->maxmr; 1521 struct ehca_mr *e_origmr = shca->maxmr;
1526 u32 hipz_acl; 1522 u32 hipz_acl;
1527 struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; 1523 struct ehca_mr_hipzout_parms hipzout;
1528 1524
1529 ehca_mrmw_map_acl(acl, &hipz_acl); 1525 ehca_mrmw_map_acl(acl, &hipz_acl);
1530 ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl); 1526 ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl);
@@ -1538,14 +1534,14 @@ int ehca_reg_maxmr(struct ehca_shca *shca,
1538 h_ret, e_origmr, shca->ipz_hca_handle.handle, 1534 h_ret, e_origmr, shca->ipz_hca_handle.handle,
1539 e_origmr->ipz_mr_handle.handle, 1535 e_origmr->ipz_mr_handle.handle,
1540 e_origmr->ib.ib_mr.lkey); 1536 e_origmr->ib.ib_mr.lkey);
1541 return ehca_mrmw_map_hrc_reg_smr(h_ret); 1537 return ehca2ib_return_code(h_ret);
1542 } 1538 }
1543 /* successful registration */ 1539 /* successful registration */
1544 e_newmr->num_pages = e_origmr->num_pages; 1540 e_newmr->num_kpages = e_origmr->num_kpages;
1545 e_newmr->num_4k = e_origmr->num_4k; 1541 e_newmr->num_hwpages = e_origmr->num_hwpages;
1546 e_newmr->start = iova_start; 1542 e_newmr->start = iova_start;
1547 e_newmr->size = e_origmr->size; 1543 e_newmr->size = e_origmr->size;
1548 e_newmr->acl = acl; 1544 e_newmr->acl = acl;
1549 e_newmr->ipz_mr_handle = hipzout.handle; 1545 e_newmr->ipz_mr_handle = hipzout.handle;
1550 *lkey = hipzout.lkey; 1546 *lkey = hipzout.lkey;
1551 *rkey = hipzout.rkey; 1547 *rkey = hipzout.rkey;
@@ -1677,299 +1673,187 @@ int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
1677 1673
1678/*----------------------------------------------------------------------*/ 1674/*----------------------------------------------------------------------*/
1679 1675
1680/* setup page buffer from page info */ 1676/* PAGE_SIZE >= pginfo->hwpage_size */
1681int ehca_set_pagebuf(struct ehca_mr *e_mr, 1677static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo,
1682 struct ehca_mr_pginfo *pginfo, 1678 u32 number,
1683 u32 number, 1679 u64 *kpage)
1684 u64 *kpage)
1685{ 1680{
1686 int ret = 0; 1681 int ret = 0;
1687 struct ib_umem_chunk *prev_chunk; 1682 struct ib_umem_chunk *prev_chunk;
1688 struct ib_umem_chunk *chunk; 1683 struct ib_umem_chunk *chunk;
1689 struct ib_phys_buf *pbuf; 1684 u64 pgaddr;
1690 u64 *fmrlist;
1691 u64 num4k, pgaddr, offs4k;
1692 u32 i = 0; 1685 u32 i = 0;
1693 u32 j = 0; 1686 u32 j = 0;
1694 1687
1695 if (pginfo->type == EHCA_MR_PGI_PHYS) { 1688 /* loop over desired chunk entries */
1696 /* loop over desired phys_buf_array entries */ 1689 chunk = pginfo->u.usr.next_chunk;
1697 while (i < number) { 1690 prev_chunk = pginfo->u.usr.next_chunk;
1698 pbuf = pginfo->phys_buf_array + pginfo->next_buf; 1691 list_for_each_entry_continue(
1699 num4k = ((pbuf->addr % EHCA_PAGESIZE) + pbuf->size + 1692 chunk, (&(pginfo->u.usr.region->chunk_list)), list) {
1700 EHCA_PAGESIZE - 1) / EHCA_PAGESIZE; 1693 for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) {
1701 offs4k = (pbuf->addr & ~PAGE_MASK) / EHCA_PAGESIZE; 1694 pgaddr = page_to_pfn(chunk->page_list[i].page)
1702 while (pginfo->next_4k < offs4k + num4k) { 1695 << PAGE_SHIFT ;
1703 /* sanity check */ 1696 *kpage = phys_to_abs(pgaddr +
1704 if ((pginfo->page_cnt >= pginfo->num_pages) || 1697 (pginfo->next_hwpage *
1705 (pginfo->page_4k_cnt >= pginfo->num_4k)) { 1698 EHCA_PAGESIZE));
1706 ehca_gen_err("page_cnt >= num_pages, "
1707 "page_cnt=%lx "
1708 "num_pages=%lx "
1709 "page_4k_cnt=%lx "
1710 "num_4k=%lx i=%x",
1711 pginfo->page_cnt,
1712 pginfo->num_pages,
1713 pginfo->page_4k_cnt,
1714 pginfo->num_4k, i);
1715 ret = -EFAULT;
1716 goto ehca_set_pagebuf_exit0;
1717 }
1718 *kpage = phys_to_abs(
1719 (pbuf->addr & EHCA_PAGEMASK)
1720 + (pginfo->next_4k * EHCA_PAGESIZE));
1721 if ( !(*kpage) && pbuf->addr ) {
1722 ehca_gen_err("pbuf->addr=%lx "
1723 "pbuf->size=%lx "
1724 "next_4k=%lx", pbuf->addr,
1725 pbuf->size,
1726 pginfo->next_4k);
1727 ret = -EFAULT;
1728 goto ehca_set_pagebuf_exit0;
1729 }
1730 (pginfo->page_4k_cnt)++;
1731 (pginfo->next_4k)++;
1732 if (pginfo->next_4k %
1733 (PAGE_SIZE / EHCA_PAGESIZE) == 0)
1734 (pginfo->page_cnt)++;
1735 kpage++;
1736 i++;
1737 if (i >= number) break;
1738 }
1739 if (pginfo->next_4k >= offs4k + num4k) {
1740 (pginfo->next_buf)++;
1741 pginfo->next_4k = 0;
1742 }
1743 }
1744 } else if (pginfo->type == EHCA_MR_PGI_USER) {
1745 /* loop over desired chunk entries */
1746 chunk = pginfo->next_chunk;
1747 prev_chunk = pginfo->next_chunk;
1748 list_for_each_entry_continue(chunk,
1749 (&(pginfo->region->chunk_list)),
1750 list) {
1751 for (i = pginfo->next_nmap; i < chunk->nmap; ) {
1752 pgaddr = ( page_to_pfn(chunk->page_list[i].page)
1753 << PAGE_SHIFT );
1754 *kpage = phys_to_abs(pgaddr +
1755 (pginfo->next_4k *
1756 EHCA_PAGESIZE));
1757 if ( !(*kpage) ) {
1758 ehca_gen_err("pgaddr=%lx "
1759 "chunk->page_list[i]=%lx "
1760 "i=%x next_4k=%lx mr=%p",
1761 pgaddr,
1762 (u64)sg_dma_address(
1763 &chunk->
1764 page_list[i]),
1765 i, pginfo->next_4k, e_mr);
1766 ret = -EFAULT;
1767 goto ehca_set_pagebuf_exit0;
1768 }
1769 (pginfo->page_4k_cnt)++;
1770 (pginfo->next_4k)++;
1771 kpage++;
1772 if (pginfo->next_4k %
1773 (PAGE_SIZE / EHCA_PAGESIZE) == 0) {
1774 (pginfo->page_cnt)++;
1775 (pginfo->next_nmap)++;
1776 pginfo->next_4k = 0;
1777 i++;
1778 }
1779 j++;
1780 if (j >= number) break;
1781 }
1782 if ((pginfo->next_nmap >= chunk->nmap) &&
1783 (j >= number)) {
1784 pginfo->next_nmap = 0;
1785 prev_chunk = chunk;
1786 break;
1787 } else if (pginfo->next_nmap >= chunk->nmap) {
1788 pginfo->next_nmap = 0;
1789 prev_chunk = chunk;
1790 } else if (j >= number)
1791 break;
1792 else
1793 prev_chunk = chunk;
1794 }
1795 pginfo->next_chunk =
1796 list_prepare_entry(prev_chunk,
1797 (&(pginfo->region->chunk_list)),
1798 list);
1799 } else if (pginfo->type == EHCA_MR_PGI_FMR) {
1800 /* loop over desired page_list entries */
1801 fmrlist = pginfo->page_list + pginfo->next_listelem;
1802 for (i = 0; i < number; i++) {
1803 *kpage = phys_to_abs((*fmrlist & EHCA_PAGEMASK) +
1804 pginfo->next_4k * EHCA_PAGESIZE);
1805 if ( !(*kpage) ) { 1699 if ( !(*kpage) ) {
1806 ehca_gen_err("*fmrlist=%lx fmrlist=%p " 1700 ehca_gen_err("pgaddr=%lx "
1807 "next_listelem=%lx next_4k=%lx", 1701 "chunk->page_list[i]=%lx "
1808 *fmrlist, fmrlist, 1702 "i=%x next_hwpage=%lx",
1809 pginfo->next_listelem, 1703 pgaddr, (u64)sg_dma_address(
1810 pginfo->next_4k); 1704 &chunk->page_list[i]),
1811 ret = -EFAULT; 1705 i, pginfo->next_hwpage);
1812 goto ehca_set_pagebuf_exit0; 1706 return -EFAULT;
1813 } 1707 }
1814 (pginfo->page_4k_cnt)++; 1708 (pginfo->hwpage_cnt)++;
1815 (pginfo->next_4k)++; 1709 (pginfo->next_hwpage)++;
1816 kpage++; 1710 kpage++;
1817 if (pginfo->next_4k % 1711 if (pginfo->next_hwpage %
1818 (e_mr->fmr_page_size / EHCA_PAGESIZE) == 0) { 1712 (PAGE_SIZE / EHCA_PAGESIZE) == 0) {
1819 (pginfo->page_cnt)++; 1713 (pginfo->kpage_cnt)++;
1820 (pginfo->next_listelem)++; 1714 (pginfo->u.usr.next_nmap)++;
1821 fmrlist++; 1715 pginfo->next_hwpage = 0;
1822 pginfo->next_4k = 0; 1716 i++;
1823 } 1717 }
1718 j++;
1719 if (j >= number) break;
1824 } 1720 }
1825 } else { 1721 if ((pginfo->u.usr.next_nmap >= chunk->nmap) &&
1826 ehca_gen_err("bad pginfo->type=%x", pginfo->type); 1722 (j >= number)) {
1827 ret = -EFAULT; 1723 pginfo->u.usr.next_nmap = 0;
1828 goto ehca_set_pagebuf_exit0; 1724 prev_chunk = chunk;
1725 break;
1726 } else if (pginfo->u.usr.next_nmap >= chunk->nmap) {
1727 pginfo->u.usr.next_nmap = 0;
1728 prev_chunk = chunk;
1729 } else if (j >= number)
1730 break;
1731 else
1732 prev_chunk = chunk;
1829 } 1733 }
1830 1734 pginfo->u.usr.next_chunk =
1831ehca_set_pagebuf_exit0: 1735 list_prepare_entry(prev_chunk,
1832 if (ret) 1736 (&(pginfo->u.usr.region->chunk_list)),
1833 ehca_gen_err("ret=%x e_mr=%p pginfo=%p type=%x num_pages=%lx " 1737 list);
1834 "num_4k=%lx next_buf=%lx next_4k=%lx number=%x "
1835 "kpage=%p page_cnt=%lx page_4k_cnt=%lx i=%x "
1836 "next_listelem=%lx region=%p next_chunk=%p "
1837 "next_nmap=%lx", ret, e_mr, pginfo, pginfo->type,
1838 pginfo->num_pages, pginfo->num_4k,
1839 pginfo->next_buf, pginfo->next_4k, number, kpage,
1840 pginfo->page_cnt, pginfo->page_4k_cnt, i,
1841 pginfo->next_listelem, pginfo->region,
1842 pginfo->next_chunk, pginfo->next_nmap);
1843 return ret; 1738 return ret;
1844} /* end ehca_set_pagebuf() */ 1739}
1845
1846/*----------------------------------------------------------------------*/
1847 1740
1848/* setup 1 page from page info page buffer */ 1741int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo,
1849int ehca_set_pagebuf_1(struct ehca_mr *e_mr, 1742 u32 number,
1850 struct ehca_mr_pginfo *pginfo, 1743 u64 *kpage)
1851 u64 *rpage)
1852{ 1744{
1853 int ret = 0; 1745 int ret = 0;
1854 struct ib_phys_buf *tmp_pbuf; 1746 struct ib_phys_buf *pbuf;
1855 u64 *fmrlist; 1747 u64 num_hw, offs_hw;
1856 struct ib_umem_chunk *chunk; 1748 u32 i = 0;
1857 struct ib_umem_chunk *prev_chunk; 1749
1858 u64 pgaddr, num4k, offs4k; 1750 /* loop over desired phys_buf_array entries */
1859 1751 while (i < number) {
1860 if (pginfo->type == EHCA_MR_PGI_PHYS) { 1752 pbuf = pginfo->u.phy.phys_buf_array + pginfo->u.phy.next_buf;
1861 /* sanity check */ 1753 num_hw = NUM_CHUNKS((pbuf->addr % EHCA_PAGESIZE) +
1862 if ((pginfo->page_cnt >= pginfo->num_pages) || 1754 pbuf->size, EHCA_PAGESIZE);
1863 (pginfo->page_4k_cnt >= pginfo->num_4k)) { 1755 offs_hw = (pbuf->addr & ~PAGE_MASK) / EHCA_PAGESIZE;
1864 ehca_gen_err("page_cnt >= num_pages, page_cnt=%lx " 1756 while (pginfo->next_hwpage < offs_hw + num_hw) {
1865 "num_pages=%lx page_4k_cnt=%lx num_4k=%lx", 1757 /* sanity check */
1866 pginfo->page_cnt, pginfo->num_pages, 1758 if ((pginfo->kpage_cnt >= pginfo->num_kpages) ||
1867 pginfo->page_4k_cnt, pginfo->num_4k); 1759 (pginfo->hwpage_cnt >= pginfo->num_hwpages)) {
1868 ret = -EFAULT; 1760 ehca_gen_err("kpage_cnt >= num_kpages, "
1869 goto ehca_set_pagebuf_1_exit0; 1761 "kpage_cnt=%lx num_kpages=%lx "
1870 } 1762 "hwpage_cnt=%lx "
1871 tmp_pbuf = pginfo->phys_buf_array + pginfo->next_buf; 1763 "num_hwpages=%lx i=%x",
1872 num4k = ((tmp_pbuf->addr % EHCA_PAGESIZE) + tmp_pbuf->size + 1764 pginfo->kpage_cnt,
1873 EHCA_PAGESIZE - 1) / EHCA_PAGESIZE; 1765 pginfo->num_kpages,
1874 offs4k = (tmp_pbuf->addr & ~PAGE_MASK) / EHCA_PAGESIZE; 1766 pginfo->hwpage_cnt,
1875 *rpage = phys_to_abs((tmp_pbuf->addr & EHCA_PAGEMASK) + 1767 pginfo->num_hwpages, i);
1876 (pginfo->next_4k * EHCA_PAGESIZE)); 1768 return -EFAULT;
1877 if ( !(*rpage) && tmp_pbuf->addr ) {
1878 ehca_gen_err("tmp_pbuf->addr=%lx"
1879 " tmp_pbuf->size=%lx next_4k=%lx",
1880 tmp_pbuf->addr, tmp_pbuf->size,
1881 pginfo->next_4k);
1882 ret = -EFAULT;
1883 goto ehca_set_pagebuf_1_exit0;
1884 }
1885 (pginfo->page_4k_cnt)++;
1886 (pginfo->next_4k)++;
1887 if (pginfo->next_4k % (PAGE_SIZE / EHCA_PAGESIZE) == 0)
1888 (pginfo->page_cnt)++;
1889 if (pginfo->next_4k >= offs4k + num4k) {
1890 (pginfo->next_buf)++;
1891 pginfo->next_4k = 0;
1892 }
1893 } else if (pginfo->type == EHCA_MR_PGI_USER) {
1894 chunk = pginfo->next_chunk;
1895 prev_chunk = pginfo->next_chunk;
1896 list_for_each_entry_continue(chunk,
1897 (&(pginfo->region->chunk_list)),
1898 list) {
1899 pgaddr = ( page_to_pfn(chunk->page_list[
1900 pginfo->next_nmap].page)
1901 << PAGE_SHIFT);
1902 *rpage = phys_to_abs(pgaddr +
1903 (pginfo->next_4k * EHCA_PAGESIZE));
1904 if ( !(*rpage) ) {
1905 ehca_gen_err("pgaddr=%lx chunk->page_list[]=%lx"
1906 " next_nmap=%lx next_4k=%lx mr=%p",
1907 pgaddr, (u64)sg_dma_address(
1908 &chunk->page_list[
1909 pginfo->
1910 next_nmap]),
1911 pginfo->next_nmap, pginfo->next_4k,
1912 e_mr);
1913 ret = -EFAULT;
1914 goto ehca_set_pagebuf_1_exit0;
1915 }
1916 (pginfo->page_4k_cnt)++;
1917 (pginfo->next_4k)++;
1918 if (pginfo->next_4k %
1919 (PAGE_SIZE / EHCA_PAGESIZE) == 0) {
1920 (pginfo->page_cnt)++;
1921 (pginfo->next_nmap)++;
1922 pginfo->next_4k = 0;
1923 } 1769 }
1924 if (pginfo->next_nmap >= chunk->nmap) { 1770 *kpage = phys_to_abs(
1925 pginfo->next_nmap = 0; 1771 (pbuf->addr & EHCA_PAGEMASK)
1926 prev_chunk = chunk; 1772 + (pginfo->next_hwpage * EHCA_PAGESIZE));
1773 if ( !(*kpage) && pbuf->addr ) {
1774 ehca_gen_err("pbuf->addr=%lx "
1775 "pbuf->size=%lx "
1776 "next_hwpage=%lx", pbuf->addr,
1777 pbuf->size,
1778 pginfo->next_hwpage);
1779 return -EFAULT;
1927 } 1780 }
1928 break; 1781 (pginfo->hwpage_cnt)++;
1782 (pginfo->next_hwpage)++;
1783 if (pginfo->next_hwpage %
1784 (PAGE_SIZE / EHCA_PAGESIZE) == 0)
1785 (pginfo->kpage_cnt)++;
1786 kpage++;
1787 i++;
1788 if (i >= number) break;
1789 }
1790 if (pginfo->next_hwpage >= offs_hw + num_hw) {
1791 (pginfo->u.phy.next_buf)++;
1792 pginfo->next_hwpage = 0;
1929 } 1793 }
1930 pginfo->next_chunk = 1794 }
1931 list_prepare_entry(prev_chunk, 1795 return ret;
1932 (&(pginfo->region->chunk_list)), 1796}
1933 list); 1797
1934 } else if (pginfo->type == EHCA_MR_PGI_FMR) { 1798int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo,
1935 fmrlist = pginfo->page_list + pginfo->next_listelem; 1799 u32 number,
1936 *rpage = phys_to_abs((*fmrlist & EHCA_PAGEMASK) + 1800 u64 *kpage)
1937 pginfo->next_4k * EHCA_PAGESIZE); 1801{
1938 if ( !(*rpage) ) { 1802 int ret = 0;
1803 u64 *fmrlist;
1804 u32 i;
1805
1806 /* loop over desired page_list entries */
1807 fmrlist = pginfo->u.fmr.page_list + pginfo->u.fmr.next_listelem;
1808 for (i = 0; i < number; i++) {
1809 *kpage = phys_to_abs((*fmrlist & EHCA_PAGEMASK) +
1810 pginfo->next_hwpage * EHCA_PAGESIZE);
1811 if ( !(*kpage) ) {
1939 ehca_gen_err("*fmrlist=%lx fmrlist=%p " 1812 ehca_gen_err("*fmrlist=%lx fmrlist=%p "
1940 "next_listelem=%lx next_4k=%lx", 1813 "next_listelem=%lx next_hwpage=%lx",
1941 *fmrlist, fmrlist, pginfo->next_listelem, 1814 *fmrlist, fmrlist,
1942 pginfo->next_4k); 1815 pginfo->u.fmr.next_listelem,
1943 ret = -EFAULT; 1816 pginfo->next_hwpage);
1944 goto ehca_set_pagebuf_1_exit0; 1817 return -EFAULT;
1945 } 1818 }
1946 (pginfo->page_4k_cnt)++; 1819 (pginfo->hwpage_cnt)++;
1947 (pginfo->next_4k)++; 1820 (pginfo->next_hwpage)++;
1948 if (pginfo->next_4k % 1821 kpage++;
1949 (e_mr->fmr_page_size / EHCA_PAGESIZE) == 0) { 1822 if (pginfo->next_hwpage %
1950 (pginfo->page_cnt)++; 1823 (pginfo->u.fmr.fmr_pgsize / EHCA_PAGESIZE) == 0) {
1951 (pginfo->next_listelem)++; 1824 (pginfo->kpage_cnt)++;
1952 pginfo->next_4k = 0; 1825 (pginfo->u.fmr.next_listelem)++;
1826 fmrlist++;
1827 pginfo->next_hwpage = 0;
1953 } 1828 }
1954 } else { 1829 }
1830 return ret;
1831}
1832
1833/* setup page buffer from page info */
1834int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo,
1835 u32 number,
1836 u64 *kpage)
1837{
1838 int ret;
1839
1840 switch (pginfo->type) {
1841 case EHCA_MR_PGI_PHYS:
1842 ret = ehca_set_pagebuf_phys(pginfo, number, kpage);
1843 break;
1844 case EHCA_MR_PGI_USER:
1845 ret = ehca_set_pagebuf_user1(pginfo, number, kpage);
1846 break;
1847 case EHCA_MR_PGI_FMR:
1848 ret = ehca_set_pagebuf_fmr(pginfo, number, kpage);
1849 break;
1850 default:
1955 ehca_gen_err("bad pginfo->type=%x", pginfo->type); 1851 ehca_gen_err("bad pginfo->type=%x", pginfo->type);
1956 ret = -EFAULT; 1852 ret = -EFAULT;
1957 goto ehca_set_pagebuf_1_exit0; 1853 break;
1958 } 1854 }
1959
1960ehca_set_pagebuf_1_exit0:
1961 if (ret)
1962 ehca_gen_err("ret=%x e_mr=%p pginfo=%p type=%x num_pages=%lx "
1963 "num_4k=%lx next_buf=%lx next_4k=%lx rpage=%p "
1964 "page_cnt=%lx page_4k_cnt=%lx next_listelem=%lx "
1965 "region=%p next_chunk=%p next_nmap=%lx", ret, e_mr,
1966 pginfo, pginfo->type, pginfo->num_pages,
1967 pginfo->num_4k, pginfo->next_buf, pginfo->next_4k,
1968 rpage, pginfo->page_cnt, pginfo->page_4k_cnt,
1969 pginfo->next_listelem, pginfo->region,
1970 pginfo->next_chunk, pginfo->next_nmap);
1971 return ret; 1855 return ret;
1972} /* end ehca_set_pagebuf_1() */ 1856} /* end ehca_set_pagebuf() */
1973 1857
1974/*----------------------------------------------------------------------*/ 1858/*----------------------------------------------------------------------*/
1975 1859
@@ -1982,7 +1866,7 @@ int ehca_mr_is_maxmr(u64 size,
1982{ 1866{
1983 /* a MR is treated as max-MR only if it fits following: */ 1867 /* a MR is treated as max-MR only if it fits following: */
1984 if ((size == ((u64)high_memory - PAGE_OFFSET)) && 1868 if ((size == ((u64)high_memory - PAGE_OFFSET)) &&
1985 (iova_start == (void*)KERNELBASE)) { 1869 (iova_start == (void *)KERNELBASE)) {
1986 ehca_gen_dbg("this is a max-MR"); 1870 ehca_gen_dbg("this is a max-MR");
1987 return 1; 1871 return 1;
1988 } else 1872 } else
@@ -2042,196 +1926,23 @@ void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl,
2042/*----------------------------------------------------------------------*/ 1926/*----------------------------------------------------------------------*/
2043 1927
2044/* 1928/*
2045 * map HIPZ rc to IB retcodes for MR/MW allocations
2046 * Used for hipz_mr_reg_alloc and hipz_mw_alloc.
2047 */
2048int ehca_mrmw_map_hrc_alloc(const u64 hipz_rc)
2049{
2050 switch (hipz_rc) {
2051 case H_SUCCESS: /* successful completion */
2052 return 0;
2053 case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */
2054 case H_CONSTRAINED: /* resource constraint */
2055 case H_NO_MEM:
2056 return -ENOMEM;
2057 case H_BUSY: /* long busy */
2058 return -EBUSY;
2059 default:
2060 return -EINVAL;
2061 }
2062} /* end ehca_mrmw_map_hrc_alloc() */
2063
2064/*----------------------------------------------------------------------*/
2065
2066/*
2067 * map HIPZ rc to IB retcodes for MR register rpage
2068 * Used for hipz_h_register_rpage_mr at registering last page
2069 */
2070int ehca_mrmw_map_hrc_rrpg_last(const u64 hipz_rc)
2071{
2072 switch (hipz_rc) {
2073 case H_SUCCESS: /* registration complete */
2074 return 0;
2075 case H_PAGE_REGISTERED: /* page registered */
2076 case H_ADAPTER_PARM: /* invalid adapter handle */
2077 case H_RH_PARM: /* invalid resource handle */
2078/* case H_QT_PARM: invalid queue type */
2079 case H_PARAMETER: /*
2080 * invalid logical address,
2081 * or count zero or greater 512
2082 */
2083 case H_TABLE_FULL: /* page table full */
2084 case H_HARDWARE: /* HCA not operational */
2085 return -EINVAL;
2086 case H_BUSY: /* long busy */
2087 return -EBUSY;
2088 default:
2089 return -EINVAL;
2090 }
2091} /* end ehca_mrmw_map_hrc_rrpg_last() */
2092
2093/*----------------------------------------------------------------------*/
2094
2095/*
2096 * map HIPZ rc to IB retcodes for MR register rpage
2097 * Used for hipz_h_register_rpage_mr at registering one page, but not last page
2098 */
2099int ehca_mrmw_map_hrc_rrpg_notlast(const u64 hipz_rc)
2100{
2101 switch (hipz_rc) {
2102 case H_PAGE_REGISTERED: /* page registered */
2103 return 0;
2104 case H_SUCCESS: /* registration complete */
2105 case H_ADAPTER_PARM: /* invalid adapter handle */
2106 case H_RH_PARM: /* invalid resource handle */
2107/* case H_QT_PARM: invalid queue type */
2108 case H_PARAMETER: /*
2109 * invalid logical address,
2110 * or count zero or greater 512
2111 */
2112 case H_TABLE_FULL: /* page table full */
2113 case H_HARDWARE: /* HCA not operational */
2114 return -EINVAL;
2115 case H_BUSY: /* long busy */
2116 return -EBUSY;
2117 default:
2118 return -EINVAL;
2119 }
2120} /* end ehca_mrmw_map_hrc_rrpg_notlast() */
2121
2122/*----------------------------------------------------------------------*/
2123
2124/* map HIPZ rc to IB retcodes for MR query. Used for hipz_mr_query. */
2125int ehca_mrmw_map_hrc_query_mr(const u64 hipz_rc)
2126{
2127 switch (hipz_rc) {
2128 case H_SUCCESS: /* successful completion */
2129 return 0;
2130 case H_ADAPTER_PARM: /* invalid adapter handle */
2131 case H_RH_PARM: /* invalid resource handle */
2132 return -EINVAL;
2133 case H_BUSY: /* long busy */
2134 return -EBUSY;
2135 default:
2136 return -EINVAL;
2137 }
2138} /* end ehca_mrmw_map_hrc_query_mr() */
2139
2140/*----------------------------------------------------------------------*/
2141/*----------------------------------------------------------------------*/
2142
2143/*
2144 * map HIPZ rc to IB retcodes for freeing MR resource
2145 * Used for hipz_h_free_resource_mr
2146 */
2147int ehca_mrmw_map_hrc_free_mr(const u64 hipz_rc)
2148{
2149 switch (hipz_rc) {
2150 case H_SUCCESS: /* resource freed */
2151 return 0;
2152 case H_ADAPTER_PARM: /* invalid adapter handle */
2153 case H_RH_PARM: /* invalid resource handle */
2154 case H_R_STATE: /* invalid resource state */
2155 case H_HARDWARE: /* HCA not operational */
2156 return -EINVAL;
2157 case H_RESOURCE: /* Resource in use */
2158 case H_BUSY: /* long busy */
2159 return -EBUSY;
2160 default:
2161 return -EINVAL;
2162 }
2163} /* end ehca_mrmw_map_hrc_free_mr() */
2164
2165/*----------------------------------------------------------------------*/
2166
2167/*
2168 * map HIPZ rc to IB retcodes for freeing MW resource
2169 * Used for hipz_h_free_resource_mw
2170 */
2171int ehca_mrmw_map_hrc_free_mw(const u64 hipz_rc)
2172{
2173 switch (hipz_rc) {
2174 case H_SUCCESS: /* resource freed */
2175 return 0;
2176 case H_ADAPTER_PARM: /* invalid adapter handle */
2177 case H_RH_PARM: /* invalid resource handle */
2178 case H_R_STATE: /* invalid resource state */
2179 case H_HARDWARE: /* HCA not operational */
2180 return -EINVAL;
2181 case H_RESOURCE: /* Resource in use */
2182 case H_BUSY: /* long busy */
2183 return -EBUSY;
2184 default:
2185 return -EINVAL;
2186 }
2187} /* end ehca_mrmw_map_hrc_free_mw() */
2188
2189/*----------------------------------------------------------------------*/
2190
2191/*
2192 * map HIPZ rc to IB retcodes for SMR registrations
2193 * Used for hipz_h_register_smr.
2194 */
2195int ehca_mrmw_map_hrc_reg_smr(const u64 hipz_rc)
2196{
2197 switch (hipz_rc) {
2198 case H_SUCCESS: /* successful completion */
2199 return 0;
2200 case H_ADAPTER_PARM: /* invalid adapter handle */
2201 case H_RH_PARM: /* invalid resource handle */
2202 case H_MEM_PARM: /* invalid MR virtual address */
2203 case H_MEM_ACCESS_PARM: /* invalid access controls */
2204 case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */
2205 return -EINVAL;
2206 case H_BUSY: /* long busy */
2207 return -EBUSY;
2208 default:
2209 return -EINVAL;
2210 }
2211} /* end ehca_mrmw_map_hrc_reg_smr() */
2212
2213/*----------------------------------------------------------------------*/
2214
2215/*
2216 * MR destructor and constructor 1929 * MR destructor and constructor
2217 * used in Reregister MR verb, sets all fields in ehca_mr_t to 0, 1930 * used in Reregister MR verb, sets all fields in ehca_mr_t to 0,
2218 * except struct ib_mr and spinlock 1931 * except struct ib_mr and spinlock
2219 */ 1932 */
2220void ehca_mr_deletenew(struct ehca_mr *mr) 1933void ehca_mr_deletenew(struct ehca_mr *mr)
2221{ 1934{
2222 mr->flags = 0; 1935 mr->flags = 0;
2223 mr->num_pages = 0; 1936 mr->num_kpages = 0;
2224 mr->num_4k = 0; 1937 mr->num_hwpages = 0;
2225 mr->acl = 0; 1938 mr->acl = 0;
2226 mr->start = NULL; 1939 mr->start = NULL;
2227 mr->fmr_page_size = 0; 1940 mr->fmr_page_size = 0;
2228 mr->fmr_max_pages = 0; 1941 mr->fmr_max_pages = 0;
2229 mr->fmr_max_maps = 0; 1942 mr->fmr_max_maps = 0;
2230 mr->fmr_map_cnt = 0; 1943 mr->fmr_map_cnt = 0;
2231 memset(&mr->ipz_mr_handle, 0, sizeof(mr->ipz_mr_handle)); 1944 memset(&mr->ipz_mr_handle, 0, sizeof(mr->ipz_mr_handle));
2232 memset(&mr->galpas, 0, sizeof(mr->galpas)); 1945 memset(&mr->galpas, 0, sizeof(mr->galpas));
2233 mr->nr_of_pages = 0;
2234 mr->pagearray = NULL;
2235} /* end ehca_mr_deletenew() */ 1946} /* end ehca_mr_deletenew() */
2236 1947
2237int ehca_init_mrmw_cache(void) 1948int ehca_init_mrmw_cache(void)
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.h b/drivers/infiniband/hw/ehca/ehca_mrmw.h
index d936e40a5748..24f13fe3708b 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.h
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.h
@@ -101,15 +101,10 @@ int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
101 u64 *page_list, 101 u64 *page_list,
102 int list_len); 102 int list_len);
103 103
104int ehca_set_pagebuf(struct ehca_mr *e_mr, 104int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo,
105 struct ehca_mr_pginfo *pginfo,
106 u32 number, 105 u32 number,
107 u64 *kpage); 106 u64 *kpage);
108 107
109int ehca_set_pagebuf_1(struct ehca_mr *e_mr,
110 struct ehca_mr_pginfo *pginfo,
111 u64 *rpage);
112
113int ehca_mr_is_maxmr(u64 size, 108int ehca_mr_is_maxmr(u64 size,
114 u64 *iova_start); 109 u64 *iova_start);
115 110
@@ -121,20 +116,6 @@ void ehca_mrmw_set_pgsize_hipz_acl(u32 *hipz_acl);
121void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl, 116void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl,
122 int *ib_acl); 117 int *ib_acl);
123 118
124int ehca_mrmw_map_hrc_alloc(const u64 hipz_rc);
125
126int ehca_mrmw_map_hrc_rrpg_last(const u64 hipz_rc);
127
128int ehca_mrmw_map_hrc_rrpg_notlast(const u64 hipz_rc);
129
130int ehca_mrmw_map_hrc_query_mr(const u64 hipz_rc);
131
132int ehca_mrmw_map_hrc_free_mr(const u64 hipz_rc);
133
134int ehca_mrmw_map_hrc_free_mw(const u64 hipz_rc);
135
136int ehca_mrmw_map_hrc_reg_smr(const u64 hipz_rc);
137
138void ehca_mr_deletenew(struct ehca_mr *mr); 119void ehca_mr_deletenew(struct ehca_mr *mr);
139 120
140#endif /*_EHCA_MRMW_H_*/ 121#endif /*_EHCA_MRMW_H_*/
diff --git a/drivers/infiniband/hw/ehca/ehca_qes.h b/drivers/infiniband/hw/ehca/ehca_qes.h
index 8707d297ce4c..818803057ebf 100644
--- a/drivers/infiniband/hw/ehca/ehca_qes.h
+++ b/drivers/infiniband/hw/ehca/ehca_qes.h
@@ -53,13 +53,13 @@ struct ehca_vsgentry {
53 u32 length; 53 u32 length;
54}; 54};
55 55
56#define GRH_FLAG_MASK EHCA_BMASK_IBM(7,7) 56#define GRH_FLAG_MASK EHCA_BMASK_IBM( 7, 7)
57#define GRH_IPVERSION_MASK EHCA_BMASK_IBM(0,3) 57#define GRH_IPVERSION_MASK EHCA_BMASK_IBM( 0, 3)
58#define GRH_TCLASS_MASK EHCA_BMASK_IBM(4,12) 58#define GRH_TCLASS_MASK EHCA_BMASK_IBM( 4, 12)
59#define GRH_FLOWLABEL_MASK EHCA_BMASK_IBM(13,31) 59#define GRH_FLOWLABEL_MASK EHCA_BMASK_IBM(13, 31)
60#define GRH_PAYLEN_MASK EHCA_BMASK_IBM(32,47) 60#define GRH_PAYLEN_MASK EHCA_BMASK_IBM(32, 47)
61#define GRH_NEXTHEADER_MASK EHCA_BMASK_IBM(48,55) 61#define GRH_NEXTHEADER_MASK EHCA_BMASK_IBM(48, 55)
62#define GRH_HOPLIMIT_MASK EHCA_BMASK_IBM(56,63) 62#define GRH_HOPLIMIT_MASK EHCA_BMASK_IBM(56, 63)
63 63
64/* 64/*
65 * Unreliable Datagram Address Vector Format 65 * Unreliable Datagram Address Vector Format
@@ -206,10 +206,10 @@ struct ehca_wqe {
206 206
207}; 207};
208 208
209#define WC_SEND_RECEIVE EHCA_BMASK_IBM(0,0) 209#define WC_SEND_RECEIVE EHCA_BMASK_IBM(0, 0)
210#define WC_IMM_DATA EHCA_BMASK_IBM(1,1) 210#define WC_IMM_DATA EHCA_BMASK_IBM(1, 1)
211#define WC_GRH_PRESENT EHCA_BMASK_IBM(2,2) 211#define WC_GRH_PRESENT EHCA_BMASK_IBM(2, 2)
212#define WC_SE_BIT EHCA_BMASK_IBM(3,3) 212#define WC_SE_BIT EHCA_BMASK_IBM(3, 3)
213#define WC_STATUS_ERROR_BIT 0x80000000 213#define WC_STATUS_ERROR_BIT 0x80000000
214#define WC_STATUS_REMOTE_ERROR_FLAGS 0x0000F800 214#define WC_STATUS_REMOTE_ERROR_FLAGS 0x0000F800
215#define WC_STATUS_PURGE_BIT 0x10 215#define WC_STATUS_PURGE_BIT 0x10
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index 74671250303f..48e9ceacd6fa 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -602,10 +602,10 @@ struct ehca_qp *internal_create_qp(struct ib_pd *pd,
602 /* UD circumvention */ 602 /* UD circumvention */
603 parms.act_nr_send_sges -= 2; 603 parms.act_nr_send_sges -= 2;
604 parms.act_nr_recv_sges -= 2; 604 parms.act_nr_recv_sges -= 2;
605 swqe_size = offsetof(struct ehca_wqe, 605 swqe_size = offsetof(struct ehca_wqe, u.ud_av.sg_list[
606 u.ud_av.sg_list[parms.act_nr_send_sges]); 606 parms.act_nr_send_sges]);
607 rwqe_size = offsetof(struct ehca_wqe, 607 rwqe_size = offsetof(struct ehca_wqe, u.ud_av.sg_list[
608 u.ud_av.sg_list[parms.act_nr_recv_sges]); 608 parms.act_nr_recv_sges]);
609 } 609 }
610 610
611 if (IB_QPT_GSI == qp_type || IB_QPT_SMI == qp_type) { 611 if (IB_QPT_GSI == qp_type || IB_QPT_SMI == qp_type) {
@@ -690,8 +690,8 @@ struct ehca_qp *internal_create_qp(struct ib_pd *pd,
690 if (my_qp->send_cq) { 690 if (my_qp->send_cq) {
691 ret = ehca_cq_assign_qp(my_qp->send_cq, my_qp); 691 ret = ehca_cq_assign_qp(my_qp->send_cq, my_qp);
692 if (ret) { 692 if (ret) {
693 ehca_err(pd->device, "Couldn't assign qp to send_cq ret=%x", 693 ehca_err(pd->device,
694 ret); 694 "Couldn't assign qp to send_cq ret=%x", ret);
695 goto create_qp_exit4; 695 goto create_qp_exit4;
696 } 696 }
697 } 697 }
@@ -749,7 +749,7 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
749 struct ehca_qp *ret; 749 struct ehca_qp *ret;
750 750
751 ret = internal_create_qp(pd, qp_init_attr, NULL, udata, 0); 751 ret = internal_create_qp(pd, qp_init_attr, NULL, udata, 0);
752 return IS_ERR(ret) ? (struct ib_qp *) ret : &ret->ib_qp; 752 return IS_ERR(ret) ? (struct ib_qp *)ret : &ret->ib_qp;
753} 753}
754 754
755int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, 755int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
@@ -780,7 +780,7 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd,
780 780
781 my_qp = internal_create_qp(pd, &qp_init_attr, srq_init_attr, udata, 1); 781 my_qp = internal_create_qp(pd, &qp_init_attr, srq_init_attr, udata, 1);
782 if (IS_ERR(my_qp)) 782 if (IS_ERR(my_qp))
783 return (struct ib_srq *) my_qp; 783 return (struct ib_srq *)my_qp;
784 784
785 /* copy back return values */ 785 /* copy back return values */
786 srq_init_attr->attr.max_wr = qp_init_attr.cap.max_recv_wr; 786 srq_init_attr->attr.max_wr = qp_init_attr.cap.max_recv_wr;
@@ -875,7 +875,7 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca,
875 my_qp, qp_num, h_ret); 875 my_qp, qp_num, h_ret);
876 return ehca2ib_return_code(h_ret); 876 return ehca2ib_return_code(h_ret);
877 } 877 }
878 bad_send_wqe_p = (void*)((u64)bad_send_wqe_p & (~(1L<<63))); 878 bad_send_wqe_p = (void *)((u64)bad_send_wqe_p & (~(1L << 63)));
879 ehca_dbg(&shca->ib_device, "qp_num=%x bad_send_wqe_p=%p", 879 ehca_dbg(&shca->ib_device, "qp_num=%x bad_send_wqe_p=%p",
880 qp_num, bad_send_wqe_p); 880 qp_num, bad_send_wqe_p);
881 /* convert wqe pointer to vadr */ 881 /* convert wqe pointer to vadr */
@@ -890,7 +890,7 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca,
890 } 890 }
891 891
892 /* loop sets wqe's purge bit */ 892 /* loop sets wqe's purge bit */
893 wqe = (struct ehca_wqe*)ipz_qeit_calc(squeue, q_ofs); 893 wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs);
894 *bad_wqe_cnt = 0; 894 *bad_wqe_cnt = 0;
895 while (wqe->optype != 0xff && wqe->wqef != 0xff) { 895 while (wqe->optype != 0xff && wqe->wqef != 0xff) {
896 if (ehca_debug_level) 896 if (ehca_debug_level)
@@ -898,7 +898,7 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca,
898 wqe->nr_of_data_seg = 0; /* suppress data access */ 898 wqe->nr_of_data_seg = 0; /* suppress data access */
899 wqe->wqef = WQEF_PURGE; /* WQE to be purged */ 899 wqe->wqef = WQEF_PURGE; /* WQE to be purged */
900 q_ofs = ipz_queue_advance_offset(squeue, q_ofs); 900 q_ofs = ipz_queue_advance_offset(squeue, q_ofs);
901 wqe = (struct ehca_wqe*)ipz_qeit_calc(squeue, q_ofs); 901 wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs);
902 *bad_wqe_cnt = (*bad_wqe_cnt)+1; 902 *bad_wqe_cnt = (*bad_wqe_cnt)+1;
903 } 903 }
904 /* 904 /*
@@ -1003,7 +1003,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
1003 goto modify_qp_exit1; 1003 goto modify_qp_exit1;
1004 } 1004 }
1005 1005
1006 ehca_dbg(ibqp->device,"ehca_qp=%p qp_num=%x current qp_state=%x " 1006 ehca_dbg(ibqp->device, "ehca_qp=%p qp_num=%x current qp_state=%x "
1007 "new qp_state=%x attribute_mask=%x", 1007 "new qp_state=%x attribute_mask=%x",
1008 my_qp, ibqp->qp_num, qp_cur_state, attr->qp_state, attr_mask); 1008 my_qp, ibqp->qp_num, qp_cur_state, attr->qp_state, attr_mask);
1009 1009
@@ -1019,7 +1019,8 @@ static int internal_modify_qp(struct ib_qp *ibqp,
1019 goto modify_qp_exit1; 1019 goto modify_qp_exit1;
1020 } 1020 }
1021 1021
1022 if ((mqpcb->qp_state = ib2ehca_qp_state(qp_new_state))) 1022 mqpcb->qp_state = ib2ehca_qp_state(qp_new_state);
1023 if (mqpcb->qp_state)
1023 update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1); 1024 update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1);
1024 else { 1025 else {
1025 ret = -EINVAL; 1026 ret = -EINVAL;
@@ -1077,7 +1078,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
1077 spin_lock_irqsave(&my_qp->spinlock_s, flags); 1078 spin_lock_irqsave(&my_qp->spinlock_s, flags);
1078 squeue_locked = 1; 1079 squeue_locked = 1;
1079 /* mark next free wqe */ 1080 /* mark next free wqe */
1080 wqe = (struct ehca_wqe*) 1081 wqe = (struct ehca_wqe *)
1081 ipz_qeit_get(&my_qp->ipz_squeue); 1082 ipz_qeit_get(&my_qp->ipz_squeue);
1082 wqe->optype = wqe->wqef = 0xff; 1083 wqe->optype = wqe->wqef = 0xff;
1083 ehca_dbg(ibqp->device, "qp_num=%x next_free_wqe=%p", 1084 ehca_dbg(ibqp->device, "qp_num=%x next_free_wqe=%p",
@@ -1312,7 +1313,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
1312 if (h_ret != H_SUCCESS) { 1313 if (h_ret != H_SUCCESS) {
1313 ret = ehca2ib_return_code(h_ret); 1314 ret = ehca2ib_return_code(h_ret);
1314 ehca_err(ibqp->device, "hipz_h_modify_qp() failed rc=%lx " 1315 ehca_err(ibqp->device, "hipz_h_modify_qp() failed rc=%lx "
1315 "ehca_qp=%p qp_num=%x",h_ret, my_qp, ibqp->qp_num); 1316 "ehca_qp=%p qp_num=%x", h_ret, my_qp, ibqp->qp_num);
1316 goto modify_qp_exit2; 1317 goto modify_qp_exit2;
1317 } 1318 }
1318 1319
@@ -1411,7 +1412,7 @@ int ehca_query_qp(struct ib_qp *qp,
1411 } 1412 }
1412 1413
1413 if (qp_attr_mask & QP_ATTR_QUERY_NOT_SUPPORTED) { 1414 if (qp_attr_mask & QP_ATTR_QUERY_NOT_SUPPORTED) {
1414 ehca_err(qp->device,"Invalid attribute mask " 1415 ehca_err(qp->device, "Invalid attribute mask "
1415 "ehca_qp=%p qp_num=%x qp_attr_mask=%x ", 1416 "ehca_qp=%p qp_num=%x qp_attr_mask=%x ",
1416 my_qp, qp->qp_num, qp_attr_mask); 1417 my_qp, qp->qp_num, qp_attr_mask);
1417 return -EINVAL; 1418 return -EINVAL;
@@ -1419,7 +1420,7 @@ int ehca_query_qp(struct ib_qp *qp,
1419 1420
1420 qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); 1421 qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
1421 if (!qpcb) { 1422 if (!qpcb) {
1422 ehca_err(qp->device,"Out of memory for qpcb " 1423 ehca_err(qp->device, "Out of memory for qpcb "
1423 "ehca_qp=%p qp_num=%x", my_qp, qp->qp_num); 1424 "ehca_qp=%p qp_num=%x", my_qp, qp->qp_num);
1424 return -ENOMEM; 1425 return -ENOMEM;
1425 } 1426 }
@@ -1431,7 +1432,7 @@ int ehca_query_qp(struct ib_qp *qp,
1431 1432
1432 if (h_ret != H_SUCCESS) { 1433 if (h_ret != H_SUCCESS) {
1433 ret = ehca2ib_return_code(h_ret); 1434 ret = ehca2ib_return_code(h_ret);
1434 ehca_err(qp->device,"hipz_h_query_qp() failed " 1435 ehca_err(qp->device, "hipz_h_query_qp() failed "
1435 "ehca_qp=%p qp_num=%x h_ret=%lx", 1436 "ehca_qp=%p qp_num=%x h_ret=%lx",
1436 my_qp, qp->qp_num, h_ret); 1437 my_qp, qp->qp_num, h_ret);
1437 goto query_qp_exit1; 1438 goto query_qp_exit1;
@@ -1442,7 +1443,7 @@ int ehca_query_qp(struct ib_qp *qp,
1442 1443
1443 if (qp_attr->cur_qp_state == -EINVAL) { 1444 if (qp_attr->cur_qp_state == -EINVAL) {
1444 ret = -EINVAL; 1445 ret = -EINVAL;
1445 ehca_err(qp->device,"Got invalid ehca_qp_state=%x " 1446 ehca_err(qp->device, "Got invalid ehca_qp_state=%x "
1446 "ehca_qp=%p qp_num=%x", 1447 "ehca_qp=%p qp_num=%x",
1447 qpcb->qp_state, my_qp, qp->qp_num); 1448 qpcb->qp_state, my_qp, qp->qp_num);
1448 goto query_qp_exit1; 1449 goto query_qp_exit1;
diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c
index 61da65e6e5e0..94eed70fedf5 100644
--- a/drivers/infiniband/hw/ehca/ehca_reqs.c
+++ b/drivers/infiniband/hw/ehca/ehca_reqs.c
@@ -79,7 +79,8 @@ static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
79 } 79 }
80 80
81 if (ehca_debug_level) { 81 if (ehca_debug_level) {
82 ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p", ipz_rqueue); 82 ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p",
83 ipz_rqueue);
83 ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe"); 84 ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe");
84 } 85 }
85 86
@@ -99,7 +100,7 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr)
99 struct ib_mad_hdr *mad_hdr = send_wr->wr.ud.mad_hdr; 100 struct ib_mad_hdr *mad_hdr = send_wr->wr.ud.mad_hdr;
100 struct ib_sge *sge = send_wr->sg_list; 101 struct ib_sge *sge = send_wr->sg_list;
101 ehca_gen_dbg("send_wr#%x wr_id=%lx num_sge=%x " 102 ehca_gen_dbg("send_wr#%x wr_id=%lx num_sge=%x "
102 "send_flags=%x opcode=%x",idx, send_wr->wr_id, 103 "send_flags=%x opcode=%x", idx, send_wr->wr_id,
103 send_wr->num_sge, send_wr->send_flags, 104 send_wr->num_sge, send_wr->send_flags,
104 send_wr->opcode); 105 send_wr->opcode);
105 if (mad_hdr) { 106 if (mad_hdr) {
@@ -116,7 +117,7 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr)
116 mad_hdr->attr_mod); 117 mad_hdr->attr_mod);
117 } 118 }
118 for (j = 0; j < send_wr->num_sge; j++) { 119 for (j = 0; j < send_wr->num_sge; j++) {
119 u8 *data = (u8 *) abs_to_virt(sge->addr); 120 u8 *data = (u8 *)abs_to_virt(sge->addr);
120 ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x " 121 ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x "
121 "lkey=%x", 122 "lkey=%x",
122 idx, j, data, sge->length, sge->lkey); 123 idx, j, data, sge->length, sge->lkey);
@@ -534,9 +535,11 @@ poll_cq_one_read_cqe:
534 535
535 cqe_count++; 536 cqe_count++;
536 if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) { 537 if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) {
537 struct ehca_qp *qp=ehca_cq_get_qp(my_cq, cqe->local_qp_number); 538 struct ehca_qp *qp;
538 int purgeflag; 539 int purgeflag;
539 unsigned long flags; 540 unsigned long flags;
541
542 qp = ehca_cq_get_qp(my_cq, cqe->local_qp_number);
540 if (!qp) { 543 if (!qp) {
541 ehca_err(cq->device, "cq_num=%x qp_num=%x " 544 ehca_err(cq->device, "cq_num=%x qp_num=%x "
542 "could not find qp -> ignore cqe", 545 "could not find qp -> ignore cqe",
@@ -551,8 +554,8 @@ poll_cq_one_read_cqe:
551 spin_unlock_irqrestore(&qp->spinlock_s, flags); 554 spin_unlock_irqrestore(&qp->spinlock_s, flags);
552 555
553 if (purgeflag) { 556 if (purgeflag) {
554 ehca_dbg(cq->device, "Got CQE with purged bit qp_num=%x " 557 ehca_dbg(cq->device,
555 "src_qp=%x", 558 "Got CQE with purged bit qp_num=%x src_qp=%x",
556 cqe->local_qp_number, cqe->remote_qp_number); 559 cqe->local_qp_number, cqe->remote_qp_number);
557 if (ehca_debug_level) 560 if (ehca_debug_level)
558 ehca_dmp(cqe, 64, "qp_num=%x src_qp=%x", 561 ehca_dmp(cqe, 64, "qp_num=%x src_qp=%x",
diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/infiniband/hw/ehca/ehca_tools.h
index 03b185f873da..678b81391861 100644
--- a/drivers/infiniband/hw/ehca/ehca_tools.h
+++ b/drivers/infiniband/hw/ehca/ehca_tools.h
@@ -93,14 +93,14 @@ extern int ehca_debug_level;
93#define ehca_gen_dbg(format, arg...) \ 93#define ehca_gen_dbg(format, arg...) \
94 do { \ 94 do { \
95 if (unlikely(ehca_debug_level)) \ 95 if (unlikely(ehca_debug_level)) \
96 printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n",\ 96 printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n", \
97 get_paca()->paca_index, __FUNCTION__, ## arg); \ 97 get_paca()->paca_index, __FUNCTION__, ## arg); \
98 } while (0) 98 } while (0)
99 99
100#define ehca_gen_warn(format, arg...) \ 100#define ehca_gen_warn(format, arg...) \
101 do { \ 101 do { \
102 if (unlikely(ehca_debug_level)) \ 102 if (unlikely(ehca_debug_level)) \
103 printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n",\ 103 printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n", \
104 get_paca()->paca_index, __FUNCTION__, ## arg); \ 104 get_paca()->paca_index, __FUNCTION__, ## arg); \
105 } while (0) 105 } while (0)
106 106
@@ -114,12 +114,12 @@ extern int ehca_debug_level;
114 * <format string> adr=X ofs=Y <8 bytes hex> <8 bytes hex> 114 * <format string> adr=X ofs=Y <8 bytes hex> <8 bytes hex>
115 */ 115 */
116#define ehca_dmp(adr, len, format, args...) \ 116#define ehca_dmp(adr, len, format, args...) \
117 do { \ 117 do { \
118 unsigned int x; \ 118 unsigned int x; \
119 unsigned int l = (unsigned int)(len); \ 119 unsigned int l = (unsigned int)(len); \
120 unsigned char *deb = (unsigned char*)(adr); \ 120 unsigned char *deb = (unsigned char *)(adr); \
121 for (x = 0; x < l; x += 16) { \ 121 for (x = 0; x < l; x += 16) { \
122 printk("EHCA_DMP:%s " format \ 122 printk(KERN_INFO "EHCA_DMP:%s " format \
123 " adr=%p ofs=%04x %016lx %016lx\n", \ 123 " adr=%p ofs=%04x %016lx %016lx\n", \
124 __FUNCTION__, ##args, deb, x, \ 124 __FUNCTION__, ##args, deb, x, \
125 *((u64 *)&deb[0]), *((u64 *)&deb[8])); \ 125 *((u64 *)&deb[0]), *((u64 *)&deb[8])); \
@@ -128,16 +128,16 @@ extern int ehca_debug_level;
128 } while (0) 128 } while (0)
129 129
130/* define a bitmask, little endian version */ 130/* define a bitmask, little endian version */
131#define EHCA_BMASK(pos,length) (((pos)<<16)+(length)) 131#define EHCA_BMASK(pos, length) (((pos) << 16) + (length))
132 132
133/* define a bitmask, the ibm way... */ 133/* define a bitmask, the ibm way... */
134#define EHCA_BMASK_IBM(from,to) (((63-to)<<16)+((to)-(from)+1)) 134#define EHCA_BMASK_IBM(from, to) (((63 - to) << 16) + ((to) - (from) + 1))
135 135
136/* internal function, don't use */ 136/* internal function, don't use */
137#define EHCA_BMASK_SHIFTPOS(mask) (((mask)>>16)&0xffff) 137#define EHCA_BMASK_SHIFTPOS(mask) (((mask) >> 16) & 0xffff)
138 138
139/* internal function, don't use */ 139/* internal function, don't use */
140#define EHCA_BMASK_MASK(mask) (0xffffffffffffffffULL >> ((64-(mask))&0xffff)) 140#define EHCA_BMASK_MASK(mask) (~0ULL >> ((64 - (mask)) & 0xffff))
141 141
142/** 142/**
143 * EHCA_BMASK_SET - return value shifted and masked by mask 143 * EHCA_BMASK_SET - return value shifted and masked by mask
@@ -145,14 +145,14 @@ extern int ehca_debug_level;
145 * variable&=~EHCA_BMASK_SET(MY_MASK,-1) clears the bits from the mask 145 * variable&=~EHCA_BMASK_SET(MY_MASK,-1) clears the bits from the mask
146 * in variable 146 * in variable
147 */ 147 */
148#define EHCA_BMASK_SET(mask,value) \ 148#define EHCA_BMASK_SET(mask, value) \
149 ((EHCA_BMASK_MASK(mask) & ((u64)(value)))<<EHCA_BMASK_SHIFTPOS(mask)) 149 ((EHCA_BMASK_MASK(mask) & ((u64)(value))) << EHCA_BMASK_SHIFTPOS(mask))
150 150
151/** 151/**
152 * EHCA_BMASK_GET - extract a parameter from value by mask 152 * EHCA_BMASK_GET - extract a parameter from value by mask
153 */ 153 */
154#define EHCA_BMASK_GET(mask,value) \ 154#define EHCA_BMASK_GET(mask, value) \
155 (EHCA_BMASK_MASK(mask)& (((u64)(value))>>EHCA_BMASK_SHIFTPOS(mask))) 155 (EHCA_BMASK_MASK(mask) & (((u64)(value)) >> EHCA_BMASK_SHIFTPOS(mask)))
156 156
157 157
158/* Converts ehca to ib return code */ 158/* Converts ehca to ib return code */
@@ -161,8 +161,11 @@ static inline int ehca2ib_return_code(u64 ehca_rc)
161 switch (ehca_rc) { 161 switch (ehca_rc) {
162 case H_SUCCESS: 162 case H_SUCCESS:
163 return 0; 163 return 0;
164 case H_RESOURCE: /* Resource in use */
164 case H_BUSY: 165 case H_BUSY:
165 return -EBUSY; 166 return -EBUSY;
167 case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */
168 case H_CONSTRAINED: /* resource constraint */
166 case H_NO_MEM: 169 case H_NO_MEM:
167 return -ENOMEM; 170 return -ENOMEM;
168 default: 171 default:
diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c
index 3031b3bb56f9..05c415744e3b 100644
--- a/drivers/infiniband/hw/ehca/ehca_uverbs.c
+++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c
@@ -70,7 +70,7 @@ int ehca_dealloc_ucontext(struct ib_ucontext *context)
70 70
71static void ehca_mm_open(struct vm_area_struct *vma) 71static void ehca_mm_open(struct vm_area_struct *vma)
72{ 72{
73 u32 *count = (u32*)vma->vm_private_data; 73 u32 *count = (u32 *)vma->vm_private_data;
74 if (!count) { 74 if (!count) {
75 ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx", 75 ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx",
76 vma->vm_start, vma->vm_end); 76 vma->vm_start, vma->vm_end);
@@ -86,7 +86,7 @@ static void ehca_mm_open(struct vm_area_struct *vma)
86 86
87static void ehca_mm_close(struct vm_area_struct *vma) 87static void ehca_mm_close(struct vm_area_struct *vma)
88{ 88{
89 u32 *count = (u32*)vma->vm_private_data; 89 u32 *count = (u32 *)vma->vm_private_data;
90 if (!count) { 90 if (!count) {
91 ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx", 91 ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx",
92 vma->vm_start, vma->vm_end); 92 vma->vm_start, vma->vm_end);
@@ -215,7 +215,8 @@ static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp,
215 case 2: /* qp rqueue_addr */ 215 case 2: /* qp rqueue_addr */
216 ehca_dbg(qp->ib_qp.device, "qp_num=%x rqueue", 216 ehca_dbg(qp->ib_qp.device, "qp_num=%x rqueue",
217 qp->ib_qp.qp_num); 217 qp->ib_qp.qp_num);
218 ret = ehca_mmap_queue(vma, &qp->ipz_rqueue, &qp->mm_count_rqueue); 218 ret = ehca_mmap_queue(vma, &qp->ipz_rqueue,
219 &qp->mm_count_rqueue);
219 if (unlikely(ret)) { 220 if (unlikely(ret)) {
220 ehca_err(qp->ib_qp.device, 221 ehca_err(qp->ib_qp.device,
221 "ehca_mmap_queue(rq) failed rc=%x qp_num=%x", 222 "ehca_mmap_queue(rq) failed rc=%x qp_num=%x",
@@ -227,7 +228,8 @@ static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp,
227 case 3: /* qp squeue_addr */ 228 case 3: /* qp squeue_addr */
228 ehca_dbg(qp->ib_qp.device, "qp_num=%x squeue", 229 ehca_dbg(qp->ib_qp.device, "qp_num=%x squeue",
229 qp->ib_qp.qp_num); 230 qp->ib_qp.qp_num);
230 ret = ehca_mmap_queue(vma, &qp->ipz_squeue, &qp->mm_count_squeue); 231 ret = ehca_mmap_queue(vma, &qp->ipz_squeue,
232 &qp->mm_count_squeue);
231 if (unlikely(ret)) { 233 if (unlikely(ret)) {
232 ehca_err(qp->ib_qp.device, 234 ehca_err(qp->ib_qp.device,
233 "ehca_mmap_queue(sq) failed rc=%x qp_num=%x", 235 "ehca_mmap_queue(sq) failed rc=%x qp_num=%x",
diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c
index 4776a8b0feec..3394e05f4b4f 100644
--- a/drivers/infiniband/hw/ehca/hcp_if.c
+++ b/drivers/infiniband/hw/ehca/hcp_if.c
@@ -501,8 +501,8 @@ u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle,
501 return H_PARAMETER; 501 return H_PARAMETER;
502 } 502 }
503 503
504 return hipz_h_register_rpage(adapter_handle,pagesize,queue_type, 504 return hipz_h_register_rpage(adapter_handle, pagesize, queue_type,
505 qp_handle.handle,logical_address_of_page, 505 qp_handle.handle, logical_address_of_page,
506 count); 506 count);
507} 507}
508 508
@@ -522,9 +522,9 @@ u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle,
522 qp_handle.handle, /* r6 */ 522 qp_handle.handle, /* r6 */
523 0, 0, 0, 0, 0, 0); 523 0, 0, 0, 0, 0, 0);
524 if (log_addr_next_sq_wqe2processed) 524 if (log_addr_next_sq_wqe2processed)
525 *log_addr_next_sq_wqe2processed = (void*)outs[0]; 525 *log_addr_next_sq_wqe2processed = (void *)outs[0];
526 if (log_addr_next_rq_wqe2processed) 526 if (log_addr_next_rq_wqe2processed)
527 *log_addr_next_rq_wqe2processed = (void*)outs[1]; 527 *log_addr_next_rq_wqe2processed = (void *)outs[1];
528 528
529 return ret; 529 return ret;
530} 530}
diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.c b/drivers/infiniband/hw/ehca/hcp_phyp.c
index 0b1a4772c78a..214821095cb1 100644
--- a/drivers/infiniband/hw/ehca/hcp_phyp.c
+++ b/drivers/infiniband/hw/ehca/hcp_phyp.c
@@ -50,7 +50,7 @@ int hcall_map_page(u64 physaddr, u64 *mapaddr)
50 50
51int hcall_unmap_page(u64 mapaddr) 51int hcall_unmap_page(u64 mapaddr)
52{ 52{
53 iounmap((volatile void __iomem*)mapaddr); 53 iounmap((volatile void __iomem *) mapaddr);
54 return 0; 54 return 0;
55} 55}
56 56
diff --git a/drivers/infiniband/hw/ehca/hipz_fns_core.h b/drivers/infiniband/hw/ehca/hipz_fns_core.h
index 20898a153446..868735fd3187 100644
--- a/drivers/infiniband/hw/ehca/hipz_fns_core.h
+++ b/drivers/infiniband/hw/ehca/hipz_fns_core.h
@@ -53,10 +53,10 @@
53#define hipz_galpa_load_cq(gal, offset) \ 53#define hipz_galpa_load_cq(gal, offset) \
54 hipz_galpa_load(gal, CQTEMM_OFFSET(offset)) 54 hipz_galpa_load(gal, CQTEMM_OFFSET(offset))
55 55
56#define hipz_galpa_store_qp(gal,offset, value) \ 56#define hipz_galpa_store_qp(gal, offset, value) \
57 hipz_galpa_store(gal, QPTEMM_OFFSET(offset), value) 57 hipz_galpa_store(gal, QPTEMM_OFFSET(offset), value)
58#define hipz_galpa_load_qp(gal, offset) \ 58#define hipz_galpa_load_qp(gal, offset) \
59 hipz_galpa_load(gal,QPTEMM_OFFSET(offset)) 59 hipz_galpa_load(gal, QPTEMM_OFFSET(offset))
60 60
61static inline void hipz_update_sqa(struct ehca_qp *qp, u16 nr_wqes) 61static inline void hipz_update_sqa(struct ehca_qp *qp, u16 nr_wqes)
62{ 62{
diff --git a/drivers/infiniband/hw/ehca/hipz_hw.h b/drivers/infiniband/hw/ehca/hipz_hw.h
index dad6dea5636b..d9739e554515 100644
--- a/drivers/infiniband/hw/ehca/hipz_hw.h
+++ b/drivers/infiniband/hw/ehca/hipz_hw.h
@@ -161,11 +161,11 @@ struct hipz_qptemm {
161/* 0x1000 */ 161/* 0x1000 */
162}; 162};
163 163
164#define QPX_SQADDER EHCA_BMASK_IBM(48,63) 164#define QPX_SQADDER EHCA_BMASK_IBM(48, 63)
165#define QPX_RQADDER EHCA_BMASK_IBM(48,63) 165#define QPX_RQADDER EHCA_BMASK_IBM(48, 63)
166#define QPX_AAELOG_RESET_SRQ_LIMIT EHCA_BMASK_IBM(3,3) 166#define QPX_AAELOG_RESET_SRQ_LIMIT EHCA_BMASK_IBM(3, 3)
167 167
168#define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm,x) 168#define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm, x)
169 169
170/* MRMWPT Entry Memory Map */ 170/* MRMWPT Entry Memory Map */
171struct hipz_mrmwmm { 171struct hipz_mrmwmm {
@@ -187,7 +187,7 @@ struct hipz_mrmwmm {
187 187
188}; 188};
189 189
190#define MRMWMM_OFFSET(x) offsetof(struct hipz_mrmwmm,x) 190#define MRMWMM_OFFSET(x) offsetof(struct hipz_mrmwmm, x)
191 191
192struct hipz_qpedmm { 192struct hipz_qpedmm {
193 /* 0x00 */ 193 /* 0x00 */
@@ -238,7 +238,7 @@ struct hipz_qpedmm {
238 u64 qpedx_rrva3; 238 u64 qpedx_rrva3;
239}; 239};
240 240
241#define QPEDMM_OFFSET(x) offsetof(struct hipz_qpedmm,x) 241#define QPEDMM_OFFSET(x) offsetof(struct hipz_qpedmm, x)
242 242
243/* CQ Table Entry Memory Map */ 243/* CQ Table Entry Memory Map */
244struct hipz_cqtemm { 244struct hipz_cqtemm {
@@ -263,12 +263,12 @@ struct hipz_cqtemm {
263/* 0x1000 */ 263/* 0x1000 */
264}; 264};
265 265
266#define CQX_FEC_CQE_CNT EHCA_BMASK_IBM(32,63) 266#define CQX_FEC_CQE_CNT EHCA_BMASK_IBM(32, 63)
267#define CQX_FECADDER EHCA_BMASK_IBM(32,63) 267#define CQX_FECADDER EHCA_BMASK_IBM(32, 63)
268#define CQX_N0_GENERATE_SOLICITED_COMP_EVENT EHCA_BMASK_IBM(0,0) 268#define CQX_N0_GENERATE_SOLICITED_COMP_EVENT EHCA_BMASK_IBM(0, 0)
269#define CQX_N1_GENERATE_COMP_EVENT EHCA_BMASK_IBM(0,0) 269#define CQX_N1_GENERATE_COMP_EVENT EHCA_BMASK_IBM(0, 0)
270 270
271#define CQTEMM_OFFSET(x) offsetof(struct hipz_cqtemm,x) 271#define CQTEMM_OFFSET(x) offsetof(struct hipz_cqtemm, x)
272 272
273/* EQ Table Entry Memory Map */ 273/* EQ Table Entry Memory Map */
274struct hipz_eqtemm { 274struct hipz_eqtemm {
@@ -293,7 +293,7 @@ struct hipz_eqtemm {
293 293
294}; 294};
295 295
296#define EQTEMM_OFFSET(x) offsetof(struct hipz_eqtemm,x) 296#define EQTEMM_OFFSET(x) offsetof(struct hipz_eqtemm, x)
297 297
298/* access control defines for MR/MW */ 298/* access control defines for MR/MW */
299#define HIPZ_ACCESSCTRL_L_WRITE 0x00800000 299#define HIPZ_ACCESSCTRL_L_WRITE 0x00800000
diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c
index bf7a40088f61..9606f13ed092 100644
--- a/drivers/infiniband/hw/ehca/ipz_pt_fn.c
+++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c
@@ -114,7 +114,7 @@ int ipz_queue_ctor(struct ipz_queue *queue,
114 */ 114 */
115 f = 0; 115 f = 0;
116 while (f < nr_of_pages) { 116 while (f < nr_of_pages) {
117 u8 *kpage = (u8*)get_zeroed_page(GFP_KERNEL); 117 u8 *kpage = (u8 *)get_zeroed_page(GFP_KERNEL);
118 int k; 118 int k;
119 if (!kpage) 119 if (!kpage)
120 goto ipz_queue_ctor_exit0; /*NOMEM*/ 120 goto ipz_queue_ctor_exit0; /*NOMEM*/
diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.h b/drivers/infiniband/hw/ehca/ipz_pt_fn.h
index 007f0882fd40..39a4f64aff41 100644
--- a/drivers/infiniband/hw/ehca/ipz_pt_fn.h
+++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.h
@@ -240,7 +240,7 @@ void *ipz_qeit_eq_get_inc(struct ipz_queue *queue);
240static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue) 240static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue)
241{ 241{
242 void *ret = ipz_qeit_get(queue); 242 void *ret = ipz_qeit_get(queue);
243 u32 qe = *(u8 *) ret; 243 u32 qe = *(u8 *)ret;
244 if ((qe >> 7) != (queue->toggle_state & 1)) 244 if ((qe >> 7) != (queue->toggle_state & 1))
245 return NULL; 245 return NULL;
246 ipz_qeit_eq_get_inc(queue); /* this is a good one */ 246 ipz_qeit_eq_get_inc(queue); /* this is a good one */
@@ -250,7 +250,7 @@ static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue)
250static inline void *ipz_eqit_eq_peek_valid(struct ipz_queue *queue) 250static inline void *ipz_eqit_eq_peek_valid(struct ipz_queue *queue)
251{ 251{
252 void *ret = ipz_qeit_get(queue); 252 void *ret = ipz_qeit_get(queue);
253 u32 qe = *(u8 *) ret; 253 u32 qe = *(u8 *)ret;
254 if ((qe >> 7) != (queue->toggle_state & 1)) 254 if ((qe >> 7) != (queue->toggle_state & 1))
255 return NULL; 255 return NULL;
256 return ret; 256 return ret;
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index 9361f5ab8bd6..09c5fd84b1e3 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -1889,7 +1889,7 @@ void ipath_write_kreg_port(const struct ipath_devdata *dd, ipath_kreg regno,
1889/* Below is "non-zero" to force override, but both actual LEDs are off */ 1889/* Below is "non-zero" to force override, but both actual LEDs are off */
1890#define LED_OVER_BOTH_OFF (8) 1890#define LED_OVER_BOTH_OFF (8)
1891 1891
1892void ipath_run_led_override(unsigned long opaque) 1892static void ipath_run_led_override(unsigned long opaque)
1893{ 1893{
1894 struct ipath_devdata *dd = (struct ipath_devdata *)opaque; 1894 struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
1895 int timeoff; 1895 int timeoff;
diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c
index 6b9147964a4f..b4503e9c1e95 100644
--- a/drivers/infiniband/hw/ipath/ipath_eeprom.c
+++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c
@@ -426,8 +426,8 @@ bail:
426 * @buffer: data to write 426 * @buffer: data to write
427 * @len: number of bytes to write 427 * @len: number of bytes to write
428 */ 428 */
429int ipath_eeprom_internal_write(struct ipath_devdata *dd, u8 eeprom_offset, 429static int ipath_eeprom_internal_write(struct ipath_devdata *dd, u8 eeprom_offset,
430 const void *buffer, int len) 430 const void *buffer, int len)
431{ 431{
432 u8 single_byte; 432 u8 single_byte;
433 int sub_len; 433 int sub_len;
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 47aa43428fbf..1fd91c59f246 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -70,7 +70,7 @@ static void ipath_clrpiobuf(struct ipath_devdata *dd, u32 pnum)
70 * If rewrite is true, and bits are set in the sendbufferror registers, 70 * If rewrite is true, and bits are set in the sendbufferror registers,
71 * we'll write to the buffer, for error recovery on parity errors. 71 * we'll write to the buffer, for error recovery on parity errors.
72 */ 72 */
73void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite) 73static void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite)
74{ 74{
75 u32 piobcnt; 75 u32 piobcnt;
76 unsigned long sbuf[4]; 76 unsigned long sbuf[4];
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index 3105005fc9d2..ace63ef78e6f 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -776,7 +776,6 @@ void ipath_get_eeprom_info(struct ipath_devdata *);
776int ipath_update_eeprom_log(struct ipath_devdata *dd); 776int ipath_update_eeprom_log(struct ipath_devdata *dd);
777void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr); 777void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr);
778u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg); 778u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
779void ipath_disarm_senderrbufs(struct ipath_devdata *, int);
780 779
781/* 780/*
782 * Set LED override, only the two LSBs have "public" meaning, but 781 * Set LED override, only the two LSBs have "public" meaning, but
@@ -820,7 +819,6 @@ static inline u64 ipath_mdio_req(int cmd, int dev, int reg, int data)
820#define IPATH_MDIO_CTRL_8355_REG_10 0x1D 819#define IPATH_MDIO_CTRL_8355_REG_10 0x1D
821 820
822int ipath_get_user_pages(unsigned long, size_t, struct page **); 821int ipath_get_user_pages(unsigned long, size_t, struct page **);
823int ipath_get_user_pages_nocopy(unsigned long, struct page **);
824void ipath_release_user_pages(struct page **, size_t); 822void ipath_release_user_pages(struct page **, size_t);
825void ipath_release_user_pages_on_close(struct page **, size_t); 823void ipath_release_user_pages_on_close(struct page **, size_t);
826int ipath_eeprom_read(struct ipath_devdata *, u8, void *, int); 824int ipath_eeprom_read(struct ipath_devdata *, u8, void *, int);
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index 85256747d8a1..c69c25239443 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -507,7 +507,7 @@ static int want_buffer(struct ipath_devdata *dd)
507 * 507 *
508 * Called when we run out of PIO buffers. 508 * Called when we run out of PIO buffers.
509 */ 509 */
510void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev) 510static void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev)
511{ 511{
512 unsigned long flags; 512 unsigned long flags;
513 513
diff --git a/drivers/infiniband/hw/ipath/ipath_user_pages.c b/drivers/infiniband/hw/ipath/ipath_user_pages.c
index 27034d38b3dd..0190edc8044e 100644
--- a/drivers/infiniband/hw/ipath/ipath_user_pages.c
+++ b/drivers/infiniband/hw/ipath/ipath_user_pages.c
@@ -171,32 +171,6 @@ int ipath_get_user_pages(unsigned long start_page, size_t num_pages,
171 return ret; 171 return ret;
172} 172}
173 173
174/**
175 * ipath_get_user_pages_nocopy - lock a single page for I/O and mark shared
176 * @start_page: the page to lock
177 * @p: the output page structure
178 *
179 * This is similar to ipath_get_user_pages, but it's always one page, and we
180 * mark the page as locked for I/O, and shared. This is used for the user
181 * process page that contains the destination address for the rcvhdrq tail
182 * update, so we need to have the vma. If we don't do this, the page can be
183 * taken away from us on fork, even if the child never touches it, and then
184 * the user process never sees the tail register updates.
185 */
186int ipath_get_user_pages_nocopy(unsigned long page, struct page **p)
187{
188 struct vm_area_struct *vma;
189 int ret;
190
191 down_write(&current->mm->mmap_sem);
192
193 ret = __get_user_pages(page, 1, p, &vma);
194
195 up_write(&current->mm->mmap_sem);
196
197 return ret;
198}
199
200void ipath_release_user_pages(struct page **p, size_t num_pages) 174void ipath_release_user_pages(struct page **p, size_t num_pages)
201{ 175{
202 down_write(&current->mm->mmap_sem); 176 down_write(&current->mm->mmap_sem);
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index 65f7181e9cf8..16aa61fd8085 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -488,7 +488,7 @@ bail:;
488 * This is called from ipath_do_rcv_timer() at interrupt level to check for 488 * This is called from ipath_do_rcv_timer() at interrupt level to check for
489 * QPs which need retransmits and to collect performance numbers. 489 * QPs which need retransmits and to collect performance numbers.
490 */ 490 */
491void ipath_ib_timer(struct ipath_ibdev *dev) 491static void ipath_ib_timer(struct ipath_ibdev *dev)
492{ 492{
493 struct ipath_qp *resend = NULL; 493 struct ipath_qp *resend = NULL;
494 struct list_head *last; 494 struct list_head *last;
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index f3d1f2cee6f8..9bbe81967f14 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -782,8 +782,6 @@ void ipath_update_mmap_info(struct ipath_ibdev *dev,
782 782
783int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); 783int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
784 784
785void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev);
786
787void ipath_insert_rnr_queue(struct ipath_qp *qp); 785void ipath_insert_rnr_queue(struct ipath_qp *qp);
788 786
789int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only); 787int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only);
@@ -807,8 +805,6 @@ void ipath_ib_rcv(struct ipath_ibdev *, void *, void *, u32);
807 805
808int ipath_ib_piobufavail(struct ipath_ibdev *); 806int ipath_ib_piobufavail(struct ipath_ibdev *);
809 807
810void ipath_ib_timer(struct ipath_ibdev *);
811
812unsigned ipath_get_npkeys(struct ipath_devdata *); 808unsigned ipath_get_npkeys(struct ipath_devdata *);
813 809
814u32 ipath_get_cr_errpkey(struct ipath_devdata *); 810u32 ipath_get_cr_errpkey(struct ipath_devdata *);
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 40042184ad58..b5a24fbef70d 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -1183,6 +1183,43 @@ static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq
1183 return cur + nreq >= wq->max_post; 1183 return cur + nreq >= wq->max_post;
1184} 1184}
1185 1185
1186static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg,
1187 u64 remote_addr, u32 rkey)
1188{
1189 rseg->raddr = cpu_to_be64(remote_addr);
1190 rseg->rkey = cpu_to_be32(rkey);
1191 rseg->reserved = 0;
1192}
1193
1194static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr *wr)
1195{
1196 if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
1197 aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
1198 aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add);
1199 } else {
1200 aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
1201 aseg->compare = 0;
1202 }
1203
1204}
1205
1206static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
1207 struct ib_send_wr *wr)
1208{
1209 memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av));
1210 dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn);
1211 dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
1212
1213}
1214
1215static void set_data_seg(struct mlx4_wqe_data_seg *dseg,
1216 struct ib_sge *sg)
1217{
1218 dseg->byte_count = cpu_to_be32(sg->length);
1219 dseg->lkey = cpu_to_be32(sg->lkey);
1220 dseg->addr = cpu_to_be64(sg->addr);
1221}
1222
1186int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 1223int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1187 struct ib_send_wr **bad_wr) 1224 struct ib_send_wr **bad_wr)
1188{ 1225{
@@ -1238,26 +1275,13 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1238 switch (wr->opcode) { 1275 switch (wr->opcode) {
1239 case IB_WR_ATOMIC_CMP_AND_SWP: 1276 case IB_WR_ATOMIC_CMP_AND_SWP:
1240 case IB_WR_ATOMIC_FETCH_AND_ADD: 1277 case IB_WR_ATOMIC_FETCH_AND_ADD:
1241 ((struct mlx4_wqe_raddr_seg *) wqe)->raddr = 1278 set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
1242 cpu_to_be64(wr->wr.atomic.remote_addr); 1279 wr->wr.atomic.rkey);
1243 ((struct mlx4_wqe_raddr_seg *) wqe)->rkey =
1244 cpu_to_be32(wr->wr.atomic.rkey);
1245 ((struct mlx4_wqe_raddr_seg *) wqe)->reserved = 0;
1246
1247 wqe += sizeof (struct mlx4_wqe_raddr_seg); 1280 wqe += sizeof (struct mlx4_wqe_raddr_seg);
1248 1281
1249 if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { 1282 set_atomic_seg(wqe, wr);
1250 ((struct mlx4_wqe_atomic_seg *) wqe)->swap_add =
1251 cpu_to_be64(wr->wr.atomic.swap);
1252 ((struct mlx4_wqe_atomic_seg *) wqe)->compare =
1253 cpu_to_be64(wr->wr.atomic.compare_add);
1254 } else {
1255 ((struct mlx4_wqe_atomic_seg *) wqe)->swap_add =
1256 cpu_to_be64(wr->wr.atomic.compare_add);
1257 ((struct mlx4_wqe_atomic_seg *) wqe)->compare = 0;
1258 }
1259
1260 wqe += sizeof (struct mlx4_wqe_atomic_seg); 1283 wqe += sizeof (struct mlx4_wqe_atomic_seg);
1284
1261 size += (sizeof (struct mlx4_wqe_raddr_seg) + 1285 size += (sizeof (struct mlx4_wqe_raddr_seg) +
1262 sizeof (struct mlx4_wqe_atomic_seg)) / 16; 1286 sizeof (struct mlx4_wqe_atomic_seg)) / 16;
1263 1287
@@ -1266,15 +1290,10 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1266 case IB_WR_RDMA_READ: 1290 case IB_WR_RDMA_READ:
1267 case IB_WR_RDMA_WRITE: 1291 case IB_WR_RDMA_WRITE:
1268 case IB_WR_RDMA_WRITE_WITH_IMM: 1292 case IB_WR_RDMA_WRITE_WITH_IMM:
1269 ((struct mlx4_wqe_raddr_seg *) wqe)->raddr = 1293 set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
1270 cpu_to_be64(wr->wr.rdma.remote_addr); 1294 wr->wr.rdma.rkey);
1271 ((struct mlx4_wqe_raddr_seg *) wqe)->rkey =
1272 cpu_to_be32(wr->wr.rdma.rkey);
1273 ((struct mlx4_wqe_raddr_seg *) wqe)->reserved = 0;
1274
1275 wqe += sizeof (struct mlx4_wqe_raddr_seg); 1295 wqe += sizeof (struct mlx4_wqe_raddr_seg);
1276 size += sizeof (struct mlx4_wqe_raddr_seg) / 16; 1296 size += sizeof (struct mlx4_wqe_raddr_seg) / 16;
1277
1278 break; 1297 break;
1279 1298
1280 default: 1299 default:
@@ -1284,13 +1303,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1284 break; 1303 break;
1285 1304
1286 case IB_QPT_UD: 1305 case IB_QPT_UD:
1287 memcpy(((struct mlx4_wqe_datagram_seg *) wqe)->av, 1306 set_datagram_seg(wqe, wr);
1288 &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av));
1289 ((struct mlx4_wqe_datagram_seg *) wqe)->dqpn =
1290 cpu_to_be32(wr->wr.ud.remote_qpn);
1291 ((struct mlx4_wqe_datagram_seg *) wqe)->qkey =
1292 cpu_to_be32(wr->wr.ud.remote_qkey);
1293
1294 wqe += sizeof (struct mlx4_wqe_datagram_seg); 1307 wqe += sizeof (struct mlx4_wqe_datagram_seg);
1295 size += sizeof (struct mlx4_wqe_datagram_seg) / 16; 1308 size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
1296 break; 1309 break;
@@ -1313,12 +1326,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1313 } 1326 }
1314 1327
1315 for (i = 0; i < wr->num_sge; ++i) { 1328 for (i = 0; i < wr->num_sge; ++i) {
1316 ((struct mlx4_wqe_data_seg *) wqe)->byte_count = 1329 set_data_seg(wqe, wr->sg_list + i);
1317 cpu_to_be32(wr->sg_list[i].length);
1318 ((struct mlx4_wqe_data_seg *) wqe)->lkey =
1319 cpu_to_be32(wr->sg_list[i].lkey);
1320 ((struct mlx4_wqe_data_seg *) wqe)->addr =
1321 cpu_to_be64(wr->sg_list[i].addr);
1322 1330
1323 wqe += sizeof (struct mlx4_wqe_data_seg); 1331 wqe += sizeof (struct mlx4_wqe_data_seg);
1324 size += sizeof (struct mlx4_wqe_data_seg) / 16; 1332 size += sizeof (struct mlx4_wqe_data_seg) / 16;
@@ -1498,7 +1506,7 @@ static int to_ib_qp_access_flags(int mlx4_flags)
1498static void to_ib_ah_attr(struct mlx4_dev *dev, struct ib_ah_attr *ib_ah_attr, 1506static void to_ib_ah_attr(struct mlx4_dev *dev, struct ib_ah_attr *ib_ah_attr,
1499 struct mlx4_qp_path *path) 1507 struct mlx4_qp_path *path)
1500{ 1508{
1501 memset(ib_ah_attr, 0, sizeof *path); 1509 memset(ib_ah_attr, 0, sizeof *ib_ah_attr);
1502 ib_ah_attr->port_num = path->sched_queue & 0x40 ? 2 : 1; 1510 ib_ah_attr->port_num = path->sched_queue & 0x40 ? 2 : 1;
1503 1511
1504 if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports) 1512 if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports)
@@ -1515,7 +1523,7 @@ static void to_ib_ah_attr(struct mlx4_dev *dev, struct ib_ah_attr *ib_ah_attr,
1515 ib_ah_attr->grh.traffic_class = 1523 ib_ah_attr->grh.traffic_class =
1516 (be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff; 1524 (be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff;
1517 ib_ah_attr->grh.flow_label = 1525 ib_ah_attr->grh.flow_label =
1518 be32_to_cpu(path->tclass_flowlabel) & 0xffffff; 1526 be32_to_cpu(path->tclass_flowlabel) & 0xfffff;
1519 memcpy(ib_ah_attr->grh.dgid.raw, 1527 memcpy(ib_ah_attr->grh.dgid.raw,
1520 path->rgid, sizeof ib_ah_attr->grh.dgid.raw); 1528 path->rgid, sizeof ib_ah_attr->grh.dgid.raw);
1521 } 1529 }
@@ -1560,7 +1568,10 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
1560 } 1568 }
1561 1569
1562 qp_attr->pkey_index = context.pri_path.pkey_index & 0x7f; 1570 qp_attr->pkey_index = context.pri_path.pkey_index & 0x7f;
1563 qp_attr->port_num = context.pri_path.sched_queue & 0x40 ? 2 : 1; 1571 if (qp_attr->qp_state == IB_QPS_INIT)
1572 qp_attr->port_num = qp->port;
1573 else
1574 qp_attr->port_num = context.pri_path.sched_queue & 0x40 ? 2 : 1;
1564 1575
1565 /* qp_attr->en_sqd_async_notify is only applicable in modify qp */ 1576 /* qp_attr->en_sqd_async_notify is only applicable in modify qp */
1566 qp_attr->sq_draining = mlx4_state == MLX4_QP_STATE_SQ_DRAINING; 1577 qp_attr->sq_draining = mlx4_state == MLX4_QP_STATE_SQ_DRAINING;
@@ -1578,17 +1589,25 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
1578 1589
1579done: 1590done:
1580 qp_attr->cur_qp_state = qp_attr->qp_state; 1591 qp_attr->cur_qp_state = qp_attr->qp_state;
1592 qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt;
1593 qp_attr->cap.max_recv_sge = qp->rq.max_gs;
1594
1581 if (!ibqp->uobject) { 1595 if (!ibqp->uobject) {
1582 qp_attr->cap.max_send_wr = qp->sq.wqe_cnt; 1596 qp_attr->cap.max_send_wr = qp->sq.wqe_cnt;
1583 qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt; 1597 qp_attr->cap.max_send_sge = qp->sq.max_gs;
1584 qp_attr->cap.max_send_sge = qp->sq.max_gs; 1598 } else {
1585 qp_attr->cap.max_recv_sge = qp->rq.max_gs; 1599 qp_attr->cap.max_send_wr = 0;
1586 qp_attr->cap.max_inline_data = (1 << qp->sq.wqe_shift) - 1600 qp_attr->cap.max_send_sge = 0;
1587 send_wqe_overhead(qp->ibqp.qp_type) -
1588 sizeof (struct mlx4_wqe_inline_seg);
1589 qp_init_attr->cap = qp_attr->cap;
1590 } 1601 }
1591 1602
1603 /*
1604 * We don't support inline sends for kernel QPs (yet), and we
1605 * don't know what userspace's value should be.
1606 */
1607 qp_attr->cap.max_inline_data = 0;
1608
1609 qp_init_attr->cap = qp_attr->cap;
1610
1592 return 0; 1611 return 0;
1593} 1612}
1594 1613
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index aa563e61de65..76fed7545c53 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -67,7 +67,7 @@ MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero");
67 67
68static int msi = 0; 68static int msi = 0;
69module_param(msi, int, 0444); 69module_param(msi, int, 0444);
70MODULE_PARM_DESC(msi, "attempt to use MSI if nonzero"); 70MODULE_PARM_DESC(msi, "attempt to use MSI if nonzero (deprecated, use MSI-X instead)");
71 71
72#else /* CONFIG_PCI_MSI */ 72#else /* CONFIG_PCI_MSI */
73 73
@@ -1117,9 +1117,21 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
1117 1117
1118 if (msi_x && !mthca_enable_msi_x(mdev)) 1118 if (msi_x && !mthca_enable_msi_x(mdev))
1119 mdev->mthca_flags |= MTHCA_FLAG_MSI_X; 1119 mdev->mthca_flags |= MTHCA_FLAG_MSI_X;
1120 if (msi && !(mdev->mthca_flags & MTHCA_FLAG_MSI_X) && 1120 else if (msi) {
1121 !pci_enable_msi(pdev)) 1121 static int warned;
1122 mdev->mthca_flags |= MTHCA_FLAG_MSI; 1122
1123 if (!warned) {
1124 printk(KERN_WARNING PFX "WARNING: MSI support will be "
1125 "removed from the ib_mthca driver in January 2008.\n");
1126 printk(KERN_WARNING " If you are using MSI and cannot "
1127 "switch to MSI-X, please tell "
1128 "<general@lists.openfabrics.org>.\n");
1129 ++warned;
1130 }
1131
1132 if (!pci_enable_msi(pdev))
1133 mdev->mthca_flags |= MTHCA_FLAG_MSI;
1134 }
1123 1135
1124 if (mthca_cmd_init(mdev)) { 1136 if (mthca_cmd_init(mdev)) {
1125 mthca_err(mdev, "Failed to init command interface, aborting.\n"); 1137 mthca_err(mdev, "Failed to init command interface, aborting.\n");
@@ -1135,7 +1147,7 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
1135 goto err_cmd; 1147 goto err_cmd;
1136 1148
1137 if (mdev->fw_ver < mthca_hca_table[hca_type].latest_fw) { 1149 if (mdev->fw_ver < mthca_hca_table[hca_type].latest_fw) {
1138 mthca_warn(mdev, "HCA FW version %d.%d.%3d is old (%d.%d.%3d is current).\n", 1150 mthca_warn(mdev, "HCA FW version %d.%d.%03d is old (%d.%d.%03d is current).\n",
1139 (int) (mdev->fw_ver >> 32), (int) (mdev->fw_ver >> 16) & 0xffff, 1151 (int) (mdev->fw_ver >> 32), (int) (mdev->fw_ver >> 16) & 0xffff,
1140 (int) (mdev->fw_ver & 0xffff), 1152 (int) (mdev->fw_ver & 0xffff),
1141 (int) (mthca_hca_table[hca_type].latest_fw >> 32), 1153 (int) (mthca_hca_table[hca_type].latest_fw >> 32),
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 11f1d99db40b..df01b2026a64 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -1578,6 +1578,45 @@ static inline int mthca_wq_overflow(struct mthca_wq *wq, int nreq,
1578 return cur + nreq >= wq->max; 1578 return cur + nreq >= wq->max;
1579} 1579}
1580 1580
1581static __always_inline void set_raddr_seg(struct mthca_raddr_seg *rseg,
1582 u64 remote_addr, u32 rkey)
1583{
1584 rseg->raddr = cpu_to_be64(remote_addr);
1585 rseg->rkey = cpu_to_be32(rkey);
1586 rseg->reserved = 0;
1587}
1588
1589static __always_inline void set_atomic_seg(struct mthca_atomic_seg *aseg,
1590 struct ib_send_wr *wr)
1591{
1592 if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
1593 aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
1594 aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add);
1595 } else {
1596 aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
1597 aseg->compare = 0;
1598 }
1599
1600}
1601
1602static void set_tavor_ud_seg(struct mthca_tavor_ud_seg *useg,
1603 struct ib_send_wr *wr)
1604{
1605 useg->lkey = cpu_to_be32(to_mah(wr->wr.ud.ah)->key);
1606 useg->av_addr = cpu_to_be64(to_mah(wr->wr.ud.ah)->avdma);
1607 useg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn);
1608 useg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
1609
1610}
1611
1612static void set_arbel_ud_seg(struct mthca_arbel_ud_seg *useg,
1613 struct ib_send_wr *wr)
1614{
1615 memcpy(useg->av, to_mah(wr->wr.ud.ah)->av, MTHCA_AV_SIZE);
1616 useg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn);
1617 useg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
1618}
1619
1581int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 1620int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1582 struct ib_send_wr **bad_wr) 1621 struct ib_send_wr **bad_wr)
1583{ 1622{
@@ -1590,8 +1629,15 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1590 int nreq; 1629 int nreq;
1591 int i; 1630 int i;
1592 int size; 1631 int size;
1593 int size0 = 0; 1632 /*
1594 u32 f0 = 0; 1633 * f0 and size0 are only used if nreq != 0, and they will
1634 * always be initialized the first time through the main loop
1635 * before nreq is incremented. So nreq cannot become non-zero
1636 * without initializing f0 and size0, and they are in fact
1637 * never used uninitialized.
1638 */
1639 int uninitialized_var(size0);
1640 u32 uninitialized_var(f0);
1595 int ind; 1641 int ind;
1596 u8 op0 = 0; 1642 u8 op0 = 0;
1597 1643
@@ -1636,25 +1682,11 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1636 switch (wr->opcode) { 1682 switch (wr->opcode) {
1637 case IB_WR_ATOMIC_CMP_AND_SWP: 1683 case IB_WR_ATOMIC_CMP_AND_SWP:
1638 case IB_WR_ATOMIC_FETCH_AND_ADD: 1684 case IB_WR_ATOMIC_FETCH_AND_ADD:
1639 ((struct mthca_raddr_seg *) wqe)->raddr = 1685 set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
1640 cpu_to_be64(wr->wr.atomic.remote_addr); 1686 wr->wr.atomic.rkey);
1641 ((struct mthca_raddr_seg *) wqe)->rkey =
1642 cpu_to_be32(wr->wr.atomic.rkey);
1643 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
1644
1645 wqe += sizeof (struct mthca_raddr_seg); 1687 wqe += sizeof (struct mthca_raddr_seg);
1646 1688
1647 if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { 1689 set_atomic_seg(wqe, wr);
1648 ((struct mthca_atomic_seg *) wqe)->swap_add =
1649 cpu_to_be64(wr->wr.atomic.swap);
1650 ((struct mthca_atomic_seg *) wqe)->compare =
1651 cpu_to_be64(wr->wr.atomic.compare_add);
1652 } else {
1653 ((struct mthca_atomic_seg *) wqe)->swap_add =
1654 cpu_to_be64(wr->wr.atomic.compare_add);
1655 ((struct mthca_atomic_seg *) wqe)->compare = 0;
1656 }
1657
1658 wqe += sizeof (struct mthca_atomic_seg); 1690 wqe += sizeof (struct mthca_atomic_seg);
1659 size += (sizeof (struct mthca_raddr_seg) + 1691 size += (sizeof (struct mthca_raddr_seg) +
1660 sizeof (struct mthca_atomic_seg)) / 16; 1692 sizeof (struct mthca_atomic_seg)) / 16;
@@ -1663,12 +1695,9 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1663 case IB_WR_RDMA_WRITE: 1695 case IB_WR_RDMA_WRITE:
1664 case IB_WR_RDMA_WRITE_WITH_IMM: 1696 case IB_WR_RDMA_WRITE_WITH_IMM:
1665 case IB_WR_RDMA_READ: 1697 case IB_WR_RDMA_READ:
1666 ((struct mthca_raddr_seg *) wqe)->raddr = 1698 set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
1667 cpu_to_be64(wr->wr.rdma.remote_addr); 1699 wr->wr.rdma.rkey);
1668 ((struct mthca_raddr_seg *) wqe)->rkey = 1700 wqe += sizeof (struct mthca_raddr_seg);
1669 cpu_to_be32(wr->wr.rdma.rkey);
1670 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
1671 wqe += sizeof (struct mthca_raddr_seg);
1672 size += sizeof (struct mthca_raddr_seg) / 16; 1701 size += sizeof (struct mthca_raddr_seg) / 16;
1673 break; 1702 break;
1674 1703
@@ -1683,12 +1712,9 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1683 switch (wr->opcode) { 1712 switch (wr->opcode) {
1684 case IB_WR_RDMA_WRITE: 1713 case IB_WR_RDMA_WRITE:
1685 case IB_WR_RDMA_WRITE_WITH_IMM: 1714 case IB_WR_RDMA_WRITE_WITH_IMM:
1686 ((struct mthca_raddr_seg *) wqe)->raddr = 1715 set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
1687 cpu_to_be64(wr->wr.rdma.remote_addr); 1716 wr->wr.rdma.rkey);
1688 ((struct mthca_raddr_seg *) wqe)->rkey = 1717 wqe += sizeof (struct mthca_raddr_seg);
1689 cpu_to_be32(wr->wr.rdma.rkey);
1690 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
1691 wqe += sizeof (struct mthca_raddr_seg);
1692 size += sizeof (struct mthca_raddr_seg) / 16; 1718 size += sizeof (struct mthca_raddr_seg) / 16;
1693 break; 1719 break;
1694 1720
@@ -1700,16 +1726,8 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1700 break; 1726 break;
1701 1727
1702 case UD: 1728 case UD:
1703 ((struct mthca_tavor_ud_seg *) wqe)->lkey = 1729 set_tavor_ud_seg(wqe, wr);
1704 cpu_to_be32(to_mah(wr->wr.ud.ah)->key); 1730 wqe += sizeof (struct mthca_tavor_ud_seg);
1705 ((struct mthca_tavor_ud_seg *) wqe)->av_addr =
1706 cpu_to_be64(to_mah(wr->wr.ud.ah)->avdma);
1707 ((struct mthca_tavor_ud_seg *) wqe)->dqpn =
1708 cpu_to_be32(wr->wr.ud.remote_qpn);
1709 ((struct mthca_tavor_ud_seg *) wqe)->qkey =
1710 cpu_to_be32(wr->wr.ud.remote_qkey);
1711
1712 wqe += sizeof (struct mthca_tavor_ud_seg);
1713 size += sizeof (struct mthca_tavor_ud_seg) / 16; 1731 size += sizeof (struct mthca_tavor_ud_seg) / 16;
1714 break; 1732 break;
1715 1733
@@ -1734,13 +1752,8 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1734 } 1752 }
1735 1753
1736 for (i = 0; i < wr->num_sge; ++i) { 1754 for (i = 0; i < wr->num_sge; ++i) {
1737 ((struct mthca_data_seg *) wqe)->byte_count = 1755 mthca_set_data_seg(wqe, wr->sg_list + i);
1738 cpu_to_be32(wr->sg_list[i].length); 1756 wqe += sizeof (struct mthca_data_seg);
1739 ((struct mthca_data_seg *) wqe)->lkey =
1740 cpu_to_be32(wr->sg_list[i].lkey);
1741 ((struct mthca_data_seg *) wqe)->addr =
1742 cpu_to_be64(wr->sg_list[i].addr);
1743 wqe += sizeof (struct mthca_data_seg);
1744 size += sizeof (struct mthca_data_seg) / 16; 1757 size += sizeof (struct mthca_data_seg) / 16;
1745 } 1758 }
1746 1759
@@ -1768,11 +1781,11 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1768 mthca_opcode[wr->opcode]); 1781 mthca_opcode[wr->opcode]);
1769 wmb(); 1782 wmb();
1770 ((struct mthca_next_seg *) prev_wqe)->ee_nds = 1783 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
1771 cpu_to_be32((size0 ? 0 : MTHCA_NEXT_DBD) | size | 1784 cpu_to_be32((nreq ? 0 : MTHCA_NEXT_DBD) | size |
1772 ((wr->send_flags & IB_SEND_FENCE) ? 1785 ((wr->send_flags & IB_SEND_FENCE) ?
1773 MTHCA_NEXT_FENCE : 0)); 1786 MTHCA_NEXT_FENCE : 0));
1774 1787
1775 if (!size0) { 1788 if (!nreq) {
1776 size0 = size; 1789 size0 = size;
1777 op0 = mthca_opcode[wr->opcode]; 1790 op0 = mthca_opcode[wr->opcode];
1778 f0 = wr->send_flags & IB_SEND_FENCE ? 1791 f0 = wr->send_flags & IB_SEND_FENCE ?
@@ -1822,7 +1835,14 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
1822 int nreq; 1835 int nreq;
1823 int i; 1836 int i;
1824 int size; 1837 int size;
1825 int size0 = 0; 1838 /*
1839 * size0 is only used if nreq != 0, and it will always be
1840 * initialized the first time through the main loop before
1841 * nreq is incremented. So nreq cannot become non-zero
1842 * without initializing size0, and it is in fact never used
1843 * uninitialized.
1844 */
1845 int uninitialized_var(size0);
1826 int ind; 1846 int ind;
1827 void *wqe; 1847 void *wqe;
1828 void *prev_wqe; 1848 void *prev_wqe;
@@ -1863,13 +1883,8 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
1863 } 1883 }
1864 1884
1865 for (i = 0; i < wr->num_sge; ++i) { 1885 for (i = 0; i < wr->num_sge; ++i) {
1866 ((struct mthca_data_seg *) wqe)->byte_count = 1886 mthca_set_data_seg(wqe, wr->sg_list + i);
1867 cpu_to_be32(wr->sg_list[i].length); 1887 wqe += sizeof (struct mthca_data_seg);
1868 ((struct mthca_data_seg *) wqe)->lkey =
1869 cpu_to_be32(wr->sg_list[i].lkey);
1870 ((struct mthca_data_seg *) wqe)->addr =
1871 cpu_to_be64(wr->sg_list[i].addr);
1872 wqe += sizeof (struct mthca_data_seg);
1873 size += sizeof (struct mthca_data_seg) / 16; 1888 size += sizeof (struct mthca_data_seg) / 16;
1874 } 1889 }
1875 1890
@@ -1881,7 +1896,7 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
1881 ((struct mthca_next_seg *) prev_wqe)->ee_nds = 1896 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
1882 cpu_to_be32(MTHCA_NEXT_DBD | size); 1897 cpu_to_be32(MTHCA_NEXT_DBD | size);
1883 1898
1884 if (!size0) 1899 if (!nreq)
1885 size0 = size; 1900 size0 = size;
1886 1901
1887 ++ind; 1902 ++ind;
@@ -1903,7 +1918,6 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
1903 1918
1904 qp->rq.next_ind = ind; 1919 qp->rq.next_ind = ind;
1905 qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB; 1920 qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB;
1906 size0 = 0;
1907 } 1921 }
1908 } 1922 }
1909 1923
@@ -1945,8 +1959,15 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1945 int nreq; 1959 int nreq;
1946 int i; 1960 int i;
1947 int size; 1961 int size;
1948 int size0 = 0; 1962 /*
1949 u32 f0 = 0; 1963 * f0 and size0 are only used if nreq != 0, and they will
1964 * always be initialized the first time through the main loop
1965 * before nreq is incremented. So nreq cannot become non-zero
1966 * without initializing f0 and size0, and they are in fact
1967 * never used uninitialized.
1968 */
1969 int uninitialized_var(size0);
1970 u32 uninitialized_var(f0);
1950 int ind; 1971 int ind;
1951 u8 op0 = 0; 1972 u8 op0 = 0;
1952 1973
@@ -1966,7 +1987,6 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1966 doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0); 1987 doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0);
1967 1988
1968 qp->sq.head += MTHCA_ARBEL_MAX_WQES_PER_SEND_DB; 1989 qp->sq.head += MTHCA_ARBEL_MAX_WQES_PER_SEND_DB;
1969 size0 = 0;
1970 1990
1971 /* 1991 /*
1972 * Make sure that descriptors are written before 1992 * Make sure that descriptors are written before
@@ -2017,26 +2037,12 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2017 switch (wr->opcode) { 2037 switch (wr->opcode) {
2018 case IB_WR_ATOMIC_CMP_AND_SWP: 2038 case IB_WR_ATOMIC_CMP_AND_SWP:
2019 case IB_WR_ATOMIC_FETCH_AND_ADD: 2039 case IB_WR_ATOMIC_FETCH_AND_ADD:
2020 ((struct mthca_raddr_seg *) wqe)->raddr = 2040 set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
2021 cpu_to_be64(wr->wr.atomic.remote_addr); 2041 wr->wr.atomic.rkey);
2022 ((struct mthca_raddr_seg *) wqe)->rkey =
2023 cpu_to_be32(wr->wr.atomic.rkey);
2024 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
2025
2026 wqe += sizeof (struct mthca_raddr_seg); 2042 wqe += sizeof (struct mthca_raddr_seg);
2027 2043
2028 if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { 2044 set_atomic_seg(wqe, wr);
2029 ((struct mthca_atomic_seg *) wqe)->swap_add = 2045 wqe += sizeof (struct mthca_atomic_seg);
2030 cpu_to_be64(wr->wr.atomic.swap);
2031 ((struct mthca_atomic_seg *) wqe)->compare =
2032 cpu_to_be64(wr->wr.atomic.compare_add);
2033 } else {
2034 ((struct mthca_atomic_seg *) wqe)->swap_add =
2035 cpu_to_be64(wr->wr.atomic.compare_add);
2036 ((struct mthca_atomic_seg *) wqe)->compare = 0;
2037 }
2038
2039 wqe += sizeof (struct mthca_atomic_seg);
2040 size += (sizeof (struct mthca_raddr_seg) + 2046 size += (sizeof (struct mthca_raddr_seg) +
2041 sizeof (struct mthca_atomic_seg)) / 16; 2047 sizeof (struct mthca_atomic_seg)) / 16;
2042 break; 2048 break;
@@ -2044,12 +2050,9 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2044 case IB_WR_RDMA_READ: 2050 case IB_WR_RDMA_READ:
2045 case IB_WR_RDMA_WRITE: 2051 case IB_WR_RDMA_WRITE:
2046 case IB_WR_RDMA_WRITE_WITH_IMM: 2052 case IB_WR_RDMA_WRITE_WITH_IMM:
2047 ((struct mthca_raddr_seg *) wqe)->raddr = 2053 set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
2048 cpu_to_be64(wr->wr.rdma.remote_addr); 2054 wr->wr.rdma.rkey);
2049 ((struct mthca_raddr_seg *) wqe)->rkey = 2055 wqe += sizeof (struct mthca_raddr_seg);
2050 cpu_to_be32(wr->wr.rdma.rkey);
2051 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
2052 wqe += sizeof (struct mthca_raddr_seg);
2053 size += sizeof (struct mthca_raddr_seg) / 16; 2056 size += sizeof (struct mthca_raddr_seg) / 16;
2054 break; 2057 break;
2055 2058
@@ -2064,12 +2067,9 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2064 switch (wr->opcode) { 2067 switch (wr->opcode) {
2065 case IB_WR_RDMA_WRITE: 2068 case IB_WR_RDMA_WRITE:
2066 case IB_WR_RDMA_WRITE_WITH_IMM: 2069 case IB_WR_RDMA_WRITE_WITH_IMM:
2067 ((struct mthca_raddr_seg *) wqe)->raddr = 2070 set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
2068 cpu_to_be64(wr->wr.rdma.remote_addr); 2071 wr->wr.rdma.rkey);
2069 ((struct mthca_raddr_seg *) wqe)->rkey = 2072 wqe += sizeof (struct mthca_raddr_seg);
2070 cpu_to_be32(wr->wr.rdma.rkey);
2071 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
2072 wqe += sizeof (struct mthca_raddr_seg);
2073 size += sizeof (struct mthca_raddr_seg) / 16; 2073 size += sizeof (struct mthca_raddr_seg) / 16;
2074 break; 2074 break;
2075 2075
@@ -2081,14 +2081,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2081 break; 2081 break;
2082 2082
2083 case UD: 2083 case UD:
2084 memcpy(((struct mthca_arbel_ud_seg *) wqe)->av, 2084 set_arbel_ud_seg(wqe, wr);
2085 to_mah(wr->wr.ud.ah)->av, MTHCA_AV_SIZE); 2085 wqe += sizeof (struct mthca_arbel_ud_seg);
2086 ((struct mthca_arbel_ud_seg *) wqe)->dqpn =
2087 cpu_to_be32(wr->wr.ud.remote_qpn);
2088 ((struct mthca_arbel_ud_seg *) wqe)->qkey =
2089 cpu_to_be32(wr->wr.ud.remote_qkey);
2090
2091 wqe += sizeof (struct mthca_arbel_ud_seg);
2092 size += sizeof (struct mthca_arbel_ud_seg) / 16; 2086 size += sizeof (struct mthca_arbel_ud_seg) / 16;
2093 break; 2087 break;
2094 2088
@@ -2113,13 +2107,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2113 } 2107 }
2114 2108
2115 for (i = 0; i < wr->num_sge; ++i) { 2109 for (i = 0; i < wr->num_sge; ++i) {
2116 ((struct mthca_data_seg *) wqe)->byte_count = 2110 mthca_set_data_seg(wqe, wr->sg_list + i);
2117 cpu_to_be32(wr->sg_list[i].length); 2111 wqe += sizeof (struct mthca_data_seg);
2118 ((struct mthca_data_seg *) wqe)->lkey =
2119 cpu_to_be32(wr->sg_list[i].lkey);
2120 ((struct mthca_data_seg *) wqe)->addr =
2121 cpu_to_be64(wr->sg_list[i].addr);
2122 wqe += sizeof (struct mthca_data_seg);
2123 size += sizeof (struct mthca_data_seg) / 16; 2112 size += sizeof (struct mthca_data_seg) / 16;
2124 } 2113 }
2125 2114
@@ -2151,7 +2140,7 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2151 ((wr->send_flags & IB_SEND_FENCE) ? 2140 ((wr->send_flags & IB_SEND_FENCE) ?
2152 MTHCA_NEXT_FENCE : 0)); 2141 MTHCA_NEXT_FENCE : 0));
2153 2142
2154 if (!size0) { 2143 if (!nreq) {
2155 size0 = size; 2144 size0 = size;
2156 op0 = mthca_opcode[wr->opcode]; 2145 op0 = mthca_opcode[wr->opcode];
2157 f0 = wr->send_flags & IB_SEND_FENCE ? 2146 f0 = wr->send_flags & IB_SEND_FENCE ?
@@ -2241,20 +2230,12 @@ int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
2241 } 2230 }
2242 2231
2243 for (i = 0; i < wr->num_sge; ++i) { 2232 for (i = 0; i < wr->num_sge; ++i) {
2244 ((struct mthca_data_seg *) wqe)->byte_count = 2233 mthca_set_data_seg(wqe, wr->sg_list + i);
2245 cpu_to_be32(wr->sg_list[i].length);
2246 ((struct mthca_data_seg *) wqe)->lkey =
2247 cpu_to_be32(wr->sg_list[i].lkey);
2248 ((struct mthca_data_seg *) wqe)->addr =
2249 cpu_to_be64(wr->sg_list[i].addr);
2250 wqe += sizeof (struct mthca_data_seg); 2234 wqe += sizeof (struct mthca_data_seg);
2251 } 2235 }
2252 2236
2253 if (i < qp->rq.max_gs) { 2237 if (i < qp->rq.max_gs)
2254 ((struct mthca_data_seg *) wqe)->byte_count = 0; 2238 mthca_set_data_seg_inval(wqe);
2255 ((struct mthca_data_seg *) wqe)->lkey = cpu_to_be32(MTHCA_INVAL_LKEY);
2256 ((struct mthca_data_seg *) wqe)->addr = 0;
2257 }
2258 2239
2259 qp->wrid[ind] = wr->wr_id; 2240 qp->wrid[ind] = wr->wr_id;
2260 2241
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index b8f05a526673..88d219e730ad 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -543,20 +543,12 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
543 } 543 }
544 544
545 for (i = 0; i < wr->num_sge; ++i) { 545 for (i = 0; i < wr->num_sge; ++i) {
546 ((struct mthca_data_seg *) wqe)->byte_count = 546 mthca_set_data_seg(wqe, wr->sg_list + i);
547 cpu_to_be32(wr->sg_list[i].length);
548 ((struct mthca_data_seg *) wqe)->lkey =
549 cpu_to_be32(wr->sg_list[i].lkey);
550 ((struct mthca_data_seg *) wqe)->addr =
551 cpu_to_be64(wr->sg_list[i].addr);
552 wqe += sizeof (struct mthca_data_seg); 547 wqe += sizeof (struct mthca_data_seg);
553 } 548 }
554 549
555 if (i < srq->max_gs) { 550 if (i < srq->max_gs)
556 ((struct mthca_data_seg *) wqe)->byte_count = 0; 551 mthca_set_data_seg_inval(wqe);
557 ((struct mthca_data_seg *) wqe)->lkey = cpu_to_be32(MTHCA_INVAL_LKEY);
558 ((struct mthca_data_seg *) wqe)->addr = 0;
559 }
560 552
561 ((struct mthca_next_seg *) prev_wqe)->nda_op = 553 ((struct mthca_next_seg *) prev_wqe)->nda_op =
562 cpu_to_be32((ind << srq->wqe_shift) | 1); 554 cpu_to_be32((ind << srq->wqe_shift) | 1);
@@ -662,20 +654,12 @@ int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
662 } 654 }
663 655
664 for (i = 0; i < wr->num_sge; ++i) { 656 for (i = 0; i < wr->num_sge; ++i) {
665 ((struct mthca_data_seg *) wqe)->byte_count = 657 mthca_set_data_seg(wqe, wr->sg_list + i);
666 cpu_to_be32(wr->sg_list[i].length);
667 ((struct mthca_data_seg *) wqe)->lkey =
668 cpu_to_be32(wr->sg_list[i].lkey);
669 ((struct mthca_data_seg *) wqe)->addr =
670 cpu_to_be64(wr->sg_list[i].addr);
671 wqe += sizeof (struct mthca_data_seg); 658 wqe += sizeof (struct mthca_data_seg);
672 } 659 }
673 660
674 if (i < srq->max_gs) { 661 if (i < srq->max_gs)
675 ((struct mthca_data_seg *) wqe)->byte_count = 0; 662 mthca_set_data_seg_inval(wqe);
676 ((struct mthca_data_seg *) wqe)->lkey = cpu_to_be32(MTHCA_INVAL_LKEY);
677 ((struct mthca_data_seg *) wqe)->addr = 0;
678 }
679 663
680 srq->wrid[ind] = wr->wr_id; 664 srq->wrid[ind] = wr->wr_id;
681 srq->first_free = next_ind; 665 srq->first_free = next_ind;
diff --git a/drivers/infiniband/hw/mthca/mthca_wqe.h b/drivers/infiniband/hw/mthca/mthca_wqe.h
index e7d2c1e86199..f6a66fe78e48 100644
--- a/drivers/infiniband/hw/mthca/mthca_wqe.h
+++ b/drivers/infiniband/hw/mthca/mthca_wqe.h
@@ -113,4 +113,19 @@ struct mthca_mlx_seg {
113 __be16 vcrc; 113 __be16 vcrc;
114}; 114};
115 115
116static __always_inline void mthca_set_data_seg(struct mthca_data_seg *dseg,
117 struct ib_sge *sg)
118{
119 dseg->byte_count = cpu_to_be32(sg->length);
120 dseg->lkey = cpu_to_be32(sg->lkey);
121 dseg->addr = cpu_to_be64(sg->addr);
122}
123
124static __always_inline void mthca_set_data_seg_inval(struct mthca_data_seg *dseg)
125{
126 dseg->byte_count = 0;
127 dseg->lkey = cpu_to_be32(MTHCA_INVAL_LKEY);
128 dseg->addr = 0;
129}
130
116#endif /* MTHCA_WQE_H */ 131#endif /* MTHCA_WQE_H */
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index e2353701e8bb..1ee867b1b341 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -310,8 +310,6 @@ int iser_conn_init(struct iser_conn **ib_conn);
310 310
311void iser_conn_terminate(struct iser_conn *ib_conn); 311void iser_conn_terminate(struct iser_conn *ib_conn);
312 312
313void iser_conn_release(struct iser_conn *ib_conn);
314
315void iser_rcv_completion(struct iser_desc *desc, 313void iser_rcv_completion(struct iser_desc *desc,
316 unsigned long dto_xfer_len); 314 unsigned long dto_xfer_len);
317 315
@@ -329,9 +327,6 @@ void iser_reg_single(struct iser_device *device,
329 struct iser_regd_buf *regd_buf, 327 struct iser_regd_buf *regd_buf,
330 enum dma_data_direction direction); 328 enum dma_data_direction direction);
331 329
332int iser_start_rdma_unaligned_sg(struct iscsi_iser_cmd_task *ctask,
333 enum iser_data_dir cmd_dir);
334
335void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_cmd_task *ctask, 330void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_cmd_task *ctask,
336 enum iser_data_dir cmd_dir); 331 enum iser_data_dir cmd_dir);
337 332
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index fc9f1fd0ae54..36cdf77ae92a 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -103,8 +103,8 @@ void iser_reg_single(struct iser_device *device,
103/** 103/**
104 * iser_start_rdma_unaligned_sg 104 * iser_start_rdma_unaligned_sg
105 */ 105 */
106int iser_start_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask, 106static int iser_start_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask,
107 enum iser_data_dir cmd_dir) 107 enum iser_data_dir cmd_dir)
108{ 108{
109 int dma_nents; 109 int dma_nents;
110 struct ib_device *dev; 110 struct ib_device *dev;
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 2044de1164ac..d42ec0156eec 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -311,6 +311,29 @@ static int iser_conn_state_comp_exch(struct iser_conn *ib_conn,
311} 311}
312 312
313/** 313/**
314 * Frees all conn objects and deallocs conn descriptor
315 */
316static void iser_conn_release(struct iser_conn *ib_conn)
317{
318 struct iser_device *device = ib_conn->device;
319
320 BUG_ON(ib_conn->state != ISER_CONN_DOWN);
321
322 mutex_lock(&ig.connlist_mutex);
323 list_del(&ib_conn->conn_list);
324 mutex_unlock(&ig.connlist_mutex);
325
326 iser_free_ib_conn_res(ib_conn);
327 ib_conn->device = NULL;
328 /* on EVENT_ADDR_ERROR there's no device yet for this conn */
329 if (device != NULL)
330 iser_device_try_release(device);
331 if (ib_conn->iser_conn)
332 ib_conn->iser_conn->ib_conn = NULL;
333 kfree(ib_conn);
334}
335
336/**
314 * triggers start of the disconnect procedures and wait for them to be done 337 * triggers start of the disconnect procedures and wait for them to be done
315 */ 338 */
316void iser_conn_terminate(struct iser_conn *ib_conn) 339void iser_conn_terminate(struct iser_conn *ib_conn)
@@ -550,30 +573,6 @@ connect_failure:
550} 573}
551 574
552/** 575/**
553 * Frees all conn objects and deallocs conn descriptor
554 */
555void iser_conn_release(struct iser_conn *ib_conn)
556{
557 struct iser_device *device = ib_conn->device;
558
559 BUG_ON(ib_conn->state != ISER_CONN_DOWN);
560
561 mutex_lock(&ig.connlist_mutex);
562 list_del(&ib_conn->conn_list);
563 mutex_unlock(&ig.connlist_mutex);
564
565 iser_free_ib_conn_res(ib_conn);
566 ib_conn->device = NULL;
567 /* on EVENT_ADDR_ERROR there's no device yet for this conn */
568 if (device != NULL)
569 iser_device_try_release(device);
570 if (ib_conn->iser_conn)
571 ib_conn->iser_conn->ib_conn = NULL;
572 kfree(ib_conn);
573}
574
575
576/**
577 * iser_reg_page_vec - Register physical memory 576 * iser_reg_page_vec - Register physical memory
578 * 577 *
579 * returns: 0 on success, errno code on failure 578 * returns: 0 on success, errno code on failure
diff --git a/drivers/net/mlx4/catas.c b/drivers/net/mlx4/catas.c
index 1bb088aeaf71..6b32ec94b3a8 100644
--- a/drivers/net/mlx4/catas.c
+++ b/drivers/net/mlx4/catas.c
@@ -30,41 +30,133 @@
30 * SOFTWARE. 30 * SOFTWARE.
31 */ 31 */
32 32
33#include <linux/workqueue.h>
34
33#include "mlx4.h" 35#include "mlx4.h"
34 36
35void mlx4_handle_catas_err(struct mlx4_dev *dev) 37enum {
38 MLX4_CATAS_POLL_INTERVAL = 5 * HZ,
39};
40
41static DEFINE_SPINLOCK(catas_lock);
42
43static LIST_HEAD(catas_list);
44static struct workqueue_struct *catas_wq;
45static struct work_struct catas_work;
46
47static int internal_err_reset = 1;
48module_param(internal_err_reset, int, 0644);
49MODULE_PARM_DESC(internal_err_reset,
50 "Reset device on internal errors if non-zero (default 1)");
51
52static void dump_err_buf(struct mlx4_dev *dev)
36{ 53{
37 struct mlx4_priv *priv = mlx4_priv(dev); 54 struct mlx4_priv *priv = mlx4_priv(dev);
38 55
39 int i; 56 int i;
40 57
41 mlx4_err(dev, "Catastrophic error detected:\n"); 58 mlx4_err(dev, "Internal error detected:\n");
42 for (i = 0; i < priv->fw.catas_size; ++i) 59 for (i = 0; i < priv->fw.catas_size; ++i)
43 mlx4_err(dev, " buf[%02x]: %08x\n", 60 mlx4_err(dev, " buf[%02x]: %08x\n",
44 i, swab32(readl(priv->catas_err.map + i))); 61 i, swab32(readl(priv->catas_err.map + i)));
62}
45 63
46 mlx4_dispatch_event(dev, MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR, 0, 0); 64static void poll_catas(unsigned long dev_ptr)
65{
66 struct mlx4_dev *dev = (struct mlx4_dev *) dev_ptr;
67 struct mlx4_priv *priv = mlx4_priv(dev);
68
69 if (readl(priv->catas_err.map)) {
70 dump_err_buf(dev);
71
72 mlx4_dispatch_event(dev, MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR, 0, 0);
73
74 if (internal_err_reset) {
75 spin_lock(&catas_lock);
76 list_add(&priv->catas_err.list, &catas_list);
77 spin_unlock(&catas_lock);
78
79 queue_work(catas_wq, &catas_work);
80 }
81 } else
82 mod_timer(&priv->catas_err.timer,
83 round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL));
47} 84}
48 85
49void mlx4_map_catas_buf(struct mlx4_dev *dev) 86static void catas_reset(struct work_struct *work)
87{
88 struct mlx4_priv *priv, *tmppriv;
89 struct mlx4_dev *dev;
90
91 LIST_HEAD(tlist);
92 int ret;
93
94 spin_lock_irq(&catas_lock);
95 list_splice_init(&catas_list, &tlist);
96 spin_unlock_irq(&catas_lock);
97
98 list_for_each_entry_safe(priv, tmppriv, &tlist, catas_err.list) {
99 ret = mlx4_restart_one(priv->dev.pdev);
100 dev = &priv->dev;
101 if (ret)
102 mlx4_err(dev, "Reset failed (%d)\n", ret);
103 else
104 mlx4_dbg(dev, "Reset succeeded\n");
105 }
106}
107
108void mlx4_start_catas_poll(struct mlx4_dev *dev)
50{ 109{
51 struct mlx4_priv *priv = mlx4_priv(dev); 110 struct mlx4_priv *priv = mlx4_priv(dev);
52 unsigned long addr; 111 unsigned long addr;
53 112
113 INIT_LIST_HEAD(&priv->catas_err.list);
114 init_timer(&priv->catas_err.timer);
115 priv->catas_err.map = NULL;
116
54 addr = pci_resource_start(dev->pdev, priv->fw.catas_bar) + 117 addr = pci_resource_start(dev->pdev, priv->fw.catas_bar) +
55 priv->fw.catas_offset; 118 priv->fw.catas_offset;
56 119
57 priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4); 120 priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4);
58 if (!priv->catas_err.map) 121 if (!priv->catas_err.map) {
59 mlx4_warn(dev, "Failed to map catastrophic error buffer at 0x%lx\n", 122 mlx4_warn(dev, "Failed to map internal error buffer at 0x%lx\n",
60 addr); 123 addr);
124 return;
125 }
61 126
127 priv->catas_err.timer.data = (unsigned long) dev;
128 priv->catas_err.timer.function = poll_catas;
129 priv->catas_err.timer.expires =
130 round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL);
131 add_timer(&priv->catas_err.timer);
62} 132}
63 133
64void mlx4_unmap_catas_buf(struct mlx4_dev *dev) 134void mlx4_stop_catas_poll(struct mlx4_dev *dev)
65{ 135{
66 struct mlx4_priv *priv = mlx4_priv(dev); 136 struct mlx4_priv *priv = mlx4_priv(dev);
67 137
138 del_timer_sync(&priv->catas_err.timer);
139
68 if (priv->catas_err.map) 140 if (priv->catas_err.map)
69 iounmap(priv->catas_err.map); 141 iounmap(priv->catas_err.map);
142
143 spin_lock_irq(&catas_lock);
144 list_del(&priv->catas_err.list);
145 spin_unlock_irq(&catas_lock);
146}
147
148int __init mlx4_catas_init(void)
149{
150 INIT_WORK(&catas_work, catas_reset);
151
152 catas_wq = create_singlethread_workqueue("mlx4_err");
153 if (!catas_wq)
154 return -ENOMEM;
155
156 return 0;
157}
158
159void mlx4_catas_cleanup(void)
160{
161 destroy_workqueue(catas_wq);
70} 162}
diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c
index 27a82cecd693..2095c843fa15 100644
--- a/drivers/net/mlx4/eq.c
+++ b/drivers/net/mlx4/eq.c
@@ -89,14 +89,12 @@ struct mlx4_eq_context {
89 (1ull << MLX4_EVENT_TYPE_PATH_MIG_FAILED) | \ 89 (1ull << MLX4_EVENT_TYPE_PATH_MIG_FAILED) | \
90 (1ull << MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \ 90 (1ull << MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \
91 (1ull << MLX4_EVENT_TYPE_WQ_ACCESS_ERROR) | \ 91 (1ull << MLX4_EVENT_TYPE_WQ_ACCESS_ERROR) | \
92 (1ull << MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR) | \
93 (1ull << MLX4_EVENT_TYPE_PORT_CHANGE) | \ 92 (1ull << MLX4_EVENT_TYPE_PORT_CHANGE) | \
94 (1ull << MLX4_EVENT_TYPE_ECC_DETECT) | \ 93 (1ull << MLX4_EVENT_TYPE_ECC_DETECT) | \
95 (1ull << MLX4_EVENT_TYPE_SRQ_CATAS_ERROR) | \ 94 (1ull << MLX4_EVENT_TYPE_SRQ_CATAS_ERROR) | \
96 (1ull << MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE) | \ 95 (1ull << MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE) | \
97 (1ull << MLX4_EVENT_TYPE_SRQ_LIMIT) | \ 96 (1ull << MLX4_EVENT_TYPE_SRQ_LIMIT) | \
98 (1ull << MLX4_EVENT_TYPE_CMD)) 97 (1ull << MLX4_EVENT_TYPE_CMD))
99#define MLX4_CATAS_EVENT_MASK (1ull << MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR)
100 98
101struct mlx4_eqe { 99struct mlx4_eqe {
102 u8 reserved1; 100 u8 reserved1;
@@ -264,7 +262,7 @@ static irqreturn_t mlx4_interrupt(int irq, void *dev_ptr)
264 262
265 writel(priv->eq_table.clr_mask, priv->eq_table.clr_int); 263 writel(priv->eq_table.clr_mask, priv->eq_table.clr_int);
266 264
267 for (i = 0; i < MLX4_EQ_CATAS; ++i) 265 for (i = 0; i < MLX4_NUM_EQ; ++i)
268 work |= mlx4_eq_int(dev, &priv->eq_table.eq[i]); 266 work |= mlx4_eq_int(dev, &priv->eq_table.eq[i]);
269 267
270 return IRQ_RETVAL(work); 268 return IRQ_RETVAL(work);
@@ -281,14 +279,6 @@ static irqreturn_t mlx4_msi_x_interrupt(int irq, void *eq_ptr)
281 return IRQ_HANDLED; 279 return IRQ_HANDLED;
282} 280}
283 281
284static irqreturn_t mlx4_catas_interrupt(int irq, void *dev_ptr)
285{
286 mlx4_handle_catas_err(dev_ptr);
287
288 /* MSI-X vectors always belong to us */
289 return IRQ_HANDLED;
290}
291
292static int mlx4_MAP_EQ(struct mlx4_dev *dev, u64 event_mask, int unmap, 282static int mlx4_MAP_EQ(struct mlx4_dev *dev, u64 event_mask, int unmap,
293 int eq_num) 283 int eq_num)
294{ 284{
@@ -490,11 +480,9 @@ static void mlx4_free_irqs(struct mlx4_dev *dev)
490 480
491 if (eq_table->have_irq) 481 if (eq_table->have_irq)
492 free_irq(dev->pdev->irq, dev); 482 free_irq(dev->pdev->irq, dev);
493 for (i = 0; i < MLX4_EQ_CATAS; ++i) 483 for (i = 0; i < MLX4_NUM_EQ; ++i)
494 if (eq_table->eq[i].have_irq) 484 if (eq_table->eq[i].have_irq)
495 free_irq(eq_table->eq[i].irq, eq_table->eq + i); 485 free_irq(eq_table->eq[i].irq, eq_table->eq + i);
496 if (eq_table->eq[MLX4_EQ_CATAS].have_irq)
497 free_irq(eq_table->eq[MLX4_EQ_CATAS].irq, dev);
498} 486}
499 487
500static int __devinit mlx4_map_clr_int(struct mlx4_dev *dev) 488static int __devinit mlx4_map_clr_int(struct mlx4_dev *dev)
@@ -598,32 +586,19 @@ int __devinit mlx4_init_eq_table(struct mlx4_dev *dev)
598 if (dev->flags & MLX4_FLAG_MSI_X) { 586 if (dev->flags & MLX4_FLAG_MSI_X) {
599 static const char *eq_name[] = { 587 static const char *eq_name[] = {
600 [MLX4_EQ_COMP] = DRV_NAME " (comp)", 588 [MLX4_EQ_COMP] = DRV_NAME " (comp)",
601 [MLX4_EQ_ASYNC] = DRV_NAME " (async)", 589 [MLX4_EQ_ASYNC] = DRV_NAME " (async)"
602 [MLX4_EQ_CATAS] = DRV_NAME " (catas)"
603 }; 590 };
604 591
605 err = mlx4_create_eq(dev, 1, MLX4_EQ_CATAS, 592 for (i = 0; i < MLX4_NUM_EQ; ++i) {
606 &priv->eq_table.eq[MLX4_EQ_CATAS]);
607 if (err)
608 goto err_out_async;
609
610 for (i = 0; i < MLX4_EQ_CATAS; ++i) {
611 err = request_irq(priv->eq_table.eq[i].irq, 593 err = request_irq(priv->eq_table.eq[i].irq,
612 mlx4_msi_x_interrupt, 594 mlx4_msi_x_interrupt,
613 0, eq_name[i], priv->eq_table.eq + i); 595 0, eq_name[i], priv->eq_table.eq + i);
614 if (err) 596 if (err)
615 goto err_out_catas; 597 goto err_out_async;
616 598
617 priv->eq_table.eq[i].have_irq = 1; 599 priv->eq_table.eq[i].have_irq = 1;
618 } 600 }
619 601
620 err = request_irq(priv->eq_table.eq[MLX4_EQ_CATAS].irq,
621 mlx4_catas_interrupt, 0,
622 eq_name[MLX4_EQ_CATAS], dev);
623 if (err)
624 goto err_out_catas;
625
626 priv->eq_table.eq[MLX4_EQ_CATAS].have_irq = 1;
627 } else { 602 } else {
628 err = request_irq(dev->pdev->irq, mlx4_interrupt, 603 err = request_irq(dev->pdev->irq, mlx4_interrupt,
629 IRQF_SHARED, DRV_NAME, dev); 604 IRQF_SHARED, DRV_NAME, dev);
@@ -639,22 +614,11 @@ int __devinit mlx4_init_eq_table(struct mlx4_dev *dev)
639 mlx4_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n", 614 mlx4_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n",
640 priv->eq_table.eq[MLX4_EQ_ASYNC].eqn, err); 615 priv->eq_table.eq[MLX4_EQ_ASYNC].eqn, err);
641 616
642 for (i = 0; i < MLX4_EQ_CATAS; ++i) 617 for (i = 0; i < MLX4_NUM_EQ; ++i)
643 eq_set_ci(&priv->eq_table.eq[i], 1); 618 eq_set_ci(&priv->eq_table.eq[i], 1);
644 619
645 if (dev->flags & MLX4_FLAG_MSI_X) {
646 err = mlx4_MAP_EQ(dev, MLX4_CATAS_EVENT_MASK, 0,
647 priv->eq_table.eq[MLX4_EQ_CATAS].eqn);
648 if (err)
649 mlx4_warn(dev, "MAP_EQ for catas EQ %d failed (%d)\n",
650 priv->eq_table.eq[MLX4_EQ_CATAS].eqn, err);
651 }
652
653 return 0; 620 return 0;
654 621
655err_out_catas:
656 mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_CATAS]);
657
658err_out_async: 622err_out_async:
659 mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_ASYNC]); 623 mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_ASYNC]);
660 624
@@ -675,19 +639,13 @@ void mlx4_cleanup_eq_table(struct mlx4_dev *dev)
675 struct mlx4_priv *priv = mlx4_priv(dev); 639 struct mlx4_priv *priv = mlx4_priv(dev);
676 int i; 640 int i;
677 641
678 if (dev->flags & MLX4_FLAG_MSI_X)
679 mlx4_MAP_EQ(dev, MLX4_CATAS_EVENT_MASK, 1,
680 priv->eq_table.eq[MLX4_EQ_CATAS].eqn);
681
682 mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 1, 642 mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 1,
683 priv->eq_table.eq[MLX4_EQ_ASYNC].eqn); 643 priv->eq_table.eq[MLX4_EQ_ASYNC].eqn);
684 644
685 mlx4_free_irqs(dev); 645 mlx4_free_irqs(dev);
686 646
687 for (i = 0; i < MLX4_EQ_CATAS; ++i) 647 for (i = 0; i < MLX4_NUM_EQ; ++i)
688 mlx4_free_eq(dev, &priv->eq_table.eq[i]); 648 mlx4_free_eq(dev, &priv->eq_table.eq[i]);
689 if (dev->flags & MLX4_FLAG_MSI_X)
690 mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_CATAS]);
691 649
692 mlx4_unmap_clr_int(dev); 650 mlx4_unmap_clr_int(dev);
693 651
diff --git a/drivers/net/mlx4/intf.c b/drivers/net/mlx4/intf.c
index 9ae951bf6aa6..be5d9e90ccf2 100644
--- a/drivers/net/mlx4/intf.c
+++ b/drivers/net/mlx4/intf.c
@@ -142,6 +142,7 @@ int mlx4_register_device(struct mlx4_dev *dev)
142 mlx4_add_device(intf, priv); 142 mlx4_add_device(intf, priv);
143 143
144 mutex_unlock(&intf_mutex); 144 mutex_unlock(&intf_mutex);
145 mlx4_start_catas_poll(dev);
145 146
146 return 0; 147 return 0;
147} 148}
@@ -151,6 +152,7 @@ void mlx4_unregister_device(struct mlx4_dev *dev)
151 struct mlx4_priv *priv = mlx4_priv(dev); 152 struct mlx4_priv *priv = mlx4_priv(dev);
152 struct mlx4_interface *intf; 153 struct mlx4_interface *intf;
153 154
155 mlx4_stop_catas_poll(dev);
154 mutex_lock(&intf_mutex); 156 mutex_lock(&intf_mutex);
155 157
156 list_for_each_entry(intf, &intf_list, list) 158 list_for_each_entry(intf, &intf_list, list)
diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c
index a4f2e0475a71..4dc9dc19b716 100644
--- a/drivers/net/mlx4/main.c
+++ b/drivers/net/mlx4/main.c
@@ -78,7 +78,7 @@ static const char mlx4_version[] __devinitdata =
78static struct mlx4_profile default_profile = { 78static struct mlx4_profile default_profile = {
79 .num_qp = 1 << 16, 79 .num_qp = 1 << 16,
80 .num_srq = 1 << 16, 80 .num_srq = 1 << 16,
81 .rdmarc_per_qp = 4, 81 .rdmarc_per_qp = 1 << 4,
82 .num_cq = 1 << 16, 82 .num_cq = 1 << 16,
83 .num_mcg = 1 << 13, 83 .num_mcg = 1 << 13,
84 .num_mpt = 1 << 17, 84 .num_mpt = 1 << 17,
@@ -583,13 +583,11 @@ static int __devinit mlx4_setup_hca(struct mlx4_dev *dev)
583 goto err_pd_table_free; 583 goto err_pd_table_free;
584 } 584 }
585 585
586 mlx4_map_catas_buf(dev);
587
588 err = mlx4_init_eq_table(dev); 586 err = mlx4_init_eq_table(dev);
589 if (err) { 587 if (err) {
590 mlx4_err(dev, "Failed to initialize " 588 mlx4_err(dev, "Failed to initialize "
591 "event queue table, aborting.\n"); 589 "event queue table, aborting.\n");
592 goto err_catas_buf; 590 goto err_mr_table_free;
593 } 591 }
594 592
595 err = mlx4_cmd_use_events(dev); 593 err = mlx4_cmd_use_events(dev);
@@ -659,8 +657,7 @@ err_cmd_poll:
659err_eq_table_free: 657err_eq_table_free:
660 mlx4_cleanup_eq_table(dev); 658 mlx4_cleanup_eq_table(dev);
661 659
662err_catas_buf: 660err_mr_table_free:
663 mlx4_unmap_catas_buf(dev);
664 mlx4_cleanup_mr_table(dev); 661 mlx4_cleanup_mr_table(dev);
665 662
666err_pd_table_free: 663err_pd_table_free:
@@ -836,9 +833,6 @@ err_cleanup:
836 mlx4_cleanup_cq_table(dev); 833 mlx4_cleanup_cq_table(dev);
837 mlx4_cmd_use_polling(dev); 834 mlx4_cmd_use_polling(dev);
838 mlx4_cleanup_eq_table(dev); 835 mlx4_cleanup_eq_table(dev);
839
840 mlx4_unmap_catas_buf(dev);
841
842 mlx4_cleanup_mr_table(dev); 836 mlx4_cleanup_mr_table(dev);
843 mlx4_cleanup_pd_table(dev); 837 mlx4_cleanup_pd_table(dev);
844 mlx4_cleanup_uar_table(dev); 838 mlx4_cleanup_uar_table(dev);
@@ -885,9 +879,6 @@ static void __devexit mlx4_remove_one(struct pci_dev *pdev)
885 mlx4_cleanup_cq_table(dev); 879 mlx4_cleanup_cq_table(dev);
886 mlx4_cmd_use_polling(dev); 880 mlx4_cmd_use_polling(dev);
887 mlx4_cleanup_eq_table(dev); 881 mlx4_cleanup_eq_table(dev);
888
889 mlx4_unmap_catas_buf(dev);
890
891 mlx4_cleanup_mr_table(dev); 882 mlx4_cleanup_mr_table(dev);
892 mlx4_cleanup_pd_table(dev); 883 mlx4_cleanup_pd_table(dev);
893 884
@@ -908,6 +899,12 @@ static void __devexit mlx4_remove_one(struct pci_dev *pdev)
908 } 899 }
909} 900}
910 901
902int mlx4_restart_one(struct pci_dev *pdev)
903{
904 mlx4_remove_one(pdev);
905 return mlx4_init_one(pdev, NULL);
906}
907
911static struct pci_device_id mlx4_pci_table[] = { 908static struct pci_device_id mlx4_pci_table[] = {
912 { PCI_VDEVICE(MELLANOX, 0x6340) }, /* MT25408 "Hermon" SDR */ 909 { PCI_VDEVICE(MELLANOX, 0x6340) }, /* MT25408 "Hermon" SDR */
913 { PCI_VDEVICE(MELLANOX, 0x634a) }, /* MT25408 "Hermon" DDR */ 910 { PCI_VDEVICE(MELLANOX, 0x634a) }, /* MT25408 "Hermon" DDR */
@@ -930,6 +927,10 @@ static int __init mlx4_init(void)
930{ 927{
931 int ret; 928 int ret;
932 929
930 ret = mlx4_catas_init();
931 if (ret)
932 return ret;
933
933 ret = pci_register_driver(&mlx4_driver); 934 ret = pci_register_driver(&mlx4_driver);
934 return ret < 0 ? ret : 0; 935 return ret < 0 ? ret : 0;
935} 936}
@@ -937,6 +938,7 @@ static int __init mlx4_init(void)
937static void __exit mlx4_cleanup(void) 938static void __exit mlx4_cleanup(void)
938{ 939{
939 pci_unregister_driver(&mlx4_driver); 940 pci_unregister_driver(&mlx4_driver);
941 mlx4_catas_cleanup();
940} 942}
941 943
942module_init(mlx4_init); 944module_init(mlx4_init);
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index d9c91a71fc87..be304a7c2c91 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -39,6 +39,7 @@
39 39
40#include <linux/mutex.h> 40#include <linux/mutex.h>
41#include <linux/radix-tree.h> 41#include <linux/radix-tree.h>
42#include <linux/timer.h>
42 43
43#include <linux/mlx4/device.h> 44#include <linux/mlx4/device.h>
44#include <linux/mlx4/doorbell.h> 45#include <linux/mlx4/doorbell.h>
@@ -67,7 +68,6 @@ enum {
67enum { 68enum {
68 MLX4_EQ_ASYNC, 69 MLX4_EQ_ASYNC,
69 MLX4_EQ_COMP, 70 MLX4_EQ_COMP,
70 MLX4_EQ_CATAS,
71 MLX4_NUM_EQ 71 MLX4_NUM_EQ
72}; 72};
73 73
@@ -248,7 +248,8 @@ struct mlx4_mcg_table {
248 248
249struct mlx4_catas_err { 249struct mlx4_catas_err {
250 u32 __iomem *map; 250 u32 __iomem *map;
251 int size; 251 struct timer_list timer;
252 struct list_head list;
252}; 253};
253 254
254struct mlx4_priv { 255struct mlx4_priv {
@@ -311,9 +312,11 @@ void mlx4_cleanup_qp_table(struct mlx4_dev *dev);
311void mlx4_cleanup_srq_table(struct mlx4_dev *dev); 312void mlx4_cleanup_srq_table(struct mlx4_dev *dev);
312void mlx4_cleanup_mcg_table(struct mlx4_dev *dev); 313void mlx4_cleanup_mcg_table(struct mlx4_dev *dev);
313 314
314void mlx4_map_catas_buf(struct mlx4_dev *dev); 315void mlx4_start_catas_poll(struct mlx4_dev *dev);
315void mlx4_unmap_catas_buf(struct mlx4_dev *dev); 316void mlx4_stop_catas_poll(struct mlx4_dev *dev);
316 317int mlx4_catas_init(void);
318void mlx4_catas_cleanup(void);
319int mlx4_restart_one(struct pci_dev *pdev);
317int mlx4_register_device(struct mlx4_dev *dev); 320int mlx4_register_device(struct mlx4_dev *dev);
318void mlx4_unregister_device(struct mlx4_dev *dev); 321void mlx4_unregister_device(struct mlx4_dev *dev);
319void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_event type, 322void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_event type,
diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c
index 143c6efc478a..a99607142fc8 100644
--- a/drivers/pcmcia/ds.c
+++ b/drivers/pcmcia/ds.c
@@ -1127,6 +1127,34 @@ static int pcmcia_bus_uevent(struct device *dev, char **envp, int num_envp,
1127 1127
1128#endif 1128#endif
1129 1129
1130/************************ runtime PM support ***************************/
1131
1132static int pcmcia_dev_suspend(struct device *dev, pm_message_t state);
1133static int pcmcia_dev_resume(struct device *dev);
1134
1135static int runtime_suspend(struct device *dev)
1136{
1137 int rc;
1138
1139 down(&dev->sem);
1140 rc = pcmcia_dev_suspend(dev, PMSG_SUSPEND);
1141 up(&dev->sem);
1142 if (!rc)
1143 dev->power.power_state.event = PM_EVENT_SUSPEND;
1144 return rc;
1145}
1146
1147static void runtime_resume(struct device *dev)
1148{
1149 int rc;
1150
1151 down(&dev->sem);
1152 rc = pcmcia_dev_resume(dev);
1153 up(&dev->sem);
1154 if (!rc)
1155 dev->power.power_state.event = PM_EVENT_ON;
1156}
1157
1130/************************ per-device sysfs output ***************************/ 1158/************************ per-device sysfs output ***************************/
1131 1159
1132#define pcmcia_device_attr(field, test, format) \ 1160#define pcmcia_device_attr(field, test, format) \
@@ -1173,9 +1201,9 @@ static ssize_t pcmcia_store_pm_state(struct device *dev, struct device_attribute
1173 return -EINVAL; 1201 return -EINVAL;
1174 1202
1175 if ((!p_dev->suspended) && !strncmp(buf, "off", 3)) 1203 if ((!p_dev->suspended) && !strncmp(buf, "off", 3))
1176 ret = dpm_runtime_suspend(dev, PMSG_SUSPEND); 1204 ret = runtime_suspend(dev);
1177 else if (p_dev->suspended && !strncmp(buf, "on", 2)) 1205 else if (p_dev->suspended && !strncmp(buf, "on", 2))
1178 dpm_runtime_resume(dev); 1206 runtime_resume(dev);
1179 1207
1180 return ret ? ret : count; 1208 return ret ? ret : count;
1181} 1209}
@@ -1312,10 +1340,10 @@ static int pcmcia_bus_suspend_callback(struct device *dev, void * _data)
1312 struct pcmcia_socket *skt = _data; 1340 struct pcmcia_socket *skt = _data;
1313 struct pcmcia_device *p_dev = to_pcmcia_dev(dev); 1341 struct pcmcia_device *p_dev = to_pcmcia_dev(dev);
1314 1342
1315 if (p_dev->socket != skt) 1343 if (p_dev->socket != skt || p_dev->suspended)
1316 return 0; 1344 return 0;
1317 1345
1318 return dpm_runtime_suspend(dev, PMSG_SUSPEND); 1346 return runtime_suspend(dev);
1319} 1347}
1320 1348
1321static int pcmcia_bus_resume_callback(struct device *dev, void * _data) 1349static int pcmcia_bus_resume_callback(struct device *dev, void * _data)
@@ -1323,10 +1351,10 @@ static int pcmcia_bus_resume_callback(struct device *dev, void * _data)
1323 struct pcmcia_socket *skt = _data; 1351 struct pcmcia_socket *skt = _data;
1324 struct pcmcia_device *p_dev = to_pcmcia_dev(dev); 1352 struct pcmcia_device *p_dev = to_pcmcia_dev(dev);
1325 1353
1326 if (p_dev->socket != skt) 1354 if (p_dev->socket != skt || !p_dev->suspended)
1327 return 0; 1355 return 0;
1328 1356
1329 dpm_runtime_resume(dev); 1357 runtime_resume(dev);
1330 1358
1331 return 0; 1359 return 0;
1332} 1360}
diff --git a/drivers/uio/Kconfig b/drivers/uio/Kconfig
new file mode 100644
index 000000000000..b778ed71f636
--- /dev/null
+++ b/drivers/uio/Kconfig
@@ -0,0 +1,29 @@
1menu "Userspace I/O"
2 depends on !S390
3
4config UIO
5 tristate "Userspace I/O drivers"
6 default n
7 help
8 Enable this to allow the userspace driver core code to be
9 built. This code allows userspace programs easy access to
10 kernel interrupts and memory locations, allowing some drivers
11 to be written in userspace. Note that a small kernel driver
12 is also required for interrupt handling to work properly.
13
14 If you don't know what to do here, say N.
15
16config UIO_CIF
17 tristate "generic Hilscher CIF Card driver"
18 depends on UIO && PCI
19 default n
20 help
21 Driver for Hilscher CIF DeviceNet and Profibus cards. This
22 driver requires a userspace component that handles all of the
23 heavy lifting and can be found at:
24 http://www.osadl.org/projects/downloads/UIO/user/cif-*
25
26 To compile this driver as a module, choose M here: the module
27 will be called uio_cif.
28
29endmenu
diff --git a/drivers/uio/Makefile b/drivers/uio/Makefile
new file mode 100644
index 000000000000..7fecfb459da5
--- /dev/null
+++ b/drivers/uio/Makefile
@@ -0,0 +1,2 @@
1obj-$(CONFIG_UIO) += uio.o
2obj-$(CONFIG_UIO_CIF) += uio_cif.o
diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
new file mode 100644
index 000000000000..865f32b63b5c
--- /dev/null
+++ b/drivers/uio/uio.c
@@ -0,0 +1,701 @@
1/*
2 * drivers/uio/uio.c
3 *
4 * Copyright(C) 2005, Benedikt Spranger <b.spranger@linutronix.de>
5 * Copyright(C) 2005, Thomas Gleixner <tglx@linutronix.de>
6 * Copyright(C) 2006, Hans J. Koch <hjk@linutronix.de>
7 * Copyright(C) 2006, Greg Kroah-Hartman <greg@kroah.com>
8 *
9 * Userspace IO
10 *
11 * Base Functions
12 *
13 * Licensed under the GPLv2 only.
14 */
15
16#include <linux/module.h>
17#include <linux/init.h>
18#include <linux/poll.h>
19#include <linux/device.h>
20#include <linux/mm.h>
21#include <linux/idr.h>
22#include <linux/string.h>
23#include <linux/kobject.h>
24#include <linux/uio_driver.h>
25
26#define UIO_MAX_DEVICES 255
27
28struct uio_device {
29 struct module *owner;
30 struct device *dev;
31 int minor;
32 atomic_t event;
33 struct fasync_struct *async_queue;
34 wait_queue_head_t wait;
35 int vma_count;
36 struct uio_info *info;
37 struct kset map_attr_kset;
38};
39
40static int uio_major;
41static DEFINE_IDR(uio_idr);
42static struct file_operations uio_fops;
43
44/* UIO class infrastructure */
45static struct uio_class {
46 struct kref kref;
47 struct class *class;
48} *uio_class;
49
50/*
51 * attributes
52 */
53
54static struct attribute attr_addr = {
55 .name = "addr",
56 .mode = S_IRUGO,
57};
58
59static struct attribute attr_size = {
60 .name = "size",
61 .mode = S_IRUGO,
62};
63
64static struct attribute* map_attrs[] = {
65 &attr_addr, &attr_size, NULL
66};
67
68static ssize_t map_attr_show(struct kobject *kobj, struct attribute *attr,
69 char *buf)
70{
71 struct uio_mem *mem = container_of(kobj, struct uio_mem, kobj);
72
73 if (strncmp(attr->name,"addr",4) == 0)
74 return sprintf(buf, "0x%lx\n", mem->addr);
75
76 if (strncmp(attr->name,"size",4) == 0)
77 return sprintf(buf, "0x%lx\n", mem->size);
78
79 return -ENODEV;
80}
81
82static void map_attr_release(struct kobject *kobj)
83{
84 /* TODO ??? */
85}
86
87static struct sysfs_ops map_attr_ops = {
88 .show = map_attr_show,
89};
90
91static struct kobj_type map_attr_type = {
92 .release = map_attr_release,
93 .sysfs_ops = &map_attr_ops,
94 .default_attrs = map_attrs,
95};
96
97static ssize_t show_name(struct device *dev,
98 struct device_attribute *attr, char *buf)
99{
100 struct uio_device *idev = dev_get_drvdata(dev);
101 if (idev)
102 return sprintf(buf, "%s\n", idev->info->name);
103 else
104 return -ENODEV;
105}
106static DEVICE_ATTR(name, S_IRUGO, show_name, NULL);
107
108static ssize_t show_version(struct device *dev,
109 struct device_attribute *attr, char *buf)
110{
111 struct uio_device *idev = dev_get_drvdata(dev);
112 if (idev)
113 return sprintf(buf, "%s\n", idev->info->version);
114 else
115 return -ENODEV;
116}
117static DEVICE_ATTR(version, S_IRUGO, show_version, NULL);
118
119static ssize_t show_event(struct device *dev,
120 struct device_attribute *attr, char *buf)
121{
122 struct uio_device *idev = dev_get_drvdata(dev);
123 if (idev)
124 return sprintf(buf, "%u\n",
125 (unsigned int)atomic_read(&idev->event));
126 else
127 return -ENODEV;
128}
129static DEVICE_ATTR(event, S_IRUGO, show_event, NULL);
130
131static struct attribute *uio_attrs[] = {
132 &dev_attr_name.attr,
133 &dev_attr_version.attr,
134 &dev_attr_event.attr,
135 NULL,
136};
137
138static struct attribute_group uio_attr_grp = {
139 .attrs = uio_attrs,
140};
141
142/*
143 * device functions
144 */
145static int uio_dev_add_attributes(struct uio_device *idev)
146{
147 int ret;
148 int mi;
149 int map_found = 0;
150 struct uio_mem *mem;
151
152 ret = sysfs_create_group(&idev->dev->kobj, &uio_attr_grp);
153 if (ret)
154 goto err_group;
155
156 for (mi = 0; mi < MAX_UIO_MAPS; mi++) {
157 mem = &idev->info->mem[mi];
158 if (mem->size == 0)
159 break;
160 if (!map_found) {
161 map_found = 1;
162 kobject_set_name(&idev->map_attr_kset.kobj,"maps");
163 idev->map_attr_kset.ktype = &map_attr_type;
164 idev->map_attr_kset.kobj.parent = &idev->dev->kobj;
165 ret = kset_register(&idev->map_attr_kset);
166 if (ret)
167 goto err_remove_group;
168 }
169 kobject_init(&mem->kobj);
170 kobject_set_name(&mem->kobj,"map%d",mi);
171 mem->kobj.parent = &idev->map_attr_kset.kobj;
172 mem->kobj.kset = &idev->map_attr_kset;
173 ret = kobject_add(&mem->kobj);
174 if (ret)
175 goto err_remove_maps;
176 }
177
178 return 0;
179
180err_remove_maps:
181 for (mi--; mi>=0; mi--) {
182 mem = &idev->info->mem[mi];
183 kobject_unregister(&mem->kobj);
184 }
185 kset_unregister(&idev->map_attr_kset); /* Needed ? */
186err_remove_group:
187 sysfs_remove_group(&idev->dev->kobj, &uio_attr_grp);
188err_group:
189 dev_err(idev->dev, "error creating sysfs files (%d)\n", ret);
190 return ret;
191}
192
193static void uio_dev_del_attributes(struct uio_device *idev)
194{
195 int mi;
196 struct uio_mem *mem;
197 for (mi = 0; mi < MAX_UIO_MAPS; mi++) {
198 mem = &idev->info->mem[mi];
199 if (mem->size == 0)
200 break;
201 kobject_unregister(&mem->kobj);
202 }
203 kset_unregister(&idev->map_attr_kset);
204 sysfs_remove_group(&idev->dev->kobj, &uio_attr_grp);
205}
206
207static int uio_get_minor(struct uio_device *idev)
208{
209 static DEFINE_MUTEX(minor_lock);
210 int retval = -ENOMEM;
211 int id;
212
213 mutex_lock(&minor_lock);
214 if (idr_pre_get(&uio_idr, GFP_KERNEL) == 0)
215 goto exit;
216
217 retval = idr_get_new(&uio_idr, idev, &id);
218 if (retval < 0) {
219 if (retval == -EAGAIN)
220 retval = -ENOMEM;
221 goto exit;
222 }
223 idev->minor = id & MAX_ID_MASK;
224exit:
225 mutex_unlock(&minor_lock);
226 return retval;
227}
228
229static void uio_free_minor(struct uio_device *idev)
230{
231 idr_remove(&uio_idr, idev->minor);
232}
233
234/**
235 * uio_event_notify - trigger an interrupt event
236 * @info: UIO device capabilities
237 */
238void uio_event_notify(struct uio_info *info)
239{
240 struct uio_device *idev = info->uio_dev;
241
242 atomic_inc(&idev->event);
243 wake_up_interruptible(&idev->wait);
244 kill_fasync(&idev->async_queue, SIGIO, POLL_IN);
245}
246EXPORT_SYMBOL_GPL(uio_event_notify);
247
248/**
249 * uio_interrupt - hardware interrupt handler
250 * @irq: IRQ number, can be UIO_IRQ_CYCLIC for cyclic timer
251 * @dev_id: Pointer to the devices uio_device structure
252 */
253static irqreturn_t uio_interrupt(int irq, void *dev_id)
254{
255 struct uio_device *idev = (struct uio_device *)dev_id;
256 irqreturn_t ret = idev->info->handler(irq, idev->info);
257
258 if (ret == IRQ_HANDLED)
259 uio_event_notify(idev->info);
260
261 return ret;
262}
263
264struct uio_listener {
265 struct uio_device *dev;
266 s32 event_count;
267};
268
269static int uio_open(struct inode *inode, struct file *filep)
270{
271 struct uio_device *idev;
272 struct uio_listener *listener;
273 int ret = 0;
274
275 idev = idr_find(&uio_idr, iminor(inode));
276 if (!idev)
277 return -ENODEV;
278
279 listener = kmalloc(sizeof(*listener), GFP_KERNEL);
280 if (!listener)
281 return -ENOMEM;
282
283 listener->dev = idev;
284 listener->event_count = atomic_read(&idev->event);
285 filep->private_data = listener;
286
287 if (idev->info->open) {
288 if (!try_module_get(idev->owner))
289 return -ENODEV;
290 ret = idev->info->open(idev->info, inode);
291 module_put(idev->owner);
292 }
293
294 if (ret)
295 kfree(listener);
296
297 return ret;
298}
299
300static int uio_fasync(int fd, struct file *filep, int on)
301{
302 struct uio_listener *listener = filep->private_data;
303 struct uio_device *idev = listener->dev;
304
305 return fasync_helper(fd, filep, on, &idev->async_queue);
306}
307
308static int uio_release(struct inode *inode, struct file *filep)
309{
310 int ret = 0;
311 struct uio_listener *listener = filep->private_data;
312 struct uio_device *idev = listener->dev;
313
314 if (idev->info->release) {
315 if (!try_module_get(idev->owner))
316 return -ENODEV;
317 ret = idev->info->release(idev->info, inode);
318 module_put(idev->owner);
319 }
320 if (filep->f_flags & FASYNC)
321 ret = uio_fasync(-1, filep, 0);
322 kfree(listener);
323 return ret;
324}
325
326static unsigned int uio_poll(struct file *filep, poll_table *wait)
327{
328 struct uio_listener *listener = filep->private_data;
329 struct uio_device *idev = listener->dev;
330
331 if (idev->info->irq == UIO_IRQ_NONE)
332 return -EIO;
333
334 poll_wait(filep, &idev->wait, wait);
335 if (listener->event_count != atomic_read(&idev->event))
336 return POLLIN | POLLRDNORM;
337 return 0;
338}
339
340static ssize_t uio_read(struct file *filep, char __user *buf,
341 size_t count, loff_t *ppos)
342{
343 struct uio_listener *listener = filep->private_data;
344 struct uio_device *idev = listener->dev;
345 DECLARE_WAITQUEUE(wait, current);
346 ssize_t retval;
347 s32 event_count;
348
349 if (idev->info->irq == UIO_IRQ_NONE)
350 return -EIO;
351
352 if (count != sizeof(s32))
353 return -EINVAL;
354
355 add_wait_queue(&idev->wait, &wait);
356
357 do {
358 set_current_state(TASK_INTERRUPTIBLE);
359
360 event_count = atomic_read(&idev->event);
361 if (event_count != listener->event_count) {
362 if (copy_to_user(buf, &event_count, count))
363 retval = -EFAULT;
364 else {
365 listener->event_count = event_count;
366 retval = count;
367 }
368 break;
369 }
370
371 if (filep->f_flags & O_NONBLOCK) {
372 retval = -EAGAIN;
373 break;
374 }
375
376 if (signal_pending(current)) {
377 retval = -ERESTARTSYS;
378 break;
379 }
380 schedule();
381 } while (1);
382
383 __set_current_state(TASK_RUNNING);
384 remove_wait_queue(&idev->wait, &wait);
385
386 return retval;
387}
388
389static int uio_find_mem_index(struct vm_area_struct *vma)
390{
391 int mi;
392 struct uio_device *idev = vma->vm_private_data;
393
394 for (mi = 0; mi < MAX_UIO_MAPS; mi++) {
395 if (idev->info->mem[mi].size == 0)
396 return -1;
397 if (vma->vm_pgoff == mi)
398 return mi;
399 }
400 return -1;
401}
402
403static void uio_vma_open(struct vm_area_struct *vma)
404{
405 struct uio_device *idev = vma->vm_private_data;
406 idev->vma_count++;
407}
408
409static void uio_vma_close(struct vm_area_struct *vma)
410{
411 struct uio_device *idev = vma->vm_private_data;
412 idev->vma_count--;
413}
414
415static struct page *uio_vma_nopage(struct vm_area_struct *vma,
416 unsigned long address, int *type)
417{
418 struct uio_device *idev = vma->vm_private_data;
419 struct page* page = NOPAGE_SIGBUS;
420
421 int mi = uio_find_mem_index(vma);
422 if (mi < 0)
423 return page;
424
425 if (idev->info->mem[mi].memtype == UIO_MEM_LOGICAL)
426 page = virt_to_page(idev->info->mem[mi].addr);
427 else
428 page = vmalloc_to_page((void*)idev->info->mem[mi].addr);
429 get_page(page);
430 if (type)
431 *type = VM_FAULT_MINOR;
432 return page;
433}
434
435static struct vm_operations_struct uio_vm_ops = {
436 .open = uio_vma_open,
437 .close = uio_vma_close,
438 .nopage = uio_vma_nopage,
439};
440
441static int uio_mmap_physical(struct vm_area_struct *vma)
442{
443 struct uio_device *idev = vma->vm_private_data;
444 int mi = uio_find_mem_index(vma);
445 if (mi < 0)
446 return -EINVAL;
447
448 vma->vm_flags |= VM_IO | VM_RESERVED;
449
450 return remap_pfn_range(vma,
451 vma->vm_start,
452 idev->info->mem[mi].addr >> PAGE_SHIFT,
453 vma->vm_end - vma->vm_start,
454 vma->vm_page_prot);
455}
456
457static int uio_mmap_logical(struct vm_area_struct *vma)
458{
459 vma->vm_flags |= VM_RESERVED;
460 vma->vm_ops = &uio_vm_ops;
461 uio_vma_open(vma);
462 return 0;
463}
464
465static int uio_mmap(struct file *filep, struct vm_area_struct *vma)
466{
467 struct uio_listener *listener = filep->private_data;
468 struct uio_device *idev = listener->dev;
469 int mi;
470 unsigned long requested_pages, actual_pages;
471 int ret = 0;
472
473 if (vma->vm_end < vma->vm_start)
474 return -EINVAL;
475
476 vma->vm_private_data = idev;
477
478 mi = uio_find_mem_index(vma);
479 if (mi < 0)
480 return -EINVAL;
481
482 requested_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
483 actual_pages = (idev->info->mem[mi].size + PAGE_SIZE -1) >> PAGE_SHIFT;
484 if (requested_pages > actual_pages)
485 return -EINVAL;
486
487 if (idev->info->mmap) {
488 if (!try_module_get(idev->owner))
489 return -ENODEV;
490 ret = idev->info->mmap(idev->info, vma);
491 module_put(idev->owner);
492 return ret;
493 }
494
495 switch (idev->info->mem[mi].memtype) {
496 case UIO_MEM_PHYS:
497 return uio_mmap_physical(vma);
498 case UIO_MEM_LOGICAL:
499 case UIO_MEM_VIRTUAL:
500 return uio_mmap_logical(vma);
501 default:
502 return -EINVAL;
503 }
504}
505
506static struct file_operations uio_fops = {
507 .owner = THIS_MODULE,
508 .open = uio_open,
509 .release = uio_release,
510 .read = uio_read,
511 .mmap = uio_mmap,
512 .poll = uio_poll,
513 .fasync = uio_fasync,
514};
515
516static int uio_major_init(void)
517{
518 uio_major = register_chrdev(0, "uio", &uio_fops);
519 if (uio_major < 0)
520 return uio_major;
521 return 0;
522}
523
524static void uio_major_cleanup(void)
525{
526 unregister_chrdev(uio_major, "uio");
527}
528
529static int init_uio_class(void)
530{
531 int ret = 0;
532
533 if (uio_class != NULL) {
534 kref_get(&uio_class->kref);
535 goto exit;
536 }
537
538 /* This is the first time in here, set everything up properly */
539 ret = uio_major_init();
540 if (ret)
541 goto exit;
542
543 uio_class = kzalloc(sizeof(*uio_class), GFP_KERNEL);
544 if (!uio_class) {
545 ret = -ENOMEM;
546 goto err_kzalloc;
547 }
548
549 kref_init(&uio_class->kref);
550 uio_class->class = class_create(THIS_MODULE, "uio");
551 if (IS_ERR(uio_class->class)) {
552 ret = IS_ERR(uio_class->class);
553 printk(KERN_ERR "class_create failed for uio\n");
554 goto err_class_create;
555 }
556 return 0;
557
558err_class_create:
559 kfree(uio_class);
560 uio_class = NULL;
561err_kzalloc:
562 uio_major_cleanup();
563exit:
564 return ret;
565}
566
567static void release_uio_class(struct kref *kref)
568{
569 /* Ok, we cheat as we know we only have one uio_class */
570 class_destroy(uio_class->class);
571 kfree(uio_class);
572 uio_major_cleanup();
573 uio_class = NULL;
574}
575
576static void uio_class_destroy(void)
577{
578 if (uio_class)
579 kref_put(&uio_class->kref, release_uio_class);
580}
581
582/**
583 * uio_register_device - register a new userspace IO device
584 * @owner: module that creates the new device
585 * @parent: parent device
586 * @info: UIO device capabilities
587 *
588 * returns zero on success or a negative error code.
589 */
590int __uio_register_device(struct module *owner,
591 struct device *parent,
592 struct uio_info *info)
593{
594 struct uio_device *idev;
595 int ret = 0;
596
597 if (!parent || !info || !info->name || !info->version)
598 return -EINVAL;
599
600 info->uio_dev = NULL;
601
602 ret = init_uio_class();
603 if (ret)
604 return ret;
605
606 idev = kzalloc(sizeof(*idev), GFP_KERNEL);
607 if (!idev) {
608 ret = -ENOMEM;
609 goto err_kzalloc;
610 }
611
612 idev->owner = owner;
613 idev->info = info;
614 init_waitqueue_head(&idev->wait);
615 atomic_set(&idev->event, 0);
616
617 ret = uio_get_minor(idev);
618 if (ret)
619 goto err_get_minor;
620
621 idev->dev = device_create(uio_class->class, parent,
622 MKDEV(uio_major, idev->minor),
623 "uio%d", idev->minor);
624 if (IS_ERR(idev->dev)) {
625 printk(KERN_ERR "UIO: device register failed\n");
626 ret = PTR_ERR(idev->dev);
627 goto err_device_create;
628 }
629 dev_set_drvdata(idev->dev, idev);
630
631 ret = uio_dev_add_attributes(idev);
632 if (ret)
633 goto err_uio_dev_add_attributes;
634
635 info->uio_dev = idev;
636
637 if (idev->info->irq >= 0) {
638 ret = request_irq(idev->info->irq, uio_interrupt,
639 idev->info->irq_flags, idev->info->name, idev);
640 if (ret)
641 goto err_request_irq;
642 }
643
644 return 0;
645
646err_request_irq:
647 uio_dev_del_attributes(idev);
648err_uio_dev_add_attributes:
649 device_destroy(uio_class->class, MKDEV(uio_major, idev->minor));
650err_device_create:
651 uio_free_minor(idev);
652err_get_minor:
653 kfree(idev);
654err_kzalloc:
655 uio_class_destroy();
656 return ret;
657}
658EXPORT_SYMBOL_GPL(__uio_register_device);
659
660/**
661 * uio_unregister_device - unregister a industrial IO device
662 * @info: UIO device capabilities
663 *
664 */
665void uio_unregister_device(struct uio_info *info)
666{
667 struct uio_device *idev;
668
669 if (!info || !info->uio_dev)
670 return;
671
672 idev = info->uio_dev;
673
674 uio_free_minor(idev);
675
676 if (info->irq >= 0)
677 free_irq(info->irq, idev);
678
679 uio_dev_del_attributes(idev);
680
681 dev_set_drvdata(idev->dev, NULL);
682 device_destroy(uio_class->class, MKDEV(uio_major, idev->minor));
683 kfree(idev);
684 uio_class_destroy();
685
686 return;
687}
688EXPORT_SYMBOL_GPL(uio_unregister_device);
689
690static int __init uio_init(void)
691{
692 return 0;
693}
694
695static void __exit uio_exit(void)
696{
697}
698
699module_init(uio_init)
700module_exit(uio_exit)
701MODULE_LICENSE("GPL v2");
diff --git a/drivers/uio/uio_cif.c b/drivers/uio/uio_cif.c
new file mode 100644
index 000000000000..838bae460831
--- /dev/null
+++ b/drivers/uio/uio_cif.c
@@ -0,0 +1,156 @@
1/*
2 * UIO Hilscher CIF card driver
3 *
4 * (C) 2007 Hans J. Koch <hjk@linutronix.de>
5 * Original code (C) 2005 Benedikt Spranger <b.spranger@linutronix.de>
6 *
7 * Licensed under GPL version 2 only.
8 *
9 */
10
11#include <linux/device.h>
12#include <linux/module.h>
13#include <linux/pci.h>
14#include <linux/uio_driver.h>
15
16#include <asm/io.h>
17
18#ifndef PCI_DEVICE_ID_PLX_9030
19#define PCI_DEVICE_ID_PLX_9030 0x9030
20#endif
21
22#define PLX9030_INTCSR 0x4C
23#define INTSCR_INT1_ENABLE 0x01
24#define INTSCR_INT1_STATUS 0x04
25#define INT1_ENABLED_AND_ACTIVE (INTSCR_INT1_ENABLE | INTSCR_INT1_STATUS)
26
27#define PCI_SUBVENDOR_ID_PEP 0x1518
28#define CIF_SUBDEVICE_PROFIBUS 0x430
29#define CIF_SUBDEVICE_DEVICENET 0x432
30
31
32static irqreturn_t hilscher_handler(int irq, struct uio_info *dev_info)
33{
34 void __iomem *plx_intscr = dev_info->mem[0].internal_addr
35 + PLX9030_INTCSR;
36
37 if ((ioread8(plx_intscr) & INT1_ENABLED_AND_ACTIVE)
38 != INT1_ENABLED_AND_ACTIVE)
39 return IRQ_NONE;
40
41 /* Disable interrupt */
42 iowrite8(ioread8(plx_intscr) & ~INTSCR_INT1_ENABLE, plx_intscr);
43 return IRQ_HANDLED;
44}
45
46static int __devinit hilscher_pci_probe(struct pci_dev *dev,
47 const struct pci_device_id *id)
48{
49 struct uio_info *info;
50
51 info = kzalloc(sizeof(struct uio_info), GFP_KERNEL);
52 if (!info)
53 return -ENOMEM;
54
55 if (pci_enable_device(dev))
56 goto out_free;
57
58 if (pci_request_regions(dev, "hilscher"))
59 goto out_disable;
60
61 info->mem[0].addr = pci_resource_start(dev, 0);
62 if (!info->mem[0].addr)
63 goto out_release;
64 info->mem[0].internal_addr = ioremap(pci_resource_start(dev, 0),
65 pci_resource_len(dev, 0));
66 if (!info->mem[0].internal_addr)
67 goto out_release;
68
69 info->mem[0].size = pci_resource_len(dev, 0);
70 info->mem[0].memtype = UIO_MEM_PHYS;
71 info->mem[1].addr = pci_resource_start(dev, 2);
72 info->mem[1].size = pci_resource_len(dev, 2);
73 info->mem[1].memtype = UIO_MEM_PHYS;
74 switch (id->subdevice) {
75 case CIF_SUBDEVICE_PROFIBUS:
76 info->name = "CIF_Profibus";
77 break;
78 case CIF_SUBDEVICE_DEVICENET:
79 info->name = "CIF_Devicenet";
80 break;
81 default:
82 info->name = "CIF_???";
83 }
84 info->version = "0.0.1";
85 info->irq = dev->irq;
86 info->irq_flags = IRQF_DISABLED | IRQF_SHARED;
87 info->handler = hilscher_handler;
88
89 if (uio_register_device(&dev->dev, info))
90 goto out_unmap;
91
92 pci_set_drvdata(dev, info);
93
94 return 0;
95out_unmap:
96 iounmap(info->mem[0].internal_addr);
97out_release:
98 pci_release_regions(dev);
99out_disable:
100 pci_disable_device(dev);
101out_free:
102 kfree (info);
103 return -ENODEV;
104}
105
106static void hilscher_pci_remove(struct pci_dev *dev)
107{
108 struct uio_info *info = pci_get_drvdata(dev);
109
110 uio_unregister_device(info);
111 pci_release_regions(dev);
112 pci_disable_device(dev);
113 pci_set_drvdata(dev, NULL);
114 iounmap(info->mem[0].internal_addr);
115
116 kfree (info);
117}
118
119static struct pci_device_id hilscher_pci_ids[] = {
120 {
121 .vendor = PCI_VENDOR_ID_PLX,
122 .device = PCI_DEVICE_ID_PLX_9030,
123 .subvendor = PCI_SUBVENDOR_ID_PEP,
124 .subdevice = CIF_SUBDEVICE_PROFIBUS,
125 },
126 {
127 .vendor = PCI_VENDOR_ID_PLX,
128 .device = PCI_DEVICE_ID_PLX_9030,
129 .subvendor = PCI_SUBVENDOR_ID_PEP,
130 .subdevice = CIF_SUBDEVICE_DEVICENET,
131 },
132 { 0, }
133};
134
135static struct pci_driver hilscher_pci_driver = {
136 .name = "hilscher",
137 .id_table = hilscher_pci_ids,
138 .probe = hilscher_pci_probe,
139 .remove = hilscher_pci_remove,
140};
141
142static int __init hilscher_init_module(void)
143{
144 return pci_register_driver(&hilscher_pci_driver);
145}
146
147static void __exit hilscher_exit_module(void)
148{
149 pci_unregister_driver(&hilscher_pci_driver);
150}
151
152module_init(hilscher_init_module);
153module_exit(hilscher_exit_module);
154
155MODULE_LICENSE("GPL v2");
156MODULE_AUTHOR("Hans J. Koch, Benedikt Spranger");
diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index 73c49362cd47..654857493a82 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -29,13 +29,6 @@
29#include "hcd.h" 29#include "hcd.h"
30#include "usb.h" 30#include "usb.h"
31 31
32#define VERBOSE_DEBUG 0
33
34#if VERBOSE_DEBUG
35#define dev_vdbg dev_dbg
36#else
37#define dev_vdbg(dev, fmt, args...) do { } while (0)
38#endif
39 32
40#ifdef CONFIG_HOTPLUG 33#ifdef CONFIG_HOTPLUG
41 34
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 1d533a2ec3a6..11be8a325e26 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -345,11 +345,6 @@ void debugfs_remove(struct dentry *dentry)
345 switch (dentry->d_inode->i_mode & S_IFMT) { 345 switch (dentry->d_inode->i_mode & S_IFMT) {
346 case S_IFDIR: 346 case S_IFDIR:
347 ret = simple_rmdir(parent->d_inode, dentry); 347 ret = simple_rmdir(parent->d_inode, dentry);
348 if (ret)
349 printk(KERN_ERR
350 "DebugFS rmdir on %s failed : "
351 "directory not empty.\n",
352 dentry->d_name.name);
353 break; 348 break;
354 case S_IFLNK: 349 case S_IFLNK:
355 kfree(dentry->d_inode->i_private); 350 kfree(dentry->d_inode->i_private);
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 196d83266e34..1a5e8e893d75 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -489,6 +489,29 @@ static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync)
489} 489}
490 490
491/** 491/**
492 * gfs2_setlease - acquire/release a file lease
493 * @file: the file pointer
494 * @arg: lease type
495 * @fl: file lock
496 *
497 * Returns: errno
498 */
499
500static int gfs2_setlease(struct file *file, long arg, struct file_lock **fl)
501{
502 struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host);
503
504 /*
505 * We don't currently have a way to enforce a lease across the whole
506 * cluster; until we do, disable leases (by just returning -EINVAL),
507 * unless the administrator has requested purely local locking.
508 */
509 if (!sdp->sd_args.ar_localflocks)
510 return -EINVAL;
511 return setlease(file, arg, fl);
512}
513
514/**
492 * gfs2_lock - acquire/release a posix lock on a file 515 * gfs2_lock - acquire/release a posix lock on a file
493 * @file: the file pointer 516 * @file: the file pointer
494 * @cmd: either modify or retrieve lock state, possibly wait 517 * @cmd: either modify or retrieve lock state, possibly wait
@@ -638,6 +661,7 @@ const struct file_operations gfs2_file_fops = {
638 .flock = gfs2_flock, 661 .flock = gfs2_flock,
639 .splice_read = generic_file_splice_read, 662 .splice_read = generic_file_splice_read,
640 .splice_write = generic_file_splice_write, 663 .splice_write = generic_file_splice_write,
664 .setlease = gfs2_setlease,
641}; 665};
642 666
643const struct file_operations gfs2_dir_fops = { 667const struct file_operations gfs2_dir_fops = {
diff --git a/fs/locks.c b/fs/locks.c
index 431a8b871fce..4f2d749ac624 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -458,22 +458,20 @@ static int lease_init(struct file *filp, int type, struct file_lock *fl)
458} 458}
459 459
460/* Allocate a file_lock initialised to this type of lease */ 460/* Allocate a file_lock initialised to this type of lease */
461static int lease_alloc(struct file *filp, int type, struct file_lock **flp) 461static struct file_lock *lease_alloc(struct file *filp, int type)
462{ 462{
463 struct file_lock *fl = locks_alloc_lock(); 463 struct file_lock *fl = locks_alloc_lock();
464 int error = -ENOMEM; 464 int error = -ENOMEM;
465 465
466 if (fl == NULL) 466 if (fl == NULL)
467 goto out; 467 return ERR_PTR(error);
468 468
469 error = lease_init(filp, type, fl); 469 error = lease_init(filp, type, fl);
470 if (error) { 470 if (error) {
471 locks_free_lock(fl); 471 locks_free_lock(fl);
472 fl = NULL; 472 return ERR_PTR(error);
473 } 473 }
474out: 474 return fl;
475 *flp = fl;
476 return error;
477} 475}
478 476
479/* Check if two locks overlap each other. 477/* Check if two locks overlap each other.
@@ -661,7 +659,7 @@ static int locks_block_on_timeout(struct file_lock *blocker, struct file_lock *w
661 return result; 659 return result;
662} 660}
663 661
664int 662void
665posix_test_lock(struct file *filp, struct file_lock *fl) 663posix_test_lock(struct file *filp, struct file_lock *fl)
666{ 664{
667 struct file_lock *cfl; 665 struct file_lock *cfl;
@@ -673,14 +671,12 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
673 if (posix_locks_conflict(cfl, fl)) 671 if (posix_locks_conflict(cfl, fl))
674 break; 672 break;
675 } 673 }
676 if (cfl) { 674 if (cfl)
677 __locks_copy_lock(fl, cfl); 675 __locks_copy_lock(fl, cfl);
678 unlock_kernel(); 676 else
679 return 1;
680 } else
681 fl->fl_type = F_UNLCK; 677 fl->fl_type = F_UNLCK;
682 unlock_kernel(); 678 unlock_kernel();
683 return 0; 679 return;
684} 680}
685 681
686EXPORT_SYMBOL(posix_test_lock); 682EXPORT_SYMBOL(posix_test_lock);
@@ -1169,9 +1165,9 @@ static void time_out_leases(struct inode *inode)
1169 * @inode: the inode of the file to return 1165 * @inode: the inode of the file to return
1170 * @mode: the open mode (read or write) 1166 * @mode: the open mode (read or write)
1171 * 1167 *
1172 * break_lease (inlined for speed) has checked there already 1168 * break_lease (inlined for speed) has checked there already is at least
1173 * is a lease on this file. Leases are broken on a call to open() 1169 * some kind of lock (maybe a lease) on this file. Leases are broken on
1174 * or truncate(). This function can sleep unless you 1170 * a call to open() or truncate(). This function can sleep unless you
1175 * specified %O_NONBLOCK to your open(). 1171 * specified %O_NONBLOCK to your open().
1176 */ 1172 */
1177int __break_lease(struct inode *inode, unsigned int mode) 1173int __break_lease(struct inode *inode, unsigned int mode)
@@ -1179,12 +1175,10 @@ int __break_lease(struct inode *inode, unsigned int mode)
1179 int error = 0, future; 1175 int error = 0, future;
1180 struct file_lock *new_fl, *flock; 1176 struct file_lock *new_fl, *flock;
1181 struct file_lock *fl; 1177 struct file_lock *fl;
1182 int alloc_err;
1183 unsigned long break_time; 1178 unsigned long break_time;
1184 int i_have_this_lease = 0; 1179 int i_have_this_lease = 0;
1185 1180
1186 alloc_err = lease_alloc(NULL, mode & FMODE_WRITE ? F_WRLCK : F_RDLCK, 1181 new_fl = lease_alloc(NULL, mode & FMODE_WRITE ? F_WRLCK : F_RDLCK);
1187 &new_fl);
1188 1182
1189 lock_kernel(); 1183 lock_kernel();
1190 1184
@@ -1212,8 +1206,9 @@ int __break_lease(struct inode *inode, unsigned int mode)
1212 goto out; 1206 goto out;
1213 } 1207 }
1214 1208
1215 if (alloc_err && !i_have_this_lease && ((mode & O_NONBLOCK) == 0)) { 1209 if (IS_ERR(new_fl) && !i_have_this_lease
1216 error = alloc_err; 1210 && ((mode & O_NONBLOCK) == 0)) {
1211 error = PTR_ERR(new_fl);
1217 goto out; 1212 goto out;
1218 } 1213 }
1219 1214
@@ -1260,7 +1255,7 @@ restart:
1260 1255
1261out: 1256out:
1262 unlock_kernel(); 1257 unlock_kernel();
1263 if (!alloc_err) 1258 if (!IS_ERR(new_fl))
1264 locks_free_lock(new_fl); 1259 locks_free_lock(new_fl);
1265 return error; 1260 return error;
1266} 1261}
@@ -1329,7 +1324,7 @@ int fcntl_getlease(struct file *filp)
1329} 1324}
1330 1325
1331/** 1326/**
1332 * __setlease - sets a lease on an open file 1327 * setlease - sets a lease on an open file
1333 * @filp: file pointer 1328 * @filp: file pointer
1334 * @arg: type of lease to obtain 1329 * @arg: type of lease to obtain
1335 * @flp: input - file_lock to use, output - file_lock inserted 1330 * @flp: input - file_lock to use, output - file_lock inserted
@@ -1339,18 +1334,24 @@ int fcntl_getlease(struct file *filp)
1339 * 1334 *
1340 * Called with kernel lock held. 1335 * Called with kernel lock held.
1341 */ 1336 */
1342static int __setlease(struct file *filp, long arg, struct file_lock **flp) 1337int setlease(struct file *filp, long arg, struct file_lock **flp)
1343{ 1338{
1344 struct file_lock *fl, **before, **my_before = NULL, *lease; 1339 struct file_lock *fl, **before, **my_before = NULL, *lease;
1345 struct dentry *dentry = filp->f_path.dentry; 1340 struct dentry *dentry = filp->f_path.dentry;
1346 struct inode *inode = dentry->d_inode; 1341 struct inode *inode = dentry->d_inode;
1347 int error, rdlease_count = 0, wrlease_count = 0; 1342 int error, rdlease_count = 0, wrlease_count = 0;
1348 1343
1344 if ((current->fsuid != inode->i_uid) && !capable(CAP_LEASE))
1345 return -EACCES;
1346 if (!S_ISREG(inode->i_mode))
1347 return -EINVAL;
1348 error = security_file_lock(filp, arg);
1349 if (error)
1350 return error;
1351
1349 time_out_leases(inode); 1352 time_out_leases(inode);
1350 1353
1351 error = -EINVAL; 1354 BUG_ON(!(*flp)->fl_lmops->fl_break);
1352 if (!flp || !(*flp) || !(*flp)->fl_lmops || !(*flp)->fl_lmops->fl_break)
1353 goto out;
1354 1355
1355 lease = *flp; 1356 lease = *flp;
1356 1357
@@ -1418,39 +1419,49 @@ static int __setlease(struct file *filp, long arg, struct file_lock **flp)
1418out: 1419out:
1419 return error; 1420 return error;
1420} 1421}
1422EXPORT_SYMBOL(setlease);
1421 1423
1422 /** 1424 /**
1423 * setlease - sets a lease on an open file 1425 * vfs_setlease - sets a lease on an open file
1424 * @filp: file pointer 1426 * @filp: file pointer
1425 * @arg: type of lease to obtain 1427 * @arg: type of lease to obtain
1426 * @lease: file_lock to use 1428 * @lease: file_lock to use
1427 * 1429 *
1428 * Call this to establish a lease on the file. 1430 * Call this to establish a lease on the file.
1429 * The fl_lmops fl_break function is required by break_lease 1431 * The (*lease)->fl_lmops->fl_break operation must be set; if not,
1432 * break_lease will oops!
1433 *
1434 * This will call the filesystem's setlease file method, if
1435 * defined. Note that there is no getlease method; instead, the
1436 * filesystem setlease method should call back to setlease() to
1437 * add a lease to the inode's lease list, where fcntl_getlease() can
1438 * find it. Since fcntl_getlease() only reports whether the current
1439 * task holds a lease, a cluster filesystem need only do this for
1440 * leases held by processes on this node.
1441 *
1442 * There is also no break_lease method; filesystems that
1443 * handle their own leases shoud break leases themselves from the
1444 * filesystem's open, create, and (on truncate) setattr methods.
1445 *
1446 * Warning: the only current setlease methods exist only to disable
1447 * leases in certain cases. More vfs changes may be required to
1448 * allow a full filesystem lease implementation.
1430 */ 1449 */
1431 1450
1432int setlease(struct file *filp, long arg, struct file_lock **lease) 1451int vfs_setlease(struct file *filp, long arg, struct file_lock **lease)
1433{ 1452{
1434 struct dentry *dentry = filp->f_path.dentry;
1435 struct inode *inode = dentry->d_inode;
1436 int error; 1453 int error;
1437 1454
1438 if ((current->fsuid != inode->i_uid) && !capable(CAP_LEASE))
1439 return -EACCES;
1440 if (!S_ISREG(inode->i_mode))
1441 return -EINVAL;
1442 error = security_file_lock(filp, arg);
1443 if (error)
1444 return error;
1445
1446 lock_kernel(); 1455 lock_kernel();
1447 error = __setlease(filp, arg, lease); 1456 if (filp->f_op && filp->f_op->setlease)
1457 error = filp->f_op->setlease(filp, arg, lease);
1458 else
1459 error = setlease(filp, arg, lease);
1448 unlock_kernel(); 1460 unlock_kernel();
1449 1461
1450 return error; 1462 return error;
1451} 1463}
1452 1464EXPORT_SYMBOL_GPL(vfs_setlease);
1453EXPORT_SYMBOL(setlease);
1454 1465
1455/** 1466/**
1456 * fcntl_setlease - sets a lease on an open file 1467 * fcntl_setlease - sets a lease on an open file
@@ -1469,14 +1480,6 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
1469 struct inode *inode = dentry->d_inode; 1480 struct inode *inode = dentry->d_inode;
1470 int error; 1481 int error;
1471 1482
1472 if ((current->fsuid != inode->i_uid) && !capable(CAP_LEASE))
1473 return -EACCES;
1474 if (!S_ISREG(inode->i_mode))
1475 return -EINVAL;
1476 error = security_file_lock(filp, arg);
1477 if (error)
1478 return error;
1479
1480 locks_init_lock(&fl); 1483 locks_init_lock(&fl);
1481 error = lease_init(filp, arg, &fl); 1484 error = lease_init(filp, arg, &fl);
1482 if (error) 1485 if (error)
@@ -1484,15 +1487,15 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
1484 1487
1485 lock_kernel(); 1488 lock_kernel();
1486 1489
1487 error = __setlease(filp, arg, &flp); 1490 error = vfs_setlease(filp, arg, &flp);
1488 if (error || arg == F_UNLCK) 1491 if (error || arg == F_UNLCK)
1489 goto out_unlock; 1492 goto out_unlock;
1490 1493
1491 error = fasync_helper(fd, filp, 1, &flp->fl_fasync); 1494 error = fasync_helper(fd, filp, 1, &flp->fl_fasync);
1492 if (error < 0) { 1495 if (error < 0) {
1493 /* remove lease just inserted by __setlease */ 1496 /* remove lease just inserted by setlease */
1494 flp->fl_type = F_UNLCK | F_INPROGRESS; 1497 flp->fl_type = F_UNLCK | F_INPROGRESS;
1495 flp->fl_break_time = jiffies- 10; 1498 flp->fl_break_time = jiffies - 10;
1496 time_out_leases(inode); 1499 time_out_leases(inode);
1497 goto out_unlock; 1500 goto out_unlock;
1498 } 1501 }
@@ -1597,8 +1600,7 @@ asmlinkage long sys_flock(unsigned int fd, unsigned int cmd)
1597/** 1600/**
1598 * vfs_test_lock - test file byte range lock 1601 * vfs_test_lock - test file byte range lock
1599 * @filp: The file to test lock for 1602 * @filp: The file to test lock for
1600 * @fl: The lock to test 1603 * @fl: The lock to test; also used to hold result
1601 * @conf: Place to return a copy of the conflicting lock, if found
1602 * 1604 *
1603 * Returns -ERRNO on failure. Indicates presence of conflicting lock by 1605 * Returns -ERRNO on failure. Indicates presence of conflicting lock by
1604 * setting conf->fl_type to something other than F_UNLCK. 1606 * setting conf->fl_type to something other than F_UNLCK.
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 8689b736fdd9..c87dc713b5d7 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -53,6 +53,7 @@ static int nfs_fsync(struct file *, struct dentry *dentry, int datasync);
53static int nfs_check_flags(int flags); 53static int nfs_check_flags(int flags);
54static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl); 54static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl);
55static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl); 55static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl);
56static int nfs_setlease(struct file *file, long arg, struct file_lock **fl);
56 57
57const struct file_operations nfs_file_operations = { 58const struct file_operations nfs_file_operations = {
58 .llseek = nfs_file_llseek, 59 .llseek = nfs_file_llseek,
@@ -69,6 +70,7 @@ const struct file_operations nfs_file_operations = {
69 .flock = nfs_flock, 70 .flock = nfs_flock,
70 .splice_read = nfs_file_splice_read, 71 .splice_read = nfs_file_splice_read,
71 .check_flags = nfs_check_flags, 72 .check_flags = nfs_check_flags,
73 .setlease = nfs_setlease,
72}; 74};
73 75
74const struct inode_operations nfs_file_inode_operations = { 76const struct inode_operations nfs_file_inode_operations = {
@@ -400,7 +402,9 @@ static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
400 402
401 lock_kernel(); 403 lock_kernel();
402 /* Try local locking first */ 404 /* Try local locking first */
403 if (posix_test_lock(filp, fl)) { 405 posix_test_lock(filp, fl);
406 if (fl->fl_type != F_UNLCK) {
407 /* found a conflict */
404 goto out; 408 goto out;
405 } 409 }
406 410
@@ -558,3 +562,13 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
558 return do_unlk(filp, cmd, fl); 562 return do_unlk(filp, cmd, fl);
559 return do_setlk(filp, cmd, fl); 563 return do_setlk(filp, cmd, fl);
560} 564}
565
566static int nfs_setlease(struct file *file, long arg, struct file_lock **fl)
567{
568 /*
569 * There is no protocol support for leases, so we have no way
570 * to implement them correctly in the face of opens by other
571 * clients.
572 */
573 return -EINVAL;
574}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index e4a4c87ec8c6..6284807bd37e 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -256,7 +256,7 @@ nfs4_close_delegation(struct nfs4_delegation *dp)
256 /* The following nfsd_close may not actually close the file, 256 /* The following nfsd_close may not actually close the file,
257 * but we want to remove the lease in any case. */ 257 * but we want to remove the lease in any case. */
258 if (dp->dl_flock) 258 if (dp->dl_flock)
259 setlease(filp, F_UNLCK, &dp->dl_flock); 259 vfs_setlease(filp, F_UNLCK, &dp->dl_flock);
260 nfsd_close(filp); 260 nfsd_close(filp);
261} 261}
262 262
@@ -1402,7 +1402,7 @@ void nfsd_release_deleg_cb(struct file_lock *fl)
1402/* 1402/*
1403 * Set the delegation file_lock back pointer. 1403 * Set the delegation file_lock back pointer.
1404 * 1404 *
1405 * Called from __setlease() with lock_kernel() held. 1405 * Called from setlease() with lock_kernel() held.
1406 */ 1406 */
1407static 1407static
1408void nfsd_copy_lock_deleg_cb(struct file_lock *new, struct file_lock *fl) 1408void nfsd_copy_lock_deleg_cb(struct file_lock *new, struct file_lock *fl)
@@ -1416,7 +1416,7 @@ void nfsd_copy_lock_deleg_cb(struct file_lock *new, struct file_lock *fl)
1416} 1416}
1417 1417
1418/* 1418/*
1419 * Called from __setlease() with lock_kernel() held 1419 * Called from setlease() with lock_kernel() held
1420 */ 1420 */
1421static 1421static
1422int nfsd_same_client_deleg_cb(struct file_lock *onlist, struct file_lock *try) 1422int nfsd_same_client_deleg_cb(struct file_lock *onlist, struct file_lock *try)
@@ -1716,10 +1716,10 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
1716 fl.fl_file = stp->st_vfs_file; 1716 fl.fl_file = stp->st_vfs_file;
1717 fl.fl_pid = current->tgid; 1717 fl.fl_pid = current->tgid;
1718 1718
1719 /* setlease checks to see if delegation should be handed out. 1719 /* vfs_setlease checks to see if delegation should be handed out.
1720 * the lock_manager callbacks fl_mylease and fl_change are used 1720 * the lock_manager callbacks fl_mylease and fl_change are used
1721 */ 1721 */
1722 if ((status = setlease(stp->st_vfs_file, 1722 if ((status = vfs_setlease(stp->st_vfs_file,
1723 flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK, &flp))) { 1723 flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK, &flp))) {
1724 dprintk("NFSD: setlease failed [%d], no delegation\n", status); 1724 dprintk("NFSD: setlease failed [%d], no delegation\n", status);
1725 unhash_delegation(dp); 1725 unhash_delegation(dp);
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index aee966c44aac..048e6054c2fd 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -361,20 +361,20 @@ static struct dentry_operations sysfs_dentry_ops = {
361struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type) 361struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
362{ 362{
363 char *dup_name = NULL; 363 char *dup_name = NULL;
364 struct sysfs_dirent *sd = NULL; 364 struct sysfs_dirent *sd;
365 365
366 if (type & SYSFS_COPY_NAME) { 366 if (type & SYSFS_COPY_NAME) {
367 name = dup_name = kstrdup(name, GFP_KERNEL); 367 name = dup_name = kstrdup(name, GFP_KERNEL);
368 if (!name) 368 if (!name)
369 goto err_out; 369 return NULL;
370 } 370 }
371 371
372 sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL); 372 sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL);
373 if (!sd) 373 if (!sd)
374 goto err_out; 374 goto err_out1;
375 375
376 if (sysfs_alloc_ino(&sd->s_ino)) 376 if (sysfs_alloc_ino(&sd->s_ino))
377 goto err_out; 377 goto err_out2;
378 378
379 atomic_set(&sd->s_count, 1); 379 atomic_set(&sd->s_count, 1);
380 atomic_set(&sd->s_active, 0); 380 atomic_set(&sd->s_active, 0);
@@ -386,9 +386,10 @@ struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
386 386
387 return sd; 387 return sd;
388 388
389 err_out: 389 err_out2:
390 kfree(dup_name);
391 kmem_cache_free(sysfs_dir_cachep, sd); 390 kmem_cache_free(sysfs_dir_cachep, sd);
391 err_out1:
392 kfree(dup_name);
392 return NULL; 393 return NULL;
393} 394}
394 395
@@ -698,17 +699,19 @@ static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
698 699
699 /* link in */ 700 /* link in */
700 sysfs_addrm_start(&acxt, parent_sd); 701 sysfs_addrm_start(&acxt, parent_sd);
702
701 if (!sysfs_find_dirent(parent_sd, name)) { 703 if (!sysfs_find_dirent(parent_sd, name)) {
702 sysfs_add_one(&acxt, sd); 704 sysfs_add_one(&acxt, sd);
703 sysfs_link_sibling(sd); 705 sysfs_link_sibling(sd);
704 } 706 }
705 if (sysfs_addrm_finish(&acxt)) { 707
706 *p_sd = sd; 708 if (!sysfs_addrm_finish(&acxt)) {
707 return 0; 709 sysfs_put(sd);
710 return -EEXIST;
708 } 711 }
709 712
710 sysfs_put(sd); 713 *p_sd = sd;
711 return -EEXIST; 714 return 0;
712} 715}
713 716
714int sysfs_create_subdir(struct kobject *kobj, const char *name, 717int sysfs_create_subdir(struct kobject *kobj, const char *name,
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index cc497994b2a8..3e1cc062a740 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -410,11 +410,12 @@ int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr,
410 sysfs_link_sibling(sd); 410 sysfs_link_sibling(sd);
411 } 411 }
412 412
413 if (sysfs_addrm_finish(&acxt)) 413 if (!sysfs_addrm_finish(&acxt)) {
414 return 0; 414 sysfs_put(sd);
415 return -EEXIST;
416 }
415 417
416 sysfs_put(sd); 418 return 0;
417 return -EEXIST;
418} 419}
419 420
420 421
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 3756e152285a..10d1b52899f1 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -133,7 +133,7 @@ static inline void set_inode_attr(struct inode * inode, struct iattr * iattr)
133 */ 133 */
134static struct lock_class_key sysfs_inode_imutex_key; 134static struct lock_class_key sysfs_inode_imutex_key;
135 135
136void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode) 136static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
137{ 137{
138 inode->i_blocks = 0; 138 inode->i_blocks = 0;
139 inode->i_mapping->a_ops = &sysfs_aops; 139 inode->i_mapping->a_ops = &sysfs_aops;
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 402cc356203c..60714d075c2f 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -43,19 +43,19 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
43 sb->s_time_gran = 1; 43 sb->s_time_gran = 1;
44 sysfs_sb = sb; 44 sysfs_sb = sb;
45 45
46 inode = new_inode(sysfs_sb); 46 /* get root inode, initialize and unlock it */
47 inode = sysfs_get_inode(&sysfs_root);
47 if (!inode) { 48 if (!inode) {
48 pr_debug("sysfs: could not get root inode\n"); 49 pr_debug("sysfs: could not get root inode\n");
49 return -ENOMEM; 50 return -ENOMEM;
50 } 51 }
51 52
52 sysfs_init_inode(&sysfs_root, inode);
53
54 inode->i_op = &sysfs_dir_inode_operations; 53 inode->i_op = &sysfs_dir_inode_operations;
55 inode->i_fop = &sysfs_dir_operations; 54 inode->i_fop = &sysfs_dir_operations;
56 /* directory inodes start off with i_nlink == 2 (for "." entry) */ 55 inc_nlink(inode); /* directory, account for "." */
57 inc_nlink(inode); 56 unlock_new_inode(inode);
58 57
58 /* instantiate and link root dentry */
59 root = d_alloc_root(inode); 59 root = d_alloc_root(inode);
60 if (!root) { 60 if (!root) {
61 pr_debug("%s: could not get root dentry!\n",__FUNCTION__); 61 pr_debug("%s: could not get root dentry!\n",__FUNCTION__);
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 2f86e0422290..4ce687f0b5d0 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -86,7 +86,9 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char
86 sd = sysfs_new_dirent(name, S_IFLNK|S_IRWXUGO, SYSFS_KOBJ_LINK); 86 sd = sysfs_new_dirent(name, S_IFLNK|S_IRWXUGO, SYSFS_KOBJ_LINK);
87 if (!sd) 87 if (!sd)
88 goto out_put; 88 goto out_put;
89
89 sd->s_elem.symlink.target_sd = target_sd; 90 sd->s_elem.symlink.target_sd = target_sd;
91 target_sd = NULL; /* reference is now owned by the symlink */
90 92
91 sysfs_addrm_start(&acxt, parent_sd); 93 sysfs_addrm_start(&acxt, parent_sd);
92 94
@@ -95,11 +97,13 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char
95 sysfs_link_sibling(sd); 97 sysfs_link_sibling(sd);
96 } 98 }
97 99
98 if (sysfs_addrm_finish(&acxt)) 100 if (!sysfs_addrm_finish(&acxt)) {
99 return 0; 101 error = -EEXIST;
102 goto out_put;
103 }
104
105 return 0;
100 106
101 error = -EEXIST;
102 /* fall through */
103 out_put: 107 out_put:
104 sysfs_put(target_sd); 108 sysfs_put(target_sd);
105 sysfs_put(sd); 109 sysfs_put(sd);
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 6a37f2386a8d..6b8c8d76d308 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -71,7 +71,6 @@ extern void sysfs_remove_one(struct sysfs_addrm_cxt *acxt,
71extern int sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt); 71extern int sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt);
72 72
73extern void sysfs_delete_inode(struct inode *inode); 73extern void sysfs_delete_inode(struct inode *inode);
74extern void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode);
75extern struct inode * sysfs_get_inode(struct sysfs_dirent *sd); 74extern struct inode * sysfs_get_inode(struct sysfs_dirent *sd);
76extern void sysfs_instantiate(struct dentry *dentry, struct inode *inode); 75extern void sysfs_instantiate(struct dentry *dentry, struct inode *inode);
77 76
diff --git a/include/linux/device.h b/include/linux/device.h
index be2debed70d2..d9f0a57f5a2f 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -572,6 +572,16 @@ dev_dbg(struct device * dev, const char * fmt, ...)
572} 572}
573#endif 573#endif
574 574
575#ifdef VERBOSE_DEBUG
576#define dev_vdbg dev_dbg
577#else
578static inline int __attribute__ ((format (printf, 2, 3)))
579dev_vdbg(struct device * dev, const char * fmt, ...)
580{
581 return 0;
582}
583#endif
584
575#define dev_err(dev, format, arg...) \ 585#define dev_err(dev, format, arg...) \
576 dev_printk(KERN_ERR , dev , format , ## arg) 586 dev_printk(KERN_ERR , dev , format , ## arg)
577#define dev_info(dev, format, arg...) \ 587#define dev_info(dev, format, arg...) \
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0b806c5e32eb..9562a59b3703 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -862,7 +862,7 @@ extern void locks_init_lock(struct file_lock *);
862extern void locks_copy_lock(struct file_lock *, struct file_lock *); 862extern void locks_copy_lock(struct file_lock *, struct file_lock *);
863extern void locks_remove_posix(struct file *, fl_owner_t); 863extern void locks_remove_posix(struct file *, fl_owner_t);
864extern void locks_remove_flock(struct file *); 864extern void locks_remove_flock(struct file *);
865extern int posix_test_lock(struct file *, struct file_lock *); 865extern void posix_test_lock(struct file *, struct file_lock *);
866extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); 866extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *);
867extern int posix_lock_file_wait(struct file *, struct file_lock *); 867extern int posix_lock_file_wait(struct file *, struct file_lock *);
868extern int posix_unblock_lock(struct file *, struct file_lock *); 868extern int posix_unblock_lock(struct file *, struct file_lock *);
@@ -873,6 +873,7 @@ extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl);
873extern int __break_lease(struct inode *inode, unsigned int flags); 873extern int __break_lease(struct inode *inode, unsigned int flags);
874extern void lease_get_mtime(struct inode *, struct timespec *time); 874extern void lease_get_mtime(struct inode *, struct timespec *time);
875extern int setlease(struct file *, long, struct file_lock **); 875extern int setlease(struct file *, long, struct file_lock **);
876extern int vfs_setlease(struct file *, long, struct file_lock **);
876extern int lease_modify(struct file_lock **, int); 877extern int lease_modify(struct file_lock **, int);
877extern int lock_may_read(struct inode *, loff_t start, unsigned long count); 878extern int lock_may_read(struct inode *, loff_t start, unsigned long count);
878extern int lock_may_write(struct inode *, loff_t start, unsigned long count); 879extern int lock_may_write(struct inode *, loff_t start, unsigned long count);
@@ -1122,6 +1123,7 @@ struct file_operations {
1122 int (*flock) (struct file *, int, struct file_lock *); 1123 int (*flock) (struct file *, int, struct file_lock *);
1123 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); 1124 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
1124 ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); 1125 ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
1126 int (*setlease)(struct file *, long, struct file_lock **);
1125}; 1127};
1126 1128
1127struct inode_operations { 1129struct inode_operations {
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index 06cbf41d32d2..aa2fe22b1baa 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -36,15 +36,24 @@ extern char uevent_helper[];
36/* counter to tag the uevent, read only except for the kobject core */ 36/* counter to tag the uevent, read only except for the kobject core */
37extern u64 uevent_seqnum; 37extern u64 uevent_seqnum;
38 38
39/* the actions here must match the proper string in lib/kobject_uevent.c */ 39/*
40typedef int __bitwise kobject_action_t; 40 * The actions here must match the index to the string array
41 * in lib/kobject_uevent.c
42 *
43 * Do not add new actions here without checking with the driver-core
44 * maintainers. Action strings are not meant to express subsystem
45 * or device specific properties. In most cases you want to send a
46 * kobject_uevent_env(kobj, KOBJ_CHANGE, env) with additional event
47 * specific variables added to the event environment.
48 */
41enum kobject_action { 49enum kobject_action {
42 KOBJ_ADD = (__force kobject_action_t) 0x01, /* exclusive to core */ 50 KOBJ_ADD,
43 KOBJ_REMOVE = (__force kobject_action_t) 0x02, /* exclusive to core */ 51 KOBJ_REMOVE,
44 KOBJ_CHANGE = (__force kobject_action_t) 0x03, /* device state change */ 52 KOBJ_CHANGE,
45 KOBJ_OFFLINE = (__force kobject_action_t) 0x04, /* device offline */ 53 KOBJ_MOVE,
46 KOBJ_ONLINE = (__force kobject_action_t) 0x05, /* device online */ 54 KOBJ_ONLINE,
47 KOBJ_MOVE = (__force kobject_action_t) 0x06, /* device move */ 55 KOBJ_OFFLINE,
56 KOBJ_MAX
48}; 57};
49 58
50struct kobject { 59struct kobject {
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 273781c82e4d..2735b7cadd20 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -284,8 +284,6 @@ extern int device_prepare_suspend(pm_message_t state);
284#define device_may_wakeup(dev) \ 284#define device_may_wakeup(dev) \
285 (device_can_wakeup(dev) && (dev)->power.should_wakeup) 285 (device_can_wakeup(dev) && (dev)->power.should_wakeup)
286 286
287extern int dpm_runtime_suspend(struct device *, pm_message_t);
288extern void dpm_runtime_resume(struct device *);
289extern void __suspend_report_result(const char *function, void *fn, int ret); 287extern void __suspend_report_result(const char *function, void *fn, int ret);
290 288
291#define suspend_report_result(fn, ret) \ 289#define suspend_report_result(fn, ret) \
@@ -317,15 +315,6 @@ static inline int device_suspend(pm_message_t state)
317#define device_set_wakeup_enable(dev,val) do{}while(0) 315#define device_set_wakeup_enable(dev,val) do{}while(0)
318#define device_may_wakeup(dev) (0) 316#define device_may_wakeup(dev) (0)
319 317
320static inline int dpm_runtime_suspend(struct device * dev, pm_message_t state)
321{
322 return 0;
323}
324
325static inline void dpm_runtime_resume(struct device * dev)
326{
327}
328
329#define suspend_report_result(fn, ret) do { } while (0) 318#define suspend_report_result(fn, ret) do { } while (0)
330 319
331static inline int call_platform_enable_wakeup(struct device *dev, int is_on) 320static inline int call_platform_enable_wakeup(struct device *dev, int is_on)
diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h
new file mode 100644
index 000000000000..44c28e94df50
--- /dev/null
+++ b/include/linux/uio_driver.h
@@ -0,0 +1,91 @@
1/*
2 * include/linux/uio_driver.h
3 *
4 * Copyright(C) 2005, Benedikt Spranger <b.spranger@linutronix.de>
5 * Copyright(C) 2005, Thomas Gleixner <tglx@linutronix.de>
6 * Copyright(C) 2006, Hans J. Koch <hjk@linutronix.de>
7 * Copyright(C) 2006, Greg Kroah-Hartman <greg@kroah.com>
8 *
9 * Userspace IO driver.
10 *
11 * Licensed under the GPLv2 only.
12 */
13
14#ifndef _UIO_DRIVER_H_
15#define _UIO_DRIVER_H_
16
17#include <linux/module.h>
18#include <linux/fs.h>
19#include <linux/interrupt.h>
20
21/**
22 * struct uio_mem - description of a UIO memory region
23 * @kobj: kobject for this mapping
24 * @addr: address of the device's memory
25 * @size: size of IO
26 * @memtype: type of memory addr points to
27 * @internal_addr: ioremap-ped version of addr, for driver internal use
28 */
29struct uio_mem {
30 struct kobject kobj;
31 unsigned long addr;
32 unsigned long size;
33 int memtype;
34 void __iomem *internal_addr;
35};
36
37#define MAX_UIO_MAPS 5
38
39struct uio_device;
40
41/**
42 * struct uio_info - UIO device capabilities
43 * @uio_dev: the UIO device this info belongs to
44 * @name: device name
45 * @version: device driver version
46 * @mem: list of mappable memory regions, size==0 for end of list
47 * @irq: interrupt number or UIO_IRQ_CUSTOM
48 * @irq_flags: flags for request_irq()
49 * @priv: optional private data
50 * @handler: the device's irq handler
51 * @mmap: mmap operation for this uio device
52 * @open: open operation for this uio device
53 * @release: release operation for this uio device
54 */
55struct uio_info {
56 struct uio_device *uio_dev;
57 char *name;
58 char *version;
59 struct uio_mem mem[MAX_UIO_MAPS];
60 long irq;
61 unsigned long irq_flags;
62 void *priv;
63 irqreturn_t (*handler)(int irq, struct uio_info *dev_info);
64 int (*mmap)(struct uio_info *info, struct vm_area_struct *vma);
65 int (*open)(struct uio_info *info, struct inode *inode);
66 int (*release)(struct uio_info *info, struct inode *inode);
67};
68
69extern int __must_check
70 __uio_register_device(struct module *owner,
71 struct device *parent,
72 struct uio_info *info);
73static inline int __must_check
74 uio_register_device(struct device *parent, struct uio_info *info)
75{
76 return __uio_register_device(THIS_MODULE, parent, info);
77}
78extern void uio_unregister_device(struct uio_info *info);
79extern void uio_event_notify(struct uio_info *info);
80
81/* defines for uio_device->irq */
82#define UIO_IRQ_CUSTOM -1
83#define UIO_IRQ_NONE -2
84
85/* defines for uio_device->memtype */
86#define UIO_MEM_NONE 0
87#define UIO_MEM_PHYS 1
88#define UIO_MEM_LOGICAL 2
89#define UIO_MEM_VIRTUAL 3
90
91#endif /* _LINUX_UIO_DRIVER_H_ */
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 495b7d4dd330..73328476761c 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -65,18 +65,6 @@ config PM_TRACE
65 CAUTION: this option will cause your machine's real-time clock to be 65 CAUTION: this option will cause your machine's real-time clock to be
66 set to an invalid time after a resume. 66 set to an invalid time after a resume.
67 67
68config PM_SYSFS_DEPRECATED
69 bool "Driver model /sys/devices/.../power/state files (DEPRECATED)"
70 depends on PM && SYSFS
71 default n
72 help
73 The driver model started out with a sysfs file intended to provide
74 a userspace hook for device power management. This feature has never
75 worked very well, except for limited testing purposes, and so it will
76 be removed. It's not clear that a generic mechanism could really
77 handle the wide variability of device power states; any replacements
78 are likely to be bus or driver specific.
79
80config SOFTWARE_SUSPEND 68config SOFTWARE_SUSPEND
81 bool "Software Suspend (Hibernation)" 69 bool "Software Suspend (Hibernation)"
82 depends on PM && SWAP && (((X86 || PPC64_SWSUSP) && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP)) 70 depends on PM && SWAP && (((X86 || PPC64_SWSUSP) && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP))
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index bd5ecbbafab1..6a80c784a8fb 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -33,25 +33,15 @@ static DEFINE_SPINLOCK(sequence_lock);
33static struct sock *uevent_sock; 33static struct sock *uevent_sock;
34#endif 34#endif
35 35
36static char *action_to_string(enum kobject_action action) 36/* the strings here must match the enum in include/linux/kobject.h */
37{ 37const char *kobject_actions[] = {
38 switch (action) { 38 "add",
39 case KOBJ_ADD: 39 "remove",
40 return "add"; 40 "change",
41 case KOBJ_REMOVE: 41 "move",
42 return "remove"; 42 "online",
43 case KOBJ_CHANGE: 43 "offline",
44 return "change"; 44};
45 case KOBJ_OFFLINE:
46 return "offline";
47 case KOBJ_ONLINE:
48 return "online";
49 case KOBJ_MOVE:
50 return "move";
51 default:
52 return NULL;
53 }
54}
55 45
56/** 46/**
57 * kobject_uevent_env - send an uevent with environmental data 47 * kobject_uevent_env - send an uevent with environmental data
@@ -83,7 +73,7 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
83 73
84 pr_debug("%s\n", __FUNCTION__); 74 pr_debug("%s\n", __FUNCTION__);
85 75
86 action_string = action_to_string(action); 76 action_string = kobject_actions[action];
87 if (!action_string) { 77 if (!action_string) {
88 pr_debug("kobject attempted to send uevent without action_string!\n"); 78 pr_debug("kobject attempted to send uevent without action_string!\n");
89 return -EINVAL; 79 return -EINVAL;