diff options
Diffstat (limited to 'arch/sparc/lib')
32 files changed, 7186 insertions, 0 deletions
diff --git a/arch/sparc/lib/COPYING.LIB b/arch/sparc/lib/COPYING.LIB new file mode 100644 index 000000000000..eb685a5ec981 --- /dev/null +++ b/arch/sparc/lib/COPYING.LIB | |||
@@ -0,0 +1,481 @@ | |||
1 | GNU LIBRARY GENERAL PUBLIC LICENSE | ||
2 | Version 2, June 1991 | ||
3 | |||
4 | Copyright (C) 1991 Free Software Foundation, Inc. | ||
5 | 675 Mass Ave, Cambridge, MA 02139, USA | ||
6 | Everyone is permitted to copy and distribute verbatim copies | ||
7 | of this license document, but changing it is not allowed. | ||
8 | |||
9 | [This is the first released version of the library GPL. It is | ||
10 | numbered 2 because it goes with version 2 of the ordinary GPL.] | ||
11 | |||
12 | Preamble | ||
13 | |||
14 | The licenses for most software are designed to take away your | ||
15 | freedom to share and change it. By contrast, the GNU General Public | ||
16 | Licenses are intended to guarantee your freedom to share and change | ||
17 | free software--to make sure the software is free for all its users. | ||
18 | |||
19 | This license, the Library General Public License, applies to some | ||
20 | specially designated Free Software Foundation software, and to any | ||
21 | other libraries whose authors decide to use it. You can use it for | ||
22 | your libraries, too. | ||
23 | |||
24 | When we speak of free software, we are referring to freedom, not | ||
25 | price. Our General Public Licenses are designed to make sure that you | ||
26 | have the freedom to distribute copies of free software (and charge for | ||
27 | this service if you wish), that you receive source code or can get it | ||
28 | if you want it, that you can change the software or use pieces of it | ||
29 | in new free programs; and that you know you can do these things. | ||
30 | |||
31 | To protect your rights, we need to make restrictions that forbid | ||
32 | anyone to deny you these rights or to ask you to surrender the rights. | ||
33 | These restrictions translate to certain responsibilities for you if | ||
34 | you distribute copies of the library, or if you modify it. | ||
35 | |||
36 | For example, if you distribute copies of the library, whether gratis | ||
37 | or for a fee, you must give the recipients all the rights that we gave | ||
38 | you. You must make sure that they, too, receive or can get the source | ||
39 | code. If you link a program with the library, you must provide | ||
40 | complete object files to the recipients so that they can relink them | ||
41 | with the library, after making changes to the library and recompiling | ||
42 | it. And you must show them these terms so they know their rights. | ||
43 | |||
44 | Our method of protecting your rights has two steps: (1) copyright | ||
45 | the library, and (2) offer you this license which gives you legal | ||
46 | permission to copy, distribute and/or modify the library. | ||
47 | |||
48 | Also, for each distributor's protection, we want to make certain | ||
49 | that everyone understands that there is no warranty for this free | ||
50 | library. If the library is modified by someone else and passed on, we | ||
51 | want its recipients to know that what they have is not the original | ||
52 | version, so that any problems introduced by others will not reflect on | ||
53 | the original authors' reputations. | ||
54 | |||
55 | Finally, any free program is threatened constantly by software | ||
56 | patents. We wish to avoid the danger that companies distributing free | ||
57 | software will individually obtain patent licenses, thus in effect | ||
58 | transforming the program into proprietary software. To prevent this, | ||
59 | we have made it clear that any patent must be licensed for everyone's | ||
60 | free use or not licensed at all. | ||
61 | |||
62 | Most GNU software, including some libraries, is covered by the ordinary | ||
63 | GNU General Public License, which was designed for utility programs. This | ||
64 | license, the GNU Library General Public License, applies to certain | ||
65 | designated libraries. This license is quite different from the ordinary | ||
66 | one; be sure to read it in full, and don't assume that anything in it is | ||
67 | the same as in the ordinary license. | ||
68 | |||
69 | The reason we have a separate public license for some libraries is that | ||
70 | they blur the distinction we usually make between modifying or adding to a | ||
71 | program and simply using it. Linking a program with a library, without | ||
72 | changing the library, is in some sense simply using the library, and is | ||
73 | analogous to running a utility program or application program. However, in | ||
74 | a textual and legal sense, the linked executable is a combined work, a | ||
75 | derivative of the original library, and the ordinary General Public License | ||
76 | treats it as such. | ||
77 | |||
78 | Because of this blurred distinction, using the ordinary General | ||
79 | Public License for libraries did not effectively promote software | ||
80 | sharing, because most developers did not use the libraries. We | ||
81 | concluded that weaker conditions might promote sharing better. | ||
82 | |||
83 | However, unrestricted linking of non-free programs would deprive the | ||
84 | users of those programs of all benefit from the free status of the | ||
85 | libraries themselves. This Library General Public License is intended to | ||
86 | permit developers of non-free programs to use free libraries, while | ||
87 | preserving your freedom as a user of such programs to change the free | ||
88 | libraries that are incorporated in them. (We have not seen how to achieve | ||
89 | this as regards changes in header files, but we have achieved it as regards | ||
90 | changes in the actual functions of the Library.) The hope is that this | ||
91 | will lead to faster development of free libraries. | ||
92 | |||
93 | The precise terms and conditions for copying, distribution and | ||
94 | modification follow. Pay close attention to the difference between a | ||
95 | "work based on the library" and a "work that uses the library". The | ||
96 | former contains code derived from the library, while the latter only | ||
97 | works together with the library. | ||
98 | |||
99 | Note that it is possible for a library to be covered by the ordinary | ||
100 | General Public License rather than by this special one. | ||
101 | |||
102 | GNU LIBRARY GENERAL PUBLIC LICENSE | ||
103 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION | ||
104 | |||
105 | 0. This License Agreement applies to any software library which | ||
106 | contains a notice placed by the copyright holder or other authorized | ||
107 | party saying it may be distributed under the terms of this Library | ||
108 | General Public License (also called "this License"). Each licensee is | ||
109 | addressed as "you". | ||
110 | |||
111 | A "library" means a collection of software functions and/or data | ||
112 | prepared so as to be conveniently linked with application programs | ||
113 | (which use some of those functions and data) to form executables. | ||
114 | |||
115 | The "Library", below, refers to any such software library or work | ||
116 | which has been distributed under these terms. A "work based on the | ||
117 | Library" means either the Library or any derivative work under | ||
118 | copyright law: that is to say, a work containing the Library or a | ||
119 | portion of it, either verbatim or with modifications and/or translated | ||
120 | straightforwardly into another language. (Hereinafter, translation is | ||
121 | included without limitation in the term "modification".) | ||
122 | |||
123 | "Source code" for a work means the preferred form of the work for | ||
124 | making modifications to it. For a library, complete source code means | ||
125 | all the source code for all modules it contains, plus any associated | ||
126 | interface definition files, plus the scripts used to control compilation | ||
127 | and installation of the library. | ||
128 | |||
129 | Activities other than copying, distribution and modification are not | ||
130 | covered by this License; they are outside its scope. The act of | ||
131 | running a program using the Library is not restricted, and output from | ||
132 | such a program is covered only if its contents constitute a work based | ||
133 | on the Library (independent of the use of the Library in a tool for | ||
134 | writing it). Whether that is true depends on what the Library does | ||
135 | and what the program that uses the Library does. | ||
136 | |||
137 | 1. You may copy and distribute verbatim copies of the Library's | ||
138 | complete source code as you receive it, in any medium, provided that | ||
139 | you conspicuously and appropriately publish on each copy an | ||
140 | appropriate copyright notice and disclaimer of warranty; keep intact | ||
141 | all the notices that refer to this License and to the absence of any | ||
142 | warranty; and distribute a copy of this License along with the | ||
143 | Library. | ||
144 | |||
145 | You may charge a fee for the physical act of transferring a copy, | ||
146 | and you may at your option offer warranty protection in exchange for a | ||
147 | fee. | ||
148 | |||
149 | 2. You may modify your copy or copies of the Library or any portion | ||
150 | of it, thus forming a work based on the Library, and copy and | ||
151 | distribute such modifications or work under the terms of Section 1 | ||
152 | above, provided that you also meet all of these conditions: | ||
153 | |||
154 | a) The modified work must itself be a software library. | ||
155 | |||
156 | b) You must cause the files modified to carry prominent notices | ||
157 | stating that you changed the files and the date of any change. | ||
158 | |||
159 | c) You must cause the whole of the work to be licensed at no | ||
160 | charge to all third parties under the terms of this License. | ||
161 | |||
162 | d) If a facility in the modified Library refers to a function or a | ||
163 | table of data to be supplied by an application program that uses | ||
164 | the facility, other than as an argument passed when the facility | ||
165 | is invoked, then you must make a good faith effort to ensure that, | ||
166 | in the event an application does not supply such function or | ||
167 | table, the facility still operates, and performs whatever part of | ||
168 | its purpose remains meaningful. | ||
169 | |||
170 | (For example, a function in a library to compute square roots has | ||
171 | a purpose that is entirely well-defined independent of the | ||
172 | application. Therefore, Subsection 2d requires that any | ||
173 | application-supplied function or table used by this function must | ||
174 | be optional: if the application does not supply it, the square | ||
175 | root function must still compute square roots.) | ||
176 | |||
177 | These requirements apply to the modified work as a whole. If | ||
178 | identifiable sections of that work are not derived from the Library, | ||
179 | and can be reasonably considered independent and separate works in | ||
180 | themselves, then this License, and its terms, do not apply to those | ||
181 | sections when you distribute them as separate works. But when you | ||
182 | distribute the same sections as part of a whole which is a work based | ||
183 | on the Library, the distribution of the whole must be on the terms of | ||
184 | this License, whose permissions for other licensees extend to the | ||
185 | entire whole, and thus to each and every part regardless of who wrote | ||
186 | it. | ||
187 | |||
188 | Thus, it is not the intent of this section to claim rights or contest | ||
189 | your rights to work written entirely by you; rather, the intent is to | ||
190 | exercise the right to control the distribution of derivative or | ||
191 | collective works based on the Library. | ||
192 | |||
193 | In addition, mere aggregation of another work not based on the Library | ||
194 | with the Library (or with a work based on the Library) on a volume of | ||
195 | a storage or distribution medium does not bring the other work under | ||
196 | the scope of this License. | ||
197 | |||
198 | 3. You may opt to apply the terms of the ordinary GNU General Public | ||
199 | License instead of this License to a given copy of the Library. To do | ||
200 | this, you must alter all the notices that refer to this License, so | ||
201 | that they refer to the ordinary GNU General Public License, version 2, | ||
202 | instead of to this License. (If a newer version than version 2 of the | ||
203 | ordinary GNU General Public License has appeared, then you can specify | ||
204 | that version instead if you wish.) Do not make any other change in | ||
205 | these notices. | ||
206 | |||
207 | Once this change is made in a given copy, it is irreversible for | ||
208 | that copy, so the ordinary GNU General Public License applies to all | ||
209 | subsequent copies and derivative works made from that copy. | ||
210 | |||
211 | This option is useful when you wish to copy part of the code of | ||
212 | the Library into a program that is not a library. | ||
213 | |||
214 | 4. You may copy and distribute the Library (or a portion or | ||
215 | derivative of it, under Section 2) in object code or executable form | ||
216 | under the terms of Sections 1 and 2 above provided that you accompany | ||
217 | it with the complete corresponding machine-readable source code, which | ||
218 | must be distributed under the terms of Sections 1 and 2 above on a | ||
219 | medium customarily used for software interchange. | ||
220 | |||
221 | If distribution of object code is made by offering access to copy | ||
222 | from a designated place, then offering equivalent access to copy the | ||
223 | source code from the same place satisfies the requirement to | ||
224 | distribute the source code, even though third parties are not | ||
225 | compelled to copy the source along with the object code. | ||
226 | |||
227 | 5. A program that contains no derivative of any portion of the | ||
228 | Library, but is designed to work with the Library by being compiled or | ||
229 | linked with it, is called a "work that uses the Library". Such a | ||
230 | work, in isolation, is not a derivative work of the Library, and | ||
231 | therefore falls outside the scope of this License. | ||
232 | |||
233 | However, linking a "work that uses the Library" with the Library | ||
234 | creates an executable that is a derivative of the Library (because it | ||
235 | contains portions of the Library), rather than a "work that uses the | ||
236 | library". The executable is therefore covered by this License. | ||
237 | Section 6 states terms for distribution of such executables. | ||
238 | |||
239 | When a "work that uses the Library" uses material from a header file | ||
240 | that is part of the Library, the object code for the work may be a | ||
241 | derivative work of the Library even though the source code is not. | ||
242 | Whether this is true is especially significant if the work can be | ||
243 | linked without the Library, or if the work is itself a library. The | ||
244 | threshold for this to be true is not precisely defined by law. | ||
245 | |||
246 | If such an object file uses only numerical parameters, data | ||
247 | structure layouts and accessors, and small macros and small inline | ||
248 | functions (ten lines or less in length), then the use of the object | ||
249 | file is unrestricted, regardless of whether it is legally a derivative | ||
250 | work. (Executables containing this object code plus portions of the | ||
251 | Library will still fall under Section 6.) | ||
252 | |||
253 | Otherwise, if the work is a derivative of the Library, you may | ||
254 | distribute the object code for the work under the terms of Section 6. | ||
255 | Any executables containing that work also fall under Section 6, | ||
256 | whether or not they are linked directly with the Library itself. | ||
257 | |||
258 | 6. As an exception to the Sections above, you may also compile or | ||
259 | link a "work that uses the Library" with the Library to produce a | ||
260 | work containing portions of the Library, and distribute that work | ||
261 | under terms of your choice, provided that the terms permit | ||
262 | modification of the work for the customer's own use and reverse | ||
263 | engineering for debugging such modifications. | ||
264 | |||
265 | You must give prominent notice with each copy of the work that the | ||
266 | Library is used in it and that the Library and its use are covered by | ||
267 | this License. You must supply a copy of this License. If the work | ||
268 | during execution displays copyright notices, you must include the | ||
269 | copyright notice for the Library among them, as well as a reference | ||
270 | directing the user to the copy of this License. Also, you must do one | ||
271 | of these things: | ||
272 | |||
273 | a) Accompany the work with the complete corresponding | ||
274 | machine-readable source code for the Library including whatever | ||
275 | changes were used in the work (which must be distributed under | ||
276 | Sections 1 and 2 above); and, if the work is an executable linked | ||
277 | with the Library, with the complete machine-readable "work that | ||
278 | uses the Library", as object code and/or source code, so that the | ||
279 | user can modify the Library and then relink to produce a modified | ||
280 | executable containing the modified Library. (It is understood | ||
281 | that the user who changes the contents of definitions files in the | ||
282 | Library will not necessarily be able to recompile the application | ||
283 | to use the modified definitions.) | ||
284 | |||
285 | b) Accompany the work with a written offer, valid for at | ||
286 | least three years, to give the same user the materials | ||
287 | specified in Subsection 6a, above, for a charge no more | ||
288 | than the cost of performing this distribution. | ||
289 | |||
290 | c) If distribution of the work is made by offering access to copy | ||
291 | from a designated place, offer equivalent access to copy the above | ||
292 | specified materials from the same place. | ||
293 | |||
294 | d) Verify that the user has already received a copy of these | ||
295 | materials or that you have already sent this user a copy. | ||
296 | |||
297 | For an executable, the required form of the "work that uses the | ||
298 | Library" must include any data and utility programs needed for | ||
299 | reproducing the executable from it. However, as a special exception, | ||
300 | the source code distributed need not include anything that is normally | ||
301 | distributed (in either source or binary form) with the major | ||
302 | components (compiler, kernel, and so on) of the operating system on | ||
303 | which the executable runs, unless that component itself accompanies | ||
304 | the executable. | ||
305 | |||
306 | It may happen that this requirement contradicts the license | ||
307 | restrictions of other proprietary libraries that do not normally | ||
308 | accompany the operating system. Such a contradiction means you cannot | ||
309 | use both them and the Library together in an executable that you | ||
310 | distribute. | ||
311 | |||
312 | 7. You may place library facilities that are a work based on the | ||
313 | Library side-by-side in a single library together with other library | ||
314 | facilities not covered by this License, and distribute such a combined | ||
315 | library, provided that the separate distribution of the work based on | ||
316 | the Library and of the other library facilities is otherwise | ||
317 | permitted, and provided that you do these two things: | ||
318 | |||
319 | a) Accompany the combined library with a copy of the same work | ||
320 | based on the Library, uncombined with any other library | ||
321 | facilities. This must be distributed under the terms of the | ||
322 | Sections above. | ||
323 | |||
324 | b) Give prominent notice with the combined library of the fact | ||
325 | that part of it is a work based on the Library, and explaining | ||
326 | where to find the accompanying uncombined form of the same work. | ||
327 | |||
328 | 8. You may not copy, modify, sublicense, link with, or distribute | ||
329 | the Library except as expressly provided under this License. Any | ||
330 | attempt otherwise to copy, modify, sublicense, link with, or | ||
331 | distribute the Library is void, and will automatically terminate your | ||
332 | rights under this License. However, parties who have received copies, | ||
333 | or rights, from you under this License will not have their licenses | ||
334 | terminated so long as such parties remain in full compliance. | ||
335 | |||
336 | 9. You are not required to accept this License, since you have not | ||
337 | signed it. However, nothing else grants you permission to modify or | ||
338 | distribute the Library or its derivative works. These actions are | ||
339 | prohibited by law if you do not accept this License. Therefore, by | ||
340 | modifying or distributing the Library (or any work based on the | ||
341 | Library), you indicate your acceptance of this License to do so, and | ||
342 | all its terms and conditions for copying, distributing or modifying | ||
343 | the Library or works based on it. | ||
344 | |||
345 | 10. Each time you redistribute the Library (or any work based on the | ||
346 | Library), the recipient automatically receives a license from the | ||
347 | original licensor to copy, distribute, link with or modify the Library | ||
348 | subject to these terms and conditions. You may not impose any further | ||
349 | restrictions on the recipients' exercise of the rights granted herein. | ||
350 | You are not responsible for enforcing compliance by third parties to | ||
351 | this License. | ||
352 | |||
353 | 11. If, as a consequence of a court judgment or allegation of patent | ||
354 | infringement or for any other reason (not limited to patent issues), | ||
355 | conditions are imposed on you (whether by court order, agreement or | ||
356 | otherwise) that contradict the conditions of this License, they do not | ||
357 | excuse you from the conditions of this License. If you cannot | ||
358 | distribute so as to satisfy simultaneously your obligations under this | ||
359 | License and any other pertinent obligations, then as a consequence you | ||
360 | may not distribute the Library at all. For example, if a patent | ||
361 | license would not permit royalty-free redistribution of the Library by | ||
362 | all those who receive copies directly or indirectly through you, then | ||
363 | the only way you could satisfy both it and this License would be to | ||
364 | refrain entirely from distribution of the Library. | ||
365 | |||
366 | If any portion of this section is held invalid or unenforceable under any | ||
367 | particular circumstance, the balance of the section is intended to apply, | ||
368 | and the section as a whole is intended to apply in other circumstances. | ||
369 | |||
370 | It is not the purpose of this section to induce you to infringe any | ||
371 | patents or other property right claims or to contest validity of any | ||
372 | such claims; this section has the sole purpose of protecting the | ||
373 | integrity of the free software distribution system which is | ||
374 | implemented by public license practices. Many people have made | ||
375 | generous contributions to the wide range of software distributed | ||
376 | through that system in reliance on consistent application of that | ||
377 | system; it is up to the author/donor to decide if he or she is willing | ||
378 | to distribute software through any other system and a licensee cannot | ||
379 | impose that choice. | ||
380 | |||
381 | This section is intended to make thoroughly clear what is believed to | ||
382 | be a consequence of the rest of this License. | ||
383 | |||
384 | 12. If the distribution and/or use of the Library is restricted in | ||
385 | certain countries either by patents or by copyrighted interfaces, the | ||
386 | original copyright holder who places the Library under this License may add | ||
387 | an explicit geographical distribution limitation excluding those countries, | ||
388 | so that distribution is permitted only in or among countries not thus | ||
389 | excluded. In such case, this License incorporates the limitation as if | ||
390 | written in the body of this License. | ||
391 | |||
392 | 13. The Free Software Foundation may publish revised and/or new | ||
393 | versions of the Library General Public License from time to time. | ||
394 | Such new versions will be similar in spirit to the present version, | ||
395 | but may differ in detail to address new problems or concerns. | ||
396 | |||
397 | Each version is given a distinguishing version number. If the Library | ||
398 | specifies a version number of this License which applies to it and | ||
399 | "any later version", you have the option of following the terms and | ||
400 | conditions either of that version or of any later version published by | ||
401 | the Free Software Foundation. If the Library does not specify a | ||
402 | license version number, you may choose any version ever published by | ||
403 | the Free Software Foundation. | ||
404 | |||
405 | 14. If you wish to incorporate parts of the Library into other free | ||
406 | programs whose distribution conditions are incompatible with these, | ||
407 | write to the author to ask for permission. For software which is | ||
408 | copyrighted by the Free Software Foundation, write to the Free | ||
409 | Software Foundation; we sometimes make exceptions for this. Our | ||
410 | decision will be guided by the two goals of preserving the free status | ||
411 | of all derivatives of our free software and of promoting the sharing | ||
412 | and reuse of software generally. | ||
413 | |||
414 | NO WARRANTY | ||
415 | |||
416 | 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO | ||
417 | WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. | ||
418 | EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR | ||
419 | OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY | ||
420 | KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE | ||
421 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | ||
422 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE | ||
423 | LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME | ||
424 | THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. | ||
425 | |||
426 | 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN | ||
427 | WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY | ||
428 | AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU | ||
429 | FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR | ||
430 | CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE | ||
431 | LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING | ||
432 | RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A | ||
433 | FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF | ||
434 | SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH | ||
435 | DAMAGES. | ||
436 | |||
437 | END OF TERMS AND CONDITIONS | ||
438 | |||
439 | Appendix: How to Apply These Terms to Your New Libraries | ||
440 | |||
441 | If you develop a new library, and you want it to be of the greatest | ||
442 | possible use to the public, we recommend making it free software that | ||
443 | everyone can redistribute and change. You can do so by permitting | ||
444 | redistribution under these terms (or, alternatively, under the terms of the | ||
445 | ordinary General Public License). | ||
446 | |||
447 | To apply these terms, attach the following notices to the library. It is | ||
448 | safest to attach them to the start of each source file to most effectively | ||
449 | convey the exclusion of warranty; and each file should have at least the | ||
450 | "copyright" line and a pointer to where the full notice is found. | ||
451 | |||
452 | <one line to give the library's name and a brief idea of what it does.> | ||
453 | Copyright (C) <year> <name of author> | ||
454 | |||
455 | This library is free software; you can redistribute it and/or | ||
456 | modify it under the terms of the GNU Library General Public | ||
457 | License as published by the Free Software Foundation; either | ||
458 | version 2 of the License, or (at your option) any later version. | ||
459 | |||
460 | This library is distributed in the hope that it will be useful, | ||
461 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
462 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
463 | Library General Public License for more details. | ||
464 | |||
465 | You should have received a copy of the GNU Library General Public | ||
466 | License along with this library; if not, write to the Free | ||
467 | Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
468 | |||
469 | Also add information on how to contact you by electronic and paper mail. | ||
470 | |||
471 | You should also get your employer (if you work as a programmer) or your | ||
472 | school, if any, to sign a "copyright disclaimer" for the library, if | ||
473 | necessary. Here is a sample; alter the names: | ||
474 | |||
475 | Yoyodyne, Inc., hereby disclaims all copyright interest in the | ||
476 | library `Frob' (a library for tweaking knobs) written by James Random Hacker. | ||
477 | |||
478 | <signature of Ty Coon>, 1 April 1990 | ||
479 | Ty Coon, President of Vice | ||
480 | |||
481 | That's all there is to it! | ||
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile new file mode 100644 index 000000000000..2296ff9dc47a --- /dev/null +++ b/arch/sparc/lib/Makefile | |||
@@ -0,0 +1,13 @@ | |||
1 | # $Id: Makefile,v 1.35 2000/12/15 00:41:18 davem Exp $ | ||
2 | # Makefile for Sparc library files.. | ||
3 | # | ||
4 | |||
5 | EXTRA_AFLAGS := -ansi -DST_DIV0=0x02 | ||
6 | |||
7 | lib-y := mul.o rem.o sdiv.o udiv.o umul.o urem.o ashrdi3.o memcpy.o memset.o \ | ||
8 | strlen.o checksum.o blockops.o memscan.o memcmp.o strncmp.o \ | ||
9 | strncpy_from_user.o divdi3.o udivdi3.o strlen_user.o \ | ||
10 | copy_user.o locks.o atomic.o atomic32.o bitops.o \ | ||
11 | lshrdi3.o ashldi3.o rwsem.o muldi3.o bitext.o | ||
12 | |||
13 | lib-$(CONFIG_DEBUG_SPINLOCK) += debuglocks.o | ||
diff --git a/arch/sparc/lib/ashldi3.S b/arch/sparc/lib/ashldi3.S new file mode 100644 index 000000000000..52418a0cb3dd --- /dev/null +++ b/arch/sparc/lib/ashldi3.S | |||
@@ -0,0 +1,34 @@ | |||
1 | /* $Id: ashldi3.S,v 1.2 1999/11/19 04:11:46 davem Exp $ | ||
2 | * ashldi3.S: GCC emits these for certain drivers playing | ||
3 | * with long longs. | ||
4 | * | ||
5 | * Copyright (C) 1999 David S. Miller (davem@redhat.com) | ||
6 | */ | ||
7 | |||
8 | .text | ||
9 | .align 4 | ||
10 | .globl __ashldi3 | ||
11 | __ashldi3: | ||
12 | cmp %o2, 0 | ||
13 | be 9f | ||
14 | mov 0x20, %g2 | ||
15 | |||
16 | sub %g2, %o2, %g2 | ||
17 | cmp %g2, 0 | ||
18 | bg 7f | ||
19 | sll %o0, %o2, %g3 | ||
20 | |||
21 | neg %g2 | ||
22 | clr %o5 | ||
23 | b 8f | ||
24 | sll %o1, %g2, %o4 | ||
25 | 7: | ||
26 | srl %o1, %g2, %g2 | ||
27 | sll %o1, %o2, %o5 | ||
28 | or %g3, %g2, %o4 | ||
29 | 8: | ||
30 | mov %o4, %o0 | ||
31 | mov %o5, %o1 | ||
32 | 9: | ||
33 | retl | ||
34 | nop | ||
diff --git a/arch/sparc/lib/ashrdi3.S b/arch/sparc/lib/ashrdi3.S new file mode 100644 index 000000000000..2848237598a4 --- /dev/null +++ b/arch/sparc/lib/ashrdi3.S | |||
@@ -0,0 +1,36 @@ | |||
1 | /* $Id: ashrdi3.S,v 1.4 1999/11/19 04:11:49 davem Exp $ | ||
2 | * ashrdi3.S: The filesystem code creates all kinds of references to | ||
3 | * this little routine on the sparc with gcc. | ||
4 | * | ||
5 | * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) | ||
6 | */ | ||
7 | |||
8 | .text | ||
9 | .align 4 | ||
10 | .globl __ashrdi3 | ||
11 | __ashrdi3: | ||
12 | tst %o2 | ||
13 | be 3f | ||
14 | or %g0, 32, %g2 | ||
15 | |||
16 | sub %g2, %o2, %g2 | ||
17 | |||
18 | tst %g2 | ||
19 | bg 1f | ||
20 | sra %o0, %o2, %o4 | ||
21 | |||
22 | sra %o0, 31, %o4 | ||
23 | sub %g0, %g2, %g2 | ||
24 | ba 2f | ||
25 | sra %o0, %g2, %o5 | ||
26 | |||
27 | 1: | ||
28 | sll %o0, %g2, %g3 | ||
29 | srl %o1, %o2, %g2 | ||
30 | or %g2, %g3, %o5 | ||
31 | 2: | ||
32 | or %g0, %o4, %o0 | ||
33 | or %g0, %o5, %o1 | ||
34 | 3: | ||
35 | jmpl %o7 + 8, %g0 | ||
36 | nop | ||
diff --git a/arch/sparc/lib/atomic.S b/arch/sparc/lib/atomic.S new file mode 100644 index 000000000000..f48ad0c4dadb --- /dev/null +++ b/arch/sparc/lib/atomic.S | |||
@@ -0,0 +1,100 @@ | |||
1 | /* atomic.S: Move this stuff here for better ICACHE hit rates. | ||
2 | * | ||
3 | * Copyright (C) 1996 David S. Miller (davem@caipfs.rutgers.edu) | ||
4 | */ | ||
5 | |||
6 | #include <linux/config.h> | ||
7 | #include <asm/ptrace.h> | ||
8 | #include <asm/psr.h> | ||
9 | |||
10 | .text | ||
11 | .align 4 | ||
12 | |||
13 | .globl __atomic_begin | ||
14 | __atomic_begin: | ||
15 | |||
16 | #ifndef CONFIG_SMP | ||
17 | .globl ___xchg32_sun4c | ||
18 | ___xchg32_sun4c: | ||
19 | rd %psr, %g3 | ||
20 | andcc %g3, PSR_PIL, %g0 | ||
21 | bne 1f | ||
22 | nop | ||
23 | wr %g3, PSR_PIL, %psr | ||
24 | nop; nop; nop | ||
25 | 1: | ||
26 | andcc %g3, PSR_PIL, %g0 | ||
27 | ld [%g1], %g7 | ||
28 | bne 1f | ||
29 | st %g2, [%g1] | ||
30 | wr %g3, 0x0, %psr | ||
31 | nop; nop; nop | ||
32 | 1: | ||
33 | mov %g7, %g2 | ||
34 | jmpl %o7 + 8, %g0 | ||
35 | mov %g4, %o7 | ||
36 | |||
37 | .globl ___xchg32_sun4md | ||
38 | ___xchg32_sun4md: | ||
39 | swap [%g1], %g2 | ||
40 | jmpl %o7 + 8, %g0 | ||
41 | mov %g4, %o7 | ||
42 | #endif | ||
43 | |||
44 | /* Read asm-sparc/atomic.h carefully to understand how this works for SMP. | ||
45 | * Really, some things here for SMP are overly clever, go read the header. | ||
46 | */ | ||
47 | .globl ___atomic24_add | ||
48 | ___atomic24_add: | ||
49 | rd %psr, %g3 ! Keep the code small, old way was stupid | ||
50 | nop; nop; nop; ! Let the bits set | ||
51 | or %g3, PSR_PIL, %g7 ! Disable interrupts | ||
52 | wr %g7, 0x0, %psr ! Set %psr | ||
53 | nop; nop; nop; ! Let the bits set | ||
54 | #ifdef CONFIG_SMP | ||
55 | 1: ldstub [%g1 + 3], %g7 ! Spin on the byte lock for SMP. | ||
56 | orcc %g7, 0x0, %g0 ! Did we get it? | ||
57 | bne 1b ! Nope... | ||
58 | ld [%g1], %g7 ! Load locked atomic24_t | ||
59 | sra %g7, 8, %g7 ! Get signed 24-bit integer | ||
60 | add %g7, %g2, %g2 ! Add in argument | ||
61 | sll %g2, 8, %g7 ! Transpose back to atomic24_t | ||
62 | st %g7, [%g1] ! Clever: This releases the lock as well. | ||
63 | #else | ||
64 | ld [%g1], %g7 ! Load locked atomic24_t | ||
65 | add %g7, %g2, %g2 ! Add in argument | ||
66 | st %g2, [%g1] ! Store it back | ||
67 | #endif | ||
68 | wr %g3, 0x0, %psr ! Restore original PSR_PIL | ||
69 | nop; nop; nop; ! Let the bits set | ||
70 | jmpl %o7, %g0 ! NOTE: not + 8, see callers in atomic.h | ||
71 | mov %g4, %o7 ! Restore %o7 | ||
72 | |||
73 | .globl ___atomic24_sub | ||
74 | ___atomic24_sub: | ||
75 | rd %psr, %g3 ! Keep the code small, old way was stupid | ||
76 | nop; nop; nop; ! Let the bits set | ||
77 | or %g3, PSR_PIL, %g7 ! Disable interrupts | ||
78 | wr %g7, 0x0, %psr ! Set %psr | ||
79 | nop; nop; nop; ! Let the bits set | ||
80 | #ifdef CONFIG_SMP | ||
81 | 1: ldstub [%g1 + 3], %g7 ! Spin on the byte lock for SMP. | ||
82 | orcc %g7, 0x0, %g0 ! Did we get it? | ||
83 | bne 1b ! Nope... | ||
84 | ld [%g1], %g7 ! Load locked atomic24_t | ||
85 | sra %g7, 8, %g7 ! Get signed 24-bit integer | ||
86 | sub %g7, %g2, %g2 ! Subtract argument | ||
87 | sll %g2, 8, %g7 ! Transpose back to atomic24_t | ||
88 | st %g7, [%g1] ! Clever: This releases the lock as well | ||
89 | #else | ||
90 | ld [%g1], %g7 ! Load locked atomic24_t | ||
91 | sub %g7, %g2, %g2 ! Subtract argument | ||
92 | st %g2, [%g1] ! Store it back | ||
93 | #endif | ||
94 | wr %g3, 0x0, %psr ! Restore original PSR_PIL | ||
95 | nop; nop; nop; ! Let the bits set | ||
96 | jmpl %o7, %g0 ! NOTE: not + 8, see callers in atomic.h | ||
97 | mov %g4, %o7 ! Restore %o7 | ||
98 | |||
99 | .globl __atomic_end | ||
100 | __atomic_end: | ||
diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c new file mode 100644 index 000000000000..19724c5800a7 --- /dev/null +++ b/arch/sparc/lib/atomic32.c | |||
@@ -0,0 +1,53 @@ | |||
1 | /* | ||
2 | * atomic32.c: 32-bit atomic_t implementation | ||
3 | * | ||
4 | * Copyright (C) 2004 Keith M Wesolowski | ||
5 | * | ||
6 | * Based on asm-parisc/atomic.h Copyright (C) 2000 Philipp Rumpf | ||
7 | */ | ||
8 | |||
9 | #include <asm/atomic.h> | ||
10 | #include <linux/spinlock.h> | ||
11 | #include <linux/module.h> | ||
12 | |||
13 | #ifdef CONFIG_SMP | ||
14 | #define ATOMIC_HASH_SIZE 4 | ||
15 | #define ATOMIC_HASH(a) (&__atomic_hash[(((unsigned long)a)>>8) & (ATOMIC_HASH_SIZE-1)]) | ||
16 | |||
17 | spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] = { | ||
18 | [0 ... (ATOMIC_HASH_SIZE-1)] = SPIN_LOCK_UNLOCKED | ||
19 | }; | ||
20 | |||
21 | #else /* SMP */ | ||
22 | |||
23 | static spinlock_t dummy = SPIN_LOCK_UNLOCKED; | ||
24 | #define ATOMIC_HASH_SIZE 1 | ||
25 | #define ATOMIC_HASH(a) (&dummy) | ||
26 | |||
27 | #endif /* SMP */ | ||
28 | |||
29 | int __atomic_add_return(int i, atomic_t *v) | ||
30 | { | ||
31 | int ret; | ||
32 | unsigned long flags; | ||
33 | spin_lock_irqsave(ATOMIC_HASH(v), flags); | ||
34 | |||
35 | ret = (v->counter += i); | ||
36 | |||
37 | spin_unlock_irqrestore(ATOMIC_HASH(v), flags); | ||
38 | return ret; | ||
39 | } | ||
40 | |||
41 | void atomic_set(atomic_t *v, int i) | ||
42 | { | ||
43 | unsigned long flags; | ||
44 | spin_lock_irqsave(ATOMIC_HASH(v), flags); | ||
45 | |||
46 | v->counter = i; | ||
47 | |||
48 | spin_unlock_irqrestore(ATOMIC_HASH(v), flags); | ||
49 | } | ||
50 | |||
51 | EXPORT_SYMBOL(__atomic_add_return); | ||
52 | EXPORT_SYMBOL(atomic_set); | ||
53 | |||
diff --git a/arch/sparc/lib/bitext.c b/arch/sparc/lib/bitext.c new file mode 100644 index 000000000000..94b05e8c906c --- /dev/null +++ b/arch/sparc/lib/bitext.c | |||
@@ -0,0 +1,132 @@ | |||
1 | /* | ||
2 | * bitext.c: kernel little helper (of bit shuffling variety). | ||
3 | * | ||
4 | * Copyright (C) 2002 Pete Zaitcev <zaitcev@yahoo.com> | ||
5 | * | ||
6 | * The algorithm to search a zero bit string is geared towards its application. | ||
7 | * We expect a couple of fixed sizes of requests, so a rotating counter, reset | ||
8 | * by align size, should provide fast enough search while maintaining low | ||
9 | * fragmentation. | ||
10 | */ | ||
11 | |||
12 | #include <linux/smp_lock.h> | ||
13 | #include <linux/bitops.h> | ||
14 | |||
15 | #include <asm/bitext.h> | ||
16 | |||
17 | /** | ||
18 | * bit_map_string_get - find and set a bit string in bit map. | ||
19 | * @t: the bit map. | ||
20 | * @len: requested string length | ||
21 | * @align: requested alignment | ||
22 | * | ||
23 | * Returns offset in the map or -1 if out of space. | ||
24 | * | ||
25 | * Not safe to call from an interrupt (uses spin_lock). | ||
26 | */ | ||
27 | int bit_map_string_get(struct bit_map *t, int len, int align) | ||
28 | { | ||
29 | int offset, count; /* siamese twins */ | ||
30 | int off_new; | ||
31 | int align1; | ||
32 | int i, color; | ||
33 | |||
34 | if (t->num_colors) { | ||
35 | /* align is overloaded to be the page color */ | ||
36 | color = align; | ||
37 | align = t->num_colors; | ||
38 | } else { | ||
39 | color = 0; | ||
40 | if (align == 0) | ||
41 | align = 1; | ||
42 | } | ||
43 | align1 = align - 1; | ||
44 | if ((align & align1) != 0) | ||
45 | BUG(); | ||
46 | if (align < 0 || align >= t->size) | ||
47 | BUG(); | ||
48 | if (len <= 0 || len > t->size) | ||
49 | BUG(); | ||
50 | color &= align1; | ||
51 | |||
52 | spin_lock(&t->lock); | ||
53 | if (len < t->last_size) | ||
54 | offset = t->first_free; | ||
55 | else | ||
56 | offset = t->last_off & ~align1; | ||
57 | count = 0; | ||
58 | for (;;) { | ||
59 | off_new = find_next_zero_bit(t->map, t->size, offset); | ||
60 | off_new = ((off_new + align1) & ~align1) + color; | ||
61 | count += off_new - offset; | ||
62 | offset = off_new; | ||
63 | if (offset >= t->size) | ||
64 | offset = 0; | ||
65 | if (count + len > t->size) { | ||
66 | spin_unlock(&t->lock); | ||
67 | /* P3 */ printk(KERN_ERR | ||
68 | "bitmap out: size %d used %d off %d len %d align %d count %d\n", | ||
69 | t->size, t->used, offset, len, align, count); | ||
70 | return -1; | ||
71 | } | ||
72 | |||
73 | if (offset + len > t->size) { | ||
74 | count += t->size - offset; | ||
75 | offset = 0; | ||
76 | continue; | ||
77 | } | ||
78 | |||
79 | i = 0; | ||
80 | while (test_bit(offset + i, t->map) == 0) { | ||
81 | i++; | ||
82 | if (i == len) { | ||
83 | for (i = 0; i < len; i++) | ||
84 | __set_bit(offset + i, t->map); | ||
85 | if (offset == t->first_free) | ||
86 | t->first_free = find_next_zero_bit | ||
87 | (t->map, t->size, | ||
88 | t->first_free + len); | ||
89 | if ((t->last_off = offset + len) >= t->size) | ||
90 | t->last_off = 0; | ||
91 | t->used += len; | ||
92 | t->last_size = len; | ||
93 | spin_unlock(&t->lock); | ||
94 | return offset; | ||
95 | } | ||
96 | } | ||
97 | count += i + 1; | ||
98 | if ((offset += i + 1) >= t->size) | ||
99 | offset = 0; | ||
100 | } | ||
101 | } | ||
102 | |||
103 | void bit_map_clear(struct bit_map *t, int offset, int len) | ||
104 | { | ||
105 | int i; | ||
106 | |||
107 | if (t->used < len) | ||
108 | BUG(); /* Much too late to do any good, but alas... */ | ||
109 | spin_lock(&t->lock); | ||
110 | for (i = 0; i < len; i++) { | ||
111 | if (test_bit(offset + i, t->map) == 0) | ||
112 | BUG(); | ||
113 | __clear_bit(offset + i, t->map); | ||
114 | } | ||
115 | if (offset < t->first_free) | ||
116 | t->first_free = offset; | ||
117 | t->used -= len; | ||
118 | spin_unlock(&t->lock); | ||
119 | } | ||
120 | |||
121 | void bit_map_init(struct bit_map *t, unsigned long *map, int size) | ||
122 | { | ||
123 | |||
124 | if ((size & 07) != 0) | ||
125 | BUG(); | ||
126 | memset(map, 0, size>>3); | ||
127 | |||
128 | memset(t, 0, sizeof *t); | ||
129 | spin_lock_init(&t->lock); | ||
130 | t->map = map; | ||
131 | t->size = size; | ||
132 | } | ||
diff --git a/arch/sparc/lib/bitops.S b/arch/sparc/lib/bitops.S new file mode 100644 index 000000000000..3e9399769075 --- /dev/null +++ b/arch/sparc/lib/bitops.S | |||
@@ -0,0 +1,110 @@ | |||
1 | /* bitops.S: Low level assembler bit operations. | ||
2 | * | ||
3 | * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) | ||
4 | */ | ||
5 | |||
6 | #include <linux/config.h> | ||
7 | #include <asm/ptrace.h> | ||
8 | #include <asm/psr.h> | ||
9 | |||
10 | .text | ||
11 | .align 4 | ||
12 | |||
13 | .globl __bitops_begin | ||
14 | __bitops_begin: | ||
15 | |||
16 | /* Take bits in %g2 and set them in word at %g1, | ||
17 | * return whether bits were set in original value | ||
18 | * in %g2. %g4 holds value to restore into %o7 | ||
19 | * in delay slot of jmpl return, %g3 + %g5 + %g7 can be | ||
20 | * used as temporaries and thus is considered clobbered | ||
21 | * by all callers. | ||
22 | */ | ||
23 | .globl ___set_bit | ||
24 | ___set_bit: | ||
25 | rd %psr, %g3 | ||
26 | nop; nop; nop; | ||
27 | or %g3, PSR_PIL, %g5 | ||
28 | wr %g5, 0x0, %psr | ||
29 | nop; nop; nop | ||
30 | #ifdef CONFIG_SMP | ||
31 | set bitops_spinlock, %g5 | ||
32 | 2: ldstub [%g5], %g7 ! Spin on the byte lock for SMP. | ||
33 | orcc %g7, 0x0, %g0 ! Did we get it? | ||
34 | bne 2b ! Nope... | ||
35 | #endif | ||
36 | ld [%g1], %g7 | ||
37 | or %g7, %g2, %g5 | ||
38 | and %g7, %g2, %g2 | ||
39 | #ifdef CONFIG_SMP | ||
40 | st %g5, [%g1] | ||
41 | set bitops_spinlock, %g5 | ||
42 | stb %g0, [%g5] | ||
43 | #else | ||
44 | st %g5, [%g1] | ||
45 | #endif | ||
46 | wr %g3, 0x0, %psr | ||
47 | nop; nop; nop | ||
48 | jmpl %o7, %g0 | ||
49 | mov %g4, %o7 | ||
50 | |||
51 | /* Same as above, but clears the bits from %g2 instead. */ | ||
52 | .globl ___clear_bit | ||
53 | ___clear_bit: | ||
54 | rd %psr, %g3 | ||
55 | nop; nop; nop | ||
56 | or %g3, PSR_PIL, %g5 | ||
57 | wr %g5, 0x0, %psr | ||
58 | nop; nop; nop | ||
59 | #ifdef CONFIG_SMP | ||
60 | set bitops_spinlock, %g5 | ||
61 | 2: ldstub [%g5], %g7 ! Spin on the byte lock for SMP. | ||
62 | orcc %g7, 0x0, %g0 ! Did we get it? | ||
63 | bne 2b ! Nope... | ||
64 | #endif | ||
65 | ld [%g1], %g7 | ||
66 | andn %g7, %g2, %g5 | ||
67 | and %g7, %g2, %g2 | ||
68 | #ifdef CONFIG_SMP | ||
69 | st %g5, [%g1] | ||
70 | set bitops_spinlock, %g5 | ||
71 | stb %g0, [%g5] | ||
72 | #else | ||
73 | st %g5, [%g1] | ||
74 | #endif | ||
75 | wr %g3, 0x0, %psr | ||
76 | nop; nop; nop | ||
77 | jmpl %o7, %g0 | ||
78 | mov %g4, %o7 | ||
79 | |||
80 | /* Same thing again, but this time toggles the bits from %g2. */ | ||
81 | .globl ___change_bit | ||
82 | ___change_bit: | ||
83 | rd %psr, %g3 | ||
84 | nop; nop; nop | ||
85 | or %g3, PSR_PIL, %g5 | ||
86 | wr %g5, 0x0, %psr | ||
87 | nop; nop; nop | ||
88 | #ifdef CONFIG_SMP | ||
89 | set bitops_spinlock, %g5 | ||
90 | 2: ldstub [%g5], %g7 ! Spin on the byte lock for SMP. | ||
91 | orcc %g7, 0x0, %g0 ! Did we get it? | ||
92 | bne 2b ! Nope... | ||
93 | #endif | ||
94 | ld [%g1], %g7 | ||
95 | xor %g7, %g2, %g5 | ||
96 | and %g7, %g2, %g2 | ||
97 | #ifdef CONFIG_SMP | ||
98 | st %g5, [%g1] | ||
99 | set bitops_spinlock, %g5 | ||
100 | stb %g0, [%g5] | ||
101 | #else | ||
102 | st %g5, [%g1] | ||
103 | #endif | ||
104 | wr %g3, 0x0, %psr | ||
105 | nop; nop; nop | ||
106 | jmpl %o7, %g0 | ||
107 | mov %g4, %o7 | ||
108 | |||
109 | .globl __bitops_end | ||
110 | __bitops_end: | ||
diff --git a/arch/sparc/lib/blockops.S b/arch/sparc/lib/blockops.S new file mode 100644 index 000000000000..a7c7ffaa4a94 --- /dev/null +++ b/arch/sparc/lib/blockops.S | |||
@@ -0,0 +1,89 @@ | |||
1 | /* $Id: blockops.S,v 1.8 1998/01/30 10:58:44 jj Exp $ | ||
2 | * blockops.S: Common block zero optimized routines. | ||
3 | * | ||
4 | * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) | ||
5 | */ | ||
6 | |||
7 | #include <asm/page.h> | ||
8 | |||
9 | /* Zero out 64 bytes of memory at (buf + offset). | ||
10 | * Assumes %g1 contains zero. | ||
11 | */ | ||
12 | #define BLAST_BLOCK(buf, offset) \ | ||
13 | std %g0, [buf + offset + 0x38]; \ | ||
14 | std %g0, [buf + offset + 0x30]; \ | ||
15 | std %g0, [buf + offset + 0x28]; \ | ||
16 | std %g0, [buf + offset + 0x20]; \ | ||
17 | std %g0, [buf + offset + 0x18]; \ | ||
18 | std %g0, [buf + offset + 0x10]; \ | ||
19 | std %g0, [buf + offset + 0x08]; \ | ||
20 | std %g0, [buf + offset + 0x00]; | ||
21 | |||
22 | /* Copy 32 bytes of memory at (src + offset) to | ||
23 | * (dst + offset). | ||
24 | */ | ||
25 | #define MIRROR_BLOCK(dst, src, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ | ||
26 | ldd [src + offset + 0x18], t0; \ | ||
27 | ldd [src + offset + 0x10], t2; \ | ||
28 | ldd [src + offset + 0x08], t4; \ | ||
29 | ldd [src + offset + 0x00], t6; \ | ||
30 | std t0, [dst + offset + 0x18]; \ | ||
31 | std t2, [dst + offset + 0x10]; \ | ||
32 | std t4, [dst + offset + 0x08]; \ | ||
33 | std t6, [dst + offset + 0x00]; | ||
34 | |||
35 | /* Profiling evidence indicates that memset() is | ||
36 | * commonly called for blocks of size PAGE_SIZE, | ||
37 | * and (2 * PAGE_SIZE) (for kernel stacks) | ||
38 | * and with a second arg of zero. We assume in | ||
39 | * all of these cases that the buffer is aligned | ||
40 | * on at least an 8 byte boundary. | ||
41 | * | ||
42 | * Therefore we special case them to make them | ||
43 | * as fast as possible. | ||
44 | */ | ||
45 | |||
46 | .text | ||
47 | .align 4 | ||
48 | .globl bzero_1page, __copy_1page | ||
49 | |||
50 | bzero_1page: | ||
51 | /* NOTE: If you change the number of insns of this routine, please check | ||
52 | * arch/sparc/mm/hypersparc.S */ | ||
53 | /* %o0 = buf */ | ||
54 | or %g0, %g0, %g1 | ||
55 | or %o0, %g0, %o1 | ||
56 | or %g0, (PAGE_SIZE >> 8), %g2 | ||
57 | 1: | ||
58 | BLAST_BLOCK(%o0, 0x00) | ||
59 | BLAST_BLOCK(%o0, 0x40) | ||
60 | BLAST_BLOCK(%o0, 0x80) | ||
61 | BLAST_BLOCK(%o0, 0xc0) | ||
62 | subcc %g2, 1, %g2 | ||
63 | bne 1b | ||
64 | add %o0, 0x100, %o0 | ||
65 | |||
66 | retl | ||
67 | nop | ||
68 | |||
69 | __copy_1page: | ||
70 | /* NOTE: If you change the number of insns of this routine, please check | ||
71 | * arch/sparc/mm/hypersparc.S */ | ||
72 | /* %o0 = dst, %o1 = src */ | ||
73 | or %g0, (PAGE_SIZE >> 8), %g1 | ||
74 | 1: | ||
75 | MIRROR_BLOCK(%o0, %o1, 0x00, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5) | ||
76 | MIRROR_BLOCK(%o0, %o1, 0x20, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5) | ||
77 | MIRROR_BLOCK(%o0, %o1, 0x40, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5) | ||
78 | MIRROR_BLOCK(%o0, %o1, 0x60, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5) | ||
79 | MIRROR_BLOCK(%o0, %o1, 0x80, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5) | ||
80 | MIRROR_BLOCK(%o0, %o1, 0xa0, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5) | ||
81 | MIRROR_BLOCK(%o0, %o1, 0xc0, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5) | ||
82 | MIRROR_BLOCK(%o0, %o1, 0xe0, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5) | ||
83 | subcc %g1, 1, %g1 | ||
84 | add %o0, 0x100, %o0 | ||
85 | bne 1b | ||
86 | add %o1, 0x100, %o1 | ||
87 | |||
88 | retl | ||
89 | nop | ||
diff --git a/arch/sparc/lib/checksum.S b/arch/sparc/lib/checksum.S new file mode 100644 index 000000000000..77f228533d47 --- /dev/null +++ b/arch/sparc/lib/checksum.S | |||
@@ -0,0 +1,583 @@ | |||
1 | /* checksum.S: Sparc optimized checksum code. | ||
2 | * | ||
3 | * Copyright(C) 1995 Linus Torvalds | ||
4 | * Copyright(C) 1995 Miguel de Icaza | ||
5 | * Copyright(C) 1996 David S. Miller | ||
6 | * Copyright(C) 1997 Jakub Jelinek | ||
7 | * | ||
8 | * derived from: | ||
9 | * Linux/Alpha checksum c-code | ||
10 | * Linux/ix86 inline checksum assembly | ||
11 | * RFC1071 Computing the Internet Checksum (esp. Jacobsons m68k code) | ||
12 | * David Mosberger-Tang for optimized reference c-code | ||
13 | * BSD4.4 portable checksum routine | ||
14 | */ | ||
15 | |||
16 | #include <asm/errno.h> | ||
17 | |||
18 | #define CSUM_BIGCHUNK(buf, offset, sum, t0, t1, t2, t3, t4, t5) \ | ||
19 | ldd [buf + offset + 0x00], t0; \ | ||
20 | ldd [buf + offset + 0x08], t2; \ | ||
21 | addxcc t0, sum, sum; \ | ||
22 | addxcc t1, sum, sum; \ | ||
23 | ldd [buf + offset + 0x10], t4; \ | ||
24 | addxcc t2, sum, sum; \ | ||
25 | addxcc t3, sum, sum; \ | ||
26 | ldd [buf + offset + 0x18], t0; \ | ||
27 | addxcc t4, sum, sum; \ | ||
28 | addxcc t5, sum, sum; \ | ||
29 | addxcc t0, sum, sum; \ | ||
30 | addxcc t1, sum, sum; | ||
31 | |||
32 | #define CSUM_LASTCHUNK(buf, offset, sum, t0, t1, t2, t3) \ | ||
33 | ldd [buf - offset - 0x08], t0; \ | ||
34 | ldd [buf - offset - 0x00], t2; \ | ||
35 | addxcc t0, sum, sum; \ | ||
36 | addxcc t1, sum, sum; \ | ||
37 | addxcc t2, sum, sum; \ | ||
38 | addxcc t3, sum, sum; | ||
39 | |||
40 | /* Do end cruft out of band to get better cache patterns. */ | ||
41 | csum_partial_end_cruft: | ||
42 | be 1f ! caller asks %o1 & 0x8 | ||
43 | andcc %o1, 4, %g0 ! nope, check for word remaining | ||
44 | ldd [%o0], %g2 ! load two | ||
45 | addcc %g2, %o2, %o2 ! add first word to sum | ||
46 | addxcc %g3, %o2, %o2 ! add second word as well | ||
47 | add %o0, 8, %o0 ! advance buf ptr | ||
48 | addx %g0, %o2, %o2 ! add in final carry | ||
49 | andcc %o1, 4, %g0 ! check again for word remaining | ||
50 | 1: be 1f ! nope, skip this code | ||
51 | andcc %o1, 3, %o1 ! check for trailing bytes | ||
52 | ld [%o0], %g2 ! load it | ||
53 | addcc %g2, %o2, %o2 ! add to sum | ||
54 | add %o0, 4, %o0 ! advance buf ptr | ||
55 | addx %g0, %o2, %o2 ! add in final carry | ||
56 | andcc %o1, 3, %g0 ! check again for trailing bytes | ||
57 | 1: be 1f ! no trailing bytes, return | ||
58 | addcc %o1, -1, %g0 ! only one byte remains? | ||
59 | bne 2f ! at least two bytes more | ||
60 | subcc %o1, 2, %o1 ! only two bytes more? | ||
61 | b 4f ! only one byte remains | ||
62 | or %g0, %g0, %o4 ! clear fake hword value | ||
63 | 2: lduh [%o0], %o4 ! get hword | ||
64 | be 6f ! jmp if only hword remains | ||
65 | add %o0, 2, %o0 ! advance buf ptr either way | ||
66 | sll %o4, 16, %o4 ! create upper hword | ||
67 | 4: ldub [%o0], %o5 ! get final byte | ||
68 | sll %o5, 8, %o5 ! put into place | ||
69 | or %o5, %o4, %o4 ! coalese with hword (if any) | ||
70 | 6: addcc %o4, %o2, %o2 ! add to sum | ||
71 | 1: retl ! get outta here | ||
72 | addx %g0, %o2, %o0 ! add final carry into retval | ||
73 | |||
74 | /* Also do alignment out of band to get better cache patterns. */ | ||
75 | csum_partial_fix_alignment: | ||
76 | cmp %o1, 6 | ||
77 | bl cpte - 0x4 | ||
78 | andcc %o0, 0x2, %g0 | ||
79 | be 1f | ||
80 | andcc %o0, 0x4, %g0 | ||
81 | lduh [%o0 + 0x00], %g2 | ||
82 | sub %o1, 2, %o1 | ||
83 | add %o0, 2, %o0 | ||
84 | sll %g2, 16, %g2 | ||
85 | addcc %g2, %o2, %o2 | ||
86 | srl %o2, 16, %g3 | ||
87 | addx %g0, %g3, %g2 | ||
88 | sll %o2, 16, %o2 | ||
89 | sll %g2, 16, %g3 | ||
90 | srl %o2, 16, %o2 | ||
91 | andcc %o0, 0x4, %g0 | ||
92 | or %g3, %o2, %o2 | ||
93 | 1: be cpa | ||
94 | andcc %o1, 0xffffff80, %o3 | ||
95 | ld [%o0 + 0x00], %g2 | ||
96 | sub %o1, 4, %o1 | ||
97 | addcc %g2, %o2, %o2 | ||
98 | add %o0, 4, %o0 | ||
99 | addx %g0, %o2, %o2 | ||
100 | b cpa | ||
101 | andcc %o1, 0xffffff80, %o3 | ||
102 | |||
103 | /* The common case is to get called with a nicely aligned | ||
104 | * buffer of size 0x20. Follow the code path for that case. | ||
105 | */ | ||
106 | .globl csum_partial | ||
107 | csum_partial: /* %o0=buf, %o1=len, %o2=sum */ | ||
108 | andcc %o0, 0x7, %g0 ! alignment problems? | ||
109 | bne csum_partial_fix_alignment ! yep, handle it | ||
110 | sethi %hi(cpte - 8), %g7 ! prepare table jmp ptr | ||
111 | andcc %o1, 0xffffff80, %o3 ! num loop iterations | ||
112 | cpa: be 3f ! none to do | ||
113 | andcc %o1, 0x70, %g1 ! clears carry flag too | ||
114 | 5: CSUM_BIGCHUNK(%o0, 0x00, %o2, %o4, %o5, %g2, %g3, %g4, %g5) | ||
115 | CSUM_BIGCHUNK(%o0, 0x20, %o2, %o4, %o5, %g2, %g3, %g4, %g5) | ||
116 | CSUM_BIGCHUNK(%o0, 0x40, %o2, %o4, %o5, %g2, %g3, %g4, %g5) | ||
117 | CSUM_BIGCHUNK(%o0, 0x60, %o2, %o4, %o5, %g2, %g3, %g4, %g5) | ||
118 | addx %g0, %o2, %o2 ! sink in final carry | ||
119 | subcc %o3, 128, %o3 ! detract from loop iters | ||
120 | bne 5b ! more to do | ||
121 | add %o0, 128, %o0 ! advance buf ptr | ||
122 | andcc %o1, 0x70, %g1 ! clears carry flag too | ||
123 | 3: be cpte ! nope | ||
124 | andcc %o1, 0xf, %g0 ! anything left at all? | ||
125 | srl %g1, 1, %o4 ! compute offset | ||
126 | sub %g7, %g1, %g7 ! adjust jmp ptr | ||
127 | sub %g7, %o4, %g7 ! final jmp ptr adjust | ||
128 | jmp %g7 + %lo(cpte - 8) ! enter the table | ||
129 | add %o0, %g1, %o0 ! advance buf ptr | ||
130 | cptbl: CSUM_LASTCHUNK(%o0, 0x68, %o2, %g2, %g3, %g4, %g5) | ||
131 | CSUM_LASTCHUNK(%o0, 0x58, %o2, %g2, %g3, %g4, %g5) | ||
132 | CSUM_LASTCHUNK(%o0, 0x48, %o2, %g2, %g3, %g4, %g5) | ||
133 | CSUM_LASTCHUNK(%o0, 0x38, %o2, %g2, %g3, %g4, %g5) | ||
134 | CSUM_LASTCHUNK(%o0, 0x28, %o2, %g2, %g3, %g4, %g5) | ||
135 | CSUM_LASTCHUNK(%o0, 0x18, %o2, %g2, %g3, %g4, %g5) | ||
136 | CSUM_LASTCHUNK(%o0, 0x08, %o2, %g2, %g3, %g4, %g5) | ||
137 | addx %g0, %o2, %o2 ! fetch final carry | ||
138 | andcc %o1, 0xf, %g0 ! anything left at all? | ||
139 | cpte: bne csum_partial_end_cruft ! yep, handle it | ||
140 | andcc %o1, 8, %g0 ! check how much | ||
141 | cpout: retl ! get outta here | ||
142 | mov %o2, %o0 ! return computed csum | ||
143 | |||
144 | .globl __csum_partial_copy_start, __csum_partial_copy_end | ||
145 | __csum_partial_copy_start: | ||
146 | |||
147 | /* Work around cpp -rob */ | ||
148 | #define ALLOC #alloc | ||
149 | #define EXECINSTR #execinstr | ||
150 | #define EX(x,y,a,b) \ | ||
151 | 98: x,y; \ | ||
152 | .section .fixup,ALLOC,EXECINSTR; \ | ||
153 | .align 4; \ | ||
154 | 99: ba 30f; \ | ||
155 | a, b, %o3; \ | ||
156 | .section __ex_table,ALLOC; \ | ||
157 | .align 4; \ | ||
158 | .word 98b, 99b; \ | ||
159 | .text; \ | ||
160 | .align 4 | ||
161 | |||
162 | #define EX2(x,y) \ | ||
163 | 98: x,y; \ | ||
164 | .section __ex_table,ALLOC; \ | ||
165 | .align 4; \ | ||
166 | .word 98b, 30f; \ | ||
167 | .text; \ | ||
168 | .align 4 | ||
169 | |||
170 | #define EX3(x,y) \ | ||
171 | 98: x,y; \ | ||
172 | .section __ex_table,ALLOC; \ | ||
173 | .align 4; \ | ||
174 | .word 98b, 96f; \ | ||
175 | .text; \ | ||
176 | .align 4 | ||
177 | |||
178 | #define EXT(start,end,handler) \ | ||
179 | .section __ex_table,ALLOC; \ | ||
180 | .align 4; \ | ||
181 | .word start, 0, end, handler; \ | ||
182 | .text; \ | ||
183 | .align 4 | ||
184 | |||
185 | /* This aligned version executes typically in 8.5 superscalar cycles, this | ||
186 | * is the best I can do. I say 8.5 because the final add will pair with | ||
187 | * the next ldd in the main unrolled loop. Thus the pipe is always full. | ||
188 | * If you change these macros (including order of instructions), | ||
189 | * please check the fixup code below as well. | ||
190 | */ | ||
191 | #define CSUMCOPY_BIGCHUNK_ALIGNED(src, dst, sum, off, t0, t1, t2, t3, t4, t5, t6, t7) \ | ||
192 | ldd [src + off + 0x00], t0; \ | ||
193 | ldd [src + off + 0x08], t2; \ | ||
194 | addxcc t0, sum, sum; \ | ||
195 | ldd [src + off + 0x10], t4; \ | ||
196 | addxcc t1, sum, sum; \ | ||
197 | ldd [src + off + 0x18], t6; \ | ||
198 | addxcc t2, sum, sum; \ | ||
199 | std t0, [dst + off + 0x00]; \ | ||
200 | addxcc t3, sum, sum; \ | ||
201 | std t2, [dst + off + 0x08]; \ | ||
202 | addxcc t4, sum, sum; \ | ||
203 | std t4, [dst + off + 0x10]; \ | ||
204 | addxcc t5, sum, sum; \ | ||
205 | std t6, [dst + off + 0x18]; \ | ||
206 | addxcc t6, sum, sum; \ | ||
207 | addxcc t7, sum, sum; | ||
208 | |||
209 | /* 12 superscalar cycles seems to be the limit for this case, | ||
210 | * because of this we thus do all the ldd's together to get | ||
211 | * Viking MXCC into streaming mode. Ho hum... | ||
212 | */ | ||
213 | #define CSUMCOPY_BIGCHUNK(src, dst, sum, off, t0, t1, t2, t3, t4, t5, t6, t7) \ | ||
214 | ldd [src + off + 0x00], t0; \ | ||
215 | ldd [src + off + 0x08], t2; \ | ||
216 | ldd [src + off + 0x10], t4; \ | ||
217 | ldd [src + off + 0x18], t6; \ | ||
218 | st t0, [dst + off + 0x00]; \ | ||
219 | addxcc t0, sum, sum; \ | ||
220 | st t1, [dst + off + 0x04]; \ | ||
221 | addxcc t1, sum, sum; \ | ||
222 | st t2, [dst + off + 0x08]; \ | ||
223 | addxcc t2, sum, sum; \ | ||
224 | st t3, [dst + off + 0x0c]; \ | ||
225 | addxcc t3, sum, sum; \ | ||
226 | st t4, [dst + off + 0x10]; \ | ||
227 | addxcc t4, sum, sum; \ | ||
228 | st t5, [dst + off + 0x14]; \ | ||
229 | addxcc t5, sum, sum; \ | ||
230 | st t6, [dst + off + 0x18]; \ | ||
231 | addxcc t6, sum, sum; \ | ||
232 | st t7, [dst + off + 0x1c]; \ | ||
233 | addxcc t7, sum, sum; | ||
234 | |||
235 | /* Yuck, 6 superscalar cycles... */ | ||
236 | #define CSUMCOPY_LASTCHUNK(src, dst, sum, off, t0, t1, t2, t3) \ | ||
237 | ldd [src - off - 0x08], t0; \ | ||
238 | ldd [src - off - 0x00], t2; \ | ||
239 | addxcc t0, sum, sum; \ | ||
240 | st t0, [dst - off - 0x08]; \ | ||
241 | addxcc t1, sum, sum; \ | ||
242 | st t1, [dst - off - 0x04]; \ | ||
243 | addxcc t2, sum, sum; \ | ||
244 | st t2, [dst - off - 0x00]; \ | ||
245 | addxcc t3, sum, sum; \ | ||
246 | st t3, [dst - off + 0x04]; | ||
247 | |||
248 | /* Handle the end cruft code out of band for better cache patterns. */ | ||
249 | cc_end_cruft: | ||
250 | be 1f | ||
251 | andcc %o3, 4, %g0 | ||
252 | EX(ldd [%o0 + 0x00], %g2, and %o3, 0xf) | ||
253 | add %o1, 8, %o1 | ||
254 | addcc %g2, %g7, %g7 | ||
255 | add %o0, 8, %o0 | ||
256 | addxcc %g3, %g7, %g7 | ||
257 | EX2(st %g2, [%o1 - 0x08]) | ||
258 | addx %g0, %g7, %g7 | ||
259 | andcc %o3, 4, %g0 | ||
260 | EX2(st %g3, [%o1 - 0x04]) | ||
261 | 1: be 1f | ||
262 | andcc %o3, 3, %o3 | ||
263 | EX(ld [%o0 + 0x00], %g2, add %o3, 4) | ||
264 | add %o1, 4, %o1 | ||
265 | addcc %g2, %g7, %g7 | ||
266 | EX2(st %g2, [%o1 - 0x04]) | ||
267 | addx %g0, %g7, %g7 | ||
268 | andcc %o3, 3, %g0 | ||
269 | add %o0, 4, %o0 | ||
270 | 1: be 1f | ||
271 | addcc %o3, -1, %g0 | ||
272 | bne 2f | ||
273 | subcc %o3, 2, %o3 | ||
274 | b 4f | ||
275 | or %g0, %g0, %o4 | ||
276 | 2: EX(lduh [%o0 + 0x00], %o4, add %o3, 2) | ||
277 | add %o0, 2, %o0 | ||
278 | EX2(sth %o4, [%o1 + 0x00]) | ||
279 | be 6f | ||
280 | add %o1, 2, %o1 | ||
281 | sll %o4, 16, %o4 | ||
282 | 4: EX(ldub [%o0 + 0x00], %o5, add %g0, 1) | ||
283 | EX2(stb %o5, [%o1 + 0x00]) | ||
284 | sll %o5, 8, %o5 | ||
285 | or %o5, %o4, %o4 | ||
286 | 6: addcc %o4, %g7, %g7 | ||
287 | 1: retl | ||
288 | addx %g0, %g7, %o0 | ||
289 | |||
290 | /* Also, handle the alignment code out of band. */ | ||
291 | cc_dword_align: | ||
292 | cmp %g1, 6 | ||
293 | bl,a ccte | ||
294 | andcc %g1, 0xf, %o3 | ||
295 | andcc %o0, 0x1, %g0 | ||
296 | bne ccslow | ||
297 | andcc %o0, 0x2, %g0 | ||
298 | be 1f | ||
299 | andcc %o0, 0x4, %g0 | ||
300 | EX(lduh [%o0 + 0x00], %g4, add %g1, 0) | ||
301 | sub %g1, 2, %g1 | ||
302 | EX2(sth %g4, [%o1 + 0x00]) | ||
303 | add %o0, 2, %o0 | ||
304 | sll %g4, 16, %g4 | ||
305 | addcc %g4, %g7, %g7 | ||
306 | add %o1, 2, %o1 | ||
307 | srl %g7, 16, %g3 | ||
308 | addx %g0, %g3, %g4 | ||
309 | sll %g7, 16, %g7 | ||
310 | sll %g4, 16, %g3 | ||
311 | srl %g7, 16, %g7 | ||
312 | andcc %o0, 0x4, %g0 | ||
313 | or %g3, %g7, %g7 | ||
314 | 1: be 3f | ||
315 | andcc %g1, 0xffffff80, %g0 | ||
316 | EX(ld [%o0 + 0x00], %g4, add %g1, 0) | ||
317 | sub %g1, 4, %g1 | ||
318 | EX2(st %g4, [%o1 + 0x00]) | ||
319 | add %o0, 4, %o0 | ||
320 | addcc %g4, %g7, %g7 | ||
321 | add %o1, 4, %o1 | ||
322 | addx %g0, %g7, %g7 | ||
323 | b 3f | ||
324 | andcc %g1, 0xffffff80, %g0 | ||
325 | |||
326 | /* Sun, you just can't beat me, you just can't. Stop trying, | ||
327 | * give up. I'm serious, I am going to kick the living shit | ||
328 | * out of you, game over, lights out. | ||
329 | */ | ||
330 | .align 8 | ||
331 | .globl __csum_partial_copy_sparc_generic | ||
332 | __csum_partial_copy_sparc_generic: | ||
333 | /* %o0=src, %o1=dest, %g1=len, %g7=sum */ | ||
334 | xor %o0, %o1, %o4 ! get changing bits | ||
335 | andcc %o4, 3, %g0 ! check for mismatched alignment | ||
336 | bne ccslow ! better this than unaligned/fixups | ||
337 | andcc %o0, 7, %g0 ! need to align things? | ||
338 | bne cc_dword_align ! yes, we check for short lengths there | ||
339 | andcc %g1, 0xffffff80, %g0 ! can we use unrolled loop? | ||
340 | 3: be 3f ! nope, less than one loop remains | ||
341 | andcc %o1, 4, %g0 ! dest aligned on 4 or 8 byte boundary? | ||
342 | be ccdbl + 4 ! 8 byte aligned, kick ass | ||
343 | 5: CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x00,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3) | ||
344 | CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x20,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3) | ||
345 | CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x40,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3) | ||
346 | CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x60,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3) | ||
347 | 10: EXT(5b, 10b, 20f) ! note for exception handling | ||
348 | sub %g1, 128, %g1 ! detract from length | ||
349 | addx %g0, %g7, %g7 ! add in last carry bit | ||
350 | andcc %g1, 0xffffff80, %g0 ! more to csum? | ||
351 | add %o0, 128, %o0 ! advance src ptr | ||
352 | bne 5b ! we did not go negative, continue looping | ||
353 | add %o1, 128, %o1 ! advance dest ptr | ||
354 | 3: andcc %g1, 0x70, %o2 ! can use table? | ||
355 | ccmerge:be ccte ! nope, go and check for end cruft | ||
356 | andcc %g1, 0xf, %o3 ! get low bits of length (clears carry btw) | ||
357 | srl %o2, 1, %o4 ! begin negative offset computation | ||
358 | sethi %hi(12f), %o5 ! set up table ptr end | ||
359 | add %o0, %o2, %o0 ! advance src ptr | ||
360 | sub %o5, %o4, %o5 ! continue table calculation | ||
361 | sll %o2, 1, %g2 ! constant multiplies are fun... | ||
362 | sub %o5, %g2, %o5 ! some more adjustments | ||
363 | jmp %o5 + %lo(12f) ! jump into it, duff style, wheee... | ||
364 | add %o1, %o2, %o1 ! advance dest ptr (carry is clear btw) | ||
365 | cctbl: CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x68,%g2,%g3,%g4,%g5) | ||
366 | CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x58,%g2,%g3,%g4,%g5) | ||
367 | CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x48,%g2,%g3,%g4,%g5) | ||
368 | CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x38,%g2,%g3,%g4,%g5) | ||
369 | CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x28,%g2,%g3,%g4,%g5) | ||
370 | CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x18,%g2,%g3,%g4,%g5) | ||
371 | CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x08,%g2,%g3,%g4,%g5) | ||
372 | 12: EXT(cctbl, 12b, 22f) ! note for exception table handling | ||
373 | addx %g0, %g7, %g7 | ||
374 | andcc %o3, 0xf, %g0 ! check for low bits set | ||
375 | ccte: bne cc_end_cruft ! something left, handle it out of band | ||
376 | andcc %o3, 8, %g0 ! begin checks for that code | ||
377 | retl ! return | ||
378 | mov %g7, %o0 ! give em the computed checksum | ||
379 | ccdbl: CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x00,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3) | ||
380 | CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x20,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3) | ||
381 | CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x40,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3) | ||
382 | CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x60,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3) | ||
383 | 11: EXT(ccdbl, 11b, 21f) ! note for exception table handling | ||
384 | sub %g1, 128, %g1 ! detract from length | ||
385 | addx %g0, %g7, %g7 ! add in last carry bit | ||
386 | andcc %g1, 0xffffff80, %g0 ! more to csum? | ||
387 | add %o0, 128, %o0 ! advance src ptr | ||
388 | bne ccdbl ! we did not go negative, continue looping | ||
389 | add %o1, 128, %o1 ! advance dest ptr | ||
390 | b ccmerge ! finish it off, above | ||
391 | andcc %g1, 0x70, %o2 ! can use table? (clears carry btw) | ||
392 | |||
393 | ccslow: cmp %g1, 0 | ||
394 | mov 0, %g5 | ||
395 | bleu 4f | ||
396 | andcc %o0, 1, %o5 | ||
397 | be,a 1f | ||
398 | srl %g1, 1, %g4 | ||
399 | sub %g1, 1, %g1 | ||
400 | EX(ldub [%o0], %g5, add %g1, 1) | ||
401 | add %o0, 1, %o0 | ||
402 | EX2(stb %g5, [%o1]) | ||
403 | srl %g1, 1, %g4 | ||
404 | add %o1, 1, %o1 | ||
405 | 1: cmp %g4, 0 | ||
406 | be,a 3f | ||
407 | andcc %g1, 1, %g0 | ||
408 | andcc %o0, 2, %g0 | ||
409 | be,a 1f | ||
410 | srl %g4, 1, %g4 | ||
411 | EX(lduh [%o0], %o4, add %g1, 0) | ||
412 | sub %g1, 2, %g1 | ||
413 | srl %o4, 8, %g2 | ||
414 | sub %g4, 1, %g4 | ||
415 | EX2(stb %g2, [%o1]) | ||
416 | add %o4, %g5, %g5 | ||
417 | EX2(stb %o4, [%o1 + 1]) | ||
418 | add %o0, 2, %o0 | ||
419 | srl %g4, 1, %g4 | ||
420 | add %o1, 2, %o1 | ||
421 | 1: cmp %g4, 0 | ||
422 | be,a 2f | ||
423 | andcc %g1, 2, %g0 | ||
424 | EX3(ld [%o0], %o4) | ||
425 | 5: srl %o4, 24, %g2 | ||
426 | srl %o4, 16, %g3 | ||
427 | EX2(stb %g2, [%o1]) | ||
428 | srl %o4, 8, %g2 | ||
429 | EX2(stb %g3, [%o1 + 1]) | ||
430 | add %o0, 4, %o0 | ||
431 | EX2(stb %g2, [%o1 + 2]) | ||
432 | addcc %o4, %g5, %g5 | ||
433 | EX2(stb %o4, [%o1 + 3]) | ||
434 | addx %g5, %g0, %g5 ! I am now to lazy to optimize this (question it | ||
435 | add %o1, 4, %o1 ! is worthy). Maybe some day - with the sll/srl | ||
436 | subcc %g4, 1, %g4 ! tricks | ||
437 | bne,a 5b | ||
438 | EX3(ld [%o0], %o4) | ||
439 | sll %g5, 16, %g2 | ||
440 | srl %g5, 16, %g5 | ||
441 | srl %g2, 16, %g2 | ||
442 | andcc %g1, 2, %g0 | ||
443 | add %g2, %g5, %g5 | ||
444 | 2: be,a 3f | ||
445 | andcc %g1, 1, %g0 | ||
446 | EX(lduh [%o0], %o4, and %g1, 3) | ||
447 | andcc %g1, 1, %g0 | ||
448 | srl %o4, 8, %g2 | ||
449 | add %o0, 2, %o0 | ||
450 | EX2(stb %g2, [%o1]) | ||
451 | add %g5, %o4, %g5 | ||
452 | EX2(stb %o4, [%o1 + 1]) | ||
453 | add %o1, 2, %o1 | ||
454 | 3: be,a 1f | ||
455 | sll %g5, 16, %o4 | ||
456 | EX(ldub [%o0], %g2, add %g0, 1) | ||
457 | sll %g2, 8, %o4 | ||
458 | EX2(stb %g2, [%o1]) | ||
459 | add %g5, %o4, %g5 | ||
460 | sll %g5, 16, %o4 | ||
461 | 1: addcc %o4, %g5, %g5 | ||
462 | srl %g5, 16, %o4 | ||
463 | addx %g0, %o4, %g5 | ||
464 | orcc %o5, %g0, %g0 | ||
465 | be 4f | ||
466 | srl %g5, 8, %o4 | ||
467 | and %g5, 0xff, %g2 | ||
468 | and %o4, 0xff, %o4 | ||
469 | sll %g2, 8, %g2 | ||
470 | or %g2, %o4, %g5 | ||
471 | 4: addcc %g7, %g5, %g7 | ||
472 | retl | ||
473 | addx %g0, %g7, %o0 | ||
474 | __csum_partial_copy_end: | ||
475 | |||
476 | /* We do these strange calculations for the csum_*_from_user case only, ie. | ||
477 | * we only bother with faults on loads... */ | ||
478 | |||
479 | /* o2 = ((g2%20)&3)*8 | ||
480 | * o3 = g1 - (g2/20)*32 - o2 */ | ||
481 | 20: | ||
482 | cmp %g2, 20 | ||
483 | blu,a 1f | ||
484 | and %g2, 3, %o2 | ||
485 | sub %g1, 32, %g1 | ||
486 | b 20b | ||
487 | sub %g2, 20, %g2 | ||
488 | 1: | ||
489 | sll %o2, 3, %o2 | ||
490 | b 31f | ||
491 | sub %g1, %o2, %o3 | ||
492 | |||
493 | /* o2 = (!(g2 & 15) ? 0 : (((g2 & 15) + 1) & ~1)*8) | ||
494 | * o3 = g1 - (g2/16)*32 - o2 */ | ||
495 | 21: | ||
496 | andcc %g2, 15, %o3 | ||
497 | srl %g2, 4, %g2 | ||
498 | be,a 1f | ||
499 | clr %o2 | ||
500 | add %o3, 1, %o3 | ||
501 | and %o3, 14, %o3 | ||
502 | sll %o3, 3, %o2 | ||
503 | 1: | ||
504 | sll %g2, 5, %g2 | ||
505 | sub %g1, %g2, %o3 | ||
506 | b 31f | ||
507 | sub %o3, %o2, %o3 | ||
508 | |||
509 | /* o0 += (g2/10)*16 - 0x70 | ||
510 | * 01 += (g2/10)*16 - 0x70 | ||
511 | * o2 = (g2 % 10) ? 8 : 0 | ||
512 | * o3 += 0x70 - (g2/10)*16 - o2 */ | ||
513 | 22: | ||
514 | cmp %g2, 10 | ||
515 | blu,a 1f | ||
516 | sub %o0, 0x70, %o0 | ||
517 | add %o0, 16, %o0 | ||
518 | add %o1, 16, %o1 | ||
519 | sub %o3, 16, %o3 | ||
520 | b 22b | ||
521 | sub %g2, 10, %g2 | ||
522 | 1: | ||
523 | sub %o1, 0x70, %o1 | ||
524 | add %o3, 0x70, %o3 | ||
525 | clr %o2 | ||
526 | tst %g2 | ||
527 | bne,a 1f | ||
528 | mov 8, %o2 | ||
529 | 1: | ||
530 | b 31f | ||
531 | sub %o3, %o2, %o3 | ||
532 | 96: | ||
533 | and %g1, 3, %g1 | ||
534 | sll %g4, 2, %g4 | ||
535 | add %g1, %g4, %o3 | ||
536 | 30: | ||
537 | /* %o1 is dst | ||
538 | * %o3 is # bytes to zero out | ||
539 | * %o4 is faulting address | ||
540 | * %o5 is %pc where fault occurred */ | ||
541 | clr %o2 | ||
542 | 31: | ||
543 | /* %o0 is src | ||
544 | * %o1 is dst | ||
545 | * %o2 is # of bytes to copy from src to dst | ||
546 | * %o3 is # bytes to zero out | ||
547 | * %o4 is faulting address | ||
548 | * %o5 is %pc where fault occurred */ | ||
549 | save %sp, -104, %sp | ||
550 | mov %i5, %o0 | ||
551 | mov %i7, %o1 | ||
552 | mov %i4, %o2 | ||
553 | call lookup_fault | ||
554 | mov %g7, %i4 | ||
555 | cmp %o0, 2 | ||
556 | bne 1f | ||
557 | add %g0, -EFAULT, %i5 | ||
558 | tst %i2 | ||
559 | be 2f | ||
560 | mov %i0, %o1 | ||
561 | mov %i1, %o0 | ||
562 | 5: | ||
563 | call __memcpy | ||
564 | mov %i2, %o2 | ||
565 | tst %o0 | ||
566 | bne,a 2f | ||
567 | add %i3, %i2, %i3 | ||
568 | add %i1, %i2, %i1 | ||
569 | 2: | ||
570 | mov %i1, %o0 | ||
571 | 6: | ||
572 | call __bzero | ||
573 | mov %i3, %o1 | ||
574 | 1: | ||
575 | ld [%sp + 168], %o2 ! struct_ptr of parent | ||
576 | st %i5, [%o2] | ||
577 | ret | ||
578 | restore | ||
579 | |||
580 | .section __ex_table,#alloc | ||
581 | .align 4 | ||
582 | .word 5b,2 | ||
583 | .word 6b,2 | ||
diff --git a/arch/sparc/lib/copy_user.S b/arch/sparc/lib/copy_user.S new file mode 100644 index 000000000000..577505b692ae --- /dev/null +++ b/arch/sparc/lib/copy_user.S | |||
@@ -0,0 +1,492 @@ | |||
1 | /* copy_user.S: Sparc optimized copy_from_user and copy_to_user code. | ||
2 | * | ||
3 | * Copyright(C) 1995 Linus Torvalds | ||
4 | * Copyright(C) 1996 David S. Miller | ||
5 | * Copyright(C) 1996 Eddie C. Dost | ||
6 | * Copyright(C) 1996,1998 Jakub Jelinek | ||
7 | * | ||
8 | * derived from: | ||
9 | * e-mail between David and Eddie. | ||
10 | * | ||
11 | * Returns 0 if successful, otherwise count of bytes not copied yet | ||
12 | */ | ||
13 | |||
14 | #include <asm/ptrace.h> | ||
15 | #include <asm/asmmacro.h> | ||
16 | #include <asm/page.h> | ||
17 | |||
18 | /* Work around cpp -rob */ | ||
19 | #define ALLOC #alloc | ||
20 | #define EXECINSTR #execinstr | ||
21 | #define EX(x,y,a,b) \ | ||
22 | 98: x,y; \ | ||
23 | .section .fixup,ALLOC,EXECINSTR; \ | ||
24 | .align 4; \ | ||
25 | 99: ba fixupretl; \ | ||
26 | a, b, %g3; \ | ||
27 | .section __ex_table,ALLOC; \ | ||
28 | .align 4; \ | ||
29 | .word 98b, 99b; \ | ||
30 | .text; \ | ||
31 | .align 4 | ||
32 | |||
33 | #define EX2(x,y,c,d,e,a,b) \ | ||
34 | 98: x,y; \ | ||
35 | .section .fixup,ALLOC,EXECINSTR; \ | ||
36 | .align 4; \ | ||
37 | 99: c, d, e; \ | ||
38 | ba fixupretl; \ | ||
39 | a, b, %g3; \ | ||
40 | .section __ex_table,ALLOC; \ | ||
41 | .align 4; \ | ||
42 | .word 98b, 99b; \ | ||
43 | .text; \ | ||
44 | .align 4 | ||
45 | |||
46 | #define EXO2(x,y) \ | ||
47 | 98: x, y; \ | ||
48 | .section __ex_table,ALLOC; \ | ||
49 | .align 4; \ | ||
50 | .word 98b, 97f; \ | ||
51 | .text; \ | ||
52 | .align 4 | ||
53 | |||
54 | #define EXT(start,end,handler) \ | ||
55 | .section __ex_table,ALLOC; \ | ||
56 | .align 4; \ | ||
57 | .word start, 0, end, handler; \ | ||
58 | .text; \ | ||
59 | .align 4 | ||
60 | |||
61 | /* Please do not change following macros unless you change logic used | ||
62 | * in .fixup at the end of this file as well | ||
63 | */ | ||
64 | |||
65 | /* Both these macros have to start with exactly the same insn */ | ||
66 | #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ | ||
67 | ldd [%src + (offset) + 0x00], %t0; \ | ||
68 | ldd [%src + (offset) + 0x08], %t2; \ | ||
69 | ldd [%src + (offset) + 0x10], %t4; \ | ||
70 | ldd [%src + (offset) + 0x18], %t6; \ | ||
71 | st %t0, [%dst + (offset) + 0x00]; \ | ||
72 | st %t1, [%dst + (offset) + 0x04]; \ | ||
73 | st %t2, [%dst + (offset) + 0x08]; \ | ||
74 | st %t3, [%dst + (offset) + 0x0c]; \ | ||
75 | st %t4, [%dst + (offset) + 0x10]; \ | ||
76 | st %t5, [%dst + (offset) + 0x14]; \ | ||
77 | st %t6, [%dst + (offset) + 0x18]; \ | ||
78 | st %t7, [%dst + (offset) + 0x1c]; | ||
79 | |||
80 | #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ | ||
81 | ldd [%src + (offset) + 0x00], %t0; \ | ||
82 | ldd [%src + (offset) + 0x08], %t2; \ | ||
83 | ldd [%src + (offset) + 0x10], %t4; \ | ||
84 | ldd [%src + (offset) + 0x18], %t6; \ | ||
85 | std %t0, [%dst + (offset) + 0x00]; \ | ||
86 | std %t2, [%dst + (offset) + 0x08]; \ | ||
87 | std %t4, [%dst + (offset) + 0x10]; \ | ||
88 | std %t6, [%dst + (offset) + 0x18]; | ||
89 | |||
90 | #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ | ||
91 | ldd [%src - (offset) - 0x10], %t0; \ | ||
92 | ldd [%src - (offset) - 0x08], %t2; \ | ||
93 | st %t0, [%dst - (offset) - 0x10]; \ | ||
94 | st %t1, [%dst - (offset) - 0x0c]; \ | ||
95 | st %t2, [%dst - (offset) - 0x08]; \ | ||
96 | st %t3, [%dst - (offset) - 0x04]; | ||
97 | |||
98 | #define MOVE_HALFCHUNK(src, dst, offset, t0, t1, t2, t3) \ | ||
99 | lduh [%src + (offset) + 0x00], %t0; \ | ||
100 | lduh [%src + (offset) + 0x02], %t1; \ | ||
101 | lduh [%src + (offset) + 0x04], %t2; \ | ||
102 | lduh [%src + (offset) + 0x06], %t3; \ | ||
103 | sth %t0, [%dst + (offset) + 0x00]; \ | ||
104 | sth %t1, [%dst + (offset) + 0x02]; \ | ||
105 | sth %t2, [%dst + (offset) + 0x04]; \ | ||
106 | sth %t3, [%dst + (offset) + 0x06]; | ||
107 | |||
108 | #define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ | ||
109 | ldub [%src - (offset) - 0x02], %t0; \ | ||
110 | ldub [%src - (offset) - 0x01], %t1; \ | ||
111 | stb %t0, [%dst - (offset) - 0x02]; \ | ||
112 | stb %t1, [%dst - (offset) - 0x01]; | ||
113 | |||
114 | .text | ||
115 | .align 4 | ||
116 | |||
117 | .globl __copy_user_begin | ||
118 | __copy_user_begin: | ||
119 | |||
120 | .globl __copy_user | ||
121 | dword_align: | ||
122 | andcc %o1, 1, %g0 | ||
123 | be 4f | ||
124 | andcc %o1, 2, %g0 | ||
125 | |||
126 | EXO2(ldub [%o1], %g2) | ||
127 | add %o1, 1, %o1 | ||
128 | EXO2(stb %g2, [%o0]) | ||
129 | sub %o2, 1, %o2 | ||
130 | bne 3f | ||
131 | add %o0, 1, %o0 | ||
132 | |||
133 | EXO2(lduh [%o1], %g2) | ||
134 | add %o1, 2, %o1 | ||
135 | EXO2(sth %g2, [%o0]) | ||
136 | sub %o2, 2, %o2 | ||
137 | b 3f | ||
138 | add %o0, 2, %o0 | ||
139 | 4: | ||
140 | EXO2(lduh [%o1], %g2) | ||
141 | add %o1, 2, %o1 | ||
142 | EXO2(sth %g2, [%o0]) | ||
143 | sub %o2, 2, %o2 | ||
144 | b 3f | ||
145 | add %o0, 2, %o0 | ||
146 | |||
147 | __copy_user: /* %o0=dst %o1=src %o2=len */ | ||
148 | xor %o0, %o1, %o4 | ||
149 | 1: | ||
150 | andcc %o4, 3, %o5 | ||
151 | 2: | ||
152 | bne cannot_optimize | ||
153 | cmp %o2, 15 | ||
154 | |||
155 | bleu short_aligned_end | ||
156 | andcc %o1, 3, %g0 | ||
157 | |||
158 | bne dword_align | ||
159 | 3: | ||
160 | andcc %o1, 4, %g0 | ||
161 | |||
162 | be 2f | ||
163 | mov %o2, %g1 | ||
164 | |||
165 | EXO2(ld [%o1], %o4) | ||
166 | sub %g1, 4, %g1 | ||
167 | EXO2(st %o4, [%o0]) | ||
168 | add %o1, 4, %o1 | ||
169 | add %o0, 4, %o0 | ||
170 | 2: | ||
171 | andcc %g1, 0xffffff80, %g7 | ||
172 | be 3f | ||
173 | andcc %o0, 4, %g0 | ||
174 | |||
175 | be ldd_std + 4 | ||
176 | 5: | ||
177 | MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) | ||
178 | MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) | ||
179 | MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) | ||
180 | MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) | ||
181 | 80: | ||
182 | EXT(5b, 80b, 50f) | ||
183 | subcc %g7, 128, %g7 | ||
184 | add %o1, 128, %o1 | ||
185 | bne 5b | ||
186 | add %o0, 128, %o0 | ||
187 | 3: | ||
188 | andcc %g1, 0x70, %g7 | ||
189 | be copy_user_table_end | ||
190 | andcc %g1, 8, %g0 | ||
191 | |||
192 | sethi %hi(copy_user_table_end), %o5 | ||
193 | srl %g7, 1, %o4 | ||
194 | add %g7, %o4, %o4 | ||
195 | add %o1, %g7, %o1 | ||
196 | sub %o5, %o4, %o5 | ||
197 | jmpl %o5 + %lo(copy_user_table_end), %g0 | ||
198 | add %o0, %g7, %o0 | ||
199 | |||
200 | copy_user_table: | ||
201 | MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5) | ||
202 | MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5) | ||
203 | MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5) | ||
204 | MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5) | ||
205 | MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5) | ||
206 | MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5) | ||
207 | MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5) | ||
208 | copy_user_table_end: | ||
209 | EXT(copy_user_table, copy_user_table_end, 51f) | ||
210 | be copy_user_last7 | ||
211 | andcc %g1, 4, %g0 | ||
212 | |||
213 | EX(ldd [%o1], %g2, and %g1, 0xf) | ||
214 | add %o0, 8, %o0 | ||
215 | add %o1, 8, %o1 | ||
216 | EX(st %g2, [%o0 - 0x08], and %g1, 0xf) | ||
217 | EX2(st %g3, [%o0 - 0x04], and %g1, 0xf, %g1, sub %g1, 4) | ||
218 | copy_user_last7: | ||
219 | be 1f | ||
220 | andcc %g1, 2, %g0 | ||
221 | |||
222 | EX(ld [%o1], %g2, and %g1, 7) | ||
223 | add %o1, 4, %o1 | ||
224 | EX(st %g2, [%o0], and %g1, 7) | ||
225 | add %o0, 4, %o0 | ||
226 | 1: | ||
227 | be 1f | ||
228 | andcc %g1, 1, %g0 | ||
229 | |||
230 | EX(lduh [%o1], %g2, and %g1, 3) | ||
231 | add %o1, 2, %o1 | ||
232 | EX(sth %g2, [%o0], and %g1, 3) | ||
233 | add %o0, 2, %o0 | ||
234 | 1: | ||
235 | be 1f | ||
236 | nop | ||
237 | |||
238 | EX(ldub [%o1], %g2, add %g0, 1) | ||
239 | EX(stb %g2, [%o0], add %g0, 1) | ||
240 | 1: | ||
241 | retl | ||
242 | clr %o0 | ||
243 | |||
244 | ldd_std: | ||
245 | MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) | ||
246 | MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) | ||
247 | MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) | ||
248 | MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) | ||
249 | 81: | ||
250 | EXT(ldd_std, 81b, 52f) | ||
251 | subcc %g7, 128, %g7 | ||
252 | add %o1, 128, %o1 | ||
253 | bne ldd_std | ||
254 | add %o0, 128, %o0 | ||
255 | |||
256 | andcc %g1, 0x70, %g7 | ||
257 | be copy_user_table_end | ||
258 | andcc %g1, 8, %g0 | ||
259 | |||
260 | sethi %hi(copy_user_table_end), %o5 | ||
261 | srl %g7, 1, %o4 | ||
262 | add %g7, %o4, %o4 | ||
263 | add %o1, %g7, %o1 | ||
264 | sub %o5, %o4, %o5 | ||
265 | jmpl %o5 + %lo(copy_user_table_end), %g0 | ||
266 | add %o0, %g7, %o0 | ||
267 | |||
268 | cannot_optimize: | ||
269 | bleu short_end | ||
270 | cmp %o5, 2 | ||
271 | |||
272 | bne byte_chunk | ||
273 | and %o2, 0xfffffff0, %o3 | ||
274 | |||
275 | andcc %o1, 1, %g0 | ||
276 | be 10f | ||
277 | nop | ||
278 | |||
279 | EXO2(ldub [%o1], %g2) | ||
280 | add %o1, 1, %o1 | ||
281 | EXO2(stb %g2, [%o0]) | ||
282 | sub %o2, 1, %o2 | ||
283 | andcc %o2, 0xfffffff0, %o3 | ||
284 | be short_end | ||
285 | add %o0, 1, %o0 | ||
286 | 10: | ||
287 | MOVE_HALFCHUNK(o1, o0, 0x00, g2, g3, g4, g5) | ||
288 | MOVE_HALFCHUNK(o1, o0, 0x08, g2, g3, g4, g5) | ||
289 | 82: | ||
290 | EXT(10b, 82b, 53f) | ||
291 | subcc %o3, 0x10, %o3 | ||
292 | add %o1, 0x10, %o1 | ||
293 | bne 10b | ||
294 | add %o0, 0x10, %o0 | ||
295 | b 2f | ||
296 | and %o2, 0xe, %o3 | ||
297 | |||
298 | byte_chunk: | ||
299 | MOVE_SHORTCHUNK(o1, o0, -0x02, g2, g3) | ||
300 | MOVE_SHORTCHUNK(o1, o0, -0x04, g2, g3) | ||
301 | MOVE_SHORTCHUNK(o1, o0, -0x06, g2, g3) | ||
302 | MOVE_SHORTCHUNK(o1, o0, -0x08, g2, g3) | ||
303 | MOVE_SHORTCHUNK(o1, o0, -0x0a, g2, g3) | ||
304 | MOVE_SHORTCHUNK(o1, o0, -0x0c, g2, g3) | ||
305 | MOVE_SHORTCHUNK(o1, o0, -0x0e, g2, g3) | ||
306 | MOVE_SHORTCHUNK(o1, o0, -0x10, g2, g3) | ||
307 | 83: | ||
308 | EXT(byte_chunk, 83b, 54f) | ||
309 | subcc %o3, 0x10, %o3 | ||
310 | add %o1, 0x10, %o1 | ||
311 | bne byte_chunk | ||
312 | add %o0, 0x10, %o0 | ||
313 | |||
314 | short_end: | ||
315 | and %o2, 0xe, %o3 | ||
316 | 2: | ||
317 | sethi %hi(short_table_end), %o5 | ||
318 | sll %o3, 3, %o4 | ||
319 | add %o0, %o3, %o0 | ||
320 | sub %o5, %o4, %o5 | ||
321 | add %o1, %o3, %o1 | ||
322 | jmpl %o5 + %lo(short_table_end), %g0 | ||
323 | andcc %o2, 1, %g0 | ||
324 | 84: | ||
325 | MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3) | ||
326 | MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3) | ||
327 | MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3) | ||
328 | MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3) | ||
329 | MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3) | ||
330 | MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3) | ||
331 | MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3) | ||
332 | short_table_end: | ||
333 | EXT(84b, short_table_end, 55f) | ||
334 | be 1f | ||
335 | nop | ||
336 | EX(ldub [%o1], %g2, add %g0, 1) | ||
337 | EX(stb %g2, [%o0], add %g0, 1) | ||
338 | 1: | ||
339 | retl | ||
340 | clr %o0 | ||
341 | |||
342 | short_aligned_end: | ||
343 | bne short_end | ||
344 | andcc %o2, 8, %g0 | ||
345 | |||
346 | be 1f | ||
347 | andcc %o2, 4, %g0 | ||
348 | |||
349 | EXO2(ld [%o1 + 0x00], %g2) | ||
350 | EXO2(ld [%o1 + 0x04], %g3) | ||
351 | add %o1, 8, %o1 | ||
352 | EXO2(st %g2, [%o0 + 0x00]) | ||
353 | EX(st %g3, [%o0 + 0x04], sub %o2, 4) | ||
354 | add %o0, 8, %o0 | ||
355 | 1: | ||
356 | b copy_user_last7 | ||
357 | mov %o2, %g1 | ||
358 | |||
359 | .section .fixup,#alloc,#execinstr | ||
360 | .align 4 | ||
361 | 97: | ||
362 | mov %o2, %g3 | ||
363 | fixupretl: | ||
364 | sethi %hi(PAGE_OFFSET), %g1 | ||
365 | cmp %o0, %g1 | ||
366 | blu 1f | ||
367 | cmp %o1, %g1 | ||
368 | bgeu 1f | ||
369 | nop | ||
370 | save %sp, -64, %sp | ||
371 | mov %i0, %o0 | ||
372 | call __bzero | ||
373 | mov %g3, %o1 | ||
374 | restore | ||
375 | 1: retl | ||
376 | mov %g3, %o0 | ||
377 | |||
378 | /* exception routine sets %g2 to (broken_insn - first_insn)>>2 */ | ||
379 | 50: | ||
380 | /* This magic counts how many bytes are left when crash in MOVE_BIGCHUNK | ||
381 | * happens. This is derived from the amount ldd reads, st stores, etc. | ||
382 | * x = g2 % 12; | ||
383 | * g3 = g1 + g7 - ((g2 / 12) * 32 + (x < 4) ? 0 : (x - 4) * 4); | ||
384 | * o0 += (g2 / 12) * 32; | ||
385 | */ | ||
386 | cmp %g2, 12 | ||
387 | add %o0, %g7, %o0 | ||
388 | bcs 1f | ||
389 | cmp %g2, 24 | ||
390 | bcs 2f | ||
391 | cmp %g2, 36 | ||
392 | bcs 3f | ||
393 | nop | ||
394 | sub %g2, 12, %g2 | ||
395 | sub %g7, 32, %g7 | ||
396 | 3: sub %g2, 12, %g2 | ||
397 | sub %g7, 32, %g7 | ||
398 | 2: sub %g2, 12, %g2 | ||
399 | sub %g7, 32, %g7 | ||
400 | 1: cmp %g2, 4 | ||
401 | bcs,a 60f | ||
402 | clr %g2 | ||
403 | sub %g2, 4, %g2 | ||
404 | sll %g2, 2, %g2 | ||
405 | 60: and %g1, 0x7f, %g3 | ||
406 | sub %o0, %g7, %o0 | ||
407 | add %g3, %g7, %g3 | ||
408 | ba fixupretl | ||
409 | sub %g3, %g2, %g3 | ||
410 | 51: | ||
411 | /* i = 41 - g2; j = i % 6; | ||
412 | * g3 = (g1 & 15) + (i / 6) * 16 + (j < 4) ? (j + 1) * 4 : 16; | ||
413 | * o0 -= (i / 6) * 16 + 16; | ||
414 | */ | ||
415 | neg %g2 | ||
416 | and %g1, 0xf, %g1 | ||
417 | add %g2, 41, %g2 | ||
418 | add %o0, %g1, %o0 | ||
419 | 1: cmp %g2, 6 | ||
420 | bcs,a 2f | ||
421 | cmp %g2, 4 | ||
422 | add %g1, 16, %g1 | ||
423 | b 1b | ||
424 | sub %g2, 6, %g2 | ||
425 | 2: bcc,a 2f | ||
426 | mov 16, %g2 | ||
427 | inc %g2 | ||
428 | sll %g2, 2, %g2 | ||
429 | 2: add %g1, %g2, %g3 | ||
430 | ba fixupretl | ||
431 | sub %o0, %g3, %o0 | ||
432 | 52: | ||
433 | /* g3 = g1 + g7 - (g2 / 8) * 32 + (g2 & 4) ? (g2 & 3) * 8 : 0; | ||
434 | o0 += (g2 / 8) * 32 */ | ||
435 | andn %g2, 7, %g4 | ||
436 | add %o0, %g7, %o0 | ||
437 | andcc %g2, 4, %g0 | ||
438 | and %g2, 3, %g2 | ||
439 | sll %g4, 2, %g4 | ||
440 | sll %g2, 3, %g2 | ||
441 | bne 60b | ||
442 | sub %g7, %g4, %g7 | ||
443 | ba 60b | ||
444 | clr %g2 | ||
445 | 53: | ||
446 | /* g3 = o3 + (o2 & 15) - (g2 & 8) - (g2 & 4) ? (g2 & 3) * 2 : 0; | ||
447 | o0 += (g2 & 8) */ | ||
448 | and %g2, 3, %g4 | ||
449 | andcc %g2, 4, %g0 | ||
450 | and %g2, 8, %g2 | ||
451 | sll %g4, 1, %g4 | ||
452 | be 1f | ||
453 | add %o0, %g2, %o0 | ||
454 | add %g2, %g4, %g2 | ||
455 | 1: and %o2, 0xf, %g3 | ||
456 | add %g3, %o3, %g3 | ||
457 | ba fixupretl | ||
458 | sub %g3, %g2, %g3 | ||
459 | 54: | ||
460 | /* g3 = o3 + (o2 & 15) - (g2 / 4) * 2 - (g2 & 2) ? (g2 & 1) : 0; | ||
461 | o0 += (g2 / 4) * 2 */ | ||
462 | srl %g2, 2, %o4 | ||
463 | and %g2, 1, %o5 | ||
464 | srl %g2, 1, %g2 | ||
465 | add %o4, %o4, %o4 | ||
466 | and %o5, %g2, %o5 | ||
467 | and %o2, 0xf, %o2 | ||
468 | add %o0, %o4, %o0 | ||
469 | sub %o3, %o5, %o3 | ||
470 | sub %o2, %o4, %o2 | ||
471 | ba fixupretl | ||
472 | add %o2, %o3, %g3 | ||
473 | 55: | ||
474 | /* i = 27 - g2; | ||
475 | g3 = (o2 & 1) + i / 4 * 2 + !(i & 3); | ||
476 | o0 -= i / 4 * 2 + 1 */ | ||
477 | neg %g2 | ||
478 | and %o2, 1, %o2 | ||
479 | add %g2, 27, %g2 | ||
480 | srl %g2, 2, %o5 | ||
481 | andcc %g2, 3, %g0 | ||
482 | mov 1, %g2 | ||
483 | add %o5, %o5, %o5 | ||
484 | be,a 1f | ||
485 | clr %g2 | ||
486 | 1: add %g2, %o5, %g3 | ||
487 | sub %o0, %g3, %o0 | ||
488 | ba fixupretl | ||
489 | add %g3, %o2, %g3 | ||
490 | |||
491 | .globl __copy_user_end | ||
492 | __copy_user_end: | ||
diff --git a/arch/sparc/lib/debuglocks.c b/arch/sparc/lib/debuglocks.c new file mode 100644 index 000000000000..fb182352782c --- /dev/null +++ b/arch/sparc/lib/debuglocks.c | |||
@@ -0,0 +1,202 @@ | |||
1 | /* $Id: debuglocks.c,v 1.11 2001/09/20 00:35:31 davem Exp $ | ||
2 | * debuglocks.c: Debugging versions of SMP locking primitives. | ||
3 | * | ||
4 | * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) | ||
5 | * Copyright (C) 1998-99 Anton Blanchard (anton@progsoc.uts.edu.au) | ||
6 | */ | ||
7 | |||
8 | #include <linux/kernel.h> | ||
9 | #include <linux/sched.h> | ||
10 | #include <linux/threads.h> /* For NR_CPUS */ | ||
11 | #include <linux/spinlock.h> | ||
12 | #include <asm/psr.h> | ||
13 | #include <asm/system.h> | ||
14 | |||
15 | #ifdef CONFIG_SMP | ||
16 | |||
17 | /* Some notes on how these debugging routines work. When a lock is acquired | ||
18 | * an extra debugging member lock->owner_pc is set to the caller of the lock | ||
19 | * acquisition routine. Right before releasing a lock, the debugging program | ||
20 | * counter is cleared to zero. | ||
21 | * | ||
22 | * Furthermore, since PC's are 4 byte aligned on Sparc, we stuff the CPU | ||
23 | * number of the owner in the lowest two bits. | ||
24 | */ | ||
25 | |||
26 | #define STORE_CALLER(A) __asm__ __volatile__("mov %%i7, %0" : "=r" (A)); | ||
27 | |||
28 | static inline void show(char *str, spinlock_t *lock, unsigned long caller) | ||
29 | { | ||
30 | int cpu = smp_processor_id(); | ||
31 | |||
32 | printk("%s(%p) CPU#%d stuck at %08lx, owner PC(%08lx):CPU(%lx)\n",str, | ||
33 | lock, cpu, caller, lock->owner_pc & ~3, lock->owner_pc & 3); | ||
34 | } | ||
35 | |||
36 | static inline void show_read(char *str, rwlock_t *lock, unsigned long caller) | ||
37 | { | ||
38 | int cpu = smp_processor_id(); | ||
39 | |||
40 | printk("%s(%p) CPU#%d stuck at %08lx, owner PC(%08lx):CPU(%lx)\n", str, | ||
41 | lock, cpu, caller, lock->owner_pc & ~3, lock->owner_pc & 3); | ||
42 | } | ||
43 | |||
44 | static inline void show_write(char *str, rwlock_t *lock, unsigned long caller) | ||
45 | { | ||
46 | int cpu = smp_processor_id(); | ||
47 | int i; | ||
48 | |||
49 | printk("%s(%p) CPU#%d stuck at %08lx, owner PC(%08lx):CPU(%lx)", str, | ||
50 | lock, cpu, caller, lock->owner_pc & ~3, lock->owner_pc & 3); | ||
51 | |||
52 | for(i = 0; i < NR_CPUS; i++) | ||
53 | printk(" reader[%d]=%08lx", i, lock->reader_pc[i]); | ||
54 | |||
55 | printk("\n"); | ||
56 | } | ||
57 | |||
58 | #undef INIT_STUCK | ||
59 | #define INIT_STUCK 100000000 | ||
60 | |||
61 | void _do_spin_lock(spinlock_t *lock, char *str) | ||
62 | { | ||
63 | unsigned long caller; | ||
64 | unsigned long val; | ||
65 | int cpu = smp_processor_id(); | ||
66 | int stuck = INIT_STUCK; | ||
67 | |||
68 | STORE_CALLER(caller); | ||
69 | |||
70 | again: | ||
71 | __asm__ __volatile__("ldstub [%1], %0" : "=r" (val) : "r" (&(lock->lock))); | ||
72 | if(val) { | ||
73 | while(lock->lock) { | ||
74 | if (!--stuck) { | ||
75 | show(str, lock, caller); | ||
76 | stuck = INIT_STUCK; | ||
77 | } | ||
78 | barrier(); | ||
79 | } | ||
80 | goto again; | ||
81 | } | ||
82 | lock->owner_pc = (cpu & 3) | (caller & ~3); | ||
83 | } | ||
84 | |||
85 | int _spin_trylock(spinlock_t *lock) | ||
86 | { | ||
87 | unsigned long val; | ||
88 | unsigned long caller; | ||
89 | int cpu = smp_processor_id(); | ||
90 | |||
91 | STORE_CALLER(caller); | ||
92 | |||
93 | __asm__ __volatile__("ldstub [%1], %0" : "=r" (val) : "r" (&(lock->lock))); | ||
94 | if(!val) { | ||
95 | /* We got it, record our identity for debugging. */ | ||
96 | lock->owner_pc = (cpu & 3) | (caller & ~3); | ||
97 | } | ||
98 | return val == 0; | ||
99 | } | ||
100 | |||
101 | void _do_spin_unlock(spinlock_t *lock) | ||
102 | { | ||
103 | lock->owner_pc = 0; | ||
104 | barrier(); | ||
105 | lock->lock = 0; | ||
106 | } | ||
107 | |||
108 | void _do_read_lock(rwlock_t *rw, char *str) | ||
109 | { | ||
110 | unsigned long caller; | ||
111 | unsigned long val; | ||
112 | int cpu = smp_processor_id(); | ||
113 | int stuck = INIT_STUCK; | ||
114 | |||
115 | STORE_CALLER(caller); | ||
116 | |||
117 | wlock_again: | ||
118 | __asm__ __volatile__("ldstub [%1 + 3], %0" : "=r" (val) : "r" (&(rw->lock))); | ||
119 | if(val) { | ||
120 | while(rw->lock & 0xff) { | ||
121 | if (!--stuck) { | ||
122 | show_read(str, rw, caller); | ||
123 | stuck = INIT_STUCK; | ||
124 | } | ||
125 | barrier(); | ||
126 | } | ||
127 | goto wlock_again; | ||
128 | } | ||
129 | |||
130 | rw->reader_pc[cpu] = caller; | ||
131 | barrier(); | ||
132 | rw->lock++; | ||
133 | } | ||
134 | |||
135 | void _do_read_unlock(rwlock_t *rw, char *str) | ||
136 | { | ||
137 | unsigned long caller; | ||
138 | unsigned long val; | ||
139 | int cpu = smp_processor_id(); | ||
140 | int stuck = INIT_STUCK; | ||
141 | |||
142 | STORE_CALLER(caller); | ||
143 | |||
144 | wlock_again: | ||
145 | __asm__ __volatile__("ldstub [%1 + 3], %0" : "=r" (val) : "r" (&(rw->lock))); | ||
146 | if(val) { | ||
147 | while(rw->lock & 0xff) { | ||
148 | if (!--stuck) { | ||
149 | show_read(str, rw, caller); | ||
150 | stuck = INIT_STUCK; | ||
151 | } | ||
152 | barrier(); | ||
153 | } | ||
154 | goto wlock_again; | ||
155 | } | ||
156 | |||
157 | rw->reader_pc[cpu] = 0; | ||
158 | barrier(); | ||
159 | rw->lock -= 0x1ff; | ||
160 | } | ||
161 | |||
162 | void _do_write_lock(rwlock_t *rw, char *str) | ||
163 | { | ||
164 | unsigned long caller; | ||
165 | unsigned long val; | ||
166 | int cpu = smp_processor_id(); | ||
167 | int stuck = INIT_STUCK; | ||
168 | |||
169 | STORE_CALLER(caller); | ||
170 | |||
171 | wlock_again: | ||
172 | __asm__ __volatile__("ldstub [%1 + 3], %0" : "=r" (val) : "r" (&(rw->lock))); | ||
173 | if(val) { | ||
174 | wlock_wait: | ||
175 | while(rw->lock) { | ||
176 | if (!--stuck) { | ||
177 | show_write(str, rw, caller); | ||
178 | stuck = INIT_STUCK; | ||
179 | } | ||
180 | barrier(); | ||
181 | } | ||
182 | goto wlock_again; | ||
183 | } | ||
184 | |||
185 | if (rw->lock & ~0xff) { | ||
186 | *(((unsigned char *)&rw->lock)+3) = 0; | ||
187 | barrier(); | ||
188 | goto wlock_wait; | ||
189 | } | ||
190 | |||
191 | barrier(); | ||
192 | rw->owner_pc = (cpu & 3) | (caller & ~3); | ||
193 | } | ||
194 | |||
195 | void _do_write_unlock(rwlock_t *rw) | ||
196 | { | ||
197 | rw->owner_pc = 0; | ||
198 | barrier(); | ||
199 | rw->lock = 0; | ||
200 | } | ||
201 | |||
202 | #endif /* SMP */ | ||
diff --git a/arch/sparc/lib/divdi3.S b/arch/sparc/lib/divdi3.S new file mode 100644 index 000000000000..681b3683da9e --- /dev/null +++ b/arch/sparc/lib/divdi3.S | |||
@@ -0,0 +1,295 @@ | |||
1 | /* Copyright (C) 1989, 1992, 1993, 1994, 1995 Free Software Foundation, Inc. | ||
2 | |||
3 | This file is part of GNU CC. | ||
4 | |||
5 | GNU CC is free software; you can redistribute it and/or modify | ||
6 | it under the terms of the GNU General Public License as published by | ||
7 | the Free Software Foundation; either version 2, or (at your option) | ||
8 | any later version. | ||
9 | |||
10 | GNU CC is distributed in the hope that it will be useful, | ||
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | GNU General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU General Public License | ||
16 | along with GNU CC; see the file COPYING. If not, write to | ||
17 | the Free Software Foundation, 59 Temple Place - Suite 330, | ||
18 | Boston, MA 02111-1307, USA. */ | ||
19 | |||
20 | .data | ||
21 | .align 8 | ||
22 | .globl __clz_tab | ||
23 | __clz_tab: | ||
24 | .byte 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5 | ||
25 | .byte 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6 | ||
26 | .byte 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 | ||
27 | .byte 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 | ||
28 | .byte 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8 | ||
29 | .byte 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8 | ||
30 | .byte 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8 | ||
31 | .byte 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8 | ||
32 | .size __clz_tab,256 | ||
33 | .global .udiv | ||
34 | |||
35 | .text | ||
36 | .align 4 | ||
37 | .globl __divdi3 | ||
38 | __divdi3: | ||
39 | save %sp,-104,%sp | ||
40 | cmp %i0,0 | ||
41 | bge .LL40 | ||
42 | mov 0,%l4 | ||
43 | mov -1,%l4 | ||
44 | sub %g0,%i1,%o0 | ||
45 | mov %o0,%o5 | ||
46 | subcc %g0,%o0,%g0 | ||
47 | sub %g0,%i0,%o0 | ||
48 | subx %o0,0,%o4 | ||
49 | mov %o4,%i0 | ||
50 | mov %o5,%i1 | ||
51 | .LL40: | ||
52 | cmp %i2,0 | ||
53 | bge .LL84 | ||
54 | mov %i3,%o4 | ||
55 | xnor %g0,%l4,%l4 | ||
56 | sub %g0,%i3,%o0 | ||
57 | mov %o0,%o3 | ||
58 | subcc %g0,%o0,%g0 | ||
59 | sub %g0,%i2,%o0 | ||
60 | subx %o0,0,%o2 | ||
61 | mov %o2,%i2 | ||
62 | mov %o3,%i3 | ||
63 | mov %i3,%o4 | ||
64 | .LL84: | ||
65 | cmp %i2,0 | ||
66 | bne .LL45 | ||
67 | mov %i1,%i3 | ||
68 | cmp %o4,%i0 | ||
69 | bleu .LL46 | ||
70 | mov %i3,%o1 | ||
71 | mov 32,%g1 | ||
72 | subcc %i0,%o4,%g0 | ||
73 | 1: bcs 5f | ||
74 | addxcc %o1,%o1,%o1 ! shift n1n0 and a q-bit in lsb | ||
75 | sub %i0,%o4,%i0 ! this kills msb of n | ||
76 | addx %i0,%i0,%i0 ! so this cannot give carry | ||
77 | subcc %g1,1,%g1 | ||
78 | 2: bne 1b | ||
79 | subcc %i0,%o4,%g0 | ||
80 | bcs 3f | ||
81 | addxcc %o1,%o1,%o1 ! shift n1n0 and a q-bit in lsb | ||
82 | b 3f | ||
83 | sub %i0,%o4,%i0 ! this kills msb of n | ||
84 | 4: sub %i0,%o4,%i0 | ||
85 | 5: addxcc %i0,%i0,%i0 | ||
86 | bcc 2b | ||
87 | subcc %g1,1,%g1 | ||
88 | ! Got carry from n. Subtract next step to cancel this carry. | ||
89 | bne 4b | ||
90 | addcc %o1,%o1,%o1 ! shift n1n0 and a 0-bit in lsb | ||
91 | sub %i0,%o4,%i0 | ||
92 | 3: xnor %o1,0,%o1 | ||
93 | b .LL50 | ||
94 | mov 0,%o2 | ||
95 | .LL46: | ||
96 | cmp %o4,0 | ||
97 | bne .LL85 | ||
98 | mov %i0,%o2 | ||
99 | mov 1,%o0 | ||
100 | call .udiv,0 | ||
101 | mov 0,%o1 | ||
102 | mov %o0,%o4 | ||
103 | mov %i0,%o2 | ||
104 | .LL85: | ||
105 | mov 0,%g3 | ||
106 | mov 32,%g1 | ||
107 | subcc %g3,%o4,%g0 | ||
108 | 1: bcs 5f | ||
109 | addxcc %o2,%o2,%o2 ! shift n1n0 and a q-bit in lsb | ||
110 | sub %g3,%o4,%g3 ! this kills msb of n | ||
111 | addx %g3,%g3,%g3 ! so this cannot give carry | ||
112 | subcc %g1,1,%g1 | ||
113 | 2: bne 1b | ||
114 | subcc %g3,%o4,%g0 | ||
115 | bcs 3f | ||
116 | addxcc %o2,%o2,%o2 ! shift n1n0 and a q-bit in lsb | ||
117 | b 3f | ||
118 | sub %g3,%o4,%g3 ! this kills msb of n | ||
119 | 4: sub %g3,%o4,%g3 | ||
120 | 5: addxcc %g3,%g3,%g3 | ||
121 | bcc 2b | ||
122 | subcc %g1,1,%g1 | ||
123 | ! Got carry from n. Subtract next step to cancel this carry. | ||
124 | bne 4b | ||
125 | addcc %o2,%o2,%o2 ! shift n1n0 and a 0-bit in lsb | ||
126 | sub %g3,%o4,%g3 | ||
127 | 3: xnor %o2,0,%o2 | ||
128 | mov %g3,%i0 | ||
129 | mov %i3,%o1 | ||
130 | mov 32,%g1 | ||
131 | subcc %i0,%o4,%g0 | ||
132 | 1: bcs 5f | ||
133 | addxcc %o1,%o1,%o1 ! shift n1n0 and a q-bit in lsb | ||
134 | sub %i0,%o4,%i0 ! this kills msb of n | ||
135 | addx %i0,%i0,%i0 ! so this cannot give carry | ||
136 | subcc %g1,1,%g1 | ||
137 | 2: bne 1b | ||
138 | subcc %i0,%o4,%g0 | ||
139 | bcs 3f | ||
140 | addxcc %o1,%o1,%o1 ! shift n1n0 and a q-bit in lsb | ||
141 | b 3f | ||
142 | sub %i0,%o4,%i0 ! this kills msb of n | ||
143 | 4: sub %i0,%o4,%i0 | ||
144 | 5: addxcc %i0,%i0,%i0 | ||
145 | bcc 2b | ||
146 | subcc %g1,1,%g1 | ||
147 | ! Got carry from n. Subtract next step to cancel this carry. | ||
148 | bne 4b | ||
149 | addcc %o1,%o1,%o1 ! shift n1n0 and a 0-bit in lsb | ||
150 | sub %i0,%o4,%i0 | ||
151 | 3: xnor %o1,0,%o1 | ||
152 | b .LL86 | ||
153 | mov %o1,%l1 | ||
154 | .LL45: | ||
155 | cmp %i2,%i0 | ||
156 | bleu .LL51 | ||
157 | sethi %hi(65535),%o0 | ||
158 | b .LL78 | ||
159 | mov 0,%o1 | ||
160 | .LL51: | ||
161 | or %o0,%lo(65535),%o0 | ||
162 | cmp %i2,%o0 | ||
163 | bgu .LL58 | ||
164 | mov %i2,%o1 | ||
165 | cmp %i2,256 | ||
166 | addx %g0,-1,%o0 | ||
167 | b .LL64 | ||
168 | and %o0,8,%o2 | ||
169 | .LL58: | ||
170 | sethi %hi(16777215),%o0 | ||
171 | or %o0,%lo(16777215),%o0 | ||
172 | cmp %i2,%o0 | ||
173 | bgu .LL64 | ||
174 | mov 24,%o2 | ||
175 | mov 16,%o2 | ||
176 | .LL64: | ||
177 | srl %o1,%o2,%o0 | ||
178 | sethi %hi(__clz_tab),%o1 | ||
179 | or %o1,%lo(__clz_tab),%o1 | ||
180 | ldub [%o0+%o1],%o0 | ||
181 | add %o0,%o2,%o0 | ||
182 | mov 32,%o1 | ||
183 | subcc %o1,%o0,%o3 | ||
184 | bne,a .LL72 | ||
185 | sub %o1,%o3,%o1 | ||
186 | cmp %i0,%i2 | ||
187 | bgu .LL74 | ||
188 | cmp %i3,%o4 | ||
189 | blu .LL78 | ||
190 | mov 0,%o1 | ||
191 | .LL74: | ||
192 | b .LL78 | ||
193 | mov 1,%o1 | ||
194 | .LL72: | ||
195 | sll %i2,%o3,%o2 | ||
196 | srl %o4,%o1,%o0 | ||
197 | or %o2,%o0,%i2 | ||
198 | sll %o4,%o3,%o4 | ||
199 | srl %i0,%o1,%o2 | ||
200 | sll %i0,%o3,%o0 | ||
201 | srl %i3,%o1,%o1 | ||
202 | or %o0,%o1,%i0 | ||
203 | sll %i3,%o3,%i3 | ||
204 | mov %i0,%o1 | ||
205 | mov 32,%g1 | ||
206 | subcc %o2,%i2,%g0 | ||
207 | 1: bcs 5f | ||
208 | addxcc %o1,%o1,%o1 ! shift n1n0 and a q-bit in lsb | ||
209 | sub %o2,%i2,%o2 ! this kills msb of n | ||
210 | addx %o2,%o2,%o2 ! so this cannot give carry | ||
211 | subcc %g1,1,%g1 | ||
212 | 2: bne 1b | ||
213 | subcc %o2,%i2,%g0 | ||
214 | bcs 3f | ||
215 | addxcc %o1,%o1,%o1 ! shift n1n0 and a q-bit in lsb | ||
216 | b 3f | ||
217 | sub %o2,%i2,%o2 ! this kills msb of n | ||
218 | 4: sub %o2,%i2,%o2 | ||
219 | 5: addxcc %o2,%o2,%o2 | ||
220 | bcc 2b | ||
221 | subcc %g1,1,%g1 | ||
222 | ! Got carry from n. Subtract next step to cancel this carry. | ||
223 | bne 4b | ||
224 | addcc %o1,%o1,%o1 ! shift n1n0 and a 0-bit in lsb | ||
225 | sub %o2,%i2,%o2 | ||
226 | 3: xnor %o1,0,%o1 | ||
227 | mov %o2,%i0 | ||
228 | wr %g0,%o1,%y ! SPARC has 0-3 delay insn after a wr | ||
229 | sra %o4,31,%g2 ! Do not move this insn | ||
230 | and %o1,%g2,%g2 ! Do not move this insn | ||
231 | andcc %g0,0,%g1 ! Do not move this insn | ||
232 | mulscc %g1,%o4,%g1 | ||
233 | mulscc %g1,%o4,%g1 | ||
234 | mulscc %g1,%o4,%g1 | ||
235 | mulscc %g1,%o4,%g1 | ||
236 | mulscc %g1,%o4,%g1 | ||
237 | mulscc %g1,%o4,%g1 | ||
238 | mulscc %g1,%o4,%g1 | ||
239 | mulscc %g1,%o4,%g1 | ||
240 | mulscc %g1,%o4,%g1 | ||
241 | mulscc %g1,%o4,%g1 | ||
242 | mulscc %g1,%o4,%g1 | ||
243 | mulscc %g1,%o4,%g1 | ||
244 | mulscc %g1,%o4,%g1 | ||
245 | mulscc %g1,%o4,%g1 | ||
246 | mulscc %g1,%o4,%g1 | ||
247 | mulscc %g1,%o4,%g1 | ||
248 | mulscc %g1,%o4,%g1 | ||
249 | mulscc %g1,%o4,%g1 | ||
250 | mulscc %g1,%o4,%g1 | ||
251 | mulscc %g1,%o4,%g1 | ||
252 | mulscc %g1,%o4,%g1 | ||
253 | mulscc %g1,%o4,%g1 | ||
254 | mulscc %g1,%o4,%g1 | ||
255 | mulscc %g1,%o4,%g1 | ||
256 | mulscc %g1,%o4,%g1 | ||
257 | mulscc %g1,%o4,%g1 | ||
258 | mulscc %g1,%o4,%g1 | ||
259 | mulscc %g1,%o4,%g1 | ||
260 | mulscc %g1,%o4,%g1 | ||
261 | mulscc %g1,%o4,%g1 | ||
262 | mulscc %g1,%o4,%g1 | ||
263 | mulscc %g1,%o4,%g1 | ||
264 | mulscc %g1,0,%g1 | ||
265 | add %g1,%g2,%o0 | ||
266 | rd %y,%o2 | ||
267 | cmp %o0,%i0 | ||
268 | bgu,a .LL78 | ||
269 | add %o1,-1,%o1 | ||
270 | bne,a .LL50 | ||
271 | mov 0,%o2 | ||
272 | cmp %o2,%i3 | ||
273 | bleu .LL50 | ||
274 | mov 0,%o2 | ||
275 | add %o1,-1,%o1 | ||
276 | .LL78: | ||
277 | mov 0,%o2 | ||
278 | .LL50: | ||
279 | mov %o1,%l1 | ||
280 | .LL86: | ||
281 | mov %o2,%l0 | ||
282 | mov %l0,%i0 | ||
283 | mov %l1,%i1 | ||
284 | cmp %l4,0 | ||
285 | be .LL81 | ||
286 | sub %g0,%i1,%o0 | ||
287 | mov %o0,%l3 | ||
288 | subcc %g0,%o0,%g0 | ||
289 | sub %g0,%i0,%o0 | ||
290 | subx %o0,0,%l2 | ||
291 | mov %l2,%i0 | ||
292 | mov %l3,%i1 | ||
293 | .LL81: | ||
294 | ret | ||
295 | restore | ||
diff --git a/arch/sparc/lib/locks.S b/arch/sparc/lib/locks.S new file mode 100644 index 000000000000..95fa48424967 --- /dev/null +++ b/arch/sparc/lib/locks.S | |||
@@ -0,0 +1,72 @@ | |||
1 | /* $Id: locks.S,v 1.16 2000/02/26 11:02:47 anton Exp $ | ||
2 | * locks.S: SMP low-level lock primitives on Sparc. | ||
3 | * | ||
4 | * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) | ||
5 | * Copyright (C) 1998 Anton Blanchard (anton@progsoc.uts.edu.au) | ||
6 | * Copyright (C) 1998 Jakub Jelinek (jj@ultra.linux.cz) | ||
7 | */ | ||
8 | |||
9 | #include <asm/ptrace.h> | ||
10 | #include <asm/psr.h> | ||
11 | #include <asm/smp.h> | ||
12 | #include <asm/spinlock.h> | ||
13 | |||
14 | .text | ||
15 | .align 4 | ||
16 | |||
17 | /* Read/writer locks, as usual this is overly clever to make it | ||
18 | * as fast as possible. | ||
19 | */ | ||
20 | |||
21 | /* caches... */ | ||
22 | ___rw_read_enter_spin_on_wlock: | ||
23 | orcc %g2, 0x0, %g0 | ||
24 | be,a ___rw_read_enter | ||
25 | ldstub [%g1 + 3], %g2 | ||
26 | b ___rw_read_enter_spin_on_wlock | ||
27 | ldub [%g1 + 3], %g2 | ||
28 | ___rw_read_exit_spin_on_wlock: | ||
29 | orcc %g2, 0x0, %g0 | ||
30 | be,a ___rw_read_exit | ||
31 | ldstub [%g1 + 3], %g2 | ||
32 | b ___rw_read_exit_spin_on_wlock | ||
33 | ldub [%g1 + 3], %g2 | ||
34 | ___rw_write_enter_spin_on_wlock: | ||
35 | orcc %g2, 0x0, %g0 | ||
36 | be,a ___rw_write_enter | ||
37 | ldstub [%g1 + 3], %g2 | ||
38 | b ___rw_write_enter_spin_on_wlock | ||
39 | ld [%g1], %g2 | ||
40 | |||
41 | .globl ___rw_read_enter | ||
42 | ___rw_read_enter: | ||
43 | orcc %g2, 0x0, %g0 | ||
44 | bne,a ___rw_read_enter_spin_on_wlock | ||
45 | ldub [%g1 + 3], %g2 | ||
46 | ld [%g1], %g2 | ||
47 | add %g2, 1, %g2 | ||
48 | st %g2, [%g1] | ||
49 | retl | ||
50 | mov %g4, %o7 | ||
51 | |||
52 | .globl ___rw_read_exit | ||
53 | ___rw_read_exit: | ||
54 | orcc %g2, 0x0, %g0 | ||
55 | bne,a ___rw_read_exit_spin_on_wlock | ||
56 | ldub [%g1 + 3], %g2 | ||
57 | ld [%g1], %g2 | ||
58 | sub %g2, 0x1ff, %g2 | ||
59 | st %g2, [%g1] | ||
60 | retl | ||
61 | mov %g4, %o7 | ||
62 | |||
63 | .globl ___rw_write_enter | ||
64 | ___rw_write_enter: | ||
65 | orcc %g2, 0x0, %g0 | ||
66 | bne ___rw_write_enter_spin_on_wlock | ||
67 | ld [%g1], %g2 | ||
68 | andncc %g2, 0xff, %g0 | ||
69 | bne,a ___rw_write_enter_spin_on_wlock | ||
70 | stb %g0, [%g1 + 3] | ||
71 | retl | ||
72 | mov %g4, %o7 | ||
diff --git a/arch/sparc/lib/lshrdi3.S b/arch/sparc/lib/lshrdi3.S new file mode 100644 index 000000000000..35abf5b2bd15 --- /dev/null +++ b/arch/sparc/lib/lshrdi3.S | |||
@@ -0,0 +1,27 @@ | |||
1 | /* $Id: lshrdi3.S,v 1.1 1999/03/21 06:37:45 davem Exp $ */ | ||
2 | |||
3 | .globl __lshrdi3 | ||
4 | __lshrdi3: | ||
5 | cmp %o2, 0 | ||
6 | be 3f | ||
7 | mov 0x20, %g2 | ||
8 | |||
9 | sub %g2, %o2, %g2 | ||
10 | cmp %g2, 0 | ||
11 | bg 1f | ||
12 | srl %o0, %o2, %o4 | ||
13 | |||
14 | clr %o4 | ||
15 | neg %g2 | ||
16 | b 2f | ||
17 | srl %o0, %g2, %o5 | ||
18 | 1: | ||
19 | sll %o0, %g2, %g3 | ||
20 | srl %o1, %o2, %g2 | ||
21 | or %g2, %g3, %o5 | ||
22 | 2: | ||
23 | mov %o4, %o0 | ||
24 | mov %o5, %o1 | ||
25 | 3: | ||
26 | retl | ||
27 | nop | ||
diff --git a/arch/sparc/lib/memcmp.S b/arch/sparc/lib/memcmp.S new file mode 100644 index 000000000000..cb4bdb0cc2af --- /dev/null +++ b/arch/sparc/lib/memcmp.S | |||
@@ -0,0 +1,312 @@ | |||
1 | .text | ||
2 | .align 4 | ||
3 | .global __memcmp, memcmp | ||
4 | __memcmp: | ||
5 | memcmp: | ||
6 | #if 1 | ||
7 | cmp %o2, 0 | ||
8 | ble L3 | ||
9 | mov 0, %g3 | ||
10 | L5: | ||
11 | ldub [%o0], %g2 | ||
12 | ldub [%o1], %g3 | ||
13 | sub %g2, %g3, %g2 | ||
14 | mov %g2, %g3 | ||
15 | sll %g2, 24, %g2 | ||
16 | |||
17 | cmp %g2, 0 | ||
18 | bne L3 | ||
19 | add %o0, 1, %o0 | ||
20 | |||
21 | add %o2, -1, %o2 | ||
22 | |||
23 | cmp %o2, 0 | ||
24 | bg L5 | ||
25 | add %o1, 1, %o1 | ||
26 | L3: | ||
27 | sll %g3, 24, %o0 | ||
28 | sra %o0, 24, %o0 | ||
29 | |||
30 | retl | ||
31 | nop | ||
32 | #else | ||
33 | save %sp, -104, %sp | ||
34 | mov %i2, %o4 | ||
35 | mov %i0, %o0 | ||
36 | |||
37 | cmp %o4, 15 | ||
38 | ble L72 | ||
39 | mov %i1, %i2 | ||
40 | |||
41 | andcc %i2, 3, %g0 | ||
42 | be L161 | ||
43 | andcc %o0, 3, %g2 | ||
44 | L75: | ||
45 | ldub [%o0], %g3 | ||
46 | ldub [%i2], %g2 | ||
47 | add %o0,1, %o0 | ||
48 | |||
49 | subcc %g3, %g2, %i0 | ||
50 | bne L156 | ||
51 | add %i2, 1, %i2 | ||
52 | |||
53 | andcc %i2, 3, %g0 | ||
54 | bne L75 | ||
55 | add %o4, -1, %o4 | ||
56 | |||
57 | andcc %o0, 3, %g2 | ||
58 | L161: | ||
59 | bne,a L78 | ||
60 | mov %i2, %i1 | ||
61 | |||
62 | mov %o0, %i5 | ||
63 | mov %i2, %i3 | ||
64 | srl %o4, 2, %i4 | ||
65 | |||
66 | cmp %i4, 0 | ||
67 | bge L93 | ||
68 | mov %i4, %g2 | ||
69 | |||
70 | add %i4, 3, %g2 | ||
71 | L93: | ||
72 | sra %g2, 2, %g2 | ||
73 | sll %g2, 2, %g2 | ||
74 | sub %i4, %g2, %g2 | ||
75 | |||
76 | cmp %g2, 1 | ||
77 | be,a L88 | ||
78 | add %o0, 4, %i5 | ||
79 | |||
80 | bg L94 | ||
81 | cmp %g2, 2 | ||
82 | |||
83 | cmp %g2, 0 | ||
84 | be,a L86 | ||
85 | ld [%o0], %g3 | ||
86 | |||
87 | b L162 | ||
88 | ld [%i5], %g3 | ||
89 | L94: | ||
90 | be L81 | ||
91 | cmp %g2, 3 | ||
92 | |||
93 | be,a L83 | ||
94 | add %o0, -4, %i5 | ||
95 | |||
96 | b L162 | ||
97 | ld [%i5], %g3 | ||
98 | L81: | ||
99 | add %o0, -8, %i5 | ||
100 | ld [%o0], %g3 | ||
101 | add %i2, -8, %i3 | ||
102 | ld [%i2], %g2 | ||
103 | |||
104 | b L82 | ||
105 | add %i4, 2, %i4 | ||
106 | L83: | ||
107 | ld [%o0], %g4 | ||
108 | add %i2, -4, %i3 | ||
109 | ld [%i2], %g1 | ||
110 | |||
111 | b L84 | ||
112 | add %i4, 1, %i4 | ||
113 | L86: | ||
114 | b L87 | ||
115 | ld [%i2], %g2 | ||
116 | L88: | ||
117 | add %i2, 4, %i3 | ||
118 | ld [%o0], %g4 | ||
119 | add %i4, -1, %i4 | ||
120 | ld [%i2], %g1 | ||
121 | L95: | ||
122 | ld [%i5], %g3 | ||
123 | L162: | ||
124 | cmp %g4, %g1 | ||
125 | be L87 | ||
126 | ld [%i3], %g2 | ||
127 | |||
128 | cmp %g4, %g1 | ||
129 | L163: | ||
130 | bleu L114 | ||
131 | mov -1, %i0 | ||
132 | |||
133 | b L114 | ||
134 | mov 1, %i0 | ||
135 | L87: | ||
136 | ld [%i5 + 4], %g4 | ||
137 | cmp %g3, %g2 | ||
138 | bne L163 | ||
139 | ld [%i3 + 4], %g1 | ||
140 | L84: | ||
141 | ld [%i5 + 8], %g3 | ||
142 | |||
143 | cmp %g4, %g1 | ||
144 | bne L163 | ||
145 | ld [%i3 + 8], %g2 | ||
146 | L82: | ||
147 | ld [%i5 + 12], %g4 | ||
148 | cmp %g3, %g2 | ||
149 | bne L163 | ||
150 | ld [%i3 + 12], %g1 | ||
151 | |||
152 | add %i5, 16, %i5 | ||
153 | |||
154 | addcc %i4, -4, %i4 | ||
155 | bne L95 | ||
156 | add %i3, 16, %i3 | ||
157 | |||
158 | cmp %g4, %g1 | ||
159 | bne L163 | ||
160 | nop | ||
161 | |||
162 | b L114 | ||
163 | mov 0, %i0 | ||
164 | L78: | ||
165 | srl %o4, 2, %i0 | ||
166 | and %o0, -4, %i3 | ||
167 | orcc %i0, %g0, %g3 | ||
168 | sll %g2, 3, %o7 | ||
169 | mov 32, %g2 | ||
170 | |||
171 | bge L129 | ||
172 | sub %g2, %o7, %o1 | ||
173 | |||
174 | add %i0, 3, %g3 | ||
175 | L129: | ||
176 | sra %g3, 2, %g2 | ||
177 | sll %g2, 2, %g2 | ||
178 | sub %i0, %g2, %g2 | ||
179 | |||
180 | cmp %g2, 1 | ||
181 | be,a L124 | ||
182 | ld [%i3], %o3 | ||
183 | |||
184 | bg L130 | ||
185 | cmp %g2, 2 | ||
186 | |||
187 | cmp %g2, 0 | ||
188 | be,a L122 | ||
189 | ld [%i3], %o2 | ||
190 | |||
191 | b L164 | ||
192 | sll %o3, %o7, %g3 | ||
193 | L130: | ||
194 | be L117 | ||
195 | cmp %g2, 3 | ||
196 | |||
197 | be,a L119 | ||
198 | ld [%i3], %g1 | ||
199 | |||
200 | b L164 | ||
201 | sll %o3, %o7, %g3 | ||
202 | L117: | ||
203 | ld [%i3], %g4 | ||
204 | add %i2, -8, %i1 | ||
205 | ld [%i3 + 4], %o3 | ||
206 | add %i0, 2, %i0 | ||
207 | ld [%i2], %i4 | ||
208 | |||
209 | b L118 | ||
210 | add %i3, -4, %i3 | ||
211 | L119: | ||
212 | ld [%i3 + 4], %g4 | ||
213 | add %i2, -4, %i1 | ||
214 | ld [%i2], %i5 | ||
215 | |||
216 | b L120 | ||
217 | add %i0, 1, %i0 | ||
218 | L122: | ||
219 | ld [%i3 + 4], %g1 | ||
220 | ld [%i2], %i4 | ||
221 | |||
222 | b L123 | ||
223 | add %i3, 4, %i3 | ||
224 | L124: | ||
225 | add %i2, 4, %i1 | ||
226 | ld [%i3 + 4], %o2 | ||
227 | add %i0, -1, %i0 | ||
228 | ld [%i2], %i5 | ||
229 | add %i3, 8, %i3 | ||
230 | L131: | ||
231 | sll %o3, %o7, %g3 | ||
232 | L164: | ||
233 | srl %o2, %o1, %g2 | ||
234 | ld [%i3], %g1 | ||
235 | or %g3, %g2, %g3 | ||
236 | |||
237 | cmp %g3, %i5 | ||
238 | bne L163 | ||
239 | ld [%i1], %i4 | ||
240 | L123: | ||
241 | sll %o2, %o7, %g3 | ||
242 | srl %g1, %o1, %g2 | ||
243 | ld [%i3 + 4], %g4 | ||
244 | or %g3, %g2, %g3 | ||
245 | |||
246 | cmp %g3, %i4 | ||
247 | bne L163 | ||
248 | ld [%i1 + 4], %i5 | ||
249 | L120: | ||
250 | sll %g1, %o7, %g3 | ||
251 | srl %g4, %o1, %g2 | ||
252 | ld [%i3 + 8], %o3 | ||
253 | or %g3, %g2, %g3 | ||
254 | |||
255 | cmp %g3, %i5 | ||
256 | bne L163 | ||
257 | ld [%i1 + 8], %i4 | ||
258 | L118: | ||
259 | sll %g4, %o7, %g3 | ||
260 | srl %o3, %o1, %g2 | ||
261 | ld [%i3 + 12], %o2 | ||
262 | or %g3, %g2, %g3 | ||
263 | |||
264 | cmp %g3, %i4 | ||
265 | bne L163 | ||
266 | ld [%i1 + 12], %i5 | ||
267 | |||
268 | add %i3, 16, %i3 | ||
269 | addcc %i0, -4, %i0 | ||
270 | bne L131 | ||
271 | add %i1, 16, %i1 | ||
272 | |||
273 | sll %o3, %o7, %g3 | ||
274 | srl %o2, %o1, %g2 | ||
275 | or %g3, %g2, %g3 | ||
276 | |||
277 | cmp %g3, %i5 | ||
278 | be,a L114 | ||
279 | mov 0, %i0 | ||
280 | |||
281 | b,a L163 | ||
282 | L114: | ||
283 | cmp %i0, 0 | ||
284 | bne L156 | ||
285 | and %o4, -4, %g2 | ||
286 | |||
287 | add %o0, %g2, %o0 | ||
288 | add %i2, %g2, %i2 | ||
289 | and %o4, 3, %o4 | ||
290 | L72: | ||
291 | cmp %o4, 0 | ||
292 | be L156 | ||
293 | mov 0, %i0 | ||
294 | |||
295 | ldub [%o0], %g3 | ||
296 | L165: | ||
297 | ldub [%i2], %g2 | ||
298 | add %o0, 1, %o0 | ||
299 | |||
300 | subcc %g3, %g2, %i0 | ||
301 | bne L156 | ||
302 | add %i2, 1, %i2 | ||
303 | |||
304 | addcc %o4, -1, %o4 | ||
305 | bne,a L165 | ||
306 | ldub [%o0], %g3 | ||
307 | |||
308 | mov 0, %i0 | ||
309 | L156: | ||
310 | ret | ||
311 | restore | ||
312 | #endif | ||
diff --git a/arch/sparc/lib/memcpy.S b/arch/sparc/lib/memcpy.S new file mode 100644 index 000000000000..ce10bc869af9 --- /dev/null +++ b/arch/sparc/lib/memcpy.S | |||
@@ -0,0 +1,1150 @@ | |||
1 | /* memcpy.S: Sparc optimized memcpy and memmove code | ||
2 | * Hand optimized from GNU libc's memcpy and memmove | ||
3 | * Copyright (C) 1991,1996 Free Software Foundation | ||
4 | * Copyright (C) 1995 Linus Torvalds (Linus.Torvalds@helsinki.fi) | ||
5 | * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) | ||
6 | * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be) | ||
7 | * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz) | ||
8 | */ | ||
9 | |||
10 | #ifdef __KERNEL__ | ||
11 | |||
12 | #define FUNC(x) \ | ||
13 | .globl x; \ | ||
14 | .type x,@function; \ | ||
15 | .align 4; \ | ||
16 | x: | ||
17 | |||
18 | #undef FASTER_REVERSE | ||
19 | #undef FASTER_NONALIGNED | ||
20 | #define FASTER_ALIGNED | ||
21 | |||
22 | /* In kernel these functions don't return a value. | ||
23 | * One should use macros in asm/string.h for that purpose. | ||
24 | * We return 0, so that bugs are more apparent. | ||
25 | */ | ||
26 | #define SETUP_RETL | ||
27 | #define RETL_INSN clr %o0 | ||
28 | |||
29 | #else | ||
30 | |||
31 | /* libc */ | ||
32 | |||
33 | #include "DEFS.h" | ||
34 | |||
35 | #define FASTER_REVERSE | ||
36 | #define FASTER_NONALIGNED | ||
37 | #define FASTER_ALIGNED | ||
38 | |||
39 | #define SETUP_RETL mov %o0, %g6 | ||
40 | #define RETL_INSN mov %g6, %o0 | ||
41 | |||
42 | #endif | ||
43 | |||
44 | /* Both these macros have to start with exactly the same insn */ | ||
45 | #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ | ||
46 | ldd [%src + (offset) + 0x00], %t0; \ | ||
47 | ldd [%src + (offset) + 0x08], %t2; \ | ||
48 | ldd [%src + (offset) + 0x10], %t4; \ | ||
49 | ldd [%src + (offset) + 0x18], %t6; \ | ||
50 | st %t0, [%dst + (offset) + 0x00]; \ | ||
51 | st %t1, [%dst + (offset) + 0x04]; \ | ||
52 | st %t2, [%dst + (offset) + 0x08]; \ | ||
53 | st %t3, [%dst + (offset) + 0x0c]; \ | ||
54 | st %t4, [%dst + (offset) + 0x10]; \ | ||
55 | st %t5, [%dst + (offset) + 0x14]; \ | ||
56 | st %t6, [%dst + (offset) + 0x18]; \ | ||
57 | st %t7, [%dst + (offset) + 0x1c]; | ||
58 | |||
59 | #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ | ||
60 | ldd [%src + (offset) + 0x00], %t0; \ | ||
61 | ldd [%src + (offset) + 0x08], %t2; \ | ||
62 | ldd [%src + (offset) + 0x10], %t4; \ | ||
63 | ldd [%src + (offset) + 0x18], %t6; \ | ||
64 | std %t0, [%dst + (offset) + 0x00]; \ | ||
65 | std %t2, [%dst + (offset) + 0x08]; \ | ||
66 | std %t4, [%dst + (offset) + 0x10]; \ | ||
67 | std %t6, [%dst + (offset) + 0x18]; | ||
68 | |||
69 | #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ | ||
70 | ldd [%src - (offset) - 0x10], %t0; \ | ||
71 | ldd [%src - (offset) - 0x08], %t2; \ | ||
72 | st %t0, [%dst - (offset) - 0x10]; \ | ||
73 | st %t1, [%dst - (offset) - 0x0c]; \ | ||
74 | st %t2, [%dst - (offset) - 0x08]; \ | ||
75 | st %t3, [%dst - (offset) - 0x04]; | ||
76 | |||
77 | #define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \ | ||
78 | ldd [%src - (offset) - 0x10], %t0; \ | ||
79 | ldd [%src - (offset) - 0x08], %t2; \ | ||
80 | std %t0, [%dst - (offset) - 0x10]; \ | ||
81 | std %t2, [%dst - (offset) - 0x08]; | ||
82 | |||
83 | #define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ | ||
84 | ldub [%src - (offset) - 0x02], %t0; \ | ||
85 | ldub [%src - (offset) - 0x01], %t1; \ | ||
86 | stb %t0, [%dst - (offset) - 0x02]; \ | ||
87 | stb %t1, [%dst - (offset) - 0x01]; | ||
88 | |||
89 | /* Both these macros have to start with exactly the same insn */ | ||
90 | #define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ | ||
91 | ldd [%src - (offset) - 0x20], %t0; \ | ||
92 | ldd [%src - (offset) - 0x18], %t2; \ | ||
93 | ldd [%src - (offset) - 0x10], %t4; \ | ||
94 | ldd [%src - (offset) - 0x08], %t6; \ | ||
95 | st %t0, [%dst - (offset) - 0x20]; \ | ||
96 | st %t1, [%dst - (offset) - 0x1c]; \ | ||
97 | st %t2, [%dst - (offset) - 0x18]; \ | ||
98 | st %t3, [%dst - (offset) - 0x14]; \ | ||
99 | st %t4, [%dst - (offset) - 0x10]; \ | ||
100 | st %t5, [%dst - (offset) - 0x0c]; \ | ||
101 | st %t6, [%dst - (offset) - 0x08]; \ | ||
102 | st %t7, [%dst - (offset) - 0x04]; | ||
103 | |||
104 | #define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ | ||
105 | ldd [%src - (offset) - 0x20], %t0; \ | ||
106 | ldd [%src - (offset) - 0x18], %t2; \ | ||
107 | ldd [%src - (offset) - 0x10], %t4; \ | ||
108 | ldd [%src - (offset) - 0x08], %t6; \ | ||
109 | std %t0, [%dst - (offset) - 0x20]; \ | ||
110 | std %t2, [%dst - (offset) - 0x18]; \ | ||
111 | std %t4, [%dst - (offset) - 0x10]; \ | ||
112 | std %t6, [%dst - (offset) - 0x08]; | ||
113 | |||
114 | #define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ | ||
115 | ldd [%src + (offset) + 0x00], %t0; \ | ||
116 | ldd [%src + (offset) + 0x08], %t2; \ | ||
117 | st %t0, [%dst + (offset) + 0x00]; \ | ||
118 | st %t1, [%dst + (offset) + 0x04]; \ | ||
119 | st %t2, [%dst + (offset) + 0x08]; \ | ||
120 | st %t3, [%dst + (offset) + 0x0c]; | ||
121 | |||
122 | #define RMOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ | ||
123 | ldub [%src + (offset) + 0x00], %t0; \ | ||
124 | ldub [%src + (offset) + 0x01], %t1; \ | ||
125 | stb %t0, [%dst + (offset) + 0x00]; \ | ||
126 | stb %t1, [%dst + (offset) + 0x01]; | ||
127 | |||
128 | #define SMOVE_CHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, prev, shil, shir, offset2) \ | ||
129 | ldd [%src + (offset) + 0x00], %t0; \ | ||
130 | ldd [%src + (offset) + 0x08], %t2; \ | ||
131 | srl %t0, shir, %t5; \ | ||
132 | srl %t1, shir, %t6; \ | ||
133 | sll %t0, shil, %t0; \ | ||
134 | or %t5, %prev, %t5; \ | ||
135 | sll %t1, shil, %prev; \ | ||
136 | or %t6, %t0, %t0; \ | ||
137 | srl %t2, shir, %t1; \ | ||
138 | srl %t3, shir, %t6; \ | ||
139 | sll %t2, shil, %t2; \ | ||
140 | or %t1, %prev, %t1; \ | ||
141 | std %t4, [%dst + (offset) + (offset2) - 0x04]; \ | ||
142 | std %t0, [%dst + (offset) + (offset2) + 0x04]; \ | ||
143 | sll %t3, shil, %prev; \ | ||
144 | or %t6, %t2, %t4; | ||
145 | |||
146 | #define SMOVE_ALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, prev, shil, shir, offset2) \ | ||
147 | ldd [%src + (offset) + 0x00], %t0; \ | ||
148 | ldd [%src + (offset) + 0x08], %t2; \ | ||
149 | srl %t0, shir, %t4; \ | ||
150 | srl %t1, shir, %t5; \ | ||
151 | sll %t0, shil, %t6; \ | ||
152 | or %t4, %prev, %t0; \ | ||
153 | sll %t1, shil, %prev; \ | ||
154 | or %t5, %t6, %t1; \ | ||
155 | srl %t2, shir, %t4; \ | ||
156 | srl %t3, shir, %t5; \ | ||
157 | sll %t2, shil, %t6; \ | ||
158 | or %t4, %prev, %t2; \ | ||
159 | sll %t3, shil, %prev; \ | ||
160 | or %t5, %t6, %t3; \ | ||
161 | std %t0, [%dst + (offset) + (offset2) + 0x00]; \ | ||
162 | std %t2, [%dst + (offset) + (offset2) + 0x08]; | ||
163 | |||
164 | .text | ||
165 | .align 4 | ||
166 | |||
167 | #ifdef FASTER_REVERSE | ||
168 | |||
169 | 70: /* rdword_align */ | ||
170 | |||
171 | andcc %o1, 1, %g0 | ||
172 | be 4f | ||
173 | andcc %o1, 2, %g0 | ||
174 | |||
175 | ldub [%o1 - 1], %g2 | ||
176 | sub %o1, 1, %o1 | ||
177 | stb %g2, [%o0 - 1] | ||
178 | sub %o2, 1, %o2 | ||
179 | be 3f | ||
180 | sub %o0, 1, %o0 | ||
181 | 4: | ||
182 | lduh [%o1 - 2], %g2 | ||
183 | sub %o1, 2, %o1 | ||
184 | sth %g2, [%o0 - 2] | ||
185 | sub %o2, 2, %o2 | ||
186 | b 3f | ||
187 | sub %o0, 2, %o0 | ||
188 | |||
189 | #endif /* FASTER_REVERSE */ | ||
190 | |||
191 | 0: | ||
192 | retl | ||
193 | nop ! Only bcopy returns here and it retuns void... | ||
194 | |||
195 | #ifdef __KERNEL__ | ||
196 | FUNC(amemmove) | ||
197 | FUNC(__memmove) | ||
198 | #endif | ||
199 | FUNC(memmove) | ||
200 | cmp %o0, %o1 | ||
201 | SETUP_RETL | ||
202 | bleu 9f | ||
203 | sub %o0, %o1, %o4 | ||
204 | |||
205 | add %o1, %o2, %o3 | ||
206 | cmp %o3, %o0 | ||
207 | bleu 0f | ||
208 | andcc %o4, 3, %o5 | ||
209 | |||
210 | #ifndef FASTER_REVERSE | ||
211 | |||
212 | add %o1, %o2, %o1 | ||
213 | add %o0, %o2, %o0 | ||
214 | sub %o1, 1, %o1 | ||
215 | sub %o0, 1, %o0 | ||
216 | |||
217 | 1: /* reverse_bytes */ | ||
218 | |||
219 | ldub [%o1], %o4 | ||
220 | subcc %o2, 1, %o2 | ||
221 | stb %o4, [%o0] | ||
222 | sub %o1, 1, %o1 | ||
223 | bne 1b | ||
224 | sub %o0, 1, %o0 | ||
225 | |||
226 | retl | ||
227 | RETL_INSN | ||
228 | |||
229 | #else /* FASTER_REVERSE */ | ||
230 | |||
231 | add %o1, %o2, %o1 | ||
232 | add %o0, %o2, %o0 | ||
233 | bne 77f | ||
234 | cmp %o2, 15 | ||
235 | bleu 91f | ||
236 | andcc %o1, 3, %g0 | ||
237 | bne 70b | ||
238 | 3: | ||
239 | andcc %o1, 4, %g0 | ||
240 | |||
241 | be 2f | ||
242 | mov %o2, %g1 | ||
243 | |||
244 | ld [%o1 - 4], %o4 | ||
245 | sub %g1, 4, %g1 | ||
246 | st %o4, [%o0 - 4] | ||
247 | sub %o1, 4, %o1 | ||
248 | sub %o0, 4, %o0 | ||
249 | 2: | ||
250 | andcc %g1, 0xffffff80, %g7 | ||
251 | be 3f | ||
252 | andcc %o0, 4, %g0 | ||
253 | |||
254 | be 74f + 4 | ||
255 | 5: | ||
256 | RMOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) | ||
257 | RMOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) | ||
258 | RMOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) | ||
259 | RMOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) | ||
260 | subcc %g7, 128, %g7 | ||
261 | sub %o1, 128, %o1 | ||
262 | bne 5b | ||
263 | sub %o0, 128, %o0 | ||
264 | 3: | ||
265 | andcc %g1, 0x70, %g7 | ||
266 | be 72f | ||
267 | andcc %g1, 8, %g0 | ||
268 | |||
269 | sethi %hi(72f), %o5 | ||
270 | srl %g7, 1, %o4 | ||
271 | add %g7, %o4, %o4 | ||
272 | sub %o1, %g7, %o1 | ||
273 | sub %o5, %o4, %o5 | ||
274 | jmpl %o5 + %lo(72f), %g0 | ||
275 | sub %o0, %g7, %o0 | ||
276 | |||
277 | 71: /* rmemcpy_table */ | ||
278 | RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5) | ||
279 | RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5) | ||
280 | RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5) | ||
281 | RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5) | ||
282 | RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5) | ||
283 | RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5) | ||
284 | RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5) | ||
285 | |||
286 | 72: /* rmemcpy_table_end */ | ||
287 | |||
288 | be 73f | ||
289 | andcc %g1, 4, %g0 | ||
290 | |||
291 | ldd [%o1 - 0x08], %g2 | ||
292 | sub %o0, 8, %o0 | ||
293 | sub %o1, 8, %o1 | ||
294 | st %g2, [%o0] | ||
295 | st %g3, [%o0 + 0x04] | ||
296 | |||
297 | 73: /* rmemcpy_last7 */ | ||
298 | |||
299 | be 1f | ||
300 | andcc %g1, 2, %g0 | ||
301 | |||
302 | ld [%o1 - 4], %g2 | ||
303 | sub %o1, 4, %o1 | ||
304 | st %g2, [%o0 - 4] | ||
305 | sub %o0, 4, %o0 | ||
306 | 1: | ||
307 | be 1f | ||
308 | andcc %g1, 1, %g0 | ||
309 | |||
310 | lduh [%o1 - 2], %g2 | ||
311 | sub %o1, 2, %o1 | ||
312 | sth %g2, [%o0 - 2] | ||
313 | sub %o0, 2, %o0 | ||
314 | 1: | ||
315 | be 1f | ||
316 | nop | ||
317 | |||
318 | ldub [%o1 - 1], %g2 | ||
319 | stb %g2, [%o0 - 1] | ||
320 | 1: | ||
321 | retl | ||
322 | RETL_INSN | ||
323 | |||
324 | 74: /* rldd_std */ | ||
325 | RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) | ||
326 | RMOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) | ||
327 | RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) | ||
328 | RMOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) | ||
329 | subcc %g7, 128, %g7 | ||
330 | sub %o1, 128, %o1 | ||
331 | bne 74b | ||
332 | sub %o0, 128, %o0 | ||
333 | |||
334 | andcc %g1, 0x70, %g7 | ||
335 | be 72b | ||
336 | andcc %g1, 8, %g0 | ||
337 | |||
338 | sethi %hi(72b), %o5 | ||
339 | srl %g7, 1, %o4 | ||
340 | add %g7, %o4, %o4 | ||
341 | sub %o1, %g7, %o1 | ||
342 | sub %o5, %o4, %o5 | ||
343 | jmpl %o5 + %lo(72b), %g0 | ||
344 | sub %o0, %g7, %o0 | ||
345 | |||
346 | 75: /* rshort_end */ | ||
347 | |||
348 | and %o2, 0xe, %o3 | ||
349 | 2: | ||
350 | sethi %hi(76f), %o5 | ||
351 | sll %o3, 3, %o4 | ||
352 | sub %o0, %o3, %o0 | ||
353 | sub %o5, %o4, %o5 | ||
354 | sub %o1, %o3, %o1 | ||
355 | jmpl %o5 + %lo(76f), %g0 | ||
356 | andcc %o2, 1, %g0 | ||
357 | |||
358 | RMOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3) | ||
359 | RMOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3) | ||
360 | RMOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3) | ||
361 | RMOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3) | ||
362 | RMOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3) | ||
363 | RMOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3) | ||
364 | RMOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3) | ||
365 | |||
366 | 76: /* rshort_table_end */ | ||
367 | |||
368 | be 1f | ||
369 | nop | ||
370 | ldub [%o1 - 1], %g2 | ||
371 | stb %g2, [%o0 - 1] | ||
372 | 1: | ||
373 | retl | ||
374 | RETL_INSN | ||
375 | |||
376 | 91: /* rshort_aligned_end */ | ||
377 | |||
378 | bne 75b | ||
379 | andcc %o2, 8, %g0 | ||
380 | |||
381 | be 1f | ||
382 | andcc %o2, 4, %g0 | ||
383 | |||
384 | ld [%o1 - 0x08], %g2 | ||
385 | ld [%o1 - 0x04], %g3 | ||
386 | sub %o1, 8, %o1 | ||
387 | st %g2, [%o0 - 0x08] | ||
388 | st %g3, [%o0 - 0x04] | ||
389 | sub %o0, 8, %o0 | ||
390 | 1: | ||
391 | b 73b | ||
392 | mov %o2, %g1 | ||
393 | |||
394 | 77: /* rnon_aligned */ | ||
395 | cmp %o2, 15 | ||
396 | bleu 75b | ||
397 | andcc %o0, 3, %g0 | ||
398 | be 64f | ||
399 | andcc %o0, 1, %g0 | ||
400 | be 63f | ||
401 | andcc %o0, 2, %g0 | ||
402 | ldub [%o1 - 1], %g5 | ||
403 | sub %o1, 1, %o1 | ||
404 | stb %g5, [%o0 - 1] | ||
405 | sub %o0, 1, %o0 | ||
406 | be 64f | ||
407 | sub %o2, 1, %o2 | ||
408 | 63: | ||
409 | ldub [%o1 - 1], %g5 | ||
410 | sub %o1, 2, %o1 | ||
411 | stb %g5, [%o0 - 1] | ||
412 | sub %o0, 2, %o0 | ||
413 | ldub [%o1], %g5 | ||
414 | sub %o2, 2, %o2 | ||
415 | stb %g5, [%o0] | ||
416 | 64: | ||
417 | and %o1, 3, %g2 | ||
418 | and %o1, -4, %o1 | ||
419 | and %o2, 0xc, %g3 | ||
420 | add %o1, 4, %o1 | ||
421 | cmp %g3, 4 | ||
422 | sll %g2, 3, %g4 | ||
423 | mov 32, %g2 | ||
424 | be 4f | ||
425 | sub %g2, %g4, %g7 | ||
426 | |||
427 | blu 3f | ||
428 | cmp %g3, 8 | ||
429 | |||
430 | be 2f | ||
431 | srl %o2, 2, %g3 | ||
432 | |||
433 | ld [%o1 - 4], %o3 | ||
434 | add %o0, -8, %o0 | ||
435 | ld [%o1 - 8], %o4 | ||
436 | add %o1, -16, %o1 | ||
437 | b 7f | ||
438 | add %g3, 1, %g3 | ||
439 | 2: | ||
440 | ld [%o1 - 4], %o4 | ||
441 | add %o0, -4, %o0 | ||
442 | ld [%o1 - 8], %g1 | ||
443 | add %o1, -12, %o1 | ||
444 | b 8f | ||
445 | add %g3, 2, %g3 | ||
446 | 3: | ||
447 | ld [%o1 - 4], %o5 | ||
448 | add %o0, -12, %o0 | ||
449 | ld [%o1 - 8], %o3 | ||
450 | add %o1, -20, %o1 | ||
451 | b 6f | ||
452 | srl %o2, 2, %g3 | ||
453 | 4: | ||
454 | ld [%o1 - 4], %g1 | ||
455 | srl %o2, 2, %g3 | ||
456 | ld [%o1 - 8], %o5 | ||
457 | add %o1, -24, %o1 | ||
458 | add %o0, -16, %o0 | ||
459 | add %g3, -1, %g3 | ||
460 | |||
461 | ld [%o1 + 12], %o3 | ||
462 | 5: | ||
463 | sll %o5, %g4, %g2 | ||
464 | srl %g1, %g7, %g5 | ||
465 | or %g2, %g5, %g2 | ||
466 | st %g2, [%o0 + 12] | ||
467 | 6: | ||
468 | ld [%o1 + 8], %o4 | ||
469 | sll %o3, %g4, %g2 | ||
470 | srl %o5, %g7, %g5 | ||
471 | or %g2, %g5, %g2 | ||
472 | st %g2, [%o0 + 8] | ||
473 | 7: | ||
474 | ld [%o1 + 4], %g1 | ||
475 | sll %o4, %g4, %g2 | ||
476 | srl %o3, %g7, %g5 | ||
477 | or %g2, %g5, %g2 | ||
478 | st %g2, [%o0 + 4] | ||
479 | 8: | ||
480 | ld [%o1], %o5 | ||
481 | sll %g1, %g4, %g2 | ||
482 | srl %o4, %g7, %g5 | ||
483 | addcc %g3, -4, %g3 | ||
484 | or %g2, %g5, %g2 | ||
485 | add %o1, -16, %o1 | ||
486 | st %g2, [%o0] | ||
487 | add %o0, -16, %o0 | ||
488 | bne,a 5b | ||
489 | ld [%o1 + 12], %o3 | ||
490 | sll %o5, %g4, %g2 | ||
491 | srl %g1, %g7, %g5 | ||
492 | srl %g4, 3, %g3 | ||
493 | or %g2, %g5, %g2 | ||
494 | add %o1, %g3, %o1 | ||
495 | andcc %o2, 2, %g0 | ||
496 | st %g2, [%o0 + 12] | ||
497 | be 1f | ||
498 | andcc %o2, 1, %g0 | ||
499 | |||
500 | ldub [%o1 + 15], %g5 | ||
501 | add %o1, -2, %o1 | ||
502 | stb %g5, [%o0 + 11] | ||
503 | add %o0, -2, %o0 | ||
504 | ldub [%o1 + 16], %g5 | ||
505 | stb %g5, [%o0 + 12] | ||
506 | 1: | ||
507 | be 1f | ||
508 | nop | ||
509 | ldub [%o1 + 15], %g5 | ||
510 | stb %g5, [%o0 + 11] | ||
511 | 1: | ||
512 | retl | ||
513 | RETL_INSN | ||
514 | |||
515 | #endif /* FASTER_REVERSE */ | ||
516 | |||
517 | /* NOTE: This code is executed just for the cases, | ||
518 | where %src (=%o1) & 3 is != 0. | ||
519 | We need to align it to 4. So, for (%src & 3) | ||
520 | 1 we need to do ldub,lduh | ||
521 | 2 lduh | ||
522 | 3 just ldub | ||
523 | so even if it looks weird, the branches | ||
524 | are correct here. -jj | ||
525 | */ | ||
526 | 78: /* dword_align */ | ||
527 | |||
528 | andcc %o1, 1, %g0 | ||
529 | be 4f | ||
530 | andcc %o1, 2, %g0 | ||
531 | |||
532 | ldub [%o1], %g2 | ||
533 | add %o1, 1, %o1 | ||
534 | stb %g2, [%o0] | ||
535 | sub %o2, 1, %o2 | ||
536 | bne 3f | ||
537 | add %o0, 1, %o0 | ||
538 | 4: | ||
539 | lduh [%o1], %g2 | ||
540 | add %o1, 2, %o1 | ||
541 | sth %g2, [%o0] | ||
542 | sub %o2, 2, %o2 | ||
543 | b 3f | ||
544 | add %o0, 2, %o0 | ||
545 | |||
546 | #ifdef __KERNEL__ | ||
547 | FUNC(__memcpy) | ||
548 | #endif | ||
549 | FUNC(memcpy) /* %o0=dst %o1=src %o2=len */ | ||
550 | |||
551 | sub %o0, %o1, %o4 | ||
552 | SETUP_RETL | ||
553 | 9: | ||
554 | andcc %o4, 3, %o5 | ||
555 | 0: | ||
556 | bne 86f | ||
557 | cmp %o2, 15 | ||
558 | |||
559 | bleu 90f | ||
560 | andcc %o1, 3, %g0 | ||
561 | |||
562 | bne 78b | ||
563 | 3: | ||
564 | andcc %o1, 4, %g0 | ||
565 | |||
566 | be 2f | ||
567 | mov %o2, %g1 | ||
568 | |||
569 | ld [%o1], %o4 | ||
570 | sub %g1, 4, %g1 | ||
571 | st %o4, [%o0] | ||
572 | add %o1, 4, %o1 | ||
573 | add %o0, 4, %o0 | ||
574 | 2: | ||
575 | andcc %g1, 0xffffff80, %g7 | ||
576 | be 3f | ||
577 | andcc %o0, 4, %g0 | ||
578 | |||
579 | be 82f + 4 | ||
580 | 5: | ||
581 | MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) | ||
582 | MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) | ||
583 | MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) | ||
584 | MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) | ||
585 | subcc %g7, 128, %g7 | ||
586 | add %o1, 128, %o1 | ||
587 | bne 5b | ||
588 | add %o0, 128, %o0 | ||
589 | 3: | ||
590 | andcc %g1, 0x70, %g7 | ||
591 | be 80f | ||
592 | andcc %g1, 8, %g0 | ||
593 | |||
594 | sethi %hi(80f), %o5 | ||
595 | srl %g7, 1, %o4 | ||
596 | add %g7, %o4, %o4 | ||
597 | add %o1, %g7, %o1 | ||
598 | sub %o5, %o4, %o5 | ||
599 | jmpl %o5 + %lo(80f), %g0 | ||
600 | add %o0, %g7, %o0 | ||
601 | |||
602 | 79: /* memcpy_table */ | ||
603 | |||
604 | MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5) | ||
605 | MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5) | ||
606 | MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5) | ||
607 | MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5) | ||
608 | MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5) | ||
609 | MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5) | ||
610 | MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5) | ||
611 | |||
612 | 80: /* memcpy_table_end */ | ||
613 | be 81f | ||
614 | andcc %g1, 4, %g0 | ||
615 | |||
616 | ldd [%o1], %g2 | ||
617 | add %o0, 8, %o0 | ||
618 | st %g2, [%o0 - 0x08] | ||
619 | add %o1, 8, %o1 | ||
620 | st %g3, [%o0 - 0x04] | ||
621 | |||
622 | 81: /* memcpy_last7 */ | ||
623 | |||
624 | be 1f | ||
625 | andcc %g1, 2, %g0 | ||
626 | |||
627 | ld [%o1], %g2 | ||
628 | add %o1, 4, %o1 | ||
629 | st %g2, [%o0] | ||
630 | add %o0, 4, %o0 | ||
631 | 1: | ||
632 | be 1f | ||
633 | andcc %g1, 1, %g0 | ||
634 | |||
635 | lduh [%o1], %g2 | ||
636 | add %o1, 2, %o1 | ||
637 | sth %g2, [%o0] | ||
638 | add %o0, 2, %o0 | ||
639 | 1: | ||
640 | be 1f | ||
641 | nop | ||
642 | |||
643 | ldub [%o1], %g2 | ||
644 | stb %g2, [%o0] | ||
645 | 1: | ||
646 | retl | ||
647 | RETL_INSN | ||
648 | |||
649 | 82: /* ldd_std */ | ||
650 | MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) | ||
651 | MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) | ||
652 | MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) | ||
653 | MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) | ||
654 | subcc %g7, 128, %g7 | ||
655 | add %o1, 128, %o1 | ||
656 | bne 82b | ||
657 | add %o0, 128, %o0 | ||
658 | |||
659 | #ifndef FASTER_ALIGNED | ||
660 | |||
661 | andcc %g1, 0x70, %g7 | ||
662 | be 80b | ||
663 | andcc %g1, 8, %g0 | ||
664 | |||
665 | sethi %hi(80b), %o5 | ||
666 | srl %g7, 1, %o4 | ||
667 | add %g7, %o4, %o4 | ||
668 | add %o1, %g7, %o1 | ||
669 | sub %o5, %o4, %o5 | ||
670 | jmpl %o5 + %lo(80b), %g0 | ||
671 | add %o0, %g7, %o0 | ||
672 | |||
673 | #else /* FASTER_ALIGNED */ | ||
674 | |||
675 | andcc %g1, 0x70, %g7 | ||
676 | be 84f | ||
677 | andcc %g1, 8, %g0 | ||
678 | |||
679 | sethi %hi(84f), %o5 | ||
680 | add %o1, %g7, %o1 | ||
681 | sub %o5, %g7, %o5 | ||
682 | jmpl %o5 + %lo(84f), %g0 | ||
683 | add %o0, %g7, %o0 | ||
684 | |||
685 | 83: /* amemcpy_table */ | ||
686 | |||
687 | MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3, g4, g5) | ||
688 | MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3, g4, g5) | ||
689 | MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3, g4, g5) | ||
690 | MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5) | ||
691 | MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5) | ||
692 | MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5) | ||
693 | MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5) | ||
694 | |||
695 | 84: /* amemcpy_table_end */ | ||
696 | be 85f | ||
697 | andcc %g1, 4, %g0 | ||
698 | |||
699 | ldd [%o1], %g2 | ||
700 | add %o0, 8, %o0 | ||
701 | std %g2, [%o0 - 0x08] | ||
702 | add %o1, 8, %o1 | ||
703 | 85: /* amemcpy_last7 */ | ||
704 | be 1f | ||
705 | andcc %g1, 2, %g0 | ||
706 | |||
707 | ld [%o1], %g2 | ||
708 | add %o1, 4, %o1 | ||
709 | st %g2, [%o0] | ||
710 | add %o0, 4, %o0 | ||
711 | 1: | ||
712 | be 1f | ||
713 | andcc %g1, 1, %g0 | ||
714 | |||
715 | lduh [%o1], %g2 | ||
716 | add %o1, 2, %o1 | ||
717 | sth %g2, [%o0] | ||
718 | add %o0, 2, %o0 | ||
719 | 1: | ||
720 | be 1f | ||
721 | nop | ||
722 | |||
723 | ldub [%o1], %g2 | ||
724 | stb %g2, [%o0] | ||
725 | 1: | ||
726 | retl | ||
727 | RETL_INSN | ||
728 | |||
729 | #endif /* FASTER_ALIGNED */ | ||
730 | |||
731 | 86: /* non_aligned */ | ||
732 | cmp %o2, 6 | ||
733 | bleu 88f | ||
734 | |||
735 | #ifdef FASTER_NONALIGNED | ||
736 | |||
737 | cmp %o2, 256 | ||
738 | bcc 87f | ||
739 | |||
740 | #endif /* FASTER_NONALIGNED */ | ||
741 | |||
742 | andcc %o0, 3, %g0 | ||
743 | be 61f | ||
744 | andcc %o0, 1, %g0 | ||
745 | be 60f | ||
746 | andcc %o0, 2, %g0 | ||
747 | |||
748 | ldub [%o1], %g5 | ||
749 | add %o1, 1, %o1 | ||
750 | stb %g5, [%o0] | ||
751 | sub %o2, 1, %o2 | ||
752 | bne 61f | ||
753 | add %o0, 1, %o0 | ||
754 | 60: | ||
755 | ldub [%o1], %g3 | ||
756 | add %o1, 2, %o1 | ||
757 | stb %g3, [%o0] | ||
758 | sub %o2, 2, %o2 | ||
759 | ldub [%o1 - 1], %g3 | ||
760 | add %o0, 2, %o0 | ||
761 | stb %g3, [%o0 - 1] | ||
762 | 61: | ||
763 | and %o1, 3, %g2 | ||
764 | and %o2, 0xc, %g3 | ||
765 | and %o1, -4, %o1 | ||
766 | cmp %g3, 4 | ||
767 | sll %g2, 3, %g4 | ||
768 | mov 32, %g2 | ||
769 | be 4f | ||
770 | sub %g2, %g4, %g7 | ||
771 | |||
772 | blu 3f | ||
773 | cmp %g3, 0x8 | ||
774 | |||
775 | be 2f | ||
776 | srl %o2, 2, %g3 | ||
777 | |||
778 | ld [%o1], %o3 | ||
779 | add %o0, -8, %o0 | ||
780 | ld [%o1 + 4], %o4 | ||
781 | b 8f | ||
782 | add %g3, 1, %g3 | ||
783 | 2: | ||
784 | ld [%o1], %o4 | ||
785 | add %o0, -12, %o0 | ||
786 | ld [%o1 + 4], %o5 | ||
787 | add %g3, 2, %g3 | ||
788 | b 9f | ||
789 | add %o1, -4, %o1 | ||
790 | 3: | ||
791 | ld [%o1], %g1 | ||
792 | add %o0, -4, %o0 | ||
793 | ld [%o1 + 4], %o3 | ||
794 | srl %o2, 2, %g3 | ||
795 | b 7f | ||
796 | add %o1, 4, %o1 | ||
797 | 4: | ||
798 | ld [%o1], %o5 | ||
799 | cmp %o2, 7 | ||
800 | ld [%o1 + 4], %g1 | ||
801 | srl %o2, 2, %g3 | ||
802 | bleu 10f | ||
803 | add %o1, 8, %o1 | ||
804 | |||
805 | ld [%o1], %o3 | ||
806 | add %g3, -1, %g3 | ||
807 | 5: | ||
808 | sll %o5, %g4, %g2 | ||
809 | srl %g1, %g7, %g5 | ||
810 | or %g2, %g5, %g2 | ||
811 | st %g2, [%o0] | ||
812 | 7: | ||
813 | ld [%o1 + 4], %o4 | ||
814 | sll %g1, %g4, %g2 | ||
815 | srl %o3, %g7, %g5 | ||
816 | or %g2, %g5, %g2 | ||
817 | st %g2, [%o0 + 4] | ||
818 | 8: | ||
819 | ld [%o1 + 8], %o5 | ||
820 | sll %o3, %g4, %g2 | ||
821 | srl %o4, %g7, %g5 | ||
822 | or %g2, %g5, %g2 | ||
823 | st %g2, [%o0 + 8] | ||
824 | 9: | ||
825 | ld [%o1 + 12], %g1 | ||
826 | sll %o4, %g4, %g2 | ||
827 | srl %o5, %g7, %g5 | ||
828 | addcc %g3, -4, %g3 | ||
829 | or %g2, %g5, %g2 | ||
830 | add %o1, 16, %o1 | ||
831 | st %g2, [%o0 + 12] | ||
832 | add %o0, 16, %o0 | ||
833 | bne,a 5b | ||
834 | ld [%o1], %o3 | ||
835 | 10: | ||
836 | sll %o5, %g4, %g2 | ||
837 | srl %g1, %g7, %g5 | ||
838 | srl %g7, 3, %g3 | ||
839 | or %g2, %g5, %g2 | ||
840 | sub %o1, %g3, %o1 | ||
841 | andcc %o2, 2, %g0 | ||
842 | st %g2, [%o0] | ||
843 | be 1f | ||
844 | andcc %o2, 1, %g0 | ||
845 | |||
846 | ldub [%o1], %g2 | ||
847 | add %o1, 2, %o1 | ||
848 | stb %g2, [%o0 + 4] | ||
849 | add %o0, 2, %o0 | ||
850 | ldub [%o1 - 1], %g2 | ||
851 | stb %g2, [%o0 + 3] | ||
852 | 1: | ||
853 | be 1f | ||
854 | nop | ||
855 | ldub [%o1], %g2 | ||
856 | stb %g2, [%o0 + 4] | ||
857 | 1: | ||
858 | retl | ||
859 | RETL_INSN | ||
860 | |||
861 | #ifdef FASTER_NONALIGNED | ||
862 | |||
863 | 87: /* faster_nonaligned */ | ||
864 | |||
865 | andcc %o1, 3, %g0 | ||
866 | be 3f | ||
867 | andcc %o1, 1, %g0 | ||
868 | |||
869 | be 4f | ||
870 | andcc %o1, 2, %g0 | ||
871 | |||
872 | ldub [%o1], %g2 | ||
873 | add %o1, 1, %o1 | ||
874 | stb %g2, [%o0] | ||
875 | sub %o2, 1, %o2 | ||
876 | bne 3f | ||
877 | add %o0, 1, %o0 | ||
878 | 4: | ||
879 | lduh [%o1], %g2 | ||
880 | add %o1, 2, %o1 | ||
881 | srl %g2, 8, %g3 | ||
882 | sub %o2, 2, %o2 | ||
883 | stb %g3, [%o0] | ||
884 | add %o0, 2, %o0 | ||
885 | stb %g2, [%o0 - 1] | ||
886 | 3: | ||
887 | andcc %o1, 4, %g0 | ||
888 | |||
889 | bne 2f | ||
890 | cmp %o5, 1 | ||
891 | |||
892 | ld [%o1], %o4 | ||
893 | srl %o4, 24, %g2 | ||
894 | stb %g2, [%o0] | ||
895 | srl %o4, 16, %g3 | ||
896 | stb %g3, [%o0 + 1] | ||
897 | srl %o4, 8, %g2 | ||
898 | stb %g2, [%o0 + 2] | ||
899 | sub %o2, 4, %o2 | ||
900 | stb %o4, [%o0 + 3] | ||
901 | add %o1, 4, %o1 | ||
902 | add %o0, 4, %o0 | ||
903 | 2: | ||
904 | be 33f | ||
905 | cmp %o5, 2 | ||
906 | be 32f | ||
907 | sub %o2, 4, %o2 | ||
908 | 31: | ||
909 | ld [%o1], %g2 | ||
910 | add %o1, 4, %o1 | ||
911 | srl %g2, 24, %g3 | ||
912 | and %o0, 7, %g5 | ||
913 | stb %g3, [%o0] | ||
914 | cmp %g5, 7 | ||
915 | sll %g2, 8, %g1 | ||
916 | add %o0, 4, %o0 | ||
917 | be 41f | ||
918 | and %o2, 0xffffffc0, %o3 | ||
919 | ld [%o0 - 7], %o4 | ||
920 | 4: | ||
921 | SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) | ||
922 | SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) | ||
923 | SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) | ||
924 | SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) | ||
925 | subcc %o3, 64, %o3 | ||
926 | add %o1, 64, %o1 | ||
927 | bne 4b | ||
928 | add %o0, 64, %o0 | ||
929 | |||
930 | andcc %o2, 0x30, %o3 | ||
931 | be,a 1f | ||
932 | srl %g1, 16, %g2 | ||
933 | 4: | ||
934 | SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) | ||
935 | subcc %o3, 16, %o3 | ||
936 | add %o1, 16, %o1 | ||
937 | bne 4b | ||
938 | add %o0, 16, %o0 | ||
939 | |||
940 | srl %g1, 16, %g2 | ||
941 | 1: | ||
942 | st %o4, [%o0 - 7] | ||
943 | sth %g2, [%o0 - 3] | ||
944 | srl %g1, 8, %g4 | ||
945 | b 88f | ||
946 | stb %g4, [%o0 - 1] | ||
947 | 32: | ||
948 | ld [%o1], %g2 | ||
949 | add %o1, 4, %o1 | ||
950 | srl %g2, 16, %g3 | ||
951 | and %o0, 7, %g5 | ||
952 | sth %g3, [%o0] | ||
953 | cmp %g5, 6 | ||
954 | sll %g2, 16, %g1 | ||
955 | add %o0, 4, %o0 | ||
956 | be 42f | ||
957 | and %o2, 0xffffffc0, %o3 | ||
958 | ld [%o0 - 6], %o4 | ||
959 | 4: | ||
960 | SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) | ||
961 | SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) | ||
962 | SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) | ||
963 | SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) | ||
964 | subcc %o3, 64, %o3 | ||
965 | add %o1, 64, %o1 | ||
966 | bne 4b | ||
967 | add %o0, 64, %o0 | ||
968 | |||
969 | andcc %o2, 0x30, %o3 | ||
970 | be,a 1f | ||
971 | srl %g1, 16, %g2 | ||
972 | 4: | ||
973 | SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) | ||
974 | subcc %o3, 16, %o3 | ||
975 | add %o1, 16, %o1 | ||
976 | bne 4b | ||
977 | add %o0, 16, %o0 | ||
978 | |||
979 | srl %g1, 16, %g2 | ||
980 | 1: | ||
981 | st %o4, [%o0 - 6] | ||
982 | b 88f | ||
983 | sth %g2, [%o0 - 2] | ||
984 | 33: | ||
985 | ld [%o1], %g2 | ||
986 | sub %o2, 4, %o2 | ||
987 | srl %g2, 24, %g3 | ||
988 | and %o0, 7, %g5 | ||
989 | stb %g3, [%o0] | ||
990 | cmp %g5, 5 | ||
991 | srl %g2, 8, %g4 | ||
992 | sll %g2, 24, %g1 | ||
993 | sth %g4, [%o0 + 1] | ||
994 | add %o1, 4, %o1 | ||
995 | be 43f | ||
996 | and %o2, 0xffffffc0, %o3 | ||
997 | |||
998 | ld [%o0 - 1], %o4 | ||
999 | add %o0, 4, %o0 | ||
1000 | 4: | ||
1001 | SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1) | ||
1002 | SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1) | ||
1003 | SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1) | ||
1004 | SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1) | ||
1005 | subcc %o3, 64, %o3 | ||
1006 | add %o1, 64, %o1 | ||
1007 | bne 4b | ||
1008 | add %o0, 64, %o0 | ||
1009 | |||
1010 | andcc %o2, 0x30, %o3 | ||
1011 | be,a 1f | ||
1012 | srl %g1, 24, %g2 | ||
1013 | 4: | ||
1014 | SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1) | ||
1015 | subcc %o3, 16, %o3 | ||
1016 | add %o1, 16, %o1 | ||
1017 | bne 4b | ||
1018 | add %o0, 16, %o0 | ||
1019 | |||
1020 | srl %g1, 24, %g2 | ||
1021 | 1: | ||
1022 | st %o4, [%o0 - 5] | ||
1023 | b 88f | ||
1024 | stb %g2, [%o0 - 1] | ||
1025 | 41: | ||
1026 | SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) | ||
1027 | SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) | ||
1028 | SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) | ||
1029 | SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) | ||
1030 | subcc %o3, 64, %o3 | ||
1031 | add %o1, 64, %o1 | ||
1032 | bne 41b | ||
1033 | add %o0, 64, %o0 | ||
1034 | |||
1035 | andcc %o2, 0x30, %o3 | ||
1036 | be,a 1f | ||
1037 | srl %g1, 16, %g2 | ||
1038 | 4: | ||
1039 | SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) | ||
1040 | subcc %o3, 16, %o3 | ||
1041 | add %o1, 16, %o1 | ||
1042 | bne 4b | ||
1043 | add %o0, 16, %o0 | ||
1044 | |||
1045 | srl %g1, 16, %g2 | ||
1046 | 1: | ||
1047 | sth %g2, [%o0 - 3] | ||
1048 | srl %g1, 8, %g4 | ||
1049 | b 88f | ||
1050 | stb %g4, [%o0 - 1] | ||
1051 | 43: | ||
1052 | SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3) | ||
1053 | SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3) | ||
1054 | SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3) | ||
1055 | SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3) | ||
1056 | subcc %o3, 64, %o3 | ||
1057 | add %o1, 64, %o1 | ||
1058 | bne 43b | ||
1059 | add %o0, 64, %o0 | ||
1060 | |||
1061 | andcc %o2, 0x30, %o3 | ||
1062 | be,a 1f | ||
1063 | srl %g1, 24, %g2 | ||
1064 | 4: | ||
1065 | SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3) | ||
1066 | subcc %o3, 16, %o3 | ||
1067 | add %o1, 16, %o1 | ||
1068 | bne 4b | ||
1069 | add %o0, 16, %o0 | ||
1070 | |||
1071 | srl %g1, 24, %g2 | ||
1072 | 1: | ||
1073 | stb %g2, [%o0 + 3] | ||
1074 | b 88f | ||
1075 | add %o0, 4, %o0 | ||
1076 | 42: | ||
1077 | SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) | ||
1078 | SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) | ||
1079 | SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) | ||
1080 | SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) | ||
1081 | subcc %o3, 64, %o3 | ||
1082 | add %o1, 64, %o1 | ||
1083 | bne 42b | ||
1084 | add %o0, 64, %o0 | ||
1085 | |||
1086 | andcc %o2, 0x30, %o3 | ||
1087 | be,a 1f | ||
1088 | srl %g1, 16, %g2 | ||
1089 | 4: | ||
1090 | SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) | ||
1091 | subcc %o3, 16, %o3 | ||
1092 | add %o1, 16, %o1 | ||
1093 | bne 4b | ||
1094 | add %o0, 16, %o0 | ||
1095 | |||
1096 | srl %g1, 16, %g2 | ||
1097 | 1: | ||
1098 | sth %g2, [%o0 - 2] | ||
1099 | |||
1100 | /* Fall through */ | ||
1101 | |||
1102 | #endif /* FASTER_NONALIGNED */ | ||
1103 | |||
1104 | 88: /* short_end */ | ||
1105 | |||
1106 | and %o2, 0xe, %o3 | ||
1107 | 20: | ||
1108 | sethi %hi(89f), %o5 | ||
1109 | sll %o3, 3, %o4 | ||
1110 | add %o0, %o3, %o0 | ||
1111 | sub %o5, %o4, %o5 | ||
1112 | add %o1, %o3, %o1 | ||
1113 | jmpl %o5 + %lo(89f), %g0 | ||
1114 | andcc %o2, 1, %g0 | ||
1115 | |||
1116 | MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3) | ||
1117 | MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3) | ||
1118 | MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3) | ||
1119 | MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3) | ||
1120 | MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3) | ||
1121 | MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3) | ||
1122 | MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3) | ||
1123 | |||
1124 | 89: /* short_table_end */ | ||
1125 | |||
1126 | be 1f | ||
1127 | nop | ||
1128 | |||
1129 | ldub [%o1], %g2 | ||
1130 | stb %g2, [%o0] | ||
1131 | 1: | ||
1132 | retl | ||
1133 | RETL_INSN | ||
1134 | |||
1135 | 90: /* short_aligned_end */ | ||
1136 | bne 88b | ||
1137 | andcc %o2, 8, %g0 | ||
1138 | |||
1139 | be 1f | ||
1140 | andcc %o2, 4, %g0 | ||
1141 | |||
1142 | ld [%o1 + 0x00], %g2 | ||
1143 | ld [%o1 + 0x04], %g3 | ||
1144 | add %o1, 8, %o1 | ||
1145 | st %g2, [%o0 + 0x00] | ||
1146 | st %g3, [%o0 + 0x04] | ||
1147 | add %o0, 8, %o0 | ||
1148 | 1: | ||
1149 | b 81b | ||
1150 | mov %o2, %g1 | ||
diff --git a/arch/sparc/lib/memscan.S b/arch/sparc/lib/memscan.S new file mode 100644 index 000000000000..28e78ff090ac --- /dev/null +++ b/arch/sparc/lib/memscan.S | |||
@@ -0,0 +1,133 @@ | |||
1 | /* $Id: memscan.S,v 1.4 1996/09/08 02:01:20 davem Exp $ | ||
2 | * memscan.S: Optimized memscan for the Sparc. | ||
3 | * | ||
4 | * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) | ||
5 | */ | ||
6 | |||
7 | /* In essence, this is just a fancy strlen. */ | ||
8 | |||
9 | #define LO_MAGIC 0x01010101 | ||
10 | #define HI_MAGIC 0x80808080 | ||
11 | |||
12 | .text | ||
13 | .align 4 | ||
14 | .globl __memscan_zero, __memscan_generic | ||
15 | .globl memscan | ||
16 | __memscan_zero: | ||
17 | /* %o0 = addr, %o1 = size */ | ||
18 | cmp %o1, 0 | ||
19 | bne,a 1f | ||
20 | andcc %o0, 3, %g0 | ||
21 | |||
22 | retl | ||
23 | nop | ||
24 | |||
25 | 1: | ||
26 | be mzero_scan_word | ||
27 | sethi %hi(HI_MAGIC), %g2 | ||
28 | |||
29 | ldsb [%o0], %g3 | ||
30 | mzero_still_not_word_aligned: | ||
31 | cmp %g3, 0 | ||
32 | bne 1f | ||
33 | add %o0, 1, %o0 | ||
34 | |||
35 | retl | ||
36 | sub %o0, 1, %o0 | ||
37 | |||
38 | 1: | ||
39 | subcc %o1, 1, %o1 | ||
40 | bne,a 1f | ||
41 | andcc %o0, 3, %g0 | ||
42 | |||
43 | retl | ||
44 | nop | ||
45 | |||
46 | 1: | ||
47 | bne,a mzero_still_not_word_aligned | ||
48 | ldsb [%o0], %g3 | ||
49 | |||
50 | sethi %hi(HI_MAGIC), %g2 | ||
51 | mzero_scan_word: | ||
52 | or %g2, %lo(HI_MAGIC), %o3 | ||
53 | sethi %hi(LO_MAGIC), %g3 | ||
54 | or %g3, %lo(LO_MAGIC), %o2 | ||
55 | mzero_next_word: | ||
56 | ld [%o0], %g2 | ||
57 | mzero_next_word_preloaded: | ||
58 | sub %g2, %o2, %g2 | ||
59 | mzero_next_word_preloaded_next: | ||
60 | andcc %g2, %o3, %g0 | ||
61 | bne mzero_byte_zero | ||
62 | add %o0, 4, %o0 | ||
63 | |||
64 | mzero_check_out_of_fuel: | ||
65 | subcc %o1, 4, %o1 | ||
66 | bg,a 1f | ||
67 | ld [%o0], %g2 | ||
68 | |||
69 | retl | ||
70 | nop | ||
71 | |||
72 | 1: | ||
73 | b mzero_next_word_preloaded_next | ||
74 | sub %g2, %o2, %g2 | ||
75 | |||
76 | /* Check every byte. */ | ||
77 | mzero_byte_zero: | ||
78 | ldsb [%o0 - 4], %g2 | ||
79 | cmp %g2, 0 | ||
80 | bne mzero_byte_one | ||
81 | sub %o0, 4, %g3 | ||
82 | |||
83 | retl | ||
84 | mov %g3, %o0 | ||
85 | |||
86 | mzero_byte_one: | ||
87 | ldsb [%o0 - 3], %g2 | ||
88 | cmp %g2, 0 | ||
89 | bne,a mzero_byte_two_and_three | ||
90 | ldsb [%o0 - 2], %g2 | ||
91 | |||
92 | retl | ||
93 | sub %o0, 3, %o0 | ||
94 | |||
95 | mzero_byte_two_and_three: | ||
96 | cmp %g2, 0 | ||
97 | bne,a 1f | ||
98 | ldsb [%o0 - 1], %g2 | ||
99 | |||
100 | retl | ||
101 | sub %o0, 2, %o0 | ||
102 | |||
103 | 1: | ||
104 | cmp %g2, 0 | ||
105 | bne,a mzero_next_word_preloaded | ||
106 | ld [%o0], %g2 | ||
107 | |||
108 | retl | ||
109 | sub %o0, 1, %o0 | ||
110 | |||
111 | mzero_found_it: | ||
112 | retl | ||
113 | sub %o0, 2, %o0 | ||
114 | |||
115 | memscan: | ||
116 | __memscan_generic: | ||
117 | /* %o0 = addr, %o1 = c, %o2 = size */ | ||
118 | cmp %o2, 0 | ||
119 | bne,a 0f | ||
120 | ldub [%o0], %g2 | ||
121 | |||
122 | b,a 2f | ||
123 | 1: | ||
124 | ldub [%o0], %g2 | ||
125 | 0: | ||
126 | cmp %g2, %o1 | ||
127 | be 2f | ||
128 | addcc %o2, -1, %o2 | ||
129 | bne 1b | ||
130 | add %o0, 1, %o0 | ||
131 | 2: | ||
132 | retl | ||
133 | nop | ||
diff --git a/arch/sparc/lib/memset.S b/arch/sparc/lib/memset.S new file mode 100644 index 000000000000..a65eba41097c --- /dev/null +++ b/arch/sparc/lib/memset.S | |||
@@ -0,0 +1,203 @@ | |||
1 | /* linux/arch/sparc/lib/memset.S: Sparc optimized memset, bzero and clear_user code | ||
2 | * Copyright (C) 1991,1996 Free Software Foundation | ||
3 | * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) | ||
4 | * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) | ||
5 | * | ||
6 | * Returns 0, if ok, and number of bytes not yet set if exception | ||
7 | * occurs and we were called as clear_user. | ||
8 | */ | ||
9 | |||
10 | #include <asm/ptrace.h> | ||
11 | |||
12 | /* Work around cpp -rob */ | ||
13 | #define ALLOC #alloc | ||
14 | #define EXECINSTR #execinstr | ||
15 | #define EX(x,y,a,b) \ | ||
16 | 98: x,y; \ | ||
17 | .section .fixup,ALLOC,EXECINSTR; \ | ||
18 | .align 4; \ | ||
19 | 99: ba 30f; \ | ||
20 | a, b, %o0; \ | ||
21 | .section __ex_table,ALLOC; \ | ||
22 | .align 4; \ | ||
23 | .word 98b, 99b; \ | ||
24 | .text; \ | ||
25 | .align 4 | ||
26 | |||
27 | #define EXT(start,end,handler) \ | ||
28 | .section __ex_table,ALLOC; \ | ||
29 | .align 4; \ | ||
30 | .word start, 0, end, handler; \ | ||
31 | .text; \ | ||
32 | .align 4 | ||
33 | |||
34 | /* Please don't change these macros, unless you change the logic | ||
35 | * in the .fixup section below as well. | ||
36 | * Store 64 bytes at (BASE + OFFSET) using value SOURCE. */ | ||
37 | #define ZERO_BIG_BLOCK(base, offset, source) \ | ||
38 | std source, [base + offset + 0x00]; \ | ||
39 | std source, [base + offset + 0x08]; \ | ||
40 | std source, [base + offset + 0x10]; \ | ||
41 | std source, [base + offset + 0x18]; \ | ||
42 | std source, [base + offset + 0x20]; \ | ||
43 | std source, [base + offset + 0x28]; \ | ||
44 | std source, [base + offset + 0x30]; \ | ||
45 | std source, [base + offset + 0x38]; | ||
46 | |||
47 | #define ZERO_LAST_BLOCKS(base, offset, source) \ | ||
48 | std source, [base - offset - 0x38]; \ | ||
49 | std source, [base - offset - 0x30]; \ | ||
50 | std source, [base - offset - 0x28]; \ | ||
51 | std source, [base - offset - 0x20]; \ | ||
52 | std source, [base - offset - 0x18]; \ | ||
53 | std source, [base - offset - 0x10]; \ | ||
54 | std source, [base - offset - 0x08]; \ | ||
55 | std source, [base - offset - 0x00]; | ||
56 | |||
57 | .text | ||
58 | .align 4 | ||
59 | |||
60 | .globl __bzero_begin | ||
61 | __bzero_begin: | ||
62 | |||
63 | .globl __bzero, __memset, | ||
64 | .globl memset | ||
65 | .globl __memset_start, __memset_end | ||
66 | __memset_start: | ||
67 | __memset: | ||
68 | memset: | ||
69 | and %o1, 0xff, %g3 | ||
70 | sll %g3, 8, %g2 | ||
71 | or %g3, %g2, %g3 | ||
72 | sll %g3, 16, %g2 | ||
73 | or %g3, %g2, %g3 | ||
74 | b 1f | ||
75 | mov %o2, %o1 | ||
76 | 3: | ||
77 | cmp %o2, 3 | ||
78 | be 2f | ||
79 | EX(stb %g3, [%o0], sub %o1, 0) | ||
80 | |||
81 | cmp %o2, 2 | ||
82 | be 2f | ||
83 | EX(stb %g3, [%o0 + 0x01], sub %o1, 1) | ||
84 | |||
85 | EX(stb %g3, [%o0 + 0x02], sub %o1, 2) | ||
86 | 2: | ||
87 | sub %o2, 4, %o2 | ||
88 | add %o1, %o2, %o1 | ||
89 | b 4f | ||
90 | sub %o0, %o2, %o0 | ||
91 | |||
92 | __bzero: | ||
93 | mov %g0, %g3 | ||
94 | 1: | ||
95 | cmp %o1, 7 | ||
96 | bleu 7f | ||
97 | andcc %o0, 3, %o2 | ||
98 | |||
99 | bne 3b | ||
100 | 4: | ||
101 | andcc %o0, 4, %g0 | ||
102 | |||
103 | be 2f | ||
104 | mov %g3, %g2 | ||
105 | |||
106 | EX(st %g3, [%o0], sub %o1, 0) | ||
107 | sub %o1, 4, %o1 | ||
108 | add %o0, 4, %o0 | ||
109 | 2: | ||
110 | andcc %o1, 0xffffff80, %o3 ! Now everything is 8 aligned and o1 is len to run | ||
111 | be 9f | ||
112 | andcc %o1, 0x78, %o2 | ||
113 | 10: | ||
114 | ZERO_BIG_BLOCK(%o0, 0x00, %g2) | ||
115 | subcc %o3, 128, %o3 | ||
116 | ZERO_BIG_BLOCK(%o0, 0x40, %g2) | ||
117 | 11: | ||
118 | EXT(10b, 11b, 20f) | ||
119 | bne 10b | ||
120 | add %o0, 128, %o0 | ||
121 | |||
122 | orcc %o2, %g0, %g0 | ||
123 | 9: | ||
124 | be 13f | ||
125 | andcc %o1, 7, %o1 | ||
126 | |||
127 | srl %o2, 1, %o3 | ||
128 | set 13f, %o4 | ||
129 | sub %o4, %o3, %o4 | ||
130 | jmp %o4 | ||
131 | add %o0, %o2, %o0 | ||
132 | |||
133 | 12: | ||
134 | ZERO_LAST_BLOCKS(%o0, 0x48, %g2) | ||
135 | ZERO_LAST_BLOCKS(%o0, 0x08, %g2) | ||
136 | 13: | ||
137 | be 8f | ||
138 | andcc %o1, 4, %g0 | ||
139 | |||
140 | be 1f | ||
141 | andcc %o1, 2, %g0 | ||
142 | |||
143 | EX(st %g3, [%o0], and %o1, 7) | ||
144 | add %o0, 4, %o0 | ||
145 | 1: | ||
146 | be 1f | ||
147 | andcc %o1, 1, %g0 | ||
148 | |||
149 | EX(sth %g3, [%o0], and %o1, 3) | ||
150 | add %o0, 2, %o0 | ||
151 | 1: | ||
152 | bne,a 8f | ||
153 | EX(stb %g3, [%o0], and %o1, 1) | ||
154 | 8: | ||
155 | retl | ||
156 | clr %o0 | ||
157 | 7: | ||
158 | be 13b | ||
159 | orcc %o1, 0, %g0 | ||
160 | |||
161 | be 0f | ||
162 | 8: | ||
163 | add %o0, 1, %o0 | ||
164 | subcc %o1, 1, %o1 | ||
165 | bne,a 8b | ||
166 | EX(stb %g3, [%o0 - 1], add %o1, 1) | ||
167 | 0: | ||
168 | retl | ||
169 | clr %o0 | ||
170 | __memset_end: | ||
171 | |||
172 | .section .fixup,#alloc,#execinstr | ||
173 | .align 4 | ||
174 | 20: | ||
175 | cmp %g2, 8 | ||
176 | bleu 1f | ||
177 | and %o1, 0x7f, %o1 | ||
178 | sub %g2, 9, %g2 | ||
179 | add %o3, 64, %o3 | ||
180 | 1: | ||
181 | sll %g2, 3, %g2 | ||
182 | add %o3, %o1, %o0 | ||
183 | b 30f | ||
184 | sub %o0, %g2, %o0 | ||
185 | 21: | ||
186 | mov 8, %o0 | ||
187 | and %o1, 7, %o1 | ||
188 | sub %o0, %g2, %o0 | ||
189 | sll %o0, 3, %o0 | ||
190 | b 30f | ||
191 | add %o0, %o1, %o0 | ||
192 | 30: | ||
193 | /* %o4 is faulting address, %o5 is %pc where fault occurred */ | ||
194 | save %sp, -104, %sp | ||
195 | mov %i5, %o0 | ||
196 | mov %i7, %o1 | ||
197 | call lookup_fault | ||
198 | mov %i4, %o2 | ||
199 | ret | ||
200 | restore | ||
201 | |||
202 | .globl __bzero_end | ||
203 | __bzero_end: | ||
diff --git a/arch/sparc/lib/mul.S b/arch/sparc/lib/mul.S new file mode 100644 index 000000000000..83dffbc2f62f --- /dev/null +++ b/arch/sparc/lib/mul.S | |||
@@ -0,0 +1,135 @@ | |||
1 | /* $Id: mul.S,v 1.4 1996/09/30 02:22:32 davem Exp $ | ||
2 | * mul.S: This routine was taken from glibc-1.09 and is covered | ||
3 | * by the GNU Library General Public License Version 2. | ||
4 | */ | ||
5 | |||
6 | /* | ||
7 | * Signed multiply, from Appendix E of the Sparc Version 8 | ||
8 | * Architecture Manual. | ||
9 | */ | ||
10 | |||
11 | /* | ||
12 | * Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the upper 32 bits of | ||
13 | * the 64-bit product). | ||
14 | * | ||
15 | * This code optimizes short (less than 13-bit) multiplies. | ||
16 | */ | ||
17 | |||
18 | .globl .mul | ||
19 | .mul: | ||
20 | mov %o0, %y ! multiplier -> Y | ||
21 | andncc %o0, 0xfff, %g0 ! test bits 12..31 | ||
22 | be Lmul_shortway ! if zero, can do it the short way | ||
23 | andcc %g0, %g0, %o4 ! zero the partial product and clear N and V | ||
24 | |||
25 | /* | ||
26 | * Long multiply. 32 steps, followed by a final shift step. | ||
27 | */ | ||
28 | mulscc %o4, %o1, %o4 ! 1 | ||
29 | mulscc %o4, %o1, %o4 ! 2 | ||
30 | mulscc %o4, %o1, %o4 ! 3 | ||
31 | mulscc %o4, %o1, %o4 ! 4 | ||
32 | mulscc %o4, %o1, %o4 ! 5 | ||
33 | mulscc %o4, %o1, %o4 ! 6 | ||
34 | mulscc %o4, %o1, %o4 ! 7 | ||
35 | mulscc %o4, %o1, %o4 ! 8 | ||
36 | mulscc %o4, %o1, %o4 ! 9 | ||
37 | mulscc %o4, %o1, %o4 ! 10 | ||
38 | mulscc %o4, %o1, %o4 ! 11 | ||
39 | mulscc %o4, %o1, %o4 ! 12 | ||
40 | mulscc %o4, %o1, %o4 ! 13 | ||
41 | mulscc %o4, %o1, %o4 ! 14 | ||
42 | mulscc %o4, %o1, %o4 ! 15 | ||
43 | mulscc %o4, %o1, %o4 ! 16 | ||
44 | mulscc %o4, %o1, %o4 ! 17 | ||
45 | mulscc %o4, %o1, %o4 ! 18 | ||
46 | mulscc %o4, %o1, %o4 ! 19 | ||
47 | mulscc %o4, %o1, %o4 ! 20 | ||
48 | mulscc %o4, %o1, %o4 ! 21 | ||
49 | mulscc %o4, %o1, %o4 ! 22 | ||
50 | mulscc %o4, %o1, %o4 ! 23 | ||
51 | mulscc %o4, %o1, %o4 ! 24 | ||
52 | mulscc %o4, %o1, %o4 ! 25 | ||
53 | mulscc %o4, %o1, %o4 ! 26 | ||
54 | mulscc %o4, %o1, %o4 ! 27 | ||
55 | mulscc %o4, %o1, %o4 ! 28 | ||
56 | mulscc %o4, %o1, %o4 ! 29 | ||
57 | mulscc %o4, %o1, %o4 ! 30 | ||
58 | mulscc %o4, %o1, %o4 ! 31 | ||
59 | mulscc %o4, %o1, %o4 ! 32 | ||
60 | mulscc %o4, %g0, %o4 ! final shift | ||
61 | |||
62 | ! If %o0 was negative, the result is | ||
63 | ! (%o0 * %o1) + (%o1 << 32)) | ||
64 | ! We fix that here. | ||
65 | |||
66 | #if 0 | ||
67 | tst %o0 | ||
68 | bge 1f | ||
69 | rd %y, %o0 | ||
70 | |||
71 | ! %o0 was indeed negative; fix upper 32 bits of result by subtracting | ||
72 | ! %o1 (i.e., return %o4 - %o1 in %o1). | ||
73 | retl | ||
74 | sub %o4, %o1, %o1 | ||
75 | |||
76 | 1: | ||
77 | retl | ||
78 | mov %o4, %o1 | ||
79 | #else | ||
80 | /* Faster code adapted from tege@sics.se's code for umul.S. */ | ||
81 | sra %o0, 31, %o2 ! make mask from sign bit | ||
82 | and %o1, %o2, %o2 ! %o2 = 0 or %o1, depending on sign of %o0 | ||
83 | rd %y, %o0 ! get lower half of product | ||
84 | retl | ||
85 | sub %o4, %o2, %o1 ! subtract compensation | ||
86 | ! and put upper half in place | ||
87 | #endif | ||
88 | |||
89 | Lmul_shortway: | ||
90 | /* | ||
91 | * Short multiply. 12 steps, followed by a final shift step. | ||
92 | * The resulting bits are off by 12 and (32-12) = 20 bit positions, | ||
93 | * but there is no problem with %o0 being negative (unlike above). | ||
94 | */ | ||
95 | mulscc %o4, %o1, %o4 ! 1 | ||
96 | mulscc %o4, %o1, %o4 ! 2 | ||
97 | mulscc %o4, %o1, %o4 ! 3 | ||
98 | mulscc %o4, %o1, %o4 ! 4 | ||
99 | mulscc %o4, %o1, %o4 ! 5 | ||
100 | mulscc %o4, %o1, %o4 ! 6 | ||
101 | mulscc %o4, %o1, %o4 ! 7 | ||
102 | mulscc %o4, %o1, %o4 ! 8 | ||
103 | mulscc %o4, %o1, %o4 ! 9 | ||
104 | mulscc %o4, %o1, %o4 ! 10 | ||
105 | mulscc %o4, %o1, %o4 ! 11 | ||
106 | mulscc %o4, %o1, %o4 ! 12 | ||
107 | mulscc %o4, %g0, %o4 ! final shift | ||
108 | |||
109 | /* | ||
110 | * %o4 has 20 of the bits that should be in the low part of the | ||
111 | * result; %y has the bottom 12 (as %y's top 12). That is: | ||
112 | * | ||
113 | * %o4 %y | ||
114 | * +----------------+----------------+ | ||
115 | * | -12- | -20- | -12- | -20- | | ||
116 | * +------(---------+------)---------+ | ||
117 | * --hi-- ----low-part---- | ||
118 | * | ||
119 | * The upper 12 bits of %o4 should be sign-extended to form the | ||
120 | * high part of the product (i.e., highpart = %o4 >> 20). | ||
121 | */ | ||
122 | |||
123 | rd %y, %o5 | ||
124 | sll %o4, 12, %o0 ! shift middle bits left 12 | ||
125 | srl %o5, 20, %o5 ! shift low bits right 20, zero fill at left | ||
126 | or %o5, %o0, %o0 ! construct low part of result | ||
127 | retl | ||
128 | sra %o4, 20, %o1 ! ... and extract high part of result | ||
129 | |||
130 | .globl .mul_patch | ||
131 | .mul_patch: | ||
132 | smul %o0, %o1, %o0 | ||
133 | retl | ||
134 | rd %y, %o1 | ||
135 | nop | ||
diff --git a/arch/sparc/lib/muldi3.S b/arch/sparc/lib/muldi3.S new file mode 100644 index 000000000000..7f17872d0603 --- /dev/null +++ b/arch/sparc/lib/muldi3.S | |||
@@ -0,0 +1,76 @@ | |||
1 | /* Copyright (C) 1989, 1992, 1993, 1994, 1995 Free Software Foundation, Inc. | ||
2 | |||
3 | This file is part of GNU CC. | ||
4 | |||
5 | GNU CC is free software; you can redistribute it and/or modify | ||
6 | it under the terms of the GNU General Public License as published by | ||
7 | the Free Software Foundation; either version 2, or (at your option) | ||
8 | any later version. | ||
9 | |||
10 | GNU CC is distributed in the hope that it will be useful, | ||
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | GNU General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU General Public License | ||
16 | along with GNU CC; see the file COPYING. If not, write to | ||
17 | the Free Software Foundation, 59 Temple Place - Suite 330, | ||
18 | Boston, MA 02111-1307, USA. */ | ||
19 | |||
20 | .text | ||
21 | .align 4 | ||
22 | .globl __muldi3 | ||
23 | __muldi3: | ||
24 | save %sp, -104, %sp | ||
25 | wr %g0, %i1, %y | ||
26 | sra %i3, 0x1f, %g2 | ||
27 | and %i1, %g2, %g2 | ||
28 | andcc %g0, 0, %g1 | ||
29 | mulscc %g1, %i3, %g1 | ||
30 | mulscc %g1, %i3, %g1 | ||
31 | mulscc %g1, %i3, %g1 | ||
32 | mulscc %g1, %i3, %g1 | ||
33 | mulscc %g1, %i3, %g1 | ||
34 | mulscc %g1, %i3, %g1 | ||
35 | mulscc %g1, %i3, %g1 | ||
36 | mulscc %g1, %i3, %g1 | ||
37 | mulscc %g1, %i3, %g1 | ||
38 | mulscc %g1, %i3, %g1 | ||
39 | mulscc %g1, %i3, %g1 | ||
40 | mulscc %g1, %i3, %g1 | ||
41 | mulscc %g1, %i3, %g1 | ||
42 | mulscc %g1, %i3, %g1 | ||
43 | mulscc %g1, %i3, %g1 | ||
44 | mulscc %g1, %i3, %g1 | ||
45 | mulscc %g1, %i3, %g1 | ||
46 | mulscc %g1, %i3, %g1 | ||
47 | mulscc %g1, %i3, %g1 | ||
48 | mulscc %g1, %i3, %g1 | ||
49 | mulscc %g1, %i3, %g1 | ||
50 | mulscc %g1, %i3, %g1 | ||
51 | mulscc %g1, %i3, %g1 | ||
52 | mulscc %g1, %i3, %g1 | ||
53 | mulscc %g1, %i3, %g1 | ||
54 | mulscc %g1, %i3, %g1 | ||
55 | mulscc %g1, %i3, %g1 | ||
56 | mulscc %g1, %i3, %g1 | ||
57 | mulscc %g1, %i3, %g1 | ||
58 | mulscc %g1, %i3, %g1 | ||
59 | mulscc %g1, %i3, %g1 | ||
60 | mulscc %g1, %i3, %g1 | ||
61 | mulscc %g1, 0, %g1 | ||
62 | add %g1, %g2, %l2 | ||
63 | rd %y, %o1 | ||
64 | mov %o1, %l3 | ||
65 | mov %i1, %o0 | ||
66 | call .umul | ||
67 | mov %i2, %o1 | ||
68 | mov %o0, %l0 | ||
69 | mov %i0, %o0 | ||
70 | call .umul | ||
71 | mov %i3, %o1 | ||
72 | add %l0, %o0, %l0 | ||
73 | mov %l2, %i0 | ||
74 | add %l2, %l0, %i0 | ||
75 | ret | ||
76 | restore %g0, %l3, %o1 | ||
diff --git a/arch/sparc/lib/rem.S b/arch/sparc/lib/rem.S new file mode 100644 index 000000000000..44508148d055 --- /dev/null +++ b/arch/sparc/lib/rem.S | |||
@@ -0,0 +1,382 @@ | |||
1 | /* $Id: rem.S,v 1.7 1996/09/30 02:22:34 davem Exp $ | ||
2 | * rem.S: This routine was taken from glibc-1.09 and is covered | ||
3 | * by the GNU Library General Public License Version 2. | ||
4 | */ | ||
5 | |||
6 | |||
7 | /* This file is generated from divrem.m4; DO NOT EDIT! */ | ||
8 | /* | ||
9 | * Division and remainder, from Appendix E of the Sparc Version 8 | ||
10 | * Architecture Manual, with fixes from Gordon Irlam. | ||
11 | */ | ||
12 | |||
13 | /* | ||
14 | * Input: dividend and divisor in %o0 and %o1 respectively. | ||
15 | * | ||
16 | * m4 parameters: | ||
17 | * .rem name of function to generate | ||
18 | * rem rem=div => %o0 / %o1; rem=rem => %o0 % %o1 | ||
19 | * true true=true => signed; true=false => unsigned | ||
20 | * | ||
21 | * Algorithm parameters: | ||
22 | * N how many bits per iteration we try to get (4) | ||
23 | * WORDSIZE total number of bits (32) | ||
24 | * | ||
25 | * Derived constants: | ||
26 | * TOPBITS number of bits in the top decade of a number | ||
27 | * | ||
28 | * Important variables: | ||
29 | * Q the partial quotient under development (initially 0) | ||
30 | * R the remainder so far, initially the dividend | ||
31 | * ITER number of main division loop iterations required; | ||
32 | * equal to ceil(log2(quotient) / N). Note that this | ||
33 | * is the log base (2^N) of the quotient. | ||
34 | * V the current comparand, initially divisor*2^(ITER*N-1) | ||
35 | * | ||
36 | * Cost: | ||
37 | * Current estimate for non-large dividend is | ||
38 | * ceil(log2(quotient) / N) * (10 + 7N/2) + C | ||
39 | * A large dividend is one greater than 2^(31-TOPBITS) and takes a | ||
40 | * different path, as the upper bits of the quotient must be developed | ||
41 | * one bit at a time. | ||
42 | */ | ||
43 | |||
44 | |||
45 | .globl .rem | ||
46 | .rem: | ||
47 | ! compute sign of result; if neither is negative, no problem | ||
48 | orcc %o1, %o0, %g0 ! either negative? | ||
49 | bge 2f ! no, go do the divide | ||
50 | mov %o0, %g2 ! compute sign in any case | ||
51 | |||
52 | tst %o1 | ||
53 | bge 1f | ||
54 | tst %o0 | ||
55 | ! %o1 is definitely negative; %o0 might also be negative | ||
56 | bge 2f ! if %o0 not negative... | ||
57 | sub %g0, %o1, %o1 ! in any case, make %o1 nonneg | ||
58 | 1: ! %o0 is negative, %o1 is nonnegative | ||
59 | sub %g0, %o0, %o0 ! make %o0 nonnegative | ||
60 | 2: | ||
61 | |||
62 | ! Ready to divide. Compute size of quotient; scale comparand. | ||
63 | orcc %o1, %g0, %o5 | ||
64 | bne 1f | ||
65 | mov %o0, %o3 | ||
66 | |||
67 | ! Divide by zero trap. If it returns, return 0 (about as | ||
68 | ! wrong as possible, but that is what SunOS does...). | ||
69 | ta ST_DIV0 | ||
70 | retl | ||
71 | clr %o0 | ||
72 | |||
73 | 1: | ||
74 | cmp %o3, %o5 ! if %o1 exceeds %o0, done | ||
75 | blu Lgot_result ! (and algorithm fails otherwise) | ||
76 | clr %o2 | ||
77 | |||
78 | sethi %hi(1 << (32 - 4 - 1)), %g1 | ||
79 | |||
80 | cmp %o3, %g1 | ||
81 | blu Lnot_really_big | ||
82 | clr %o4 | ||
83 | |||
84 | ! Here the dividend is >= 2**(31-N) or so. We must be careful here, | ||
85 | ! as our usual N-at-a-shot divide step will cause overflow and havoc. | ||
86 | ! The number of bits in the result here is N*ITER+SC, where SC <= N. | ||
87 | ! Compute ITER in an unorthodox manner: know we need to shift V into | ||
88 | ! the top decade: so do not even bother to compare to R. | ||
89 | 1: | ||
90 | cmp %o5, %g1 | ||
91 | bgeu 3f | ||
92 | mov 1, %g7 | ||
93 | |||
94 | sll %o5, 4, %o5 | ||
95 | |||
96 | b 1b | ||
97 | add %o4, 1, %o4 | ||
98 | |||
99 | ! Now compute %g7. | ||
100 | 2: | ||
101 | addcc %o5, %o5, %o5 | ||
102 | |||
103 | bcc Lnot_too_big | ||
104 | add %g7, 1, %g7 | ||
105 | |||
106 | ! We get here if the %o1 overflowed while shifting. | ||
107 | ! This means that %o3 has the high-order bit set. | ||
108 | ! Restore %o5 and subtract from %o3. | ||
109 | sll %g1, 4, %g1 ! high order bit | ||
110 | srl %o5, 1, %o5 ! rest of %o5 | ||
111 | add %o5, %g1, %o5 | ||
112 | |||
113 | b Ldo_single_div | ||
114 | sub %g7, 1, %g7 | ||
115 | |||
116 | Lnot_too_big: | ||
117 | 3: | ||
118 | cmp %o5, %o3 | ||
119 | blu 2b | ||
120 | nop | ||
121 | |||
122 | be Ldo_single_div | ||
123 | nop | ||
124 | /* NB: these are commented out in the V8-Sparc manual as well */ | ||
125 | /* (I do not understand this) */ | ||
126 | ! %o5 > %o3: went too far: back up 1 step | ||
127 | ! srl %o5, 1, %o5 | ||
128 | ! dec %g7 | ||
129 | ! do single-bit divide steps | ||
130 | ! | ||
131 | ! We have to be careful here. We know that %o3 >= %o5, so we can do the | ||
132 | ! first divide step without thinking. BUT, the others are conditional, | ||
133 | ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- | ||
134 | ! order bit set in the first step, just falling into the regular | ||
135 | ! division loop will mess up the first time around. | ||
136 | ! So we unroll slightly... | ||
137 | Ldo_single_div: | ||
138 | subcc %g7, 1, %g7 | ||
139 | bl Lend_regular_divide | ||
140 | nop | ||
141 | |||
142 | sub %o3, %o5, %o3 | ||
143 | mov 1, %o2 | ||
144 | |||
145 | b Lend_single_divloop | ||
146 | nop | ||
147 | Lsingle_divloop: | ||
148 | sll %o2, 1, %o2 | ||
149 | |||
150 | bl 1f | ||
151 | srl %o5, 1, %o5 | ||
152 | ! %o3 >= 0 | ||
153 | sub %o3, %o5, %o3 | ||
154 | |||
155 | b 2f | ||
156 | add %o2, 1, %o2 | ||
157 | 1: ! %o3 < 0 | ||
158 | add %o3, %o5, %o3 | ||
159 | sub %o2, 1, %o2 | ||
160 | 2: | ||
161 | Lend_single_divloop: | ||
162 | subcc %g7, 1, %g7 | ||
163 | bge Lsingle_divloop | ||
164 | tst %o3 | ||
165 | |||
166 | b,a Lend_regular_divide | ||
167 | |||
168 | Lnot_really_big: | ||
169 | 1: | ||
170 | sll %o5, 4, %o5 | ||
171 | cmp %o5, %o3 | ||
172 | bleu 1b | ||
173 | addcc %o4, 1, %o4 | ||
174 | be Lgot_result | ||
175 | sub %o4, 1, %o4 | ||
176 | |||
177 | tst %o3 ! set up for initial iteration | ||
178 | Ldivloop: | ||
179 | sll %o2, 4, %o2 | ||
180 | ! depth 1, accumulated bits 0 | ||
181 | bl L.1.16 | ||
182 | srl %o5,1,%o5 | ||
183 | ! remainder is positive | ||
184 | subcc %o3,%o5,%o3 | ||
185 | ! depth 2, accumulated bits 1 | ||
186 | bl L.2.17 | ||
187 | srl %o5,1,%o5 | ||
188 | ! remainder is positive | ||
189 | subcc %o3,%o5,%o3 | ||
190 | ! depth 3, accumulated bits 3 | ||
191 | bl L.3.19 | ||
192 | srl %o5,1,%o5 | ||
193 | ! remainder is positive | ||
194 | subcc %o3,%o5,%o3 | ||
195 | ! depth 4, accumulated bits 7 | ||
196 | bl L.4.23 | ||
197 | srl %o5,1,%o5 | ||
198 | ! remainder is positive | ||
199 | subcc %o3,%o5,%o3 | ||
200 | |||
201 | b 9f | ||
202 | add %o2, (7*2+1), %o2 | ||
203 | |||
204 | L.4.23: | ||
205 | ! remainder is negative | ||
206 | addcc %o3,%o5,%o3 | ||
207 | b 9f | ||
208 | add %o2, (7*2-1), %o2 | ||
209 | |||
210 | L.3.19: | ||
211 | ! remainder is negative | ||
212 | addcc %o3,%o5,%o3 | ||
213 | ! depth 4, accumulated bits 5 | ||
214 | bl L.4.21 | ||
215 | srl %o5,1,%o5 | ||
216 | ! remainder is positive | ||
217 | subcc %o3,%o5,%o3 | ||
218 | b 9f | ||
219 | add %o2, (5*2+1), %o2 | ||
220 | |||
221 | L.4.21: | ||
222 | ! remainder is negative | ||
223 | addcc %o3,%o5,%o3 | ||
224 | b 9f | ||
225 | add %o2, (5*2-1), %o2 | ||
226 | |||
227 | L.2.17: | ||
228 | ! remainder is negative | ||
229 | addcc %o3,%o5,%o3 | ||
230 | ! depth 3, accumulated bits 1 | ||
231 | bl L.3.17 | ||
232 | srl %o5,1,%o5 | ||
233 | ! remainder is positive | ||
234 | subcc %o3,%o5,%o3 | ||
235 | ! depth 4, accumulated bits 3 | ||
236 | bl L.4.19 | ||
237 | srl %o5,1,%o5 | ||
238 | ! remainder is positive | ||
239 | subcc %o3,%o5,%o3 | ||
240 | b 9f | ||
241 | add %o2, (3*2+1), %o2 | ||
242 | |||
243 | L.4.19: | ||
244 | ! remainder is negative | ||
245 | addcc %o3,%o5,%o3 | ||
246 | b 9f | ||
247 | add %o2, (3*2-1), %o2 | ||
248 | |||
249 | L.3.17: | ||
250 | ! remainder is negative | ||
251 | addcc %o3,%o5,%o3 | ||
252 | ! depth 4, accumulated bits 1 | ||
253 | bl L.4.17 | ||
254 | srl %o5,1,%o5 | ||
255 | ! remainder is positive | ||
256 | subcc %o3,%o5,%o3 | ||
257 | b 9f | ||
258 | add %o2, (1*2+1), %o2 | ||
259 | |||
260 | L.4.17: | ||
261 | ! remainder is negative | ||
262 | addcc %o3,%o5,%o3 | ||
263 | b 9f | ||
264 | add %o2, (1*2-1), %o2 | ||
265 | |||
266 | L.1.16: | ||
267 | ! remainder is negative | ||
268 | addcc %o3,%o5,%o3 | ||
269 | ! depth 2, accumulated bits -1 | ||
270 | bl L.2.15 | ||
271 | srl %o5,1,%o5 | ||
272 | ! remainder is positive | ||
273 | subcc %o3,%o5,%o3 | ||
274 | ! depth 3, accumulated bits -1 | ||
275 | bl L.3.15 | ||
276 | srl %o5,1,%o5 | ||
277 | ! remainder is positive | ||
278 | subcc %o3,%o5,%o3 | ||
279 | ! depth 4, accumulated bits -1 | ||
280 | bl L.4.15 | ||
281 | srl %o5,1,%o5 | ||
282 | ! remainder is positive | ||
283 | subcc %o3,%o5,%o3 | ||
284 | b 9f | ||
285 | add %o2, (-1*2+1), %o2 | ||
286 | |||
287 | L.4.15: | ||
288 | ! remainder is negative | ||
289 | addcc %o3,%o5,%o3 | ||
290 | b 9f | ||
291 | add %o2, (-1*2-1), %o2 | ||
292 | |||
293 | L.3.15: | ||
294 | ! remainder is negative | ||
295 | addcc %o3,%o5,%o3 | ||
296 | ! depth 4, accumulated bits -3 | ||
297 | bl L.4.13 | ||
298 | srl %o5,1,%o5 | ||
299 | ! remainder is positive | ||
300 | subcc %o3,%o5,%o3 | ||
301 | b 9f | ||
302 | add %o2, (-3*2+1), %o2 | ||
303 | |||
304 | L.4.13: | ||
305 | ! remainder is negative | ||
306 | addcc %o3,%o5,%o3 | ||
307 | b 9f | ||
308 | add %o2, (-3*2-1), %o2 | ||
309 | |||
310 | L.2.15: | ||
311 | ! remainder is negative | ||
312 | addcc %o3,%o5,%o3 | ||
313 | ! depth 3, accumulated bits -3 | ||
314 | bl L.3.13 | ||
315 | srl %o5,1,%o5 | ||
316 | ! remainder is positive | ||
317 | subcc %o3,%o5,%o3 | ||
318 | ! depth 4, accumulated bits -5 | ||
319 | bl L.4.11 | ||
320 | srl %o5,1,%o5 | ||
321 | ! remainder is positive | ||
322 | subcc %o3,%o5,%o3 | ||
323 | b 9f | ||
324 | add %o2, (-5*2+1), %o2 | ||
325 | |||
326 | L.4.11: | ||
327 | ! remainder is negative | ||
328 | addcc %o3,%o5,%o3 | ||
329 | b 9f | ||
330 | add %o2, (-5*2-1), %o2 | ||
331 | |||
332 | |||
333 | L.3.13: | ||
334 | ! remainder is negative | ||
335 | addcc %o3,%o5,%o3 | ||
336 | ! depth 4, accumulated bits -7 | ||
337 | bl L.4.9 | ||
338 | srl %o5,1,%o5 | ||
339 | ! remainder is positive | ||
340 | subcc %o3,%o5,%o3 | ||
341 | b 9f | ||
342 | add %o2, (-7*2+1), %o2 | ||
343 | |||
344 | L.4.9: | ||
345 | ! remainder is negative | ||
346 | addcc %o3,%o5,%o3 | ||
347 | b 9f | ||
348 | add %o2, (-7*2-1), %o2 | ||
349 | |||
350 | 9: | ||
351 | Lend_regular_divide: | ||
352 | subcc %o4, 1, %o4 | ||
353 | bge Ldivloop | ||
354 | tst %o3 | ||
355 | |||
356 | bl,a Lgot_result | ||
357 | ! non-restoring fixup here (one instruction only!) | ||
358 | add %o3, %o1, %o3 | ||
359 | |||
360 | Lgot_result: | ||
361 | ! check to see if answer should be < 0 | ||
362 | tst %g2 | ||
363 | bl,a 1f | ||
364 | sub %g0, %o3, %o3 | ||
365 | 1: | ||
366 | retl | ||
367 | mov %o3, %o0 | ||
368 | |||
369 | .globl .rem_patch | ||
370 | .rem_patch: | ||
371 | sra %o0, 0x1f, %o4 | ||
372 | wr %o4, 0x0, %y | ||
373 | nop | ||
374 | nop | ||
375 | nop | ||
376 | sdivcc %o0, %o1, %o2 | ||
377 | bvs,a 1f | ||
378 | xnor %o2, %g0, %o2 | ||
379 | 1: smul %o2, %o1, %o2 | ||
380 | retl | ||
381 | sub %o0, %o2, %o0 | ||
382 | nop | ||
diff --git a/arch/sparc/lib/rwsem.S b/arch/sparc/lib/rwsem.S new file mode 100644 index 000000000000..e7578dc600b8 --- /dev/null +++ b/arch/sparc/lib/rwsem.S | |||
@@ -0,0 +1,205 @@ | |||
1 | /* $Id: rwsem.S,v 1.5 2000/05/09 17:40:13 davem Exp $ | ||
2 | * Assembly part of rw semaphores. | ||
3 | * | ||
4 | * Copyright (C) 1999 Jakub Jelinek (jakub@redhat.com) | ||
5 | */ | ||
6 | |||
7 | #include <linux/config.h> | ||
8 | #include <asm/ptrace.h> | ||
9 | #include <asm/psr.h> | ||
10 | |||
11 | .section .sched.text | ||
12 | .align 4 | ||
13 | |||
14 | .globl ___down_read | ||
15 | ___down_read: | ||
16 | rd %psr, %g3 | ||
17 | nop | ||
18 | nop | ||
19 | nop | ||
20 | or %g3, PSR_PIL, %g7 | ||
21 | wr %g7, 0, %psr | ||
22 | nop | ||
23 | nop | ||
24 | nop | ||
25 | #ifdef CONFIG_SMP | ||
26 | 1: ldstub [%g1 + 4], %g7 | ||
27 | tst %g7 | ||
28 | bne 1b | ||
29 | ld [%g1], %g7 | ||
30 | sub %g7, 1, %g7 | ||
31 | st %g7, [%g1] | ||
32 | stb %g0, [%g1 + 4] | ||
33 | #else | ||
34 | ld [%g1], %g7 | ||
35 | sub %g7, 1, %g7 | ||
36 | st %g7, [%g1] | ||
37 | #endif | ||
38 | wr %g3, 0, %psr | ||
39 | add %g7, 1, %g7 | ||
40 | nop | ||
41 | nop | ||
42 | subcc %g7, 1, %g7 | ||
43 | bneg 3f | ||
44 | nop | ||
45 | 2: jmpl %o7, %g0 | ||
46 | mov %g4, %o7 | ||
47 | 3: save %sp, -64, %sp | ||
48 | mov %g1, %l1 | ||
49 | mov %g4, %l4 | ||
50 | bcs 4f | ||
51 | mov %g5, %l5 | ||
52 | call down_read_failed | ||
53 | mov %l1, %o0 | ||
54 | mov %l1, %g1 | ||
55 | mov %l4, %g4 | ||
56 | ba ___down_read | ||
57 | restore %l5, %g0, %g5 | ||
58 | 4: call down_read_failed_biased | ||
59 | mov %l1, %o0 | ||
60 | mov %l1, %g1 | ||
61 | mov %l4, %g4 | ||
62 | ba 2b | ||
63 | restore %l5, %g0, %g5 | ||
64 | |||
65 | .globl ___down_write | ||
66 | ___down_write: | ||
67 | rd %psr, %g3 | ||
68 | nop | ||
69 | nop | ||
70 | nop | ||
71 | or %g3, PSR_PIL, %g7 | ||
72 | wr %g7, 0, %psr | ||
73 | sethi %hi(0x01000000), %g2 | ||
74 | nop | ||
75 | nop | ||
76 | #ifdef CONFIG_SMP | ||
77 | 1: ldstub [%g1 + 4], %g7 | ||
78 | tst %g7 | ||
79 | bne 1b | ||
80 | ld [%g1], %g7 | ||
81 | sub %g7, %g2, %g7 | ||
82 | st %g7, [%g1] | ||
83 | stb %g0, [%g1 + 4] | ||
84 | #else | ||
85 | ld [%g1], %g7 | ||
86 | sub %g7, %g2, %g7 | ||
87 | st %g7, [%g1] | ||
88 | #endif | ||
89 | wr %g3, 0, %psr | ||
90 | add %g7, %g2, %g7 | ||
91 | nop | ||
92 | nop | ||
93 | subcc %g7, %g2, %g7 | ||
94 | bne 3f | ||
95 | nop | ||
96 | 2: jmpl %o7, %g0 | ||
97 | mov %g4, %o7 | ||
98 | 3: save %sp, -64, %sp | ||
99 | mov %g1, %l1 | ||
100 | mov %g4, %l4 | ||
101 | bcs 4f | ||
102 | mov %g5, %l5 | ||
103 | call down_write_failed | ||
104 | mov %l1, %o0 | ||
105 | mov %l1, %g1 | ||
106 | mov %l4, %g4 | ||
107 | ba ___down_write | ||
108 | restore %l5, %g0, %g5 | ||
109 | 4: call down_write_failed_biased | ||
110 | mov %l1, %o0 | ||
111 | mov %l1, %g1 | ||
112 | mov %l4, %g4 | ||
113 | ba 2b | ||
114 | restore %l5, %g0, %g5 | ||
115 | |||
116 | .text | ||
117 | .globl ___up_read | ||
118 | ___up_read: | ||
119 | rd %psr, %g3 | ||
120 | nop | ||
121 | nop | ||
122 | nop | ||
123 | or %g3, PSR_PIL, %g7 | ||
124 | wr %g7, 0, %psr | ||
125 | nop | ||
126 | nop | ||
127 | nop | ||
128 | #ifdef CONFIG_SMP | ||
129 | 1: ldstub [%g1 + 4], %g7 | ||
130 | tst %g7 | ||
131 | bne 1b | ||
132 | ld [%g1], %g7 | ||
133 | add %g7, 1, %g7 | ||
134 | st %g7, [%g1] | ||
135 | stb %g0, [%g1 + 4] | ||
136 | #else | ||
137 | ld [%g1], %g7 | ||
138 | add %g7, 1, %g7 | ||
139 | st %g7, [%g1] | ||
140 | #endif | ||
141 | wr %g3, 0, %psr | ||
142 | nop | ||
143 | nop | ||
144 | nop | ||
145 | cmp %g7, 0 | ||
146 | be 3f | ||
147 | nop | ||
148 | 2: jmpl %o7, %g0 | ||
149 | mov %g4, %o7 | ||
150 | 3: save %sp, -64, %sp | ||
151 | mov %g1, %l1 | ||
152 | mov %g4, %l4 | ||
153 | mov %g5, %l5 | ||
154 | clr %o1 | ||
155 | call __rwsem_wake | ||
156 | mov %l1, %o0 | ||
157 | mov %l1, %g1 | ||
158 | mov %l4, %g4 | ||
159 | ba 2b | ||
160 | restore %l5, %g0, %g5 | ||
161 | |||
162 | .globl ___up_write | ||
163 | ___up_write: | ||
164 | rd %psr, %g3 | ||
165 | nop | ||
166 | nop | ||
167 | nop | ||
168 | or %g3, PSR_PIL, %g7 | ||
169 | wr %g7, 0, %psr | ||
170 | sethi %hi(0x01000000), %g2 | ||
171 | nop | ||
172 | nop | ||
173 | #ifdef CONFIG_SMP | ||
174 | 1: ldstub [%g1 + 4], %g7 | ||
175 | tst %g7 | ||
176 | bne 1b | ||
177 | ld [%g1], %g7 | ||
178 | add %g7, %g2, %g7 | ||
179 | st %g7, [%g1] | ||
180 | stb %g0, [%g1 + 4] | ||
181 | #else | ||
182 | ld [%g1], %g7 | ||
183 | add %g7, %g2, %g7 | ||
184 | st %g7, [%g1] | ||
185 | #endif | ||
186 | wr %g3, 0, %psr | ||
187 | sub %g7, %g2, %g7 | ||
188 | nop | ||
189 | nop | ||
190 | addcc %g7, %g2, %g7 | ||
191 | bcs 3f | ||
192 | nop | ||
193 | 2: jmpl %o7, %g0 | ||
194 | mov %g4, %o7 | ||
195 | 3: save %sp, -64, %sp | ||
196 | mov %g1, %l1 | ||
197 | mov %g4, %l4 | ||
198 | mov %g5, %l5 | ||
199 | mov %g7, %o1 | ||
200 | call __rwsem_wake | ||
201 | mov %l1, %o0 | ||
202 | mov %l1, %g1 | ||
203 | mov %l4, %g4 | ||
204 | ba 2b | ||
205 | restore %l5, %g0, %g5 | ||
diff --git a/arch/sparc/lib/sdiv.S b/arch/sparc/lib/sdiv.S new file mode 100644 index 000000000000..e0ad80b6f63d --- /dev/null +++ b/arch/sparc/lib/sdiv.S | |||
@@ -0,0 +1,379 @@ | |||
1 | /* $Id: sdiv.S,v 1.6 1996/10/02 17:37:00 davem Exp $ | ||
2 | * sdiv.S: This routine was taken from glibc-1.09 and is covered | ||
3 | * by the GNU Library General Public License Version 2. | ||
4 | */ | ||
5 | |||
6 | |||
7 | /* This file is generated from divrem.m4; DO NOT EDIT! */ | ||
8 | /* | ||
9 | * Division and remainder, from Appendix E of the Sparc Version 8 | ||
10 | * Architecture Manual, with fixes from Gordon Irlam. | ||
11 | */ | ||
12 | |||
13 | /* | ||
14 | * Input: dividend and divisor in %o0 and %o1 respectively. | ||
15 | * | ||
16 | * m4 parameters: | ||
17 | * .div name of function to generate | ||
18 | * div div=div => %o0 / %o1; div=rem => %o0 % %o1 | ||
19 | * true true=true => signed; true=false => unsigned | ||
20 | * | ||
21 | * Algorithm parameters: | ||
22 | * N how many bits per iteration we try to get (4) | ||
23 | * WORDSIZE total number of bits (32) | ||
24 | * | ||
25 | * Derived constants: | ||
26 | * TOPBITS number of bits in the top decade of a number | ||
27 | * | ||
28 | * Important variables: | ||
29 | * Q the partial quotient under development (initially 0) | ||
30 | * R the remainder so far, initially the dividend | ||
31 | * ITER number of main division loop iterations required; | ||
32 | * equal to ceil(log2(quotient) / N). Note that this | ||
33 | * is the log base (2^N) of the quotient. | ||
34 | * V the current comparand, initially divisor*2^(ITER*N-1) | ||
35 | * | ||
36 | * Cost: | ||
37 | * Current estimate for non-large dividend is | ||
38 | * ceil(log2(quotient) / N) * (10 + 7N/2) + C | ||
39 | * A large dividend is one greater than 2^(31-TOPBITS) and takes a | ||
40 | * different path, as the upper bits of the quotient must be developed | ||
41 | * one bit at a time. | ||
42 | */ | ||
43 | |||
44 | |||
45 | .globl .div | ||
46 | .div: | ||
47 | ! compute sign of result; if neither is negative, no problem | ||
48 | orcc %o1, %o0, %g0 ! either negative? | ||
49 | bge 2f ! no, go do the divide | ||
50 | xor %o1, %o0, %g2 ! compute sign in any case | ||
51 | |||
52 | tst %o1 | ||
53 | bge 1f | ||
54 | tst %o0 | ||
55 | ! %o1 is definitely negative; %o0 might also be negative | ||
56 | bge 2f ! if %o0 not negative... | ||
57 | sub %g0, %o1, %o1 ! in any case, make %o1 nonneg | ||
58 | 1: ! %o0 is negative, %o1 is nonnegative | ||
59 | sub %g0, %o0, %o0 ! make %o0 nonnegative | ||
60 | 2: | ||
61 | |||
62 | ! Ready to divide. Compute size of quotient; scale comparand. | ||
63 | orcc %o1, %g0, %o5 | ||
64 | bne 1f | ||
65 | mov %o0, %o3 | ||
66 | |||
67 | ! Divide by zero trap. If it returns, return 0 (about as | ||
68 | ! wrong as possible, but that is what SunOS does...). | ||
69 | ta ST_DIV0 | ||
70 | retl | ||
71 | clr %o0 | ||
72 | |||
73 | 1: | ||
74 | cmp %o3, %o5 ! if %o1 exceeds %o0, done | ||
75 | blu Lgot_result ! (and algorithm fails otherwise) | ||
76 | clr %o2 | ||
77 | |||
78 | sethi %hi(1 << (32 - 4 - 1)), %g1 | ||
79 | |||
80 | cmp %o3, %g1 | ||
81 | blu Lnot_really_big | ||
82 | clr %o4 | ||
83 | |||
84 | ! Here the dividend is >= 2**(31-N) or so. We must be careful here, | ||
85 | ! as our usual N-at-a-shot divide step will cause overflow and havoc. | ||
86 | ! The number of bits in the result here is N*ITER+SC, where SC <= N. | ||
87 | ! Compute ITER in an unorthodox manner: know we need to shift V into | ||
88 | ! the top decade: so do not even bother to compare to R. | ||
89 | 1: | ||
90 | cmp %o5, %g1 | ||
91 | bgeu 3f | ||
92 | mov 1, %g7 | ||
93 | |||
94 | sll %o5, 4, %o5 | ||
95 | |||
96 | b 1b | ||
97 | add %o4, 1, %o4 | ||
98 | |||
99 | ! Now compute %g7. | ||
100 | 2: | ||
101 | addcc %o5, %o5, %o5 | ||
102 | bcc Lnot_too_big | ||
103 | add %g7, 1, %g7 | ||
104 | |||
105 | ! We get here if the %o1 overflowed while shifting. | ||
106 | ! This means that %o3 has the high-order bit set. | ||
107 | ! Restore %o5 and subtract from %o3. | ||
108 | sll %g1, 4, %g1 ! high order bit | ||
109 | srl %o5, 1, %o5 ! rest of %o5 | ||
110 | add %o5, %g1, %o5 | ||
111 | |||
112 | b Ldo_single_div | ||
113 | sub %g7, 1, %g7 | ||
114 | |||
115 | Lnot_too_big: | ||
116 | 3: | ||
117 | cmp %o5, %o3 | ||
118 | blu 2b | ||
119 | nop | ||
120 | |||
121 | be Ldo_single_div | ||
122 | nop | ||
123 | /* NB: these are commented out in the V8-Sparc manual as well */ | ||
124 | /* (I do not understand this) */ | ||
125 | ! %o5 > %o3: went too far: back up 1 step | ||
126 | ! srl %o5, 1, %o5 | ||
127 | ! dec %g7 | ||
128 | ! do single-bit divide steps | ||
129 | ! | ||
130 | ! We have to be careful here. We know that %o3 >= %o5, so we can do the | ||
131 | ! first divide step without thinking. BUT, the others are conditional, | ||
132 | ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- | ||
133 | ! order bit set in the first step, just falling into the regular | ||
134 | ! division loop will mess up the first time around. | ||
135 | ! So we unroll slightly... | ||
136 | Ldo_single_div: | ||
137 | subcc %g7, 1, %g7 | ||
138 | bl Lend_regular_divide | ||
139 | nop | ||
140 | |||
141 | sub %o3, %o5, %o3 | ||
142 | mov 1, %o2 | ||
143 | |||
144 | b Lend_single_divloop | ||
145 | nop | ||
146 | Lsingle_divloop: | ||
147 | sll %o2, 1, %o2 | ||
148 | |||
149 | bl 1f | ||
150 | srl %o5, 1, %o5 | ||
151 | ! %o3 >= 0 | ||
152 | sub %o3, %o5, %o3 | ||
153 | |||
154 | b 2f | ||
155 | add %o2, 1, %o2 | ||
156 | 1: ! %o3 < 0 | ||
157 | add %o3, %o5, %o3 | ||
158 | sub %o2, 1, %o2 | ||
159 | 2: | ||
160 | Lend_single_divloop: | ||
161 | subcc %g7, 1, %g7 | ||
162 | bge Lsingle_divloop | ||
163 | tst %o3 | ||
164 | |||
165 | b,a Lend_regular_divide | ||
166 | |||
167 | Lnot_really_big: | ||
168 | 1: | ||
169 | sll %o5, 4, %o5 | ||
170 | cmp %o5, %o3 | ||
171 | bleu 1b | ||
172 | addcc %o4, 1, %o4 | ||
173 | |||
174 | be Lgot_result | ||
175 | sub %o4, 1, %o4 | ||
176 | |||
177 | tst %o3 ! set up for initial iteration | ||
178 | Ldivloop: | ||
179 | sll %o2, 4, %o2 | ||
180 | ! depth 1, accumulated bits 0 | ||
181 | bl L.1.16 | ||
182 | srl %o5,1,%o5 | ||
183 | ! remainder is positive | ||
184 | subcc %o3,%o5,%o3 | ||
185 | ! depth 2, accumulated bits 1 | ||
186 | bl L.2.17 | ||
187 | srl %o5,1,%o5 | ||
188 | ! remainder is positive | ||
189 | subcc %o3,%o5,%o3 | ||
190 | ! depth 3, accumulated bits 3 | ||
191 | bl L.3.19 | ||
192 | srl %o5,1,%o5 | ||
193 | ! remainder is positive | ||
194 | subcc %o3,%o5,%o3 | ||
195 | ! depth 4, accumulated bits 7 | ||
196 | bl L.4.23 | ||
197 | srl %o5,1,%o5 | ||
198 | ! remainder is positive | ||
199 | subcc %o3,%o5,%o3 | ||
200 | b 9f | ||
201 | add %o2, (7*2+1), %o2 | ||
202 | |||
203 | L.4.23: | ||
204 | ! remainder is negative | ||
205 | addcc %o3,%o5,%o3 | ||
206 | b 9f | ||
207 | add %o2, (7*2-1), %o2 | ||
208 | |||
209 | L.3.19: | ||
210 | ! remainder is negative | ||
211 | addcc %o3,%o5,%o3 | ||
212 | ! depth 4, accumulated bits 5 | ||
213 | bl L.4.21 | ||
214 | srl %o5,1,%o5 | ||
215 | ! remainder is positive | ||
216 | subcc %o3,%o5,%o3 | ||
217 | b 9f | ||
218 | add %o2, (5*2+1), %o2 | ||
219 | |||
220 | L.4.21: | ||
221 | ! remainder is negative | ||
222 | addcc %o3,%o5,%o3 | ||
223 | b 9f | ||
224 | add %o2, (5*2-1), %o2 | ||
225 | |||
226 | L.2.17: | ||
227 | ! remainder is negative | ||
228 | addcc %o3,%o5,%o3 | ||
229 | ! depth 3, accumulated bits 1 | ||
230 | bl L.3.17 | ||
231 | srl %o5,1,%o5 | ||
232 | ! remainder is positive | ||
233 | subcc %o3,%o5,%o3 | ||
234 | ! depth 4, accumulated bits 3 | ||
235 | bl L.4.19 | ||
236 | srl %o5,1,%o5 | ||
237 | ! remainder is positive | ||
238 | subcc %o3,%o5,%o3 | ||
239 | b 9f | ||
240 | add %o2, (3*2+1), %o2 | ||
241 | |||
242 | L.4.19: | ||
243 | ! remainder is negative | ||
244 | addcc %o3,%o5,%o3 | ||
245 | b 9f | ||
246 | add %o2, (3*2-1), %o2 | ||
247 | |||
248 | |||
249 | L.3.17: | ||
250 | ! remainder is negative | ||
251 | addcc %o3,%o5,%o3 | ||
252 | ! depth 4, accumulated bits 1 | ||
253 | bl L.4.17 | ||
254 | srl %o5,1,%o5 | ||
255 | ! remainder is positive | ||
256 | subcc %o3,%o5,%o3 | ||
257 | b 9f | ||
258 | add %o2, (1*2+1), %o2 | ||
259 | |||
260 | L.4.17: | ||
261 | ! remainder is negative | ||
262 | addcc %o3,%o5,%o3 | ||
263 | b 9f | ||
264 | add %o2, (1*2-1), %o2 | ||
265 | |||
266 | L.1.16: | ||
267 | ! remainder is negative | ||
268 | addcc %o3,%o5,%o3 | ||
269 | ! depth 2, accumulated bits -1 | ||
270 | bl L.2.15 | ||
271 | srl %o5,1,%o5 | ||
272 | ! remainder is positive | ||
273 | subcc %o3,%o5,%o3 | ||
274 | ! depth 3, accumulated bits -1 | ||
275 | bl L.3.15 | ||
276 | srl %o5,1,%o5 | ||
277 | ! remainder is positive | ||
278 | subcc %o3,%o5,%o3 | ||
279 | ! depth 4, accumulated bits -1 | ||
280 | bl L.4.15 | ||
281 | srl %o5,1,%o5 | ||
282 | ! remainder is positive | ||
283 | subcc %o3,%o5,%o3 | ||
284 | b 9f | ||
285 | add %o2, (-1*2+1), %o2 | ||
286 | |||
287 | L.4.15: | ||
288 | ! remainder is negative | ||
289 | addcc %o3,%o5,%o3 | ||
290 | b 9f | ||
291 | add %o2, (-1*2-1), %o2 | ||
292 | |||
293 | L.3.15: | ||
294 | ! remainder is negative | ||
295 | addcc %o3,%o5,%o3 | ||
296 | ! depth 4, accumulated bits -3 | ||
297 | bl L.4.13 | ||
298 | srl %o5,1,%o5 | ||
299 | ! remainder is positive | ||
300 | subcc %o3,%o5,%o3 | ||
301 | b 9f | ||
302 | add %o2, (-3*2+1), %o2 | ||
303 | |||
304 | L.4.13: | ||
305 | ! remainder is negative | ||
306 | addcc %o3,%o5,%o3 | ||
307 | b 9f | ||
308 | add %o2, (-3*2-1), %o2 | ||
309 | |||
310 | L.2.15: | ||
311 | ! remainder is negative | ||
312 | addcc %o3,%o5,%o3 | ||
313 | ! depth 3, accumulated bits -3 | ||
314 | bl L.3.13 | ||
315 | srl %o5,1,%o5 | ||
316 | ! remainder is positive | ||
317 | subcc %o3,%o5,%o3 | ||
318 | ! depth 4, accumulated bits -5 | ||
319 | bl L.4.11 | ||
320 | srl %o5,1,%o5 | ||
321 | ! remainder is positive | ||
322 | subcc %o3,%o5,%o3 | ||
323 | b 9f | ||
324 | add %o2, (-5*2+1), %o2 | ||
325 | |||
326 | L.4.11: | ||
327 | ! remainder is negative | ||
328 | addcc %o3,%o5,%o3 | ||
329 | b 9f | ||
330 | add %o2, (-5*2-1), %o2 | ||
331 | |||
332 | L.3.13: | ||
333 | ! remainder is negative | ||
334 | addcc %o3,%o5,%o3 | ||
335 | ! depth 4, accumulated bits -7 | ||
336 | bl L.4.9 | ||
337 | srl %o5,1,%o5 | ||
338 | ! remainder is positive | ||
339 | subcc %o3,%o5,%o3 | ||
340 | b 9f | ||
341 | add %o2, (-7*2+1), %o2 | ||
342 | |||
343 | L.4.9: | ||
344 | ! remainder is negative | ||
345 | addcc %o3,%o5,%o3 | ||
346 | b 9f | ||
347 | add %o2, (-7*2-1), %o2 | ||
348 | |||
349 | 9: | ||
350 | Lend_regular_divide: | ||
351 | subcc %o4, 1, %o4 | ||
352 | bge Ldivloop | ||
353 | tst %o3 | ||
354 | |||
355 | bl,a Lgot_result | ||
356 | ! non-restoring fixup here (one instruction only!) | ||
357 | sub %o2, 1, %o2 | ||
358 | |||
359 | Lgot_result: | ||
360 | ! check to see if answer should be < 0 | ||
361 | tst %g2 | ||
362 | bl,a 1f | ||
363 | sub %g0, %o2, %o2 | ||
364 | 1: | ||
365 | retl | ||
366 | mov %o2, %o0 | ||
367 | |||
368 | .globl .div_patch | ||
369 | .div_patch: | ||
370 | sra %o0, 0x1f, %o2 | ||
371 | wr %o2, 0x0, %y | ||
372 | nop | ||
373 | nop | ||
374 | nop | ||
375 | sdivcc %o0, %o1, %o0 | ||
376 | bvs,a 1f | ||
377 | xnor %o0, %g0, %o0 | ||
378 | 1: retl | ||
379 | nop | ||
diff --git a/arch/sparc/lib/strlen.S b/arch/sparc/lib/strlen.S new file mode 100644 index 000000000000..ed9a763368cd --- /dev/null +++ b/arch/sparc/lib/strlen.S | |||
@@ -0,0 +1,81 @@ | |||
1 | /* strlen.S: Sparc optimized strlen code | ||
2 | * Hand optimized from GNU libc's strlen | ||
3 | * Copyright (C) 1991,1996 Free Software Foundation | ||
4 | * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) | ||
5 | * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz) | ||
6 | */ | ||
7 | |||
8 | #define LO_MAGIC 0x01010101 | ||
9 | #define HI_MAGIC 0x80808080 | ||
10 | |||
11 | 0: | ||
12 | ldub [%o0], %o5 | ||
13 | cmp %o5, 0 | ||
14 | be 1f | ||
15 | add %o0, 1, %o0 | ||
16 | andcc %o0, 3, %g0 | ||
17 | be 4f | ||
18 | or %o4, %lo(HI_MAGIC), %o3 | ||
19 | ldub [%o0], %o5 | ||
20 | cmp %o5, 0 | ||
21 | be 2f | ||
22 | add %o0, 1, %o0 | ||
23 | andcc %o0, 3, %g0 | ||
24 | be 5f | ||
25 | sethi %hi(LO_MAGIC), %o4 | ||
26 | ldub [%o0], %o5 | ||
27 | cmp %o5, 0 | ||
28 | be 3f | ||
29 | add %o0, 1, %o0 | ||
30 | b 8f | ||
31 | or %o4, %lo(LO_MAGIC), %o2 | ||
32 | 1: | ||
33 | retl | ||
34 | mov 0, %o0 | ||
35 | 2: | ||
36 | retl | ||
37 | mov 1, %o0 | ||
38 | 3: | ||
39 | retl | ||
40 | mov 2, %o0 | ||
41 | |||
42 | .align 4 | ||
43 | .global strlen | ||
44 | strlen: | ||
45 | mov %o0, %o1 | ||
46 | andcc %o0, 3, %g0 | ||
47 | bne 0b | ||
48 | sethi %hi(HI_MAGIC), %o4 | ||
49 | or %o4, %lo(HI_MAGIC), %o3 | ||
50 | 4: | ||
51 | sethi %hi(LO_MAGIC), %o4 | ||
52 | 5: | ||
53 | or %o4, %lo(LO_MAGIC), %o2 | ||
54 | 8: | ||
55 | ld [%o0], %o5 | ||
56 | 2: | ||
57 | sub %o5, %o2, %o4 | ||
58 | andcc %o4, %o3, %g0 | ||
59 | be 8b | ||
60 | add %o0, 4, %o0 | ||
61 | |||
62 | /* Check every byte. */ | ||
63 | srl %o5, 24, %g5 | ||
64 | andcc %g5, 0xff, %g0 | ||
65 | be 1f | ||
66 | add %o0, -4, %o4 | ||
67 | srl %o5, 16, %g5 | ||
68 | andcc %g5, 0xff, %g0 | ||
69 | be 1f | ||
70 | add %o4, 1, %o4 | ||
71 | srl %o5, 8, %g5 | ||
72 | andcc %g5, 0xff, %g0 | ||
73 | be 1f | ||
74 | add %o4, 1, %o4 | ||
75 | andcc %o5, 0xff, %g0 | ||
76 | bne,a 2b | ||
77 | ld [%o0], %o5 | ||
78 | add %o4, 1, %o4 | ||
79 | 1: | ||
80 | retl | ||
81 | sub %o4, %o1, %o0 | ||
diff --git a/arch/sparc/lib/strlen_user.S b/arch/sparc/lib/strlen_user.S new file mode 100644 index 000000000000..8c8a371df3c9 --- /dev/null +++ b/arch/sparc/lib/strlen_user.S | |||
@@ -0,0 +1,109 @@ | |||
1 | /* strlen_user.S: Sparc optimized strlen_user code | ||
2 | * | ||
3 | * Return length of string in userspace including terminating 0 | ||
4 | * or 0 for error | ||
5 | * | ||
6 | * Copyright (C) 1991,1996 Free Software Foundation | ||
7 | * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) | ||
8 | * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz) | ||
9 | */ | ||
10 | |||
11 | #define LO_MAGIC 0x01010101 | ||
12 | #define HI_MAGIC 0x80808080 | ||
13 | |||
14 | 10: | ||
15 | ldub [%o0], %o5 | ||
16 | cmp %o5, 0 | ||
17 | be 1f | ||
18 | add %o0, 1, %o0 | ||
19 | andcc %o0, 3, %g0 | ||
20 | be 4f | ||
21 | or %o4, %lo(HI_MAGIC), %o3 | ||
22 | 11: | ||
23 | ldub [%o0], %o5 | ||
24 | cmp %o5, 0 | ||
25 | be 2f | ||
26 | add %o0, 1, %o0 | ||
27 | andcc %o0, 3, %g0 | ||
28 | be 5f | ||
29 | sethi %hi(LO_MAGIC), %o4 | ||
30 | 12: | ||
31 | ldub [%o0], %o5 | ||
32 | cmp %o5, 0 | ||
33 | be 3f | ||
34 | add %o0, 1, %o0 | ||
35 | b 13f | ||
36 | or %o4, %lo(LO_MAGIC), %o2 | ||
37 | 1: | ||
38 | retl | ||
39 | mov 1, %o0 | ||
40 | 2: | ||
41 | retl | ||
42 | mov 2, %o0 | ||
43 | 3: | ||
44 | retl | ||
45 | mov 3, %o0 | ||
46 | |||
47 | .align 4 | ||
48 | .global __strlen_user, __strnlen_user | ||
49 | __strlen_user: | ||
50 | sethi %hi(32768), %o1 | ||
51 | __strnlen_user: | ||
52 | mov %o1, %g1 | ||
53 | mov %o0, %o1 | ||
54 | andcc %o0, 3, %g0 | ||
55 | bne 10b | ||
56 | sethi %hi(HI_MAGIC), %o4 | ||
57 | or %o4, %lo(HI_MAGIC), %o3 | ||
58 | 4: | ||
59 | sethi %hi(LO_MAGIC), %o4 | ||
60 | 5: | ||
61 | or %o4, %lo(LO_MAGIC), %o2 | ||
62 | 13: | ||
63 | ld [%o0], %o5 | ||
64 | 2: | ||
65 | sub %o5, %o2, %o4 | ||
66 | andcc %o4, %o3, %g0 | ||
67 | bne 82f | ||
68 | add %o0, 4, %o0 | ||
69 | sub %o0, %o1, %g2 | ||
70 | 81: cmp %g2, %g1 | ||
71 | blu 13b | ||
72 | mov %o0, %o4 | ||
73 | ba,a 1f | ||
74 | |||
75 | /* Check every byte. */ | ||
76 | 82: srl %o5, 24, %g5 | ||
77 | andcc %g5, 0xff, %g0 | ||
78 | be 1f | ||
79 | add %o0, -3, %o4 | ||
80 | srl %o5, 16, %g5 | ||
81 | andcc %g5, 0xff, %g0 | ||
82 | be 1f | ||
83 | add %o4, 1, %o4 | ||
84 | srl %o5, 8, %g5 | ||
85 | andcc %g5, 0xff, %g0 | ||
86 | be 1f | ||
87 | add %o4, 1, %o4 | ||
88 | andcc %o5, 0xff, %g0 | ||
89 | bne 81b | ||
90 | sub %o0, %o1, %g2 | ||
91 | |||
92 | add %o4, 1, %o4 | ||
93 | 1: | ||
94 | retl | ||
95 | sub %o4, %o1, %o0 | ||
96 | |||
97 | .section .fixup,#alloc,#execinstr | ||
98 | .align 4 | ||
99 | 9: | ||
100 | retl | ||
101 | clr %o0 | ||
102 | |||
103 | .section __ex_table,#alloc | ||
104 | .align 4 | ||
105 | |||
106 | .word 10b, 9b | ||
107 | .word 11b, 9b | ||
108 | .word 12b, 9b | ||
109 | .word 13b, 9b | ||
diff --git a/arch/sparc/lib/strncmp.S b/arch/sparc/lib/strncmp.S new file mode 100644 index 000000000000..615626805d4b --- /dev/null +++ b/arch/sparc/lib/strncmp.S | |||
@@ -0,0 +1,118 @@ | |||
1 | /* $Id: strncmp.S,v 1.2 1996/09/09 02:47:20 davem Exp $ | ||
2 | * strncmp.S: Hand optimized Sparc assembly of GCC output from GNU libc | ||
3 | * generic strncmp routine. | ||
4 | */ | ||
5 | |||
6 | .text | ||
7 | .align 4 | ||
8 | .global __strncmp, strncmp | ||
9 | __strncmp: | ||
10 | strncmp: | ||
11 | mov %o0, %g3 | ||
12 | mov 0, %o3 | ||
13 | |||
14 | cmp %o2, 3 | ||
15 | ble 7f | ||
16 | mov 0, %g2 | ||
17 | |||
18 | sra %o2, 2, %o4 | ||
19 | ldub [%g3], %o3 | ||
20 | |||
21 | 0: | ||
22 | ldub [%o1], %g2 | ||
23 | add %g3, 1, %g3 | ||
24 | and %o3, 0xff, %o0 | ||
25 | |||
26 | cmp %o0, 0 | ||
27 | be 8f | ||
28 | add %o1, 1, %o1 | ||
29 | |||
30 | cmp %o0, %g2 | ||
31 | be,a 1f | ||
32 | ldub [%g3], %o3 | ||
33 | |||
34 | retl | ||
35 | sub %o0, %g2, %o0 | ||
36 | |||
37 | 1: | ||
38 | ldub [%o1], %g2 | ||
39 | add %g3,1, %g3 | ||
40 | and %o3, 0xff, %o0 | ||
41 | |||
42 | cmp %o0, 0 | ||
43 | be 8f | ||
44 | add %o1, 1, %o1 | ||
45 | |||
46 | cmp %o0, %g2 | ||
47 | be,a 1f | ||
48 | ldub [%g3], %o3 | ||
49 | |||
50 | retl | ||
51 | sub %o0, %g2, %o0 | ||
52 | |||
53 | 1: | ||
54 | ldub [%o1], %g2 | ||
55 | add %g3, 1, %g3 | ||
56 | and %o3, 0xff, %o0 | ||
57 | |||
58 | cmp %o0, 0 | ||
59 | be 8f | ||
60 | add %o1, 1, %o1 | ||
61 | |||
62 | cmp %o0, %g2 | ||
63 | be,a 1f | ||
64 | ldub [%g3], %o3 | ||
65 | |||
66 | retl | ||
67 | sub %o0, %g2, %o0 | ||
68 | |||
69 | 1: | ||
70 | ldub [%o1], %g2 | ||
71 | add %g3, 1, %g3 | ||
72 | and %o3, 0xff, %o0 | ||
73 | |||
74 | cmp %o0, 0 | ||
75 | be 8f | ||
76 | add %o1, 1, %o1 | ||
77 | |||
78 | cmp %o0, %g2 | ||
79 | be 1f | ||
80 | add %o4, -1, %o4 | ||
81 | |||
82 | retl | ||
83 | sub %o0, %g2, %o0 | ||
84 | |||
85 | 1: | ||
86 | |||
87 | cmp %o4, 0 | ||
88 | bg,a 0b | ||
89 | ldub [%g3], %o3 | ||
90 | |||
91 | b 7f | ||
92 | and %o2, 3, %o2 | ||
93 | |||
94 | 9: | ||
95 | ldub [%o1], %g2 | ||
96 | add %g3, 1, %g3 | ||
97 | and %o3, 0xff, %o0 | ||
98 | |||
99 | cmp %o0, 0 | ||
100 | be 8f | ||
101 | add %o1, 1, %o1 | ||
102 | |||
103 | cmp %o0, %g2 | ||
104 | be 7f | ||
105 | add %o2, -1, %o2 | ||
106 | |||
107 | 8: | ||
108 | retl | ||
109 | sub %o0, %g2, %o0 | ||
110 | |||
111 | 7: | ||
112 | cmp %o2, 0 | ||
113 | bg,a 9b | ||
114 | ldub [%g3], %o3 | ||
115 | |||
116 | and %g2, 0xff, %o0 | ||
117 | retl | ||
118 | sub %o3, %o0, %o0 | ||
diff --git a/arch/sparc/lib/strncpy_from_user.S b/arch/sparc/lib/strncpy_from_user.S new file mode 100644 index 000000000000..d77198976a66 --- /dev/null +++ b/arch/sparc/lib/strncpy_from_user.S | |||
@@ -0,0 +1,47 @@ | |||
1 | /* strncpy_from_user.S: Sparc strncpy from userspace. | ||
2 | * | ||
3 | * Copyright(C) 1996 David S. Miller | ||
4 | */ | ||
5 | |||
6 | #include <asm/ptrace.h> | ||
7 | #include <asm/errno.h> | ||
8 | |||
9 | .text | ||
10 | .align 4 | ||
11 | |||
12 | /* Must return: | ||
13 | * | ||
14 | * -EFAULT for an exception | ||
15 | * count if we hit the buffer limit | ||
16 | * bytes copied if we hit a null byte | ||
17 | */ | ||
18 | |||
19 | .globl __strncpy_from_user | ||
20 | __strncpy_from_user: | ||
21 | /* %o0=dest, %o1=src, %o2=count */ | ||
22 | mov %o2, %o3 | ||
23 | 1: | ||
24 | subcc %o2, 1, %o2 | ||
25 | bneg 2f | ||
26 | nop | ||
27 | 10: | ||
28 | ldub [%o1], %o4 | ||
29 | add %o0, 1, %o0 | ||
30 | cmp %o4, 0 | ||
31 | add %o1, 1, %o1 | ||
32 | bne 1b | ||
33 | stb %o4, [%o0 - 1] | ||
34 | 2: | ||
35 | add %o2, 1, %o0 | ||
36 | retl | ||
37 | sub %o3, %o0, %o0 | ||
38 | |||
39 | .section .fixup,#alloc,#execinstr | ||
40 | .align 4 | ||
41 | 4: | ||
42 | retl | ||
43 | mov -EFAULT, %o0 | ||
44 | |||
45 | .section __ex_table,#alloc | ||
46 | .align 4 | ||
47 | .word 10b, 4b | ||
diff --git a/arch/sparc/lib/udiv.S b/arch/sparc/lib/udiv.S new file mode 100644 index 000000000000..2abfc6b0f3e9 --- /dev/null +++ b/arch/sparc/lib/udiv.S | |||
@@ -0,0 +1,355 @@ | |||
1 | /* $Id: udiv.S,v 1.4 1996/09/30 02:22:38 davem Exp $ | ||
2 | * udiv.S: This routine was taken from glibc-1.09 and is covered | ||
3 | * by the GNU Library General Public License Version 2. | ||
4 | */ | ||
5 | |||
6 | |||
7 | /* This file is generated from divrem.m4; DO NOT EDIT! */ | ||
8 | /* | ||
9 | * Division and remainder, from Appendix E of the Sparc Version 8 | ||
10 | * Architecture Manual, with fixes from Gordon Irlam. | ||
11 | */ | ||
12 | |||
13 | /* | ||
14 | * Input: dividend and divisor in %o0 and %o1 respectively. | ||
15 | * | ||
16 | * m4 parameters: | ||
17 | * .udiv name of function to generate | ||
18 | * div div=div => %o0 / %o1; div=rem => %o0 % %o1 | ||
19 | * false false=true => signed; false=false => unsigned | ||
20 | * | ||
21 | * Algorithm parameters: | ||
22 | * N how many bits per iteration we try to get (4) | ||
23 | * WORDSIZE total number of bits (32) | ||
24 | * | ||
25 | * Derived constants: | ||
26 | * TOPBITS number of bits in the top decade of a number | ||
27 | * | ||
28 | * Important variables: | ||
29 | * Q the partial quotient under development (initially 0) | ||
30 | * R the remainder so far, initially the dividend | ||
31 | * ITER number of main division loop iterations required; | ||
32 | * equal to ceil(log2(quotient) / N). Note that this | ||
33 | * is the log base (2^N) of the quotient. | ||
34 | * V the current comparand, initially divisor*2^(ITER*N-1) | ||
35 | * | ||
36 | * Cost: | ||
37 | * Current estimate for non-large dividend is | ||
38 | * ceil(log2(quotient) / N) * (10 + 7N/2) + C | ||
39 | * A large dividend is one greater than 2^(31-TOPBITS) and takes a | ||
40 | * different path, as the upper bits of the quotient must be developed | ||
41 | * one bit at a time. | ||
42 | */ | ||
43 | |||
44 | |||
45 | .globl .udiv | ||
46 | .udiv: | ||
47 | |||
48 | ! Ready to divide. Compute size of quotient; scale comparand. | ||
49 | orcc %o1, %g0, %o5 | ||
50 | bne 1f | ||
51 | mov %o0, %o3 | ||
52 | |||
53 | ! Divide by zero trap. If it returns, return 0 (about as | ||
54 | ! wrong as possible, but that is what SunOS does...). | ||
55 | ta ST_DIV0 | ||
56 | retl | ||
57 | clr %o0 | ||
58 | |||
59 | 1: | ||
60 | cmp %o3, %o5 ! if %o1 exceeds %o0, done | ||
61 | blu Lgot_result ! (and algorithm fails otherwise) | ||
62 | clr %o2 | ||
63 | |||
64 | sethi %hi(1 << (32 - 4 - 1)), %g1 | ||
65 | |||
66 | cmp %o3, %g1 | ||
67 | blu Lnot_really_big | ||
68 | clr %o4 | ||
69 | |||
70 | ! Here the dividend is >= 2**(31-N) or so. We must be careful here, | ||
71 | ! as our usual N-at-a-shot divide step will cause overflow and havoc. | ||
72 | ! The number of bits in the result here is N*ITER+SC, where SC <= N. | ||
73 | ! Compute ITER in an unorthodox manner: know we need to shift V into | ||
74 | ! the top decade: so do not even bother to compare to R. | ||
75 | 1: | ||
76 | cmp %o5, %g1 | ||
77 | bgeu 3f | ||
78 | mov 1, %g7 | ||
79 | |||
80 | sll %o5, 4, %o5 | ||
81 | |||
82 | b 1b | ||
83 | add %o4, 1, %o4 | ||
84 | |||
85 | ! Now compute %g7. | ||
86 | 2: | ||
87 | addcc %o5, %o5, %o5 | ||
88 | bcc Lnot_too_big | ||
89 | add %g7, 1, %g7 | ||
90 | |||
91 | ! We get here if the %o1 overflowed while shifting. | ||
92 | ! This means that %o3 has the high-order bit set. | ||
93 | ! Restore %o5 and subtract from %o3. | ||
94 | sll %g1, 4, %g1 ! high order bit | ||
95 | srl %o5, 1, %o5 ! rest of %o5 | ||
96 | add %o5, %g1, %o5 | ||
97 | |||
98 | b Ldo_single_div | ||
99 | sub %g7, 1, %g7 | ||
100 | |||
101 | Lnot_too_big: | ||
102 | 3: | ||
103 | cmp %o5, %o3 | ||
104 | blu 2b | ||
105 | nop | ||
106 | |||
107 | be Ldo_single_div | ||
108 | nop | ||
109 | /* NB: these are commented out in the V8-Sparc manual as well */ | ||
110 | /* (I do not understand this) */ | ||
111 | ! %o5 > %o3: went too far: back up 1 step | ||
112 | ! srl %o5, 1, %o5 | ||
113 | ! dec %g7 | ||
114 | ! do single-bit divide steps | ||
115 | ! | ||
116 | ! We have to be careful here. We know that %o3 >= %o5, so we can do the | ||
117 | ! first divide step without thinking. BUT, the others are conditional, | ||
118 | ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- | ||
119 | ! order bit set in the first step, just falling into the regular | ||
120 | ! division loop will mess up the first time around. | ||
121 | ! So we unroll slightly... | ||
122 | Ldo_single_div: | ||
123 | subcc %g7, 1, %g7 | ||
124 | bl Lend_regular_divide | ||
125 | nop | ||
126 | |||
127 | sub %o3, %o5, %o3 | ||
128 | mov 1, %o2 | ||
129 | |||
130 | b Lend_single_divloop | ||
131 | nop | ||
132 | Lsingle_divloop: | ||
133 | sll %o2, 1, %o2 | ||
134 | bl 1f | ||
135 | srl %o5, 1, %o5 | ||
136 | ! %o3 >= 0 | ||
137 | sub %o3, %o5, %o3 | ||
138 | b 2f | ||
139 | add %o2, 1, %o2 | ||
140 | 1: ! %o3 < 0 | ||
141 | add %o3, %o5, %o3 | ||
142 | sub %o2, 1, %o2 | ||
143 | 2: | ||
144 | Lend_single_divloop: | ||
145 | subcc %g7, 1, %g7 | ||
146 | bge Lsingle_divloop | ||
147 | tst %o3 | ||
148 | |||
149 | b,a Lend_regular_divide | ||
150 | |||
151 | Lnot_really_big: | ||
152 | 1: | ||
153 | sll %o5, 4, %o5 | ||
154 | |||
155 | cmp %o5, %o3 | ||
156 | bleu 1b | ||
157 | addcc %o4, 1, %o4 | ||
158 | |||
159 | be Lgot_result | ||
160 | sub %o4, 1, %o4 | ||
161 | |||
162 | tst %o3 ! set up for initial iteration | ||
163 | Ldivloop: | ||
164 | sll %o2, 4, %o2 | ||
165 | ! depth 1, accumulated bits 0 | ||
166 | bl L.1.16 | ||
167 | srl %o5,1,%o5 | ||
168 | ! remainder is positive | ||
169 | subcc %o3,%o5,%o3 | ||
170 | ! depth 2, accumulated bits 1 | ||
171 | bl L.2.17 | ||
172 | srl %o5,1,%o5 | ||
173 | ! remainder is positive | ||
174 | subcc %o3,%o5,%o3 | ||
175 | ! depth 3, accumulated bits 3 | ||
176 | bl L.3.19 | ||
177 | srl %o5,1,%o5 | ||
178 | ! remainder is positive | ||
179 | subcc %o3,%o5,%o3 | ||
180 | ! depth 4, accumulated bits 7 | ||
181 | bl L.4.23 | ||
182 | srl %o5,1,%o5 | ||
183 | ! remainder is positive | ||
184 | subcc %o3,%o5,%o3 | ||
185 | b 9f | ||
186 | add %o2, (7*2+1), %o2 | ||
187 | |||
188 | L.4.23: | ||
189 | ! remainder is negative | ||
190 | addcc %o3,%o5,%o3 | ||
191 | b 9f | ||
192 | add %o2, (7*2-1), %o2 | ||
193 | |||
194 | L.3.19: | ||
195 | ! remainder is negative | ||
196 | addcc %o3,%o5,%o3 | ||
197 | ! depth 4, accumulated bits 5 | ||
198 | bl L.4.21 | ||
199 | srl %o5,1,%o5 | ||
200 | ! remainder is positive | ||
201 | subcc %o3,%o5,%o3 | ||
202 | b 9f | ||
203 | add %o2, (5*2+1), %o2 | ||
204 | |||
205 | L.4.21: | ||
206 | ! remainder is negative | ||
207 | addcc %o3,%o5,%o3 | ||
208 | b 9f | ||
209 | add %o2, (5*2-1), %o2 | ||
210 | |||
211 | L.2.17: | ||
212 | ! remainder is negative | ||
213 | addcc %o3,%o5,%o3 | ||
214 | ! depth 3, accumulated bits 1 | ||
215 | bl L.3.17 | ||
216 | srl %o5,1,%o5 | ||
217 | ! remainder is positive | ||
218 | subcc %o3,%o5,%o3 | ||
219 | ! depth 4, accumulated bits 3 | ||
220 | bl L.4.19 | ||
221 | srl %o5,1,%o5 | ||
222 | ! remainder is positive | ||
223 | subcc %o3,%o5,%o3 | ||
224 | b 9f | ||
225 | add %o2, (3*2+1), %o2 | ||
226 | |||
227 | L.4.19: | ||
228 | ! remainder is negative | ||
229 | addcc %o3,%o5,%o3 | ||
230 | b 9f | ||
231 | add %o2, (3*2-1), %o2 | ||
232 | |||
233 | L.3.17: | ||
234 | ! remainder is negative | ||
235 | addcc %o3,%o5,%o3 | ||
236 | ! depth 4, accumulated bits 1 | ||
237 | bl L.4.17 | ||
238 | srl %o5,1,%o5 | ||
239 | ! remainder is positive | ||
240 | subcc %o3,%o5,%o3 | ||
241 | b 9f | ||
242 | add %o2, (1*2+1), %o2 | ||
243 | |||
244 | L.4.17: | ||
245 | ! remainder is negative | ||
246 | addcc %o3,%o5,%o3 | ||
247 | b 9f | ||
248 | add %o2, (1*2-1), %o2 | ||
249 | |||
250 | L.1.16: | ||
251 | ! remainder is negative | ||
252 | addcc %o3,%o5,%o3 | ||
253 | ! depth 2, accumulated bits -1 | ||
254 | bl L.2.15 | ||
255 | srl %o5,1,%o5 | ||
256 | ! remainder is positive | ||
257 | subcc %o3,%o5,%o3 | ||
258 | ! depth 3, accumulated bits -1 | ||
259 | bl L.3.15 | ||
260 | srl %o5,1,%o5 | ||
261 | ! remainder is positive | ||
262 | subcc %o3,%o5,%o3 | ||
263 | ! depth 4, accumulated bits -1 | ||
264 | bl L.4.15 | ||
265 | srl %o5,1,%o5 | ||
266 | ! remainder is positive | ||
267 | subcc %o3,%o5,%o3 | ||
268 | b 9f | ||
269 | add %o2, (-1*2+1), %o2 | ||
270 | |||
271 | L.4.15: | ||
272 | ! remainder is negative | ||
273 | addcc %o3,%o5,%o3 | ||
274 | b 9f | ||
275 | add %o2, (-1*2-1), %o2 | ||
276 | |||
277 | L.3.15: | ||
278 | ! remainder is negative | ||
279 | addcc %o3,%o5,%o3 | ||
280 | ! depth 4, accumulated bits -3 | ||
281 | bl L.4.13 | ||
282 | srl %o5,1,%o5 | ||
283 | ! remainder is positive | ||
284 | subcc %o3,%o5,%o3 | ||
285 | b 9f | ||
286 | add %o2, (-3*2+1), %o2 | ||
287 | |||
288 | L.4.13: | ||
289 | ! remainder is negative | ||
290 | addcc %o3,%o5,%o3 | ||
291 | b 9f | ||
292 | add %o2, (-3*2-1), %o2 | ||
293 | |||
294 | L.2.15: | ||
295 | ! remainder is negative | ||
296 | addcc %o3,%o5,%o3 | ||
297 | ! depth 3, accumulated bits -3 | ||
298 | bl L.3.13 | ||
299 | srl %o5,1,%o5 | ||
300 | ! remainder is positive | ||
301 | subcc %o3,%o5,%o3 | ||
302 | ! depth 4, accumulated bits -5 | ||
303 | bl L.4.11 | ||
304 | srl %o5,1,%o5 | ||
305 | ! remainder is positive | ||
306 | subcc %o3,%o5,%o3 | ||
307 | b 9f | ||
308 | add %o2, (-5*2+1), %o2 | ||
309 | |||
310 | L.4.11: | ||
311 | ! remainder is negative | ||
312 | addcc %o3,%o5,%o3 | ||
313 | b 9f | ||
314 | add %o2, (-5*2-1), %o2 | ||
315 | |||
316 | L.3.13: | ||
317 | ! remainder is negative | ||
318 | addcc %o3,%o5,%o3 | ||
319 | ! depth 4, accumulated bits -7 | ||
320 | bl L.4.9 | ||
321 | srl %o5,1,%o5 | ||
322 | ! remainder is positive | ||
323 | subcc %o3,%o5,%o3 | ||
324 | b 9f | ||
325 | add %o2, (-7*2+1), %o2 | ||
326 | |||
327 | L.4.9: | ||
328 | ! remainder is negative | ||
329 | addcc %o3,%o5,%o3 | ||
330 | b 9f | ||
331 | add %o2, (-7*2-1), %o2 | ||
332 | |||
333 | 9: | ||
334 | Lend_regular_divide: | ||
335 | subcc %o4, 1, %o4 | ||
336 | bge Ldivloop | ||
337 | tst %o3 | ||
338 | |||
339 | bl,a Lgot_result | ||
340 | ! non-restoring fixup here (one instruction only!) | ||
341 | sub %o2, 1, %o2 | ||
342 | |||
343 | Lgot_result: | ||
344 | |||
345 | retl | ||
346 | mov %o2, %o0 | ||
347 | |||
348 | .globl .udiv_patch | ||
349 | .udiv_patch: | ||
350 | wr %g0, 0x0, %y | ||
351 | nop | ||
352 | nop | ||
353 | retl | ||
354 | udiv %o0, %o1, %o0 | ||
355 | nop | ||
diff --git a/arch/sparc/lib/udivdi3.S b/arch/sparc/lib/udivdi3.S new file mode 100644 index 000000000000..b430f1f0ef62 --- /dev/null +++ b/arch/sparc/lib/udivdi3.S | |||
@@ -0,0 +1,258 @@ | |||
1 | /* Copyright (C) 1989, 1992, 1993, 1994, 1995 Free Software Foundation, Inc. | ||
2 | |||
3 | This file is part of GNU CC. | ||
4 | |||
5 | GNU CC is free software; you can redistribute it and/or modify | ||
6 | it under the terms of the GNU General Public License as published by | ||
7 | the Free Software Foundation; either version 2, or (at your option) | ||
8 | any later version. | ||
9 | |||
10 | GNU CC is distributed in the hope that it will be useful, | ||
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | GNU General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU General Public License | ||
16 | along with GNU CC; see the file COPYING. If not, write to | ||
17 | the Free Software Foundation, 59 Temple Place - Suite 330, | ||
18 | Boston, MA 02111-1307, USA. */ | ||
19 | |||
20 | .text | ||
21 | .align 4 | ||
22 | .globl __udivdi3 | ||
23 | __udivdi3: | ||
24 | save %sp,-104,%sp | ||
25 | mov %i3,%o3 | ||
26 | cmp %i2,0 | ||
27 | bne .LL40 | ||
28 | mov %i1,%i3 | ||
29 | cmp %o3,%i0 | ||
30 | bleu .LL41 | ||
31 | mov %i3,%o1 | ||
32 | ! Inlined udiv_qrnnd | ||
33 | mov 32,%g1 | ||
34 | subcc %i0,%o3,%g0 | ||
35 | 1: bcs 5f | ||
36 | addxcc %o1,%o1,%o1 ! shift n1n0 and a q-bit in lsb | ||
37 | sub %i0,%o3,%i0 ! this kills msb of n | ||
38 | addx %i0,%i0,%i0 ! so this cannot give carry | ||
39 | subcc %g1,1,%g1 | ||
40 | 2: bne 1b | ||
41 | subcc %i0,%o3,%g0 | ||
42 | bcs 3f | ||
43 | addxcc %o1,%o1,%o1 ! shift n1n0 and a q-bit in lsb | ||
44 | b 3f | ||
45 | sub %i0,%o3,%i0 ! this kills msb of n | ||
46 | 4: sub %i0,%o3,%i0 | ||
47 | 5: addxcc %i0,%i0,%i0 | ||
48 | bcc 2b | ||
49 | subcc %g1,1,%g1 | ||
50 | ! Got carry from n. Subtract next step to cancel this carry. | ||
51 | bne 4b | ||
52 | addcc %o1,%o1,%o1 ! shift n1n0 and a 0-bit in lsb | ||
53 | sub %i0,%o3,%i0 | ||
54 | 3: xnor %o1,0,%o1 | ||
55 | ! End of inline udiv_qrnnd | ||
56 | b .LL45 | ||
57 | mov 0,%o2 | ||
58 | .LL41: | ||
59 | cmp %o3,0 | ||
60 | bne .LL77 | ||
61 | mov %i0,%o2 | ||
62 | mov 1,%o0 | ||
63 | call .udiv,0 | ||
64 | mov 0,%o1 | ||
65 | mov %o0,%o3 | ||
66 | mov %i0,%o2 | ||
67 | .LL77: | ||
68 | mov 0,%o4 | ||
69 | ! Inlined udiv_qrnnd | ||
70 | mov 32,%g1 | ||
71 | subcc %o4,%o3,%g0 | ||
72 | 1: bcs 5f | ||
73 | addxcc %o2,%o2,%o2 ! shift n1n0 and a q-bit in lsb | ||
74 | sub %o4,%o3,%o4 ! this kills msb of n | ||
75 | addx %o4,%o4,%o4 ! so this cannot give carry | ||
76 | subcc %g1,1,%g1 | ||
77 | 2: bne 1b | ||
78 | subcc %o4,%o3,%g0 | ||
79 | bcs 3f | ||
80 | addxcc %o2,%o2,%o2 ! shift n1n0 and a q-bit in lsb | ||
81 | b 3f | ||
82 | sub %o4,%o3,%o4 ! this kills msb of n | ||
83 | 4: sub %o4,%o3,%o4 | ||
84 | 5: addxcc %o4,%o4,%o4 | ||
85 | bcc 2b | ||
86 | subcc %g1,1,%g1 | ||
87 | ! Got carry from n. Subtract next step to cancel this carry. | ||
88 | bne 4b | ||
89 | addcc %o2,%o2,%o2 ! shift n1n0 and a 0-bit in lsb | ||
90 | sub %o4,%o3,%o4 | ||
91 | 3: xnor %o2,0,%o2 | ||
92 | ! End of inline udiv_qrnnd | ||
93 | mov %o4,%i0 | ||
94 | mov %i3,%o1 | ||
95 | ! Inlined udiv_qrnnd | ||
96 | mov 32,%g1 | ||
97 | subcc %i0,%o3,%g0 | ||
98 | 1: bcs 5f | ||
99 | addxcc %o1,%o1,%o1 ! shift n1n0 and a q-bit in lsb | ||
100 | sub %i0,%o3,%i0 ! this kills msb of n | ||
101 | addx %i0,%i0,%i0 ! so this cannot give carry | ||
102 | subcc %g1,1,%g1 | ||
103 | 2: bne 1b | ||
104 | subcc %i0,%o3,%g0 | ||
105 | bcs 3f | ||
106 | addxcc %o1,%o1,%o1 ! shift n1n0 and a q-bit in lsb | ||
107 | b 3f | ||
108 | sub %i0,%o3,%i0 ! this kills msb of n | ||
109 | 4: sub %i0,%o3,%i0 | ||
110 | 5: addxcc %i0,%i0,%i0 | ||
111 | bcc 2b | ||
112 | subcc %g1,1,%g1 | ||
113 | ! Got carry from n. Subtract next step to cancel this carry. | ||
114 | bne 4b | ||
115 | addcc %o1,%o1,%o1 ! shift n1n0 and a 0-bit in lsb | ||
116 | sub %i0,%o3,%i0 | ||
117 | 3: xnor %o1,0,%o1 | ||
118 | ! End of inline udiv_qrnnd | ||
119 | b .LL78 | ||
120 | mov %o1,%l1 | ||
121 | .LL40: | ||
122 | cmp %i2,%i0 | ||
123 | bleu .LL46 | ||
124 | sethi %hi(65535),%o0 | ||
125 | b .LL73 | ||
126 | mov 0,%o1 | ||
127 | .LL46: | ||
128 | or %o0,%lo(65535),%o0 | ||
129 | cmp %i2,%o0 | ||
130 | bgu .LL53 | ||
131 | mov %i2,%o1 | ||
132 | cmp %i2,256 | ||
133 | addx %g0,-1,%o0 | ||
134 | b .LL59 | ||
135 | and %o0,8,%o2 | ||
136 | .LL53: | ||
137 | sethi %hi(16777215),%o0 | ||
138 | or %o0,%lo(16777215),%o0 | ||
139 | cmp %o1,%o0 | ||
140 | bgu .LL59 | ||
141 | mov 24,%o2 | ||
142 | mov 16,%o2 | ||
143 | .LL59: | ||
144 | srl %o1,%o2,%o1 | ||
145 | sethi %hi(__clz_tab),%o0 | ||
146 | or %o0,%lo(__clz_tab),%o0 | ||
147 | ldub [%o1+%o0],%o0 | ||
148 | add %o0,%o2,%o0 | ||
149 | mov 32,%o1 | ||
150 | subcc %o1,%o0,%o2 | ||
151 | bne,a .LL67 | ||
152 | mov 32,%o0 | ||
153 | cmp %i0,%i2 | ||
154 | bgu .LL69 | ||
155 | cmp %i3,%o3 | ||
156 | blu .LL73 | ||
157 | mov 0,%o1 | ||
158 | .LL69: | ||
159 | b .LL73 | ||
160 | mov 1,%o1 | ||
161 | .LL67: | ||
162 | sub %o0,%o2,%o0 | ||
163 | sll %i2,%o2,%i2 | ||
164 | srl %o3,%o0,%o1 | ||
165 | or %i2,%o1,%i2 | ||
166 | sll %o3,%o2,%o3 | ||
167 | srl %i0,%o0,%o1 | ||
168 | sll %i0,%o2,%i0 | ||
169 | srl %i3,%o0,%o0 | ||
170 | or %i0,%o0,%i0 | ||
171 | sll %i3,%o2,%i3 | ||
172 | mov %i0,%o5 | ||
173 | mov %o1,%o4 | ||
174 | ! Inlined udiv_qrnnd | ||
175 | mov 32,%g1 | ||
176 | subcc %o4,%i2,%g0 | ||
177 | 1: bcs 5f | ||
178 | addxcc %o5,%o5,%o5 ! shift n1n0 and a q-bit in lsb | ||
179 | sub %o4,%i2,%o4 ! this kills msb of n | ||
180 | addx %o4,%o4,%o4 ! so this cannot give carry | ||
181 | subcc %g1,1,%g1 | ||
182 | 2: bne 1b | ||
183 | subcc %o4,%i2,%g0 | ||
184 | bcs 3f | ||
185 | addxcc %o5,%o5,%o5 ! shift n1n0 and a q-bit in lsb | ||
186 | b 3f | ||
187 | sub %o4,%i2,%o4 ! this kills msb of n | ||
188 | 4: sub %o4,%i2,%o4 | ||
189 | 5: addxcc %o4,%o4,%o4 | ||
190 | bcc 2b | ||
191 | subcc %g1,1,%g1 | ||
192 | ! Got carry from n. Subtract next step to cancel this carry. | ||
193 | bne 4b | ||
194 | addcc %o5,%o5,%o5 ! shift n1n0 and a 0-bit in lsb | ||
195 | sub %o4,%i2,%o4 | ||
196 | 3: xnor %o5,0,%o5 | ||
197 | ! End of inline udiv_qrnnd | ||
198 | mov %o4,%i0 | ||
199 | mov %o5,%o1 | ||
200 | ! Inlined umul_ppmm | ||
201 | wr %g0,%o1,%y ! SPARC has 0-3 delay insn after a wr | ||
202 | sra %o3,31,%g2 ! Do not move this insn | ||
203 | and %o1,%g2,%g2 ! Do not move this insn | ||
204 | andcc %g0,0,%g1 ! Do not move this insn | ||
205 | mulscc %g1,%o3,%g1 | ||
206 | mulscc %g1,%o3,%g1 | ||
207 | mulscc %g1,%o3,%g1 | ||
208 | mulscc %g1,%o3,%g1 | ||
209 | mulscc %g1,%o3,%g1 | ||
210 | mulscc %g1,%o3,%g1 | ||
211 | mulscc %g1,%o3,%g1 | ||
212 | mulscc %g1,%o3,%g1 | ||
213 | mulscc %g1,%o3,%g1 | ||
214 | mulscc %g1,%o3,%g1 | ||
215 | mulscc %g1,%o3,%g1 | ||
216 | mulscc %g1,%o3,%g1 | ||
217 | mulscc %g1,%o3,%g1 | ||
218 | mulscc %g1,%o3,%g1 | ||
219 | mulscc %g1,%o3,%g1 | ||
220 | mulscc %g1,%o3,%g1 | ||
221 | mulscc %g1,%o3,%g1 | ||
222 | mulscc %g1,%o3,%g1 | ||
223 | mulscc %g1,%o3,%g1 | ||
224 | mulscc %g1,%o3,%g1 | ||
225 | mulscc %g1,%o3,%g1 | ||
226 | mulscc %g1,%o3,%g1 | ||
227 | mulscc %g1,%o3,%g1 | ||
228 | mulscc %g1,%o3,%g1 | ||
229 | mulscc %g1,%o3,%g1 | ||
230 | mulscc %g1,%o3,%g1 | ||
231 | mulscc %g1,%o3,%g1 | ||
232 | mulscc %g1,%o3,%g1 | ||
233 | mulscc %g1,%o3,%g1 | ||
234 | mulscc %g1,%o3,%g1 | ||
235 | mulscc %g1,%o3,%g1 | ||
236 | mulscc %g1,%o3,%g1 | ||
237 | mulscc %g1,0,%g1 | ||
238 | add %g1,%g2,%o0 | ||
239 | rd %y,%o2 | ||
240 | cmp %o0,%i0 | ||
241 | bgu,a .LL73 | ||
242 | add %o1,-1,%o1 | ||
243 | bne,a .LL45 | ||
244 | mov 0,%o2 | ||
245 | cmp %o2,%i3 | ||
246 | bleu .LL45 | ||
247 | mov 0,%o2 | ||
248 | add %o1,-1,%o1 | ||
249 | .LL73: | ||
250 | mov 0,%o2 | ||
251 | .LL45: | ||
252 | mov %o1,%l1 | ||
253 | .LL78: | ||
254 | mov %o2,%l0 | ||
255 | mov %l0,%i0 | ||
256 | mov %l1,%i1 | ||
257 | ret | ||
258 | restore | ||
diff --git a/arch/sparc/lib/umul.S b/arch/sparc/lib/umul.S new file mode 100644 index 000000000000..a784720a8a22 --- /dev/null +++ b/arch/sparc/lib/umul.S | |||
@@ -0,0 +1,169 @@ | |||
1 | /* $Id: umul.S,v 1.4 1996/09/30 02:22:39 davem Exp $ | ||
2 | * umul.S: This routine was taken from glibc-1.09 and is covered | ||
3 | * by the GNU Library General Public License Version 2. | ||
4 | */ | ||
5 | |||
6 | |||
7 | /* | ||
8 | * Unsigned multiply. Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the | ||
9 | * upper 32 bits of the 64-bit product). | ||
10 | * | ||
11 | * This code optimizes short (less than 13-bit) multiplies. Short | ||
12 | * multiplies require 25 instruction cycles, and long ones require | ||
13 | * 45 instruction cycles. | ||
14 | * | ||
15 | * On return, overflow has occurred (%o1 is not zero) if and only if | ||
16 | * the Z condition code is clear, allowing, e.g., the following: | ||
17 | * | ||
18 | * call .umul | ||
19 | * nop | ||
20 | * bnz overflow (or tnz) | ||
21 | */ | ||
22 | |||
23 | .globl .umul | ||
24 | .umul: | ||
25 | or %o0, %o1, %o4 | ||
26 | mov %o0, %y ! multiplier -> Y | ||
27 | |||
28 | andncc %o4, 0xfff, %g0 ! test bits 12..31 of *both* args | ||
29 | be Lmul_shortway ! if zero, can do it the short way | ||
30 | andcc %g0, %g0, %o4 ! zero the partial product and clear N and V | ||
31 | |||
32 | /* | ||
33 | * Long multiply. 32 steps, followed by a final shift step. | ||
34 | */ | ||
35 | mulscc %o4, %o1, %o4 ! 1 | ||
36 | mulscc %o4, %o1, %o4 ! 2 | ||
37 | mulscc %o4, %o1, %o4 ! 3 | ||
38 | mulscc %o4, %o1, %o4 ! 4 | ||
39 | mulscc %o4, %o1, %o4 ! 5 | ||
40 | mulscc %o4, %o1, %o4 ! 6 | ||
41 | mulscc %o4, %o1, %o4 ! 7 | ||
42 | mulscc %o4, %o1, %o4 ! 8 | ||
43 | mulscc %o4, %o1, %o4 ! 9 | ||
44 | mulscc %o4, %o1, %o4 ! 10 | ||
45 | mulscc %o4, %o1, %o4 ! 11 | ||
46 | mulscc %o4, %o1, %o4 ! 12 | ||
47 | mulscc %o4, %o1, %o4 ! 13 | ||
48 | mulscc %o4, %o1, %o4 ! 14 | ||
49 | mulscc %o4, %o1, %o4 ! 15 | ||
50 | mulscc %o4, %o1, %o4 ! 16 | ||
51 | mulscc %o4, %o1, %o4 ! 17 | ||
52 | mulscc %o4, %o1, %o4 ! 18 | ||
53 | mulscc %o4, %o1, %o4 ! 19 | ||
54 | mulscc %o4, %o1, %o4 ! 20 | ||
55 | mulscc %o4, %o1, %o4 ! 21 | ||
56 | mulscc %o4, %o1, %o4 ! 22 | ||
57 | mulscc %o4, %o1, %o4 ! 23 | ||
58 | mulscc %o4, %o1, %o4 ! 24 | ||
59 | mulscc %o4, %o1, %o4 ! 25 | ||
60 | mulscc %o4, %o1, %o4 ! 26 | ||
61 | mulscc %o4, %o1, %o4 ! 27 | ||
62 | mulscc %o4, %o1, %o4 ! 28 | ||
63 | mulscc %o4, %o1, %o4 ! 29 | ||
64 | mulscc %o4, %o1, %o4 ! 30 | ||
65 | mulscc %o4, %o1, %o4 ! 31 | ||
66 | mulscc %o4, %o1, %o4 ! 32 | ||
67 | mulscc %o4, %g0, %o4 ! final shift | ||
68 | |||
69 | |||
70 | /* | ||
71 | * Normally, with the shift-and-add approach, if both numbers are | ||
72 | * positive you get the correct result. With 32-bit two's-complement | ||
73 | * numbers, -x is represented as | ||
74 | * | ||
75 | * x 32 | ||
76 | * ( 2 - ------ ) mod 2 * 2 | ||
77 | * 32 | ||
78 | * 2 | ||
79 | * | ||
80 | * (the `mod 2' subtracts 1 from 1.bbbb). To avoid lots of 2^32s, | ||
81 | * we can treat this as if the radix point were just to the left | ||
82 | * of the sign bit (multiply by 2^32), and get | ||
83 | * | ||
84 | * -x = (2 - x) mod 2 | ||
85 | * | ||
86 | * Then, ignoring the `mod 2's for convenience: | ||
87 | * | ||
88 | * x * y = xy | ||
89 | * -x * y = 2y - xy | ||
90 | * x * -y = 2x - xy | ||
91 | * -x * -y = 4 - 2x - 2y + xy | ||
92 | * | ||
93 | * For signed multiplies, we subtract (x << 32) from the partial | ||
94 | * product to fix this problem for negative multipliers (see mul.s). | ||
95 | * Because of the way the shift into the partial product is calculated | ||
96 | * (N xor V), this term is automatically removed for the multiplicand, | ||
97 | * so we don't have to adjust. | ||
98 | * | ||
99 | * But for unsigned multiplies, the high order bit wasn't a sign bit, | ||
100 | * and the correction is wrong. So for unsigned multiplies where the | ||
101 | * high order bit is one, we end up with xy - (y << 32). To fix it | ||
102 | * we add y << 32. | ||
103 | */ | ||
104 | #if 0 | ||
105 | tst %o1 | ||
106 | bl,a 1f ! if %o1 < 0 (high order bit = 1), | ||
107 | add %o4, %o0, %o4 ! %o4 += %o0 (add y to upper half) | ||
108 | |||
109 | 1: | ||
110 | rd %y, %o0 ! get lower half of product | ||
111 | retl | ||
112 | addcc %o4, %g0, %o1 ! put upper half in place and set Z for %o1==0 | ||
113 | #else | ||
114 | /* Faster code from tege@sics.se. */ | ||
115 | sra %o1, 31, %o2 ! make mask from sign bit | ||
116 | and %o0, %o2, %o2 ! %o2 = 0 or %o0, depending on sign of %o1 | ||
117 | rd %y, %o0 ! get lower half of product | ||
118 | retl | ||
119 | addcc %o4, %o2, %o1 ! add compensation and put upper half in place | ||
120 | #endif | ||
121 | |||
122 | Lmul_shortway: | ||
123 | /* | ||
124 | * Short multiply. 12 steps, followed by a final shift step. | ||
125 | * The resulting bits are off by 12 and (32-12) = 20 bit positions, | ||
126 | * but there is no problem with %o0 being negative (unlike above), | ||
127 | * and overflow is impossible (the answer is at most 24 bits long). | ||
128 | */ | ||
129 | mulscc %o4, %o1, %o4 ! 1 | ||
130 | mulscc %o4, %o1, %o4 ! 2 | ||
131 | mulscc %o4, %o1, %o4 ! 3 | ||
132 | mulscc %o4, %o1, %o4 ! 4 | ||
133 | mulscc %o4, %o1, %o4 ! 5 | ||
134 | mulscc %o4, %o1, %o4 ! 6 | ||
135 | mulscc %o4, %o1, %o4 ! 7 | ||
136 | mulscc %o4, %o1, %o4 ! 8 | ||
137 | mulscc %o4, %o1, %o4 ! 9 | ||
138 | mulscc %o4, %o1, %o4 ! 10 | ||
139 | mulscc %o4, %o1, %o4 ! 11 | ||
140 | mulscc %o4, %o1, %o4 ! 12 | ||
141 | mulscc %o4, %g0, %o4 ! final shift | ||
142 | |||
143 | /* | ||
144 | * %o4 has 20 of the bits that should be in the result; %y has | ||
145 | * the bottom 12 (as %y's top 12). That is: | ||
146 | * | ||
147 | * %o4 %y | ||
148 | * +----------------+----------------+ | ||
149 | * | -12- | -20- | -12- | -20- | | ||
150 | * +------(---------+------)---------+ | ||
151 | * -----result----- | ||
152 | * | ||
153 | * The 12 bits of %o4 left of the `result' area are all zero; | ||
154 | * in fact, all top 20 bits of %o4 are zero. | ||
155 | */ | ||
156 | |||
157 | rd %y, %o5 | ||
158 | sll %o4, 12, %o0 ! shift middle bits left 12 | ||
159 | srl %o5, 20, %o5 ! shift low bits right 20 | ||
160 | or %o5, %o0, %o0 | ||
161 | retl | ||
162 | addcc %g0, %g0, %o1 ! %o1 = zero, and set Z | ||
163 | |||
164 | .globl .umul_patch | ||
165 | .umul_patch: | ||
166 | umul %o0, %o1, %o0 | ||
167 | retl | ||
168 | rd %y, %o1 | ||
169 | nop | ||
diff --git a/arch/sparc/lib/urem.S b/arch/sparc/lib/urem.S new file mode 100644 index 000000000000..ec7f0c502c56 --- /dev/null +++ b/arch/sparc/lib/urem.S | |||
@@ -0,0 +1,355 @@ | |||
1 | /* $Id: urem.S,v 1.4 1996/09/30 02:22:42 davem Exp $ | ||
2 | * urem.S: This routine was taken from glibc-1.09 and is covered | ||
3 | * by the GNU Library General Public License Version 2. | ||
4 | */ | ||
5 | |||
6 | /* This file is generated from divrem.m4; DO NOT EDIT! */ | ||
7 | /* | ||
8 | * Division and remainder, from Appendix E of the Sparc Version 8 | ||
9 | * Architecture Manual, with fixes from Gordon Irlam. | ||
10 | */ | ||
11 | |||
12 | /* | ||
13 | * Input: dividend and divisor in %o0 and %o1 respectively. | ||
14 | * | ||
15 | * m4 parameters: | ||
16 | * .urem name of function to generate | ||
17 | * rem rem=div => %o0 / %o1; rem=rem => %o0 % %o1 | ||
18 | * false false=true => signed; false=false => unsigned | ||
19 | * | ||
20 | * Algorithm parameters: | ||
21 | * N how many bits per iteration we try to get (4) | ||
22 | * WORDSIZE total number of bits (32) | ||
23 | * | ||
24 | * Derived constants: | ||
25 | * TOPBITS number of bits in the top decade of a number | ||
26 | * | ||
27 | * Important variables: | ||
28 | * Q the partial quotient under development (initially 0) | ||
29 | * R the remainder so far, initially the dividend | ||
30 | * ITER number of main division loop iterations required; | ||
31 | * equal to ceil(log2(quotient) / N). Note that this | ||
32 | * is the log base (2^N) of the quotient. | ||
33 | * V the current comparand, initially divisor*2^(ITER*N-1) | ||
34 | * | ||
35 | * Cost: | ||
36 | * Current estimate for non-large dividend is | ||
37 | * ceil(log2(quotient) / N) * (10 + 7N/2) + C | ||
38 | * A large dividend is one greater than 2^(31-TOPBITS) and takes a | ||
39 | * different path, as the upper bits of the quotient must be developed | ||
40 | * one bit at a time. | ||
41 | */ | ||
42 | |||
43 | .globl .urem | ||
44 | .urem: | ||
45 | |||
46 | ! Ready to divide. Compute size of quotient; scale comparand. | ||
47 | orcc %o1, %g0, %o5 | ||
48 | bne 1f | ||
49 | mov %o0, %o3 | ||
50 | |||
51 | ! Divide by zero trap. If it returns, return 0 (about as | ||
52 | ! wrong as possible, but that is what SunOS does...). | ||
53 | ta ST_DIV0 | ||
54 | retl | ||
55 | clr %o0 | ||
56 | |||
57 | 1: | ||
58 | cmp %o3, %o5 ! if %o1 exceeds %o0, done | ||
59 | blu Lgot_result ! (and algorithm fails otherwise) | ||
60 | clr %o2 | ||
61 | |||
62 | sethi %hi(1 << (32 - 4 - 1)), %g1 | ||
63 | |||
64 | cmp %o3, %g1 | ||
65 | blu Lnot_really_big | ||
66 | clr %o4 | ||
67 | |||
68 | ! Here the dividend is >= 2**(31-N) or so. We must be careful here, | ||
69 | ! as our usual N-at-a-shot divide step will cause overflow and havoc. | ||
70 | ! The number of bits in the result here is N*ITER+SC, where SC <= N. | ||
71 | ! Compute ITER in an unorthodox manner: know we need to shift V into | ||
72 | ! the top decade: so do not even bother to compare to R. | ||
73 | 1: | ||
74 | cmp %o5, %g1 | ||
75 | bgeu 3f | ||
76 | mov 1, %g7 | ||
77 | |||
78 | sll %o5, 4, %o5 | ||
79 | |||
80 | b 1b | ||
81 | add %o4, 1, %o4 | ||
82 | |||
83 | ! Now compute %g7. | ||
84 | 2: | ||
85 | addcc %o5, %o5, %o5 | ||
86 | bcc Lnot_too_big | ||
87 | add %g7, 1, %g7 | ||
88 | |||
89 | ! We get here if the %o1 overflowed while shifting. | ||
90 | ! This means that %o3 has the high-order bit set. | ||
91 | ! Restore %o5 and subtract from %o3. | ||
92 | sll %g1, 4, %g1 ! high order bit | ||
93 | srl %o5, 1, %o5 ! rest of %o5 | ||
94 | add %o5, %g1, %o5 | ||
95 | |||
96 | b Ldo_single_div | ||
97 | sub %g7, 1, %g7 | ||
98 | |||
99 | Lnot_too_big: | ||
100 | 3: | ||
101 | cmp %o5, %o3 | ||
102 | blu 2b | ||
103 | nop | ||
104 | |||
105 | be Ldo_single_div | ||
106 | nop | ||
107 | /* NB: these are commented out in the V8-Sparc manual as well */ | ||
108 | /* (I do not understand this) */ | ||
109 | ! %o5 > %o3: went too far: back up 1 step | ||
110 | ! srl %o5, 1, %o5 | ||
111 | ! dec %g7 | ||
112 | ! do single-bit divide steps | ||
113 | ! | ||
114 | ! We have to be careful here. We know that %o3 >= %o5, so we can do the | ||
115 | ! first divide step without thinking. BUT, the others are conditional, | ||
116 | ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- | ||
117 | ! order bit set in the first step, just falling into the regular | ||
118 | ! division loop will mess up the first time around. | ||
119 | ! So we unroll slightly... | ||
120 | Ldo_single_div: | ||
121 | subcc %g7, 1, %g7 | ||
122 | bl Lend_regular_divide | ||
123 | nop | ||
124 | |||
125 | sub %o3, %o5, %o3 | ||
126 | mov 1, %o2 | ||
127 | |||
128 | b Lend_single_divloop | ||
129 | nop | ||
130 | Lsingle_divloop: | ||
131 | sll %o2, 1, %o2 | ||
132 | bl 1f | ||
133 | srl %o5, 1, %o5 | ||
134 | ! %o3 >= 0 | ||
135 | sub %o3, %o5, %o3 | ||
136 | b 2f | ||
137 | add %o2, 1, %o2 | ||
138 | 1: ! %o3 < 0 | ||
139 | add %o3, %o5, %o3 | ||
140 | sub %o2, 1, %o2 | ||
141 | 2: | ||
142 | Lend_single_divloop: | ||
143 | subcc %g7, 1, %g7 | ||
144 | bge Lsingle_divloop | ||
145 | tst %o3 | ||
146 | |||
147 | b,a Lend_regular_divide | ||
148 | |||
149 | Lnot_really_big: | ||
150 | 1: | ||
151 | sll %o5, 4, %o5 | ||
152 | |||
153 | cmp %o5, %o3 | ||
154 | bleu 1b | ||
155 | addcc %o4, 1, %o4 | ||
156 | |||
157 | be Lgot_result | ||
158 | sub %o4, 1, %o4 | ||
159 | |||
160 | tst %o3 ! set up for initial iteration | ||
161 | Ldivloop: | ||
162 | sll %o2, 4, %o2 | ||
163 | ! depth 1, accumulated bits 0 | ||
164 | bl L.1.16 | ||
165 | srl %o5,1,%o5 | ||
166 | ! remainder is positive | ||
167 | subcc %o3,%o5,%o3 | ||
168 | ! depth 2, accumulated bits 1 | ||
169 | bl L.2.17 | ||
170 | srl %o5,1,%o5 | ||
171 | ! remainder is positive | ||
172 | subcc %o3,%o5,%o3 | ||
173 | ! depth 3, accumulated bits 3 | ||
174 | bl L.3.19 | ||
175 | srl %o5,1,%o5 | ||
176 | ! remainder is positive | ||
177 | subcc %o3,%o5,%o3 | ||
178 | ! depth 4, accumulated bits 7 | ||
179 | bl L.4.23 | ||
180 | srl %o5,1,%o5 | ||
181 | ! remainder is positive | ||
182 | subcc %o3,%o5,%o3 | ||
183 | b 9f | ||
184 | add %o2, (7*2+1), %o2 | ||
185 | |||
186 | L.4.23: | ||
187 | ! remainder is negative | ||
188 | addcc %o3,%o5,%o3 | ||
189 | b 9f | ||
190 | add %o2, (7*2-1), %o2 | ||
191 | |||
192 | L.3.19: | ||
193 | ! remainder is negative | ||
194 | addcc %o3,%o5,%o3 | ||
195 | ! depth 4, accumulated bits 5 | ||
196 | bl L.4.21 | ||
197 | srl %o5,1,%o5 | ||
198 | ! remainder is positive | ||
199 | subcc %o3,%o5,%o3 | ||
200 | b 9f | ||
201 | add %o2, (5*2+1), %o2 | ||
202 | |||
203 | L.4.21: | ||
204 | ! remainder is negative | ||
205 | addcc %o3,%o5,%o3 | ||
206 | b 9f | ||
207 | add %o2, (5*2-1), %o2 | ||
208 | |||
209 | L.2.17: | ||
210 | ! remainder is negative | ||
211 | addcc %o3,%o5,%o3 | ||
212 | ! depth 3, accumulated bits 1 | ||
213 | bl L.3.17 | ||
214 | srl %o5,1,%o5 | ||
215 | ! remainder is positive | ||
216 | subcc %o3,%o5,%o3 | ||
217 | ! depth 4, accumulated bits 3 | ||
218 | bl L.4.19 | ||
219 | srl %o5,1,%o5 | ||
220 | ! remainder is positive | ||
221 | subcc %o3,%o5,%o3 | ||
222 | b 9f | ||
223 | add %o2, (3*2+1), %o2 | ||
224 | |||
225 | L.4.19: | ||
226 | ! remainder is negative | ||
227 | addcc %o3,%o5,%o3 | ||
228 | b 9f | ||
229 | add %o2, (3*2-1), %o2 | ||
230 | |||
231 | L.3.17: | ||
232 | ! remainder is negative | ||
233 | addcc %o3,%o5,%o3 | ||
234 | ! depth 4, accumulated bits 1 | ||
235 | bl L.4.17 | ||
236 | srl %o5,1,%o5 | ||
237 | ! remainder is positive | ||
238 | subcc %o3,%o5,%o3 | ||
239 | b 9f | ||
240 | add %o2, (1*2+1), %o2 | ||
241 | |||
242 | L.4.17: | ||
243 | ! remainder is negative | ||
244 | addcc %o3,%o5,%o3 | ||
245 | b 9f | ||
246 | add %o2, (1*2-1), %o2 | ||
247 | |||
248 | L.1.16: | ||
249 | ! remainder is negative | ||
250 | addcc %o3,%o5,%o3 | ||
251 | ! depth 2, accumulated bits -1 | ||
252 | bl L.2.15 | ||
253 | srl %o5,1,%o5 | ||
254 | ! remainder is positive | ||
255 | subcc %o3,%o5,%o3 | ||
256 | ! depth 3, accumulated bits -1 | ||
257 | bl L.3.15 | ||
258 | srl %o5,1,%o5 | ||
259 | ! remainder is positive | ||
260 | subcc %o3,%o5,%o3 | ||
261 | ! depth 4, accumulated bits -1 | ||
262 | bl L.4.15 | ||
263 | srl %o5,1,%o5 | ||
264 | ! remainder is positive | ||
265 | subcc %o3,%o5,%o3 | ||
266 | b 9f | ||
267 | add %o2, (-1*2+1), %o2 | ||
268 | |||
269 | L.4.15: | ||
270 | ! remainder is negative | ||
271 | addcc %o3,%o5,%o3 | ||
272 | b 9f | ||
273 | add %o2, (-1*2-1), %o2 | ||
274 | |||
275 | L.3.15: | ||
276 | ! remainder is negative | ||
277 | addcc %o3,%o5,%o3 | ||
278 | ! depth 4, accumulated bits -3 | ||
279 | bl L.4.13 | ||
280 | srl %o5,1,%o5 | ||
281 | ! remainder is positive | ||
282 | subcc %o3,%o5,%o3 | ||
283 | b 9f | ||
284 | add %o2, (-3*2+1), %o2 | ||
285 | |||
286 | L.4.13: | ||
287 | ! remainder is negative | ||
288 | addcc %o3,%o5,%o3 | ||
289 | b 9f | ||
290 | add %o2, (-3*2-1), %o2 | ||
291 | |||
292 | L.2.15: | ||
293 | ! remainder is negative | ||
294 | addcc %o3,%o5,%o3 | ||
295 | ! depth 3, accumulated bits -3 | ||
296 | bl L.3.13 | ||
297 | srl %o5,1,%o5 | ||
298 | ! remainder is positive | ||
299 | subcc %o3,%o5,%o3 | ||
300 | ! depth 4, accumulated bits -5 | ||
301 | bl L.4.11 | ||
302 | srl %o5,1,%o5 | ||
303 | ! remainder is positive | ||
304 | subcc %o3,%o5,%o3 | ||
305 | b 9f | ||
306 | add %o2, (-5*2+1), %o2 | ||
307 | |||
308 | L.4.11: | ||
309 | ! remainder is negative | ||
310 | addcc %o3,%o5,%o3 | ||
311 | b 9f | ||
312 | add %o2, (-5*2-1), %o2 | ||
313 | |||
314 | L.3.13: | ||
315 | ! remainder is negative | ||
316 | addcc %o3,%o5,%o3 | ||
317 | ! depth 4, accumulated bits -7 | ||
318 | bl L.4.9 | ||
319 | srl %o5,1,%o5 | ||
320 | ! remainder is positive | ||
321 | subcc %o3,%o5,%o3 | ||
322 | b 9f | ||
323 | add %o2, (-7*2+1), %o2 | ||
324 | |||
325 | L.4.9: | ||
326 | ! remainder is negative | ||
327 | addcc %o3,%o5,%o3 | ||
328 | b 9f | ||
329 | add %o2, (-7*2-1), %o2 | ||
330 | |||
331 | 9: | ||
332 | Lend_regular_divide: | ||
333 | subcc %o4, 1, %o4 | ||
334 | bge Ldivloop | ||
335 | tst %o3 | ||
336 | |||
337 | bl,a Lgot_result | ||
338 | ! non-restoring fixup here (one instruction only!) | ||
339 | add %o3, %o1, %o3 | ||
340 | |||
341 | Lgot_result: | ||
342 | |||
343 | retl | ||
344 | mov %o3, %o0 | ||
345 | |||
346 | .globl .urem_patch | ||
347 | .urem_patch: | ||
348 | wr %g0, 0x0, %y | ||
349 | nop | ||
350 | nop | ||
351 | nop | ||
352 | udiv %o0, %o1, %o2 | ||
353 | umul %o2, %o1, %o2 | ||
354 | retl | ||
355 | sub %o0, %o2, %o0 | ||