diff options
| author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:20:36 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:20:36 -0400 |
| commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
| tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/sparc/lib | |
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'arch/sparc/lib')
32 files changed, 7186 insertions, 0 deletions
diff --git a/arch/sparc/lib/COPYING.LIB b/arch/sparc/lib/COPYING.LIB new file mode 100644 index 000000000000..eb685a5ec981 --- /dev/null +++ b/arch/sparc/lib/COPYING.LIB | |||
| @@ -0,0 +1,481 @@ | |||
| 1 | GNU LIBRARY GENERAL PUBLIC LICENSE | ||
| 2 | Version 2, June 1991 | ||
| 3 | |||
| 4 | Copyright (C) 1991 Free Software Foundation, Inc. | ||
| 5 | 675 Mass Ave, Cambridge, MA 02139, USA | ||
| 6 | Everyone is permitted to copy and distribute verbatim copies | ||
| 7 | of this license document, but changing it is not allowed. | ||
| 8 | |||
| 9 | [This is the first released version of the library GPL. It is | ||
| 10 | numbered 2 because it goes with version 2 of the ordinary GPL.] | ||
| 11 | |||
| 12 | Preamble | ||
| 13 | |||
| 14 | The licenses for most software are designed to take away your | ||
| 15 | freedom to share and change it. By contrast, the GNU General Public | ||
| 16 | Licenses are intended to guarantee your freedom to share and change | ||
| 17 | free software--to make sure the software is free for all its users. | ||
| 18 | |||
| 19 | This license, the Library General Public License, applies to some | ||
| 20 | specially designated Free Software Foundation software, and to any | ||
| 21 | other libraries whose authors decide to use it. You can use it for | ||
| 22 | your libraries, too. | ||
| 23 | |||
| 24 | When we speak of free software, we are referring to freedom, not | ||
| 25 | price. Our General Public Licenses are designed to make sure that you | ||
| 26 | have the freedom to distribute copies of free software (and charge for | ||
| 27 | this service if you wish), that you receive source code or can get it | ||
| 28 | if you want it, that you can change the software or use pieces of it | ||
| 29 | in new free programs; and that you know you can do these things. | ||
| 30 | |||
| 31 | To protect your rights, we need to make restrictions that forbid | ||
| 32 | anyone to deny you these rights or to ask you to surrender the rights. | ||
| 33 | These restrictions translate to certain responsibilities for you if | ||
| 34 | you distribute copies of the library, or if you modify it. | ||
| 35 | |||
| 36 | For example, if you distribute copies of the library, whether gratis | ||
| 37 | or for a fee, you must give the recipients all the rights that we gave | ||
| 38 | you. You must make sure that they, too, receive or can get the source | ||
| 39 | code. If you link a program with the library, you must provide | ||
| 40 | complete object files to the recipients so that they can relink them | ||
| 41 | with the library, after making changes to the library and recompiling | ||
| 42 | it. And you must show them these terms so they know their rights. | ||
| 43 | |||
| 44 | Our method of protecting your rights has two steps: (1) copyright | ||
| 45 | the library, and (2) offer you this license which gives you legal | ||
| 46 | permission to copy, distribute and/or modify the library. | ||
| 47 | |||
| 48 | Also, for each distributor's protection, we want to make certain | ||
| 49 | that everyone understands that there is no warranty for this free | ||
| 50 | library. If the library is modified by someone else and passed on, we | ||
| 51 | want its recipients to know that what they have is not the original | ||
| 52 | version, so that any problems introduced by others will not reflect on | ||
| 53 | the original authors' reputations. | ||
| 54 | |||
| 55 | Finally, any free program is threatened constantly by software | ||
| 56 | patents. We wish to avoid the danger that companies distributing free | ||
| 57 | software will individually obtain patent licenses, thus in effect | ||
| 58 | transforming the program into proprietary software. To prevent this, | ||
| 59 | we have made it clear that any patent must be licensed for everyone's | ||
| 60 | free use or not licensed at all. | ||
| 61 | |||
| 62 | Most GNU software, including some libraries, is covered by the ordinary | ||
| 63 | GNU General Public License, which was designed for utility programs. This | ||
| 64 | license, the GNU Library General Public License, applies to certain | ||
| 65 | designated libraries. This license is quite different from the ordinary | ||
| 66 | one; be sure to read it in full, and don't assume that anything in it is | ||
| 67 | the same as in the ordinary license. | ||
| 68 | |||
| 69 | The reason we have a separate public license for some libraries is that | ||
| 70 | they blur the distinction we usually make between modifying or adding to a | ||
| 71 | program and simply using it. Linking a program with a library, without | ||
| 72 | changing the library, is in some sense simply using the library, and is | ||
| 73 | analogous to running a utility program or application program. However, in | ||
| 74 | a textual and legal sense, the linked executable is a combined work, a | ||
| 75 | derivative of the original library, and the ordinary General Public License | ||
| 76 | treats it as such. | ||
| 77 | |||
| 78 | Because of this blurred distinction, using the ordinary General | ||
| 79 | Public License for libraries did not effectively promote software | ||
| 80 | sharing, because most developers did not use the libraries. We | ||
| 81 | concluded that weaker conditions might promote sharing better. | ||
| 82 | |||
| 83 | However, unrestricted linking of non-free programs would deprive the | ||
| 84 | users of those programs of all benefit from the free status of the | ||
| 85 | libraries themselves. This Library General Public License is intended to | ||
| 86 | permit developers of non-free programs to use free libraries, while | ||
| 87 | preserving your freedom as a user of such programs to change the free | ||
| 88 | libraries that are incorporated in them. (We have not seen how to achieve | ||
| 89 | this as regards changes in header files, but we have achieved it as regards | ||
| 90 | changes in the actual functions of the Library.) The hope is that this | ||
| 91 | will lead to faster development of free libraries. | ||
| 92 | |||
| 93 | The precise terms and conditions for copying, distribution and | ||
| 94 | modification follow. Pay close attention to the difference between a | ||
| 95 | "work based on the library" and a "work that uses the library". The | ||
| 96 | former contains code derived from the library, while the latter only | ||
| 97 | works together with the library. | ||
| 98 | |||
| 99 | Note that it is possible for a library to be covered by the ordinary | ||
| 100 | General Public License rather than by this special one. | ||
| 101 | |||
| 102 | GNU LIBRARY GENERAL PUBLIC LICENSE | ||
| 103 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION | ||
| 104 | |||
| 105 | 0. This License Agreement applies to any software library which | ||
| 106 | contains a notice placed by the copyright holder or other authorized | ||
| 107 | party saying it may be distributed under the terms of this Library | ||
| 108 | General Public License (also called "this License"). Each licensee is | ||
| 109 | addressed as "you". | ||
| 110 | |||
| 111 | A "library" means a collection of software functions and/or data | ||
| 112 | prepared so as to be conveniently linked with application programs | ||
| 113 | (which use some of those functions and data) to form executables. | ||
| 114 | |||
| 115 | The "Library", below, refers to any such software library or work | ||
| 116 | which has been distributed under these terms. A "work based on the | ||
| 117 | Library" means either the Library or any derivative work under | ||
| 118 | copyright law: that is to say, a work containing the Library or a | ||
| 119 | portion of it, either verbatim or with modifications and/or translated | ||
| 120 | straightforwardly into another language. (Hereinafter, translation is | ||
| 121 | included without limitation in the term "modification".) | ||
| 122 | |||
| 123 | "Source code" for a work means the preferred form of the work for | ||
| 124 | making modifications to it. For a library, complete source code means | ||
| 125 | all the source code for all modules it contains, plus any associated | ||
| 126 | interface definition files, plus the scripts used to control compilation | ||
| 127 | and installation of the library. | ||
| 128 | |||
| 129 | Activities other than copying, distribution and modification are not | ||
| 130 | covered by this License; they are outside its scope. The act of | ||
| 131 | running a program using the Library is not restricted, and output from | ||
| 132 | such a program is covered only if its contents constitute a work based | ||
| 133 | on the Library (independent of the use of the Library in a tool for | ||
| 134 | writing it). Whether that is true depends on what the Library does | ||
| 135 | and what the program that uses the Library does. | ||
| 136 | |||
| 137 | 1. You may copy and distribute verbatim copies of the Library's | ||
| 138 | complete source code as you receive it, in any medium, provided that | ||
| 139 | you conspicuously and appropriately publish on each copy an | ||
| 140 | appropriate copyright notice and disclaimer of warranty; keep intact | ||
| 141 | all the notices that refer to this License and to the absence of any | ||
| 142 | warranty; and distribute a copy of this License along with the | ||
| 143 | Library. | ||
| 144 | |||
| 145 | You may charge a fee for the physical act of transferring a copy, | ||
| 146 | and you may at your option offer warranty protection in exchange for a | ||
| 147 | fee. | ||
| 148 | |||
| 149 | 2. You may modify your copy or copies of the Library or any portion | ||
| 150 | of it, thus forming a work based on the Library, and copy and | ||
| 151 | distribute such modifications or work under the terms of Section 1 | ||
| 152 | above, provided that you also meet all of these conditions: | ||
| 153 | |||
| 154 | a) The modified work must itself be a software library. | ||
| 155 | |||
| 156 | b) You must cause the files modified to carry prominent notices | ||
| 157 | stating that you changed the files and the date of any change. | ||
| 158 | |||
| 159 | c) You must cause the whole of the work to be licensed at no | ||
| 160 | charge to all third parties under the terms of this License. | ||
| 161 | |||
| 162 | d) If a facility in the modified Library refers to a function or a | ||
| 163 | table of data to be supplied by an application program that uses | ||
| 164 | the facility, other than as an argument passed when the facility | ||
| 165 | is invoked, then you must make a good faith effort to ensure that, | ||
| 166 | in the event an application does not supply such function or | ||
| 167 | table, the facility still operates, and performs whatever part of | ||
| 168 | its purpose remains meaningful. | ||
| 169 | |||
| 170 | (For example, a function in a library to compute square roots has | ||
| 171 | a purpose that is entirely well-defined independent of the | ||
| 172 | application. Therefore, Subsection 2d requires that any | ||
| 173 | application-supplied function or table used by this function must | ||
| 174 | be optional: if the application does not supply it, the square | ||
| 175 | root function must still compute square roots.) | ||
| 176 | |||
| 177 | These requirements apply to the modified work as a whole. If | ||
| 178 | identifiable sections of that work are not derived from the Library, | ||
| 179 | and can be reasonably considered independent and separate works in | ||
| 180 | themselves, then this License, and its terms, do not apply to those | ||
| 181 | sections when you distribute them as separate works. But when you | ||
| 182 | distribute the same sections as part of a whole which is a work based | ||
| 183 | on the Library, the distribution of the whole must be on the terms of | ||
| 184 | this License, whose permissions for other licensees extend to the | ||
| 185 | entire whole, and thus to each and every part regardless of who wrote | ||
| 186 | it. | ||
| 187 | |||
| 188 | Thus, it is not the intent of this section to claim rights or contest | ||
| 189 | your rights to work written entirely by you; rather, the intent is to | ||
| 190 | exercise the right to control the distribution of derivative or | ||
| 191 | collective works based on the Library. | ||
| 192 | |||
| 193 | In addition, mere aggregation of another work not based on the Library | ||
| 194 | with the Library (or with a work based on the Library) on a volume of | ||
| 195 | a storage or distribution medium does not bring the other work under | ||
| 196 | the scope of this License. | ||
| 197 | |||
| 198 | 3. You may opt to apply the terms of the ordinary GNU General Public | ||
| 199 | License instead of this License to a given copy of the Library. To do | ||
| 200 | this, you must alter all the notices that refer to this License, so | ||
| 201 | that they refer to the ordinary GNU General Public License, version 2, | ||
| 202 | instead of to this License. (If a newer version than version 2 of the | ||
| 203 | ordinary GNU General Public License has appeared, then you can specify | ||
| 204 | that version instead if you wish.) Do not make any other change in | ||
| 205 | these notices. | ||
| 206 | |||
| 207 | Once this change is made in a given copy, it is irreversible for | ||
| 208 | that copy, so the ordinary GNU General Public License applies to all | ||
| 209 | subsequent copies and derivative works made from that copy. | ||
| 210 | |||
| 211 | This option is useful when you wish to copy part of the code of | ||
| 212 | the Library into a program that is not a library. | ||
| 213 | |||
| 214 | 4. You may copy and distribute the Library (or a portion or | ||
| 215 | derivative of it, under Section 2) in object code or executable form | ||
| 216 | under the terms of Sections 1 and 2 above provided that you accompany | ||
| 217 | it with the complete corresponding machine-readable source code, which | ||
| 218 | must be distributed under the terms of Sections 1 and 2 above on a | ||
| 219 | medium customarily used for software interchange. | ||
| 220 | |||
| 221 | If distribution of object code is made by offering access to copy | ||
| 222 | from a designated place, then offering equivalent access to copy the | ||
| 223 | source code from the same place satisfies the requirement to | ||
| 224 | distribute the source code, even though third parties are not | ||
| 225 | compelled to copy the source along with the object code. | ||
| 226 | |||
| 227 | 5. A program that contains no derivative of any portion of the | ||
| 228 | Library, but is designed to work with the Library by being compiled or | ||
| 229 | linked with it, is called a "work that uses the Library". Such a | ||
| 230 | work, in isolation, is not a derivative work of the Library, and | ||
| 231 | therefore falls outside the scope of this License. | ||
| 232 | |||
| 233 | However, linking a "work that uses the Library" with the Library | ||
| 234 | creates an executable that is a derivative of the Library (because it | ||
| 235 | contains portions of the Library), rather than a "work that uses the | ||
| 236 | library". The executable is therefore covered by this License. | ||
| 237 | Section 6 states terms for distribution of such executables. | ||
| 238 | |||
| 239 | When a "work that uses the Library" uses material from a header file | ||
| 240 | that is part of the Library, the object code for the work may be a | ||
| 241 | derivative work of the Library even though the source code is not. | ||
| 242 | Whether this is true is especially significant if the work can be | ||
| 243 | linked without the Library, or if the work is itself a library. The | ||
| 244 | threshold for this to be true is not precisely defined by law. | ||
| 245 | |||
| 246 | If such an object file uses only numerical parameters, data | ||
| 247 | structure layouts and accessors, and small macros and small inline | ||
| 248 | functions (ten lines or less in length), then the use of the object | ||
| 249 | file is unrestricted, regardless of whether it is legally a derivative | ||
| 250 | work. (Executables containing this object code plus portions of the | ||
| 251 | Library will still fall under Section 6.) | ||
| 252 | |||
| 253 | Otherwise, if the work is a derivative of the Library, you may | ||
| 254 | distribute the object code for the work under the terms of Section 6. | ||
| 255 | Any executables containing that work also fall under Section 6, | ||
| 256 | whether or not they are linked directly with the Library itself. | ||
| 257 | |||
| 258 | 6. As an exception to the Sections above, you may also compile or | ||
| 259 | link a "work that uses the Library" with the Library to produce a | ||
| 260 | work containing portions of the Library, and distribute that work | ||
| 261 | under terms of your choice, provided that the terms permit | ||
| 262 | modification of the work for the customer's own use and reverse | ||
| 263 | engineering for debugging such modifications. | ||
| 264 | |||
| 265 | You must give prominent notice with each copy of the work that the | ||
| 266 | Library is used in it and that the Library and its use are covered by | ||
| 267 | this License. You must supply a copy of this License. If the work | ||
| 268 | during execution displays copyright notices, you must include the | ||
| 269 | copyright notice for the Library among them, as well as a reference | ||
| 270 | directing the user to the copy of this License. Also, you must do one | ||
| 271 | of these things: | ||
| 272 | |||
| 273 | a) Accompany the work with the complete corresponding | ||
| 274 | machine-readable source code for the Library including whatever | ||
| 275 | changes were used in the work (which must be distributed under | ||
| 276 | Sections 1 and 2 above); and, if the work is an executable linked | ||
| 277 | with the Library, with the complete machine-readable "work that | ||
| 278 | uses the Library", as object code and/or source code, so that the | ||
| 279 | user can modify the Library and then relink to produce a modified | ||
| 280 | executable containing the modified Library. (It is understood | ||
| 281 | that the user who changes the contents of definitions files in the | ||
| 282 | Library will not necessarily be able to recompile the application | ||
| 283 | to use the modified definitions.) | ||
| 284 | |||
| 285 | b) Accompany the work with a written offer, valid for at | ||
| 286 | least three years, to give the same user the materials | ||
| 287 | specified in Subsection 6a, above, for a charge no more | ||
| 288 | than the cost of performing this distribution. | ||
| 289 | |||
| 290 | c) If distribution of the work is made by offering access to copy | ||
| 291 | from a designated place, offer equivalent access to copy the above | ||
| 292 | specified materials from the same place. | ||
| 293 | |||
| 294 | d) Verify that the user has already received a copy of these | ||
| 295 | materials or that you have already sent this user a copy. | ||
| 296 | |||
| 297 | For an executable, the required form of the "work that uses the | ||
| 298 | Library" must include any data and utility programs needed for | ||
| 299 | reproducing the executable from it. However, as a special exception, | ||
| 300 | the source code distributed need not include anything that is normally | ||
| 301 | distributed (in either source or binary form) with the major | ||
| 302 | components (compiler, kernel, and so on) of the operating system on | ||
| 303 | which the executable runs, unless that component itself accompanies | ||
| 304 | the executable. | ||
| 305 | |||
| 306 | It may happen that this requirement contradicts the license | ||
| 307 | restrictions of other proprietary libraries that do not normally | ||
| 308 | accompany the operating system. Such a contradiction means you cannot | ||
| 309 | use both them and the Library together in an executable that you | ||
| 310 | distribute. | ||
| 311 | |||
| 312 | 7. You may place library facilities that are a work based on the | ||
| 313 | Library side-by-side in a single library together with other library | ||
| 314 | facilities not covered by this License, and distribute such a combined | ||
| 315 | library, provided that the separate distribution of the work based on | ||
| 316 | the Library and of the other library facilities is otherwise | ||
| 317 | permitted, and provided that you do these two things: | ||
| 318 | |||
| 319 | a) Accompany the combined library with a copy of the same work | ||
| 320 | based on the Library, uncombined with any other library | ||
| 321 | facilities. This must be distributed under the terms of the | ||
| 322 | Sections above. | ||
| 323 | |||
| 324 | b) Give prominent notice with the combined library of the fact | ||
| 325 | that part of it is a work based on the Library, and explaining | ||
| 326 | where to find the accompanying uncombined form of the same work. | ||
| 327 | |||
| 328 | 8. You may not copy, modify, sublicense, link with, or distribute | ||
| 329 | the Library except as expressly provided under this License. Any | ||
| 330 | attempt otherwise to copy, modify, sublicense, link with, or | ||
| 331 | distribute the Library is void, and will automatically terminate your | ||
| 332 | rights under this License. However, parties who have received copies, | ||
| 333 | or rights, from you under this License will not have their licenses | ||
| 334 | terminated so long as such parties remain in full compliance. | ||
| 335 | |||
| 336 | 9. You are not required to accept this License, since you have not | ||
| 337 | signed it. However, nothing else grants you permission to modify or | ||
| 338 | distribute the Library or its derivative works. These actions are | ||
| 339 | prohibited by law if you do not accept this License. Therefore, by | ||
| 340 | modifying or distributing the Library (or any work based on the | ||
| 341 | Library), you indicate your acceptance of this License to do so, and | ||
| 342 | all its terms and conditions for copying, distributing or modifying | ||
| 343 | the Library or works based on it. | ||
| 344 | |||
| 345 | 10. Each time you redistribute the Library (or any work based on the | ||
| 346 | Library), the recipient automatically receives a license from the | ||
| 347 | original licensor to copy, distribute, link with or modify the Library | ||
| 348 | subject to these terms and conditions. You may not impose any further | ||
| 349 | restrictions on the recipients' exercise of the rights granted herein. | ||
| 350 | You are not responsible for enforcing compliance by third parties to | ||
| 351 | this License. | ||
| 352 | |||
| 353 | 11. If, as a consequence of a court judgment or allegation of patent | ||
| 354 | infringement or for any other reason (not limited to patent issues), | ||
| 355 | conditions are imposed on you (whether by court order, agreement or | ||
| 356 | otherwise) that contradict the conditions of this License, they do not | ||
| 357 | excuse you from the conditions of this License. If you cannot | ||
| 358 | distribute so as to satisfy simultaneously your obligations under this | ||
| 359 | License and any other pertinent obligations, then as a consequence you | ||
| 360 | may not distribute the Library at all. For example, if a patent | ||
| 361 | license would not permit royalty-free redistribution of the Library by | ||
| 362 | all those who receive copies directly or indirectly through you, then | ||
| 363 | the only way you could satisfy both it and this License would be to | ||
| 364 | refrain entirely from distribution of the Library. | ||
| 365 | |||
| 366 | If any portion of this section is held invalid or unenforceable under any | ||
| 367 | particular circumstance, the balance of the section is intended to apply, | ||
| 368 | and the section as a whole is intended to apply in other circumstances. | ||
| 369 | |||
| 370 | It is not the purpose of this section to induce you to infringe any | ||
| 371 | patents or other property right claims or to contest validity of any | ||
| 372 | such claims; this section has the sole purpose of protecting the | ||
| 373 | integrity of the free software distribution system which is | ||
| 374 | implemented by public license practices. Many people have made | ||
| 375 | generous contributions to the wide range of software distributed | ||
| 376 | through that system in reliance on consistent application of that | ||
| 377 | system; it is up to the author/donor to decide if he or she is willing | ||
| 378 | to distribute software through any other system and a licensee cannot | ||
| 379 | impose that choice. | ||
| 380 | |||
| 381 | This section is intended to make thoroughly clear what is believed to | ||
| 382 | be a consequence of the rest of this License. | ||
| 383 | |||
| 384 | 12. If the distribution and/or use of the Library is restricted in | ||
| 385 | certain countries either by patents or by copyrighted interfaces, the | ||
| 386 | original copyright holder who places the Library under this License may add | ||
| 387 | an explicit geographical distribution limitation excluding those countries, | ||
| 388 | so that distribution is permitted only in or among countries not thus | ||
| 389 | excluded. In such case, this License incorporates the limitation as if | ||
| 390 | written in the body of this License. | ||
| 391 | |||
| 392 | 13. The Free Software Foundation may publish revised and/or new | ||
| 393 | versions of the Library General Public License from time to time. | ||
| 394 | Such new versions will be similar in spirit to the present version, | ||
| 395 | but may differ in detail to address new problems or concerns. | ||
| 396 | |||
| 397 | Each version is given a distinguishing version number. If the Library | ||
| 398 | specifies a version number of this License which applies to it and | ||
| 399 | "any later version", you have the option of following the terms and | ||
| 400 | conditions either of that version or of any later version published by | ||
| 401 | the Free Software Foundation. If the Library does not specify a | ||
| 402 | license version number, you may choose any version ever published by | ||
| 403 | the Free Software Foundation. | ||
| 404 | |||
| 405 | 14. If you wish to incorporate parts of the Library into other free | ||
| 406 | programs whose distribution conditions are incompatible with these, | ||
| 407 | write to the author to ask for permission. For software which is | ||
| 408 | copyrighted by the Free Software Foundation, write to the Free | ||
| 409 | Software Foundation; we sometimes make exceptions for this. Our | ||
| 410 | decision will be guided by the two goals of preserving the free status | ||
| 411 | of all derivatives of our free software and of promoting the sharing | ||
| 412 | and reuse of software generally. | ||
| 413 | |||
| 414 | NO WARRANTY | ||
| 415 | |||
| 416 | 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO | ||
| 417 | WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. | ||
| 418 | EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR | ||
| 419 | OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY | ||
| 420 | KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE | ||
| 421 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | ||
| 422 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE | ||
| 423 | LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME | ||
| 424 | THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. | ||
| 425 | |||
| 426 | 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN | ||
| 427 | WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY | ||
| 428 | AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU | ||
| 429 | FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR | ||
| 430 | CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE | ||
| 431 | LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING | ||
| 432 | RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A | ||
| 433 | FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF | ||
| 434 | SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH | ||
| 435 | DAMAGES. | ||
| 436 | |||
| 437 | END OF TERMS AND CONDITIONS | ||
| 438 | |||
| 439 | Appendix: How to Apply These Terms to Your New Libraries | ||
| 440 | |||
| 441 | If you develop a new library, and you want it to be of the greatest | ||
| 442 | possible use to the public, we recommend making it free software that | ||
| 443 | everyone can redistribute and change. You can do so by permitting | ||
| 444 | redistribution under these terms (or, alternatively, under the terms of the | ||
| 445 | ordinary General Public License). | ||
| 446 | |||
| 447 | To apply these terms, attach the following notices to the library. It is | ||
| 448 | safest to attach them to the start of each source file to most effectively | ||
| 449 | convey the exclusion of warranty; and each file should have at least the | ||
| 450 | "copyright" line and a pointer to where the full notice is found. | ||
| 451 | |||
| 452 | <one line to give the library's name and a brief idea of what it does.> | ||
| 453 | Copyright (C) <year> <name of author> | ||
| 454 | |||
| 455 | This library is free software; you can redistribute it and/or | ||
| 456 | modify it under the terms of the GNU Library General Public | ||
| 457 | License as published by the Free Software Foundation; either | ||
| 458 | version 2 of the License, or (at your option) any later version. | ||
| 459 | |||
| 460 | This library is distributed in the hope that it will be useful, | ||
| 461 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 462 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 463 | Library General Public License for more details. | ||
| 464 | |||
| 465 | You should have received a copy of the GNU Library General Public | ||
| 466 | License along with this library; if not, write to the Free | ||
| 467 | Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
| 468 | |||
| 469 | Also add information on how to contact you by electronic and paper mail. | ||
| 470 | |||
| 471 | You should also get your employer (if you work as a programmer) or your | ||
| 472 | school, if any, to sign a "copyright disclaimer" for the library, if | ||
| 473 | necessary. Here is a sample; alter the names: | ||
| 474 | |||
| 475 | Yoyodyne, Inc., hereby disclaims all copyright interest in the | ||
| 476 | library `Frob' (a library for tweaking knobs) written by James Random Hacker. | ||
| 477 | |||
| 478 | <signature of Ty Coon>, 1 April 1990 | ||
| 479 | Ty Coon, President of Vice | ||
| 480 | |||
| 481 | That's all there is to it! | ||
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile new file mode 100644 index 000000000000..2296ff9dc47a --- /dev/null +++ b/arch/sparc/lib/Makefile | |||
| @@ -0,0 +1,13 @@ | |||
| 1 | # $Id: Makefile,v 1.35 2000/12/15 00:41:18 davem Exp $ | ||
| 2 | # Makefile for Sparc library files.. | ||
| 3 | # | ||
| 4 | |||
| 5 | EXTRA_AFLAGS := -ansi -DST_DIV0=0x02 | ||
| 6 | |||
| 7 | lib-y := mul.o rem.o sdiv.o udiv.o umul.o urem.o ashrdi3.o memcpy.o memset.o \ | ||
| 8 | strlen.o checksum.o blockops.o memscan.o memcmp.o strncmp.o \ | ||
| 9 | strncpy_from_user.o divdi3.o udivdi3.o strlen_user.o \ | ||
| 10 | copy_user.o locks.o atomic.o atomic32.o bitops.o \ | ||
| 11 | lshrdi3.o ashldi3.o rwsem.o muldi3.o bitext.o | ||
| 12 | |||
| 13 | lib-$(CONFIG_DEBUG_SPINLOCK) += debuglocks.o | ||
diff --git a/arch/sparc/lib/ashldi3.S b/arch/sparc/lib/ashldi3.S new file mode 100644 index 000000000000..52418a0cb3dd --- /dev/null +++ b/arch/sparc/lib/ashldi3.S | |||
| @@ -0,0 +1,34 @@ | |||
| 1 | /* $Id: ashldi3.S,v 1.2 1999/11/19 04:11:46 davem Exp $ | ||
| 2 | * ashldi3.S: GCC emits these for certain drivers playing | ||
| 3 | * with long longs. | ||
| 4 | * | ||
| 5 | * Copyright (C) 1999 David S. Miller (davem@redhat.com) | ||
| 6 | */ | ||
| 7 | |||
| 8 | .text | ||
| 9 | .align 4 | ||
| 10 | .globl __ashldi3 | ||
| 11 | __ashldi3: | ||
| 12 | cmp %o2, 0 | ||
| 13 | be 9f | ||
| 14 | mov 0x20, %g2 | ||
| 15 | |||
| 16 | sub %g2, %o2, %g2 | ||
| 17 | cmp %g2, 0 | ||
| 18 | bg 7f | ||
| 19 | sll %o0, %o2, %g3 | ||
| 20 | |||
| 21 | neg %g2 | ||
| 22 | clr %o5 | ||
| 23 | b 8f | ||
| 24 | sll %o1, %g2, %o4 | ||
| 25 | 7: | ||
| 26 | srl %o1, %g2, %g2 | ||
| 27 | sll %o1, %o2, %o5 | ||
| 28 | or %g3, %g2, %o4 | ||
| 29 | 8: | ||
| 30 | mov %o4, %o0 | ||
| 31 | mov %o5, %o1 | ||
| 32 | 9: | ||
| 33 | retl | ||
| 34 | nop | ||
diff --git a/arch/sparc/lib/ashrdi3.S b/arch/sparc/lib/ashrdi3.S new file mode 100644 index 000000000000..2848237598a4 --- /dev/null +++ b/arch/sparc/lib/ashrdi3.S | |||
| @@ -0,0 +1,36 @@ | |||
| 1 | /* $Id: ashrdi3.S,v 1.4 1999/11/19 04:11:49 davem Exp $ | ||
| 2 | * ashrdi3.S: The filesystem code creates all kinds of references to | ||
| 3 | * this little routine on the sparc with gcc. | ||
| 4 | * | ||
| 5 | * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) | ||
| 6 | */ | ||
| 7 | |||
| 8 | .text | ||
| 9 | .align 4 | ||
| 10 | .globl __ashrdi3 | ||
| 11 | __ashrdi3: | ||
| 12 | tst %o2 | ||
| 13 | be 3f | ||
| 14 | or %g0, 32, %g2 | ||
| 15 | |||
| 16 | sub %g2, %o2, %g2 | ||
| 17 | |||
| 18 | tst %g2 | ||
| 19 | bg 1f | ||
| 20 | sra %o0, %o2, %o4 | ||
| 21 | |||
| 22 | sra %o0, 31, %o4 | ||
| 23 | sub %g0, %g2, %g2 | ||
| 24 | ba 2f | ||
| 25 | sra %o0, %g2, %o5 | ||
| 26 | |||
| 27 | 1: | ||
| 28 | sll %o0, %g2, %g3 | ||
| 29 | srl %o1, %o2, %g2 | ||
| 30 | or %g2, %g3, %o5 | ||
| 31 | 2: | ||
| 32 | or %g0, %o4, %o0 | ||
| 33 | or %g0, %o5, %o1 | ||
| 34 | 3: | ||
| 35 | jmpl %o7 + 8, %g0 | ||
| 36 | nop | ||
diff --git a/arch/sparc/lib/atomic.S b/arch/sparc/lib/atomic.S new file mode 100644 index 000000000000..f48ad0c4dadb --- /dev/null +++ b/arch/sparc/lib/atomic.S | |||
| @@ -0,0 +1,100 @@ | |||
| 1 | /* atomic.S: Move this stuff here for better ICACHE hit rates. | ||
| 2 | * | ||
| 3 | * Copyright (C) 1996 David S. Miller (davem@caipfs.rutgers.edu) | ||
| 4 | */ | ||
| 5 | |||
| 6 | #include <linux/config.h> | ||
| 7 | #include <asm/ptrace.h> | ||
| 8 | #include <asm/psr.h> | ||
| 9 | |||
| 10 | .text | ||
| 11 | .align 4 | ||
| 12 | |||
| 13 | .globl __atomic_begin | ||
| 14 | __atomic_begin: | ||
| 15 | |||
| 16 | #ifndef CONFIG_SMP | ||
| 17 | .globl ___xchg32_sun4c | ||
| 18 | ___xchg32_sun4c: | ||
| 19 | rd %psr, %g3 | ||
| 20 | andcc %g3, PSR_PIL, %g0 | ||
| 21 | bne 1f | ||
| 22 | nop | ||
| 23 | wr %g3, PSR_PIL, %psr | ||
| 24 | nop; nop; nop | ||
| 25 | 1: | ||
| 26 | andcc %g3, PSR_PIL, %g0 | ||
| 27 | ld [%g1], %g7 | ||
| 28 | bne 1f | ||
| 29 | st %g2, [%g1] | ||
| 30 | wr %g3, 0x0, %psr | ||
| 31 | nop; nop; nop | ||
| 32 | 1: | ||
| 33 | mov %g7, %g2 | ||
| 34 | jmpl %o7 + 8, %g0 | ||
| 35 | mov %g4, %o7 | ||
| 36 | |||
| 37 | .globl ___xchg32_sun4md | ||
| 38 | ___xchg32_sun4md: | ||
| 39 | swap [%g1], %g2 | ||
| 40 | jmpl %o7 + 8, %g0 | ||
| 41 | mov %g4, %o7 | ||
| 42 | #endif | ||
| 43 | |||
| 44 | /* Read asm-sparc/atomic.h carefully to understand how this works for SMP. | ||
| 45 | * Really, some things here for SMP are overly clever, go read the header. | ||
| 46 | */ | ||
| 47 | .globl ___atomic24_add | ||
| 48 | ___atomic24_add: | ||
| 49 | rd %psr, %g3 ! Keep the code small, old way was stupid | ||
| 50 | nop; nop; nop; ! Let the bits set | ||
| 51 | or %g3, PSR_PIL, %g7 ! Disable interrupts | ||
| 52 | wr %g7, 0x0, %psr ! Set %psr | ||
| 53 | nop; nop; nop; ! Let the bits set | ||
| 54 | #ifdef CONFIG_SMP | ||
| 55 | 1: ldstub [%g1 + 3], %g7 ! Spin on the byte lock for SMP. | ||
| 56 | orcc %g7, 0x0, %g0 ! Did we get it? | ||
| 57 | bne 1b ! Nope... | ||
| 58 | ld [%g1], %g7 ! Load locked atomic24_t | ||
| 59 | sra %g7, 8, %g7 ! Get signed 24-bit integer | ||
| 60 | add %g7, %g2, %g2 ! Add in argument | ||
| 61 | sll %g2, 8, %g7 ! Transpose back to atomic24_t | ||
| 62 | st %g7, [%g1] ! Clever: This releases the lock as well. | ||
| 63 | #else | ||
| 64 | ld [%g1], %g7 ! Load locked atomic24_t | ||
| 65 | add %g7, %g2, %g2 ! Add in argument | ||
| 66 | st %g2, [%g1] ! Store it back | ||
| 67 | #endif | ||
| 68 | wr %g3, 0x0, %psr ! Restore original PSR_PIL | ||
| 69 | nop; nop; nop; ! Let the bits set | ||
| 70 | jmpl %o7, %g0 ! NOTE: not + 8, see callers in atomic.h | ||
| 71 | mov %g4, %o7 ! Restore %o7 | ||
| 72 | |||
| 73 | .globl ___atomic24_sub | ||
| 74 | ___atomic24_sub: | ||
| 75 | rd %psr, %g3 ! Keep the code small, old way was stupid | ||
| 76 | nop; nop; nop; ! Let the bits set | ||
| 77 | or %g3, PSR_PIL, %g7 ! Disable interrupts | ||
| 78 | wr %g7, 0x0, %psr ! Set %psr | ||
| 79 | nop; nop; nop; ! Let the bits set | ||
| 80 | #ifdef CONFIG_SMP | ||
| 81 | 1: ldstub [%g1 + 3], %g7 ! Spin on the byte lock for SMP. | ||
| 82 | orcc %g7, 0x0, %g0 ! Did we get it? | ||
| 83 | bne 1b ! Nope... | ||
| 84 | ld [%g1], %g7 ! Load locked atomic24_t | ||
| 85 | sra %g7, 8, %g7 ! Get signed 24-bit integer | ||
| 86 | sub %g7, %g2, %g2 ! Subtract argument | ||
| 87 | sll %g2, 8, %g7 ! Transpose back to atomic24_t | ||
| 88 | st %g7, [%g1] ! Clever: This releases the lock as well | ||
| 89 | #else | ||
| 90 | ld [%g1], %g7 ! Load locked atomic24_t | ||
| 91 | sub %g7, %g2, %g2 ! Subtract argument | ||
| 92 | st %g2, [%g1] ! Store it back | ||
| 93 | #endif | ||
| 94 | wr %g3, 0x0, %psr ! Restore original PSR_PIL | ||
| 95 | nop; nop; nop; ! Let the bits set | ||
| 96 | jmpl %o7, %g0 ! NOTE: not + 8, see callers in atomic.h | ||
| 97 | mov %g4, %o7 ! Restore %o7 | ||
| 98 | |||
| 99 | .globl __atomic_end | ||
| 100 | __atomic_end: | ||
diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c new file mode 100644 index 000000000000..19724c5800a7 --- /dev/null +++ b/arch/sparc/lib/atomic32.c | |||
| @@ -0,0 +1,53 @@ | |||
| 1 | /* | ||
| 2 | * atomic32.c: 32-bit atomic_t implementation | ||
| 3 | * | ||
| 4 | * Copyright (C) 2004 Keith M Wesolowski | ||
| 5 | * | ||
| 6 | * Based on asm-parisc/atomic.h Copyright (C) 2000 Philipp Rumpf | ||
| 7 | */ | ||
| 8 | |||
| 9 | #include <asm/atomic.h> | ||
| 10 | #include <linux/spinlock.h> | ||
| 11 | #include <linux/module.h> | ||
| 12 | |||
| 13 | #ifdef CONFIG_SMP | ||
| 14 | #define ATOMIC_HASH_SIZE 4 | ||
| 15 | #define ATOMIC_HASH(a) (&__atomic_hash[(((unsigned long)a)>>8) & (ATOMIC_HASH_SIZE-1)]) | ||
| 16 | |||
| 17 | spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] = { | ||
| 18 | [0 ... (ATOMIC_HASH_SIZE-1)] = SPIN_LOCK_UNLOCKED | ||
| 19 | }; | ||
| 20 | |||
| 21 | #else /* SMP */ | ||
| 22 | |||
| 23 | static spinlock_t dummy = SPIN_LOCK_UNLOCKED; | ||
| 24 | #define ATOMIC_HASH_SIZE 1 | ||
| 25 | #define ATOMIC_HASH(a) (&dummy) | ||
| 26 | |||
| 27 | #endif /* SMP */ | ||
| 28 | |||
| 29 | int __atomic_add_return(int i, atomic_t *v) | ||
| 30 | { | ||
| 31 | int ret; | ||
| 32 | unsigned long flags; | ||
| 33 | spin_lock_irqsave(ATOMIC_HASH(v), flags); | ||
| 34 | |||
| 35 | ret = (v->counter += i); | ||
| 36 | |||
| 37 | spin_unlock_irqrestore(ATOMIC_HASH(v), flags); | ||
| 38 | return ret; | ||
| 39 | } | ||
| 40 | |||
| 41 | void atomic_set(atomic_t *v, int i) | ||
| 42 | { | ||
| 43 | unsigned long flags; | ||
| 44 | spin_lock_irqsave(ATOMIC_HASH(v), flags); | ||
| 45 | |||
| 46 | v->counter = i; | ||
| 47 | |||
| 48 | spin_unlock_irqrestore(ATOMIC_HASH(v), flags); | ||
| 49 | } | ||
| 50 | |||
| 51 | EXPORT_SYMBOL(__atomic_add_return); | ||
| 52 | EXPORT_SYMBOL(atomic_set); | ||
| 53 | |||
diff --git a/arch/sparc/lib/bitext.c b/arch/sparc/lib/bitext.c new file mode 100644 index 000000000000..94b05e8c906c --- /dev/null +++ b/arch/sparc/lib/bitext.c | |||
| @@ -0,0 +1,132 @@ | |||
| 1 | /* | ||
| 2 | * bitext.c: kernel little helper (of bit shuffling variety). | ||
| 3 | * | ||
| 4 | * Copyright (C) 2002 Pete Zaitcev <zaitcev@yahoo.com> | ||
| 5 | * | ||
| 6 | * The algorithm to search a zero bit string is geared towards its application. | ||
| 7 | * We expect a couple of fixed sizes of requests, so a rotating counter, reset | ||
| 8 | * by align size, should provide fast enough search while maintaining low | ||
| 9 | * fragmentation. | ||
| 10 | */ | ||
| 11 | |||
| 12 | #include <linux/smp_lock.h> | ||
| 13 | #include <linux/bitops.h> | ||
| 14 | |||
| 15 | #include <asm/bitext.h> | ||
| 16 | |||
| 17 | /** | ||
| 18 | * bit_map_string_get - find and set a bit string in bit map. | ||
| 19 | * @t: the bit map. | ||
| 20 | * @len: requested string length | ||
| 21 | * @align: requested alignment | ||
| 22 | * | ||
| 23 | * Returns offset in the map or -1 if out of space. | ||
| 24 | * | ||
| 25 | * Not safe to call from an interrupt (uses spin_lock). | ||
| 26 | */ | ||
| 27 | int bit_map_string_get(struct bit_map *t, int len, int align) | ||
| 28 | { | ||
| 29 | int offset, count; /* siamese twins */ | ||
| 30 | int off_new; | ||
| 31 | int align1; | ||
| 32 | int i, color; | ||
| 33 | |||
| 34 | if (t->num_colors) { | ||
| 35 | /* align is overloaded to be the page color */ | ||
| 36 | color = align; | ||
| 37 | align = t->num_colors; | ||
| 38 | } else { | ||
| 39 | color = 0; | ||
| 40 | if (align == 0) | ||
| 41 | align = 1; | ||
| 42 | } | ||
| 43 | align1 = align - 1; | ||
| 44 | if ((align & align1) != 0) | ||
| 45 | BUG(); | ||
| 46 | if (align < 0 || align >= t->size) | ||
| 47 | BUG(); | ||
| 48 | if (len <= 0 || len > t->size) | ||
| 49 | BUG(); | ||
| 50 | color &= align1; | ||
| 51 | |||
| 52 | spin_lock(&t->lock); | ||
| 53 | if (len < t->last_size) | ||
| 54 | offset = t->first_free; | ||
| 55 | else | ||
| 56 | offset = t->last_off & ~align1; | ||
| 57 | count = 0; | ||
| 58 | for (;;) { | ||
| 59 | off_new = find_next_zero_bit(t->map, t->size, offset); | ||
| 60 | off_new = ((off_new + align1) & ~align1) + color; | ||
| 61 | count += off_new - offset; | ||
| 62 | offset = off_new; | ||
| 63 | if (offset >= t->size) | ||
| 64 | offset = 0; | ||
| 65 | if (count + len > t->size) { | ||
| 66 | spin_unlock(&t->lock); | ||
| 67 | /* P3 */ printk(KERN_ERR | ||
| 68 | "bitmap out: size %d used %d off %d len %d align %d count %d\n", | ||
| 69 | t->size, t->used, offset, len, align, count); | ||
| 70 | return -1; | ||
| 71 | } | ||
| 72 | |||
| 73 | if (offset + len > t->size) { | ||
| 74 | count += t->size - offset; | ||
| 75 | offset = 0; | ||
| 76 | continue; | ||
| 77 | } | ||
| 78 | |||
| 79 | i = 0; | ||
| 80 | while (test_bit(offset + i, t->map) == 0) { | ||
| 81 | i++; | ||
| 82 | if (i == len) { | ||
| 83 | for (i = 0; i < len; i++) | ||
| 84 | __set_bit(offset + i, t->map); | ||
| 85 | if (offset == t->first_free) | ||
| 86 | t->first_free = find_next_zero_bit | ||
| 87 | (t->map, t->size, | ||
| 88 | t->first_free + len); | ||
| 89 | if ((t->last_off = offset + len) >= t->size) | ||
| 90 | t->last_off = 0; | ||
| 91 | t->used += len; | ||
| 92 | t->last_size = len; | ||
| 93 | spin_unlock(&t->lock); | ||
| 94 | return offset; | ||
| 95 | } | ||
| 96 | } | ||
| 97 | count += i + 1; | ||
| 98 | if ((offset += i + 1) >= t->size) | ||
| 99 | offset = 0; | ||
| 100 | } | ||
| 101 | } | ||
| 102 | |||
| 103 | void bit_map_clear(struct bit_map *t, int offset, int len) | ||
| 104 | { | ||
| 105 | int i; | ||
| 106 | |||
| 107 | if (t->used < len) | ||
| 108 | BUG(); /* Much too late to do any good, but alas... */ | ||
| 109 | spin_lock(&t->lock); | ||
| 110 | for (i = 0; i < len; i++) { | ||
| 111 | if (test_bit(offset + i, t->map) == 0) | ||
| 112 | BUG(); | ||
| 113 | __clear_bit(offset + i, t->map); | ||
| 114 | } | ||
| 115 | if (offset < t->first_free) | ||
| 116 | t->first_free = offset; | ||
| 117 | t->used -= len; | ||
| 118 | spin_unlock(&t->lock); | ||
| 119 | } | ||
| 120 | |||
| 121 | void bit_map_init(struct bit_map *t, unsigned long *map, int size) | ||
| 122 | { | ||
| 123 | |||
| 124 | if ((size & 07) != 0) | ||
| 125 | BUG(); | ||
| 126 | memset(map, 0, size>>3); | ||
| 127 | |||
| 128 | memset(t, 0, sizeof *t); | ||
| 129 | spin_lock_init(&t->lock); | ||
| 130 | t->map = map; | ||
| 131 | t->size = size; | ||
| 132 | } | ||
diff --git a/arch/sparc/lib/bitops.S b/arch/sparc/lib/bitops.S new file mode 100644 index 000000000000..3e9399769075 --- /dev/null +++ b/arch/sparc/lib/bitops.S | |||
| @@ -0,0 +1,110 @@ | |||
| 1 | /* bitops.S: Low level assembler bit operations. | ||
| 2 | * | ||
| 3 | * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) | ||
| 4 | */ | ||
| 5 | |||
| 6 | #include <linux/config.h> | ||
| 7 | #include <asm/ptrace.h> | ||
| 8 | #include <asm/psr.h> | ||
| 9 | |||
| 10 | .text | ||
| 11 | .align 4 | ||
| 12 | |||
| 13 | .globl __bitops_begin | ||
| 14 | __bitops_begin: | ||
| 15 | |||
| 16 | /* Take bits in %g2 and set them in word at %g1, | ||
| 17 | * return whether bits were set in original value | ||
| 18 | * in %g2. %g4 holds value to restore into %o7 | ||
| 19 | * in delay slot of jmpl return, %g3 + %g5 + %g7 can be | ||
| 20 | * used as temporaries and thus is considered clobbered | ||
| 21 | * by all callers. | ||
| 22 | */ | ||
| 23 | .globl ___set_bit | ||
| 24 | ___set_bit: | ||
| 25 | rd %psr, %g3 | ||
| 26 | nop; nop; nop; | ||
| 27 | or %g3, PSR_PIL, %g5 | ||
| 28 | wr %g5, 0x0, %psr | ||
| 29 | nop; nop; nop | ||
| 30 | #ifdef CONFIG_SMP | ||
| 31 | set bitops_spinlock, %g5 | ||
| 32 | 2: ldstub [%g5], %g7 ! Spin on the byte lock for SMP. | ||
| 33 | orcc %g7, 0x0, %g0 ! Did we get it? | ||
| 34 | bne 2b ! Nope... | ||
| 35 | #endif | ||
| 36 | ld [%g1], %g7 | ||
| 37 | or %g7, %g2, %g5 | ||
| 38 | and %g7, %g2, %g2 | ||
| 39 | #ifdef CONFIG_SMP | ||
| 40 | st %g5, [%g1] | ||
| 41 | set bitops_spinlock, %g5 | ||
| 42 | stb %g0, [%g5] | ||
| 43 | #else | ||
| 44 | st %g5, [%g1] | ||
| 45 | #endif | ||
| 46 | wr %g3, 0x0, %psr | ||
| 47 | nop; nop; nop | ||
| 48 | jmpl %o7, %g0 | ||
| 49 | mov %g4, %o7 | ||
| 50 | |||
| 51 | /* Same as above, but clears the bits from %g2 instead. */ | ||
| 52 | .globl ___clear_bit | ||
| 53 | ___clear_bit: | ||
| 54 | rd %psr, %g3 | ||
| 55 | nop; nop; nop | ||
| 56 | or %g3, PSR_PIL, %g5 | ||
| 57 | wr %g5, 0x0, %psr | ||
| 58 | nop; nop; nop | ||
| 59 | #ifdef CONFIG_SMP | ||
| 60 | set bitops_spinlock, %g5 | ||
| 61 | 2: ldstub [%g5], %g7 ! Spin on the byte lock for SMP. | ||
| 62 | orcc %g7, 0x0, %g0 ! Did we get it? | ||
| 63 | bne 2b ! Nope... | ||
| 64 | #endif | ||
| 65 | ld [%g1], %g7 | ||
| 66 | andn %g7, %g2, %g5 | ||
| 67 | and %g7, %g2, %g2 | ||
| 68 | #ifdef CONFIG_SMP | ||
| 69 | st %g5, [%g1] | ||
| 70 | set bitops_spinlock, %g5 | ||
| 71 | stb %g0, [%g5] | ||
| 72 | #else | ||
| 73 | st %g5, [%g1] | ||
| 74 | #endif | ||
| 75 | wr %g3, 0x0, %psr | ||
| 76 | nop; nop; nop | ||
| 77 | jmpl %o7, %g0 | ||
| 78 | mov %g4, %o7 | ||
| 79 | |||
| 80 | /* Same thing again, but this time toggles the bits from %g2. */ | ||
| 81 | .globl ___change_bit | ||
| 82 | ___change_bit: | ||
| 83 | rd %psr, %g3 | ||
| 84 | nop; nop; nop | ||
| 85 | or %g3, PSR_PIL, %g5 | ||
| 86 | wr %g5, 0x0, %psr | ||
| 87 | nop; nop; nop | ||
| 88 | #ifdef CONFIG_SMP | ||
| 89 | set bitops_spinlock, %g5 | ||
| 90 | 2: ldstub [%g5], %g7 ! Spin on the byte lock for SMP. | ||
| 91 | orcc %g7, 0x0, %g0 ! Did we get it? | ||
| 92 | bne 2b ! Nope... | ||
| 93 | #endif | ||
| 94 | ld [%g1], %g7 | ||
| 95 | xor %g7, %g2, %g5 | ||
| 96 | and %g7, %g2, %g2 | ||
| 97 | #ifdef CONFIG_SMP | ||
| 98 | st %g5, [%g1] | ||
| 99 | set bitops_spinlock, %g5 | ||
| 100 | stb %g0, [%g5] | ||
| 101 | #else | ||
| 102 | st %g5, [%g1] | ||
| 103 | #endif | ||
| 104 | wr %g3, 0x0, %psr | ||
| 105 | nop; nop; nop | ||
| 106 | jmpl %o7, %g0 | ||
| 107 | mov %g4, %o7 | ||
| 108 | |||
| 109 | .globl __bitops_end | ||
| 110 | __bitops_end: | ||
diff --git a/arch/sparc/lib/blockops.S b/arch/sparc/lib/blockops.S new file mode 100644 index 000000000000..a7c7ffaa4a94 --- /dev/null +++ b/arch/sparc/lib/blockops.S | |||
| @@ -0,0 +1,89 @@ | |||
| 1 | /* $Id: blockops.S,v 1.8 1998/01/30 10:58:44 jj Exp $ | ||
| 2 | * blockops.S: Common block zero optimized routines. | ||
| 3 | * | ||
| 4 | * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) | ||
| 5 | */ | ||
| 6 | |||
| 7 | #include <asm/page.h> | ||
| 8 | |||
| 9 | /* Zero out 64 bytes of memory at (buf + offset). | ||
| 10 | * Assumes %g1 contains zero. | ||
| 11 | */ | ||
| 12 | #define BLAST_BLOCK(buf, offset) \ | ||
| 13 | std %g0, [buf + offset + 0x38]; \ | ||
| 14 | std %g0, [buf + offset + 0x30]; \ | ||
| 15 | std %g0, [buf + offset + 0x28]; \ | ||
| 16 | std %g0, [buf + offset + 0x20]; \ | ||
| 17 | std %g0, [buf + offset + 0x18]; \ | ||
| 18 | std %g0, [buf + offset + 0x10]; \ | ||
| 19 | std %g0, [buf + offset + 0x08]; \ | ||
| 20 | std %g0, [buf + offset + 0x00]; | ||
| 21 | |||
| 22 | /* Copy 32 bytes of memory at (src + offset) to | ||
| 23 | * (dst + offset). | ||
| 24 | */ | ||
| 25 | #define MIRROR_BLOCK(dst, src, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ | ||
| 26 | ldd [src + offset + 0x18], t0; \ | ||
| 27 | ldd [src + offset + 0x10], t2; \ | ||
| 28 | ldd [src + offset + 0x08], t4; \ | ||
| 29 | ldd [src + offset + 0x00], t6; \ | ||
| 30 | std t0, [dst + offset + 0x18]; \ | ||
| 31 | std t2, [dst + offset + 0x10]; \ | ||
| 32 | std t4, [dst + offset + 0x08]; \ | ||
| 33 | std t6, [dst + offset + 0x00]; | ||
| 34 | |||
| 35 | /* Profiling evidence indicates that memset() is | ||
| 36 | * commonly called for blocks of size PAGE_SIZE, | ||
| 37 | * and (2 * PAGE_SIZE) (for kernel stacks) | ||
| 38 | * and with a second arg of zero. We assume in | ||
| 39 | * all of these cases that the buffer is aligned | ||
| 40 | * on at least an 8 byte boundary. | ||
| 41 | * | ||
| 42 | * Therefore we special case them to make them | ||
| 43 | * as fast as possible. | ||
| 44 | */ | ||
| 45 | |||
| 46 | .text | ||
| 47 | .align 4 | ||
| 48 | .globl bzero_1page, __copy_1page | ||
| 49 | |||
| 50 | bzero_1page: | ||
| 51 | /* NOTE: If you change the number of insns of this routine, please check | ||
| 52 | * arch/sparc/mm/hypersparc.S */ | ||
| 53 | /* %o0 = buf */ | ||
| 54 | or %g0, %g0, %g1 | ||
| 55 | or %o0, %g0, %o1 | ||
| 56 | or %g0, (PAGE_SIZE >> 8), %g2 | ||
| 57 | 1: | ||
| 58 | BLAST_BLOCK(%o0, 0x00) | ||
| 59 | BLAST_BLOCK(%o0, 0x40) | ||
| 60 | BLAST_BLOCK(%o0, 0x80) | ||
| 61 | BLAST_BLOCK(%o0, 0xc0) | ||
| 62 | subcc %g2, 1, %g2 | ||
| 63 | bne 1b | ||
| 64 | add %o0, 0x100, %o0 | ||
| 65 | |||
| 66 | retl | ||
| 67 | nop | ||
| 68 | |||
| 69 | __copy_1page: | ||
| 70 | /* NOTE: If you change the number of insns of this routine, please check | ||
| 71 | * arch/sparc/mm/hypersparc.S */ | ||
| 72 | /* %o0 = dst, %o1 = src */ | ||
| 73 | or %g0, (PAGE_SIZE >> 8), %g1 | ||
| 74 | 1: | ||
| 75 | MIRROR_BLOCK(%o0, %o1, 0x00, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5) | ||
| 76 | MIRROR_BLOCK(%o0, %o1, 0x20, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5) | ||
| 77 | MIRROR_BLOCK(%o0, %o1, 0x40, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5) | ||
| 78 | MIRROR_BLOCK(%o0, %o1, 0x60, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5) | ||
| 79 | MIRROR_BLOCK(%o0, %o1, 0x80, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5) | ||
| 80 | MIRROR_BLOCK(%o0, %o1, 0xa0, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5) | ||
| 81 | MIRROR_BLOCK(%o0, %o1, 0xc0, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5) | ||
| 82 | MIRROR_BLOCK(%o0, %o1, 0xe0, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5) | ||
| 83 | subcc %g1, 1, %g1 | ||
| 84 | add %o0, 0x100, %o0 | ||
| 85 | bne 1b | ||
| 86 | add %o1, 0x100, %o1 | ||
| 87 | |||
| 88 | retl | ||
| 89 | nop | ||
diff --git a/arch/sparc/lib/checksum.S b/arch/sparc/lib/checksum.S new file mode 100644 index 000000000000..77f228533d47 --- /dev/null +++ b/arch/sparc/lib/checksum.S | |||
| @@ -0,0 +1,583 @@ | |||
| 1 | /* checksum.S: Sparc optimized checksum code. | ||
| 2 | * | ||
| 3 | * Copyright(C) 1995 Linus Torvalds | ||
| 4 | * Copyright(C) 1995 Miguel de Icaza | ||
| 5 | * Copyright(C) 1996 David S. Miller | ||
| 6 | * Copyright(C) 1997 Jakub Jelinek | ||
| 7 | * | ||
| 8 | * derived from: | ||
| 9 | * Linux/Alpha checksum c-code | ||
| 10 | * Linux/ix86 inline checksum assembly | ||
| 11 | * RFC1071 Computing the Internet Checksum (esp. Jacobsons m68k code) | ||
| 12 | * David Mosberger-Tang for optimized reference c-code | ||
| 13 | * BSD4.4 portable checksum routine | ||
| 14 | */ | ||
| 15 | |||
| 16 | #include <asm/errno.h> | ||
| 17 | |||
| 18 | #define CSUM_BIGCHUNK(buf, offset, sum, t0, t1, t2, t3, t4, t5) \ | ||
| 19 | ldd [buf + offset + 0x00], t0; \ | ||
| 20 | ldd [buf + offset + 0x08], t2; \ | ||
| 21 | addxcc t0, sum, sum; \ | ||
| 22 | addxcc t1, sum, sum; \ | ||
| 23 | ldd [buf + offset + 0x10], t4; \ | ||
| 24 | addxcc t2, sum, sum; \ | ||
| 25 | addxcc t3, sum, sum; \ | ||
| 26 | ldd [buf + offset + 0x18], t0; \ | ||
| 27 | addxcc t4, sum, sum; \ | ||
| 28 | addxcc t5, sum, sum; \ | ||
| 29 | addxcc t0, sum, sum; \ | ||
| 30 | addxcc t1, sum, sum; | ||
| 31 | |||
| 32 | #define CSUM_LASTCHUNK(buf, offset, sum, t0, t1, t2, t3) \ | ||
| 33 | ldd [buf - offset - 0x08], t0; \ | ||
| 34 | ldd [buf - offset - 0x00], t2; \ | ||
| 35 | addxcc t0, sum, sum; \ | ||
| 36 | addxcc t1, sum, sum; \ | ||
| 37 | addxcc t2, sum, sum; \ | ||
| 38 | addxcc t3, sum, sum; | ||
| 39 | |||
| 40 | /* Do end cruft out of band to get better cache patterns. */ | ||
| 41 | csum_partial_end_cruft: | ||
| 42 | be 1f ! caller asks %o1 & 0x8 | ||
| 43 | andcc %o1, 4, %g0 ! nope, check for word remaining | ||
| 44 | ldd [%o0], %g2 ! load two | ||
| 45 | addcc %g2, %o2, %o2 ! add first word to sum | ||
| 46 | addxcc %g3, %o2, %o2 ! add second word as well | ||
| 47 | add %o0, 8, %o0 ! advance buf ptr | ||
| 48 | addx %g0, %o2, %o2 ! add in final carry | ||
| 49 | andcc %o1, 4, %g0 ! check again for word remaining | ||
| 50 | 1: be 1f ! nope, skip this code | ||
| 51 | andcc %o1, 3, %o1 ! check for trailing bytes | ||
| 52 | ld [%o0], %g2 ! load it | ||
| 53 | addcc %g2, %o2, %o2 ! add to sum | ||
| 54 | add %o0, 4, %o0 ! advance buf ptr | ||
| 55 | addx %g0, %o2, %o2 ! add in final carry | ||
| 56 | andcc %o1, 3, %g0 ! check again for trailing bytes | ||
| 57 | 1: be 1f ! no trailing bytes, return | ||
| 58 | addcc %o1, -1, %g0 ! only one byte remains? | ||
| 59 | bne 2f ! at least two bytes more | ||
| 60 | subcc %o1, 2, %o1 ! only two bytes more? | ||
| 61 | b 4f ! only one byte remains | ||
| 62 | or %g0, %g0, %o4 ! clear fake hword value | ||
| 63 | 2: lduh [%o0], %o4 ! get hword | ||
| 64 | be 6f ! jmp if only hword remains | ||
| 65 | add %o0, 2, %o0 ! advance buf ptr either way | ||
| 66 | sll %o4, 16, %o4 ! create upper hword | ||
| 67 | 4: ldub [%o0], %o5 ! get final byte | ||
| 68 | sll %o5, 8, %o5 ! put into place | ||
| 69 | or %o5, %o4, %o4 ! coalese with hword (if any) | ||
| 70 | 6: addcc %o4, %o2, %o2 ! add to sum | ||
| 71 | 1: retl ! get outta here | ||
| 72 | addx %g0, %o2, %o0 ! add final carry into retval | ||
| 73 | |||
| 74 | /* Also do alignment out of band to get better cache patterns. */ | ||
| 75 | csum_partial_fix_alignment: | ||
| 76 | cmp %o1, 6 | ||
| 77 | bl cpte - 0x4 | ||
| 78 | andcc %o0, 0x2, %g0 | ||
| 79 | be 1f | ||
| 80 | andcc %o0, 0x4, %g0 | ||
| 81 | lduh [%o0 + 0x00], %g2 | ||
| 82 | sub %o1, 2, %o1 | ||
| 83 | add %o0, 2, %o0 | ||
| 84 | sll %g2, 16, %g2 | ||
| 85 | addcc %g2, %o2, %o2 | ||
| 86 | srl %o2, 16, %g3 | ||
| 87 | addx %g0, %g3, %g2 | ||
| 88 | sll %o2, 16, %o2 | ||
| 89 | sll %g2, 16, %g3 | ||
| 90 | srl %o2, 16, %o2 | ||
| 91 | andcc %o0, 0x4, %g0 | ||
| 92 | or %g3, %o2, %o2 | ||
| 93 | 1: be cpa | ||
| 94 | andcc %o1, 0xffffff80, %o3 | ||
| 95 | ld [%o0 + 0x00], %g2 | ||
| 96 | sub %o1, 4, %o1 | ||
| 97 | addcc %g2, %o2, %o2 | ||
| 98 | add %o0, 4, %o0 | ||
| 99 | addx %g0, %o2, %o2 | ||
| 100 | b cpa | ||
| 101 | andcc %o1, 0xffffff80, %o3 | ||
| 102 | |||
| 103 | /* The common case is to get called with a nicely aligned | ||
| 104 | * buffer of size 0x20. Follow the code path for that case. | ||
| 105 | */ | ||
| 106 | .globl csum_partial | ||
| 107 | csum_partial: /* %o0=buf, %o1=len, %o2=sum */ | ||
| 108 | andcc %o0, 0x7, %g0 ! alignment problems? | ||
| 109 | bne csum_partial_fix_alignment ! yep, handle it | ||
| 110 | sethi %hi(cpte - 8), %g7 ! prepare table jmp ptr | ||
| 111 | andcc %o1, 0xffffff80, %o3 ! num loop iterations | ||
| 112 | cpa: be 3f ! none to do | ||
| 113 | andcc %o1, 0x70, %g1 ! clears carry flag too | ||
| 114 | 5: CSUM_BIGCHUNK(%o0, 0x00, %o2, %o4, %o5, %g2, %g3, %g4, %g5) | ||
| 115 | CSUM_BIGCHUNK(%o0, 0x20, %o2, %o4, %o5, %g2, %g3, %g4, %g5) | ||
| 116 | CSUM_BIGCHUNK(%o0, 0x40, %o2, %o4, %o5, %g2, %g3, %g4, %g5) | ||
| 117 | CSUM_BIGCHUNK(%o0, 0x60, %o2, %o4, %o5, %g2, %g3, %g4, %g5) | ||
| 118 | addx %g0, %o2, %o2 ! sink in final carry | ||
| 119 | subcc %o3, 128, %o3 ! detract from loop iters | ||
| 120 | bne 5b ! more to do | ||
| 121 | add %o0, 128, %o0 ! advance buf ptr | ||
| 122 | andcc %o1, 0x70, %g1 ! clears carry flag too | ||
| 123 | 3: be cpte ! nope | ||
| 124 | andcc %o1, 0xf, %g0 ! anything left at all? | ||
| 125 | srl %g1, 1, %o4 ! compute offset | ||
| 126 | sub %g7, %g1, %g7 ! adjust jmp ptr | ||
| 127 | sub %g7, %o4, %g7 ! final jmp ptr adjust | ||
| 128 | jmp %g7 + %lo(cpte - 8) ! enter the table | ||
| 129 | add %o0, %g1, %o0 ! advance buf ptr | ||
| 130 | cptbl: CSUM_LASTCHUNK(%o0, 0x68, %o2, %g2, %g3, %g4, %g5) | ||
| 131 | CSUM_LASTCHUNK(%o0, 0x58, %o2, %g2, %g3, %g4, %g5) | ||
| 132 | CSUM_LASTCHUNK(%o0, 0x48, %o2, %g2, %g3, %g4, %g5) | ||
| 133 | CSUM_LASTCHUNK(%o0, 0x38, %o2, %g2, %g3, %g4, %g5) | ||
| 134 | CSUM_LASTCHUNK(%o0, 0x28, %o2, %g2, %g3, %g4, %g5) | ||
| 135 | CSUM_LASTCHUNK(%o0, 0x18, %o2, %g2, %g3, %g4, %g5) | ||
| 136 | CSUM_LASTCHUNK(%o0, 0x08, %o2, %g2, %g3, %g4, %g5) | ||
| 137 | addx %g0, %o2, %o2 ! fetch final carry | ||
| 138 | andcc %o1, 0xf, %g0 ! anything left at all? | ||
| 139 | cpte: bne csum_partial_end_cruft ! yep, handle it | ||
| 140 | andcc %o1, 8, %g0 ! check how much | ||
| 141 | cpout: retl ! get outta here | ||
| 142 | mov %o2, %o0 ! return computed csum | ||
| 143 | |||
| 144 | .globl __csum_partial_copy_start, __csum_partial_copy_end | ||
| 145 | __csum_partial_copy_start: | ||
| 146 | |||
| 147 | /* Work around cpp -rob */ | ||
| 148 | #define ALLOC #alloc | ||
| 149 | #define EXECINSTR #execinstr | ||
| 150 | #define EX(x,y,a,b) \ | ||
| 151 | 98: x,y; \ | ||
| 152 | .section .fixup,ALLOC,EXECINSTR; \ | ||
| 153 | .align 4; \ | ||
| 154 | 99: ba 30f; \ | ||
| 155 | a, b, %o3; \ | ||
| 156 | .section __ex_table,ALLOC; \ | ||
| 157 | .align 4; \ | ||
| 158 | .word 98b, 99b; \ | ||
| 159 | .text; \ | ||
| 160 | .align 4 | ||
| 161 | |||
| 162 | #define EX2(x,y) \ | ||
| 163 | 98: x,y; \ | ||
| 164 | .section __ex_table,ALLOC; \ | ||
| 165 | .align 4; \ | ||
| 166 | .word 98b, 30f; \ | ||
| 167 | .text; \ | ||
| 168 | .align 4 | ||
| 169 | |||
| 170 | #define EX3(x,y) \ | ||
| 171 | 98: x,y; \ | ||
| 172 | .section __ex_table,ALLOC; \ | ||
| 173 | .align 4; \ | ||
| 174 | .word 98b, 96f; \ | ||
| 175 | .text; \ | ||
| 176 | .align 4 | ||
| 177 | |||
| 178 | #define EXT(start,end,handler) \ | ||
| 179 | .section __ex_table,ALLOC; \ | ||
| 180 | .align 4; \ | ||
| 181 | .word start, 0, end, handler; \ | ||
| 182 | .text; \ | ||
| 183 | .align 4 | ||
| 184 | |||
| 185 | /* This aligned version executes typically in 8.5 superscalar cycles, this | ||
| 186 | * is the best I can do. I say 8.5 because the final add will pair with | ||
| 187 | * the next ldd in the main unrolled loop. Thus the pipe is always full. | ||
| 188 | * If you change these macros (including order of instructions), | ||
| 189 | * please check the fixup code below as well. | ||
| 190 | */ | ||
| 191 | #define CSUMCOPY_BIGCHUNK_ALIGNED(src, dst, sum, off, t0, t1, t2, t3, t4, t5, t6, t7) \ | ||
| 192 | ldd [src + off + 0x00], t0; \ | ||
| 193 | ldd [src + off + 0x08], t2; \ | ||
| 194 | addxcc t0, sum, sum; \ | ||
| 195 | ldd [src + off + 0x10], t4; \ | ||
| 196 | addxcc t1, sum, sum; \ | ||
| 197 | ldd [src + off + 0x18], t6; \ | ||
| 198 | addxcc t2, sum, sum; \ | ||
| 199 | std t0, [dst + off + 0x00]; \ | ||
| 200 | addxcc t3, sum, sum; \ | ||
| 201 | std t2, [dst + off + 0x08]; \ | ||
| 202 | addxcc t4, sum, sum; \ | ||
| 203 | std t4, [dst + off + 0x10]; \ | ||
| 204 | addxcc t5, sum, sum; \ | ||
| 205 | std t6, [dst + off + 0x18]; \ | ||
| 206 | addxcc t6, sum, sum; \ | ||
| 207 | addxcc t7, sum, sum; | ||
| 208 | |||
| 209 | /* 12 superscalar cycles seems to be the limit for this case, | ||
| 210 | * because of this we thus do all the ldd's together to get | ||
| 211 | * Viking MXCC into streaming mode. Ho hum... | ||
| 212 | */ | ||
| 213 | #define CSUMCOPY_BIGCHUNK(src, dst, sum, off, t0, t1, t2, t3, t4, t5, t6, t7) \ | ||
| 214 | ldd [src + off + 0x00], t0; \ | ||
| 215 | ldd [src + off + 0x08], t2; \ | ||
| 216 | ldd [src + off + 0x10], t4; \ | ||
| 217 | ldd [src + off + 0x18], t6; \ | ||
| 218 | st t0, [dst + off + 0x00]; \ | ||
| 219 | addxcc t0, sum, sum; \ | ||
| 220 | st t1, [dst + off + 0x04]; \ | ||
| 221 | addxcc t1, sum, sum; \ | ||
| 222 | st t2, [dst + off + 0x08]; \ | ||
| 223 | addxcc t2, sum, sum; \ | ||
| 224 | st t3, [dst + off + 0x0c]; \ | ||
| 225 | addxcc t3, sum, sum; \ | ||
| 226 | st t4, [dst + off + 0x10]; \ | ||
| 227 | addxcc t4, sum, sum; \ | ||
| 228 | st t5, [dst + off + 0x14]; \ | ||
| 229 | addxcc t5, sum, sum; \ | ||
| 230 | st t6, [dst + off + 0x18]; \ | ||
| 231 | addxcc t6, sum, sum; \ | ||
| 232 | st t7, [dst + off + 0x1c]; \ | ||
| 233 | addxcc t7, sum, sum; | ||
| 234 | |||
| 235 | /* Yuck, 6 superscalar cycles... */ | ||
| 236 | #define CSUMCOPY_LASTCHUNK(src, dst, sum, off, t0, t1, t2, t3) \ | ||
| 237 | ldd [src - off - 0x08], t0; \ | ||
| 238 | ldd [src - off - 0x00], t2; \ | ||
| 239 | addxcc t0, sum, sum; \ | ||
| 240 | st t0, [dst - off - 0x08]; \ | ||
| 241 | addxcc t1, sum, sum; \ | ||
| 242 | st t1, [dst - off - 0x04]; \ | ||
| 243 | addxcc t2, sum, sum; \ | ||
| 244 | st t2, [dst - off - 0x00]; \ | ||
| 245 | addxcc t3, sum, sum; \ | ||
| 246 | st t3, [dst - off + 0x04]; | ||
| 247 | |||
| 248 | /* Handle the end cruft code out of band for better cache patterns. */ | ||
| 249 | cc_end_cruft: | ||
| 250 | be 1f | ||
| 251 | andcc %o3, 4, %g0 | ||
| 252 | EX(ldd [%o0 + 0x00], %g2, and %o3, 0xf) | ||
| 253 | add %o1, 8, %o1 | ||
| 254 | addcc %g2, %g7, %g7 | ||
| 255 | add %o0, 8, %o0 | ||
| 256 | addxcc %g3, %g7, %g7 | ||
| 257 | EX2(st %g2, [%o1 - 0x08]) | ||
| 258 | addx %g0, %g7, %g7 | ||
| 259 | andcc %o3, 4, %g0 | ||
| 260 | EX2(st %g3, [%o1 - 0x04]) | ||
| 261 | 1: be 1f | ||
| 262 | andcc %o3, 3, %o3 | ||
| 263 | EX(ld [%o0 + 0x00], %g2, add %o3, 4) | ||
| 264 | add %o1, 4, %o1 | ||
| 265 | addcc %g2, %g7, %g7 | ||
| 266 | EX2(st %g2, [%o1 - 0x04]) | ||
| 267 | addx %g0, %g7, %g7 | ||
| 268 | andcc %o3, 3, %g0 | ||
| 269 | add %o0, 4, %o0 | ||
| 270 | 1: be 1f | ||
| 271 | addcc %o3, -1, %g0 | ||
| 272 | bne 2f | ||
| 273 | subcc %o3, 2, %o3 | ||
| 274 | b 4f | ||
| 275 | or %g0, %g0, %o4 | ||
| 276 | 2: EX(lduh [%o0 + 0x00], %o4, add %o3, 2) | ||
| 277 | add %o0, 2, %o0 | ||
| 278 | EX2(sth %o4, [%o1 + 0x00]) | ||
| 279 | be 6f | ||
| 280 | add %o1, 2, %o1 | ||
| 281 | sll %o4, 16, %o4 | ||
| 282 | 4: EX(ldub [%o0 + 0x00], %o5, add %g0, 1) | ||
| 283 | EX2(stb %o5, [%o1 + 0x00]) | ||
| 284 | sll %o5, 8, %o5 | ||
| 285 | or %o5, %o4, %o4 | ||
| 286 | 6: addcc %o4, %g7, %g7 | ||
| 287 | 1: retl | ||
| 288 | addx %g0, %g7, %o0 | ||
| 289 | |||
| 290 | /* Also, handle the alignment code out of band. */ | ||
| 291 | cc_dword_align: | ||
| 292 | cmp %g1, 6 | ||
| 293 | bl,a ccte | ||
| 294 | andcc %g1, 0xf, %o3 | ||
| 295 | andcc %o0, 0x1, %g0 | ||
| 296 | bne ccslow | ||
| 297 | andcc %o0, 0x2, %g0 | ||
| 298 | be 1f | ||
| 299 | andcc %o0, 0x4, %g0 | ||
| 300 | EX(lduh [%o0 + 0x00], %g4, add %g1, 0) | ||
| 301 | sub %g1, 2, %g1 | ||
| 302 | EX2(sth %g4, [%o1 + 0x00]) | ||
| 303 | add %o0, 2, %o0 | ||
| 304 | sll %g4, 16, %g4 | ||
| 305 | addcc %g4, %g7, %g7 | ||
| 306 | add %o1, 2, %o1 | ||
| 307 | srl %g7, 16, %g3 | ||
| 308 | addx %g0, %g3, %g4 | ||
| 309 | sll %g7, 16, %g7 | ||
| 310 | sll %g4, 16, %g3 | ||
| 311 | srl %g7, 16, %g7 | ||
| 312 | andcc %o0, 0x4, %g0 | ||
| 313 | or %g3, %g7, %g7 | ||
| 314 | 1: be 3f | ||
| 315 | andcc %g1, 0xffffff80, %g0 | ||
| 316 | EX(ld [%o0 + 0x00], %g4, add %g1, 0) | ||
| 317 | sub %g1, 4, %g1 | ||
| 318 | EX2(st %g4, [%o1 + 0x00]) | ||
| 319 | add %o0, 4, %o0 | ||
| 320 | addcc %g4, %g7, %g7 | ||
| 321 | add %o1, 4, %o1 | ||
| 322 | addx %g0, %g7, %g7 | ||
| 323 | b 3f | ||
| 324 | andcc %g1, 0xffffff80, %g0 | ||
| 325 | |||
| 326 | /* Sun, you just can't beat me, you just can't. Stop trying, | ||
| 327 | * give up. I'm serious, I am going to kick the living shit | ||
| 328 | * out of you, game over, lights out. | ||
| 329 | */ | ||
| 330 | .align 8 | ||
| 331 | .globl __csum_partial_copy_sparc_generic | ||
| 332 | __csum_partial_copy_sparc_generic: | ||
| 333 | /* %o0=src, %o1=dest, %g1=len, %g7=sum */ | ||
| 334 | xor %o0, %o1, %o4 ! get changing bits | ||
| 335 | andcc %o4, 3, %g0 ! check for mismatched alignment | ||
| 336 | bne ccslow ! better this than unaligned/fixups | ||
| 337 | andcc %o0, 7, %g0 ! need to align things? | ||
| 338 | bne cc_dword_align ! yes, we check for short lengths there | ||
| 339 | andcc %g1, 0xffffff80, %g0 ! can we use unrolled loop? | ||
| 340 | 3: be 3f ! nope, less than one loop remains | ||
| 341 | andcc %o1, 4, %g0 ! dest aligned on 4 or 8 byte boundary? | ||
| 342 | be ccdbl + 4 ! 8 byte aligned, kick ass | ||
| 343 | 5: CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x00,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3) | ||
| 344 | CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x20,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3) | ||
| 345 | CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x40,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3) | ||
| 346 | CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x60,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3) | ||
| 347 | 10: EXT(5b, 10b, 20f) ! note for exception handling | ||
| 348 | sub %g1, 128, %g1 ! detract from length | ||
| 349 | addx %g0, %g7, %g7 ! add in last carry bit | ||
| 350 | andcc %g1, 0xffffff80, %g0 ! more to csum? | ||
| 351 | add %o0, 128, %o0 ! advance src ptr | ||
| 352 | bne 5b ! we did not go negative, continue looping | ||
| 353 | add %o1, 128, %o1 ! advance dest ptr | ||
| 354 | 3: andcc %g1, 0x70, %o2 ! can use table? | ||
| 355 | ccmerge:be ccte ! nope, go and check for end cruft | ||
| 356 | andcc %g1, 0xf, %o3 ! get low bits of length (clears carry btw) | ||
| 357 | srl %o2, 1, %o4 ! begin negative offset computation | ||
| 358 | sethi %hi(12f), %o5 ! set up table ptr end | ||
| 359 | add %o0, %o2, %o0 ! advance src ptr | ||
| 360 | sub %o5, %o4, %o5 ! continue table calculation | ||
| 361 | sll %o2, 1, %g2 ! constant multiplies are fun... | ||
| 362 | sub %o5, %g2, %o5 ! some more adjustments | ||
| 363 | jmp %o5 + %lo(12f) ! jump into it, duff style, wheee... | ||
| 364 | add %o1, %o2, %o1 ! advance dest ptr (carry is clear btw) | ||
| 365 | cctbl: CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x68,%g2,%g3,%g4,%g5) | ||
| 366 | CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x58,%g2,%g3,%g4,%g5) | ||
| 367 | CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x48,%g2,%g3,%g4,%g5) | ||
| 368 | CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x38,%g2,%g3,%g4,%g5) | ||
| 369 | CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x28,%g2,%g3,%g4,%g5) | ||
| 370 | CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x18,%g2,%g3,%g4,%g5) | ||
| 371 | CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x08,%g2,%g3,%g4,%g5) | ||
| 372 | 12: EXT(cctbl, 12b, 22f) ! note for exception table handling | ||
| 373 | addx %g0, %g7, %g7 | ||
| 374 | andcc %o3, 0xf, %g0 ! check for low bits set | ||
| 375 | ccte: bne cc_end_cruft ! something left, handle it out of band | ||
| 376 | andcc %o3, 8, %g0 ! begin checks for that code | ||
| 377 | retl ! return | ||
| 378 | mov %g7, %o0 ! give em the computed checksum | ||
| 379 | ccdbl: CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x00,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3) | ||
| 380 | CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x20,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3) | ||
| 381 | CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x40,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3) | ||
| 382 | CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x60,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3) | ||
| 383 | 11: EXT(ccdbl, 11b, 21f) ! note for exception table handling | ||
| 384 | sub %g1, 128, %g1 ! detract from length | ||
| 385 | addx %g0, %g7, %g7 ! add in last carry bit | ||
| 386 | andcc %g1, 0xffffff80, %g0 ! more to csum? | ||
| 387 | add %o0, 128, %o0 ! advance src ptr | ||
| 388 | bne ccdbl ! we did not go negative, continue looping | ||
| 389 | add %o1, 128, %o1 ! advance dest ptr | ||
| 390 | b ccmerge ! finish it off, above | ||
| 391 | andcc %g1, 0x70, %o2 ! can use table? (clears carry btw) | ||
| 392 | |||
| 393 | ccslow: cmp %g1, 0 | ||
| 394 | mov 0, %g5 | ||
| 395 | bleu 4f | ||
| 396 | andcc %o0, 1, %o5 | ||
| 397 | be,a 1f | ||
| 398 | srl %g1, 1, %g4 | ||
| 399 | sub %g1, 1, %g1 | ||
| 400 | EX(ldub [%o0], %g5, add %g1, 1) | ||
| 401 | add %o0, 1, %o0 | ||
| 402 | EX2(stb %g5, [%o1]) | ||
| 403 | srl %g1, 1, %g4 | ||
| 404 | add %o1, 1, %o1 | ||
| 405 | 1: cmp %g4, 0 | ||
| 406 | be,a 3f | ||
| 407 | andcc %g1, 1, %g0 | ||
| 408 | andcc %o0, 2, %g0 | ||
| 409 | be,a 1f | ||
| 410 | srl %g4, 1, %g4 | ||
| 411 | EX(lduh [%o0], %o4, add %g1, 0) | ||
| 412 | sub %g1, 2, %g1 | ||
| 413 | srl %o4, 8, %g2 | ||
| 414 | sub %g4, 1, %g4 | ||
| 415 | EX2(stb %g2, [%o1]) | ||
| 416 | add %o4, %g5, %g5 | ||
| 417 | EX2(stb %o4, [%o1 + 1]) | ||
| 418 | add %o0, 2, %o0 | ||
| 419 | srl %g4, 1, %g4 | ||
| 420 | add %o1, 2, %o1 | ||
| 421 | 1: cmp %g4, 0 | ||
| 422 | be,a 2f | ||
| 423 | andcc %g1, 2, %g0 | ||
| 424 | EX3(ld [%o0], %o4) | ||
| 425 | 5: srl %o4, 24, %g2 | ||
| 426 | srl %o4, 16, %g3 | ||
| 427 | EX2(stb %g2, [%o1]) | ||
| 428 | srl %o4, 8, %g2 | ||
| 429 | EX2(stb %g3, [%o1 + 1]) | ||
| 430 | add %o0, 4, %o0 | ||
| 431 | EX2(stb %g2, [%o1 + 2]) | ||
| 432 | addcc %o4, %g5, %g5 | ||
| 433 | EX2(stb %o4, [%o1 + 3]) | ||
| 434 | addx %g5, %g0, %g5 ! I am now to lazy to optimize this (question it | ||
| 435 | add %o1, 4, %o1 ! is worthy). Maybe some day - with the sll/srl | ||
| 436 | subcc %g4, 1, %g4 ! tricks | ||
| 437 | bne,a 5b | ||
| 438 | EX3(ld [%o0], %o4) | ||
| 439 | sll %g5, 16, %g2 | ||
| 440 | srl %g5, 16, %g5 | ||
| 441 | srl %g2, 16, %g2 | ||
| 442 | andcc %g1, 2, %g0 | ||
| 443 | add %g2, %g5, %g5 | ||
| 444 | 2: be,a 3f | ||
| 445 | andcc %g1, 1, %g0 | ||
| 446 | EX(lduh [%o0], %o4, and %g1, 3) | ||
| 447 | andcc %g1, 1, %g0 | ||
| 448 | srl %o4, 8, %g2 | ||
| 449 | add %o0, 2, %o0 | ||
| 450 | EX2(stb %g2, [%o1]) | ||
| 451 | add %g5, %o4, %g5 | ||
| 452 | EX2(stb %o4, [%o1 + 1]) | ||
| 453 | add %o1, 2, %o1 | ||
| 454 | 3: be,a 1f | ||
| 455 | sll %g5, 16, %o4 | ||
| 456 | EX(ldub [%o0], %g2, add %g0, 1) | ||
| 457 | sll %g2, 8, %o4 | ||
| 458 | EX2(stb %g2, [%o1]) | ||
| 459 | add %g5, %o4, %g5 | ||
| 460 | sll %g5, 16, %o4 | ||
| 461 | 1: addcc %o4, %g5, %g5 | ||
| 462 | srl %g5, 16, %o4 | ||
| 463 | addx %g0, %o4, %g5 | ||
| 464 | orcc %o5, %g0, %g0 | ||
| 465 | be 4f | ||
| 466 | srl %g5, 8, %o4 | ||
| 467 | and %g5, 0xff, %g2 | ||
| 468 | and %o4, 0xff, %o4 | ||
| 469 | sll %g2, 8, %g2 | ||
| 470 | or %g2, %o4, %g5 | ||
| 471 | 4: addcc %g7, %g5, %g7 | ||
| 472 | retl | ||
| 473 | addx %g0, %g7, %o0 | ||
| 474 | __csum_partial_copy_end: | ||
| 475 | |||
| 476 | /* We do these strange calculations for the csum_*_from_user case only, ie. | ||
| 477 | * we only bother with faults on loads... */ | ||
| 478 | |||
| 479 | /* o2 = ((g2%20)&3)*8 | ||
| 480 | * o3 = g1 - (g2/20)*32 - o2 */ | ||
| 481 | 20: | ||
| 482 | cmp %g2, 20 | ||
| 483 | blu,a 1f | ||
| 484 | and %g2, 3, %o2 | ||
| 485 | sub %g1, 32, %g1 | ||
| 486 | b 20b | ||
| 487 | sub %g2, 20, %g2 | ||
| 488 | 1: | ||
| 489 | sll %o2, 3, %o2 | ||
| 490 | b 31f | ||
| 491 | sub %g1, %o2, %o3 | ||
| 492 | |||
| 493 | /* o2 = (!(g2 & 15) ? 0 : (((g2 & 15) + 1) & ~1)*8) | ||
| 494 | * o3 = g1 - (g2/16)*32 - o2 */ | ||
| 495 | 21: | ||
| 496 | andcc %g2, 15, %o3 | ||
| 497 | srl %g2, 4, %g2 | ||
| 498 | be,a 1f | ||
| 499 | clr %o2 | ||
| 500 | add %o3, 1, %o3 | ||
| 501 | and %o3, 14, %o3 | ||
| 502 | sll %o3, 3, %o2 | ||
| 503 | 1: | ||
| 504 | sll %g2, 5, %g2 | ||
| 505 | sub %g1, %g2, %o3 | ||
| 506 | b 31f | ||
| 507 | sub %o3, %o2, %o3 | ||
| 508 | |||
| 509 | /* o0 += (g2/10)*16 - 0x70 | ||
| 510 | * 01 += (g2/10)*16 - 0x70 | ||
| 511 | * o2 = (g2 % 10) ? 8 : 0 | ||
| 512 | * o3 += 0x70 - (g2/10)*16 - o2 */ | ||
| 513 | 22: | ||
| 514 | cmp %g2, 10 | ||
| 515 | blu,a 1f | ||
| 516 | sub %o0, 0x70, %o0 | ||
| 517 | add %o0, 16, %o0 | ||
| 518 | add %o1, 16, %o1 | ||
| 519 | sub %o3, 16, %o3 | ||
| 520 | b 22b | ||
| 521 | sub %g2, 10, %g2 | ||
| 522 | 1: | ||
| 523 | sub %o1, 0x70, %o1 | ||
| 524 | add %o3, 0x70, %o3 | ||
| 525 | clr %o2 | ||
| 526 | tst %g2 | ||
| 527 | bne,a 1f | ||
| 528 | mov 8, %o2 | ||
| 529 | 1: | ||
| 530 | b 31f | ||
| 531 | sub %o3, %o2, %o3 | ||
| 532 | 96: | ||
| 533 | and %g1, 3, %g1 | ||
| 534 | sll %g4, 2, %g4 | ||
| 535 | add %g1, %g4, %o3 | ||
| 536 | 30: | ||
| 537 | /* %o1 is dst | ||
| 538 | * %o3 is # bytes to zero out | ||
| 539 | * %o4 is faulting address | ||
| 540 | * %o5 is %pc where fault occurred */ | ||
| 541 | clr %o2 | ||
| 542 | 31: | ||
| 543 | /* %o0 is src | ||
| 544 | * %o1 is dst | ||
| 545 | * %o2 is # of bytes to copy from src to dst | ||
| 546 | * %o3 is # bytes to zero out | ||
| 547 | * %o4 is faulting address | ||
| 548 | * %o5 is %pc where fault occurred */ | ||
| 549 | save %sp, -104, %sp | ||
| 550 | mov %i5, %o0 | ||
| 551 | mov %i7, %o1 | ||
| 552 | mov %i4, %o2 | ||
| 553 | call lookup_fault | ||
| 554 | mov %g7, %i4 | ||
| 555 | cmp %o0, 2 | ||
| 556 | bne 1f | ||
| 557 | add %g0, -EFAULT, %i5 | ||
| 558 | tst %i2 | ||
| 559 | be 2f | ||
| 560 | mov %i0, %o1 | ||
| 561 | mov %i1, %o0 | ||
| 562 | 5: | ||
| 563 | call __memcpy | ||
| 564 | mov %i2, %o2 | ||
| 565 | tst %o0 | ||
| 566 | bne,a 2f | ||
| 567 | add %i3, %i2, %i3 | ||
| 568 | add %i1, %i2, %i1 | ||
| 569 | 2: | ||
| 570 | mov %i1, %o0 | ||
| 571 | 6: | ||
| 572 | call __bzero | ||
| 573 | mov %i3, %o1 | ||
| 574 | 1: | ||
| 575 | ld [%sp + 168], %o2 ! struct_ptr of parent | ||
| 576 | st %i5, [%o2] | ||
| 577 | ret | ||
| 578 | restore | ||
| 579 | |||
| 580 | .section __ex_table,#alloc | ||
| 581 | .align 4 | ||
| 582 | .word 5b,2 | ||
| 583 | .word 6b,2 | ||
diff --git a/arch/sparc/lib/copy_user.S b/arch/sparc/lib/copy_user.S new file mode 100644 index 000000000000..577505b692ae --- /dev/null +++ b/arch/sparc/lib/copy_user.S | |||
| @@ -0,0 +1,492 @@ | |||
| 1 | /* copy_user.S: Sparc optimized copy_from_user and copy_to_user code. | ||
| 2 | * | ||
| 3 | * Copyright(C) 1995 Linus Torvalds | ||
| 4 | * Copyright(C) 1996 David S. Miller | ||
| 5 | * Copyright(C) 1996 Eddie C. Dost | ||
| 6 | * Copyright(C) 1996,1998 Jakub Jelinek | ||
| 7 | * | ||
| 8 | * derived from: | ||
| 9 | * e-mail between David and Eddie. | ||
| 10 | * | ||
| 11 | * Returns 0 if successful, otherwise count of bytes not copied yet | ||
| 12 | */ | ||
| 13 | |||
| 14 | #include <asm/ptrace.h> | ||
| 15 | #include <asm/asmmacro.h> | ||
| 16 | #include <asm/page.h> | ||
| 17 | |||
| 18 | /* Work around cpp -rob */ | ||
| 19 | #define ALLOC #alloc | ||
| 20 | #define EXECINSTR #execinstr | ||
| 21 | #define EX(x,y,a,b) \ | ||
| 22 | 98: x,y; \ | ||
| 23 | .section .fixup,ALLOC,EXECINSTR; \ | ||
| 24 | .align 4; \ | ||
| 25 | 99: ba fixupretl; \ | ||
| 26 | a, b, %g3; \ | ||
| 27 | .section __ex_table,ALLOC; \ | ||
| 28 | .align 4; \ | ||
| 29 | .word 98b, 99b; \ | ||
| 30 | .text; \ | ||
| 31 | .align 4 | ||
| 32 | |||
| 33 | #define EX2(x,y,c,d,e,a,b) \ | ||
| 34 | 98: x,y; \ | ||
| 35 | .section .fixup,ALLOC,EXECINSTR; \ | ||
| 36 | .align 4; \ | ||
| 37 | 99: c, d, e; \ | ||
| 38 | ba fixupretl; \ | ||
| 39 | a, b, %g3; \ | ||
| 40 | .section __ex_table,ALLOC; \ | ||
| 41 | .align 4; \ | ||
| 42 | .word 98b, 99b; \ | ||
| 43 | .text; \ | ||
| 44 | .align 4 | ||
| 45 | |||
| 46 | #define EXO2(x,y) \ | ||
| 47 | 98: x, y; \ | ||
| 48 | .section __ex_table,ALLOC; \ | ||
| 49 | .align 4; \ | ||
| 50 | .word 98b, 97f; \ | ||
| 51 | .text; \ | ||
| 52 | .align 4 | ||
| 53 | |||
| 54 | #define EXT(start,end,handler) \ | ||
| 55 | .section __ex_table,ALLOC; \ | ||
| 56 | .align 4; \ | ||
| 57 | .word start, 0, end, handler; \ | ||
| 58 | .text; \ | ||
| 59 | .align 4 | ||
| 60 | |||
| 61 | /* Please do not change following macros unless you change logic used | ||
| 62 | * in .fixup at the end of this file as well | ||
| 63 | */ | ||
| 64 | |||
| 65 | /* Both these macros have to start with exactly the same insn */ | ||
| 66 | #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ | ||
| 67 | ldd [%src + (offset) + 0x00], %t0; \ | ||
| 68 | ldd [%src + (offset) + 0x08], %t2; \ | ||
| 69 | ldd [%src + (offset) + 0x10], %t4; \ | ||
| 70 | ldd [%src + (offset) + 0x18], %t6; \ | ||
| 71 | st %t0, [%dst + (offset) + 0x00]; \ | ||
| 72 | st %t1, [%dst + (offset) + 0x04]; \ | ||
| 73 | st %t2, [%dst + (offset) + 0x08]; \ | ||
| 74 | st %t3, [%dst + (offset) + 0x0c]; \ | ||
| 75 | st %t4, [%dst + (offset) + 0x10]; \ | ||
| 76 | st %t5, [%dst + (offset) + 0x14]; \ | ||
| 77 | st %t6, [%dst + (offset) + 0x18]; \ | ||
| 78 | st %t7, [%dst + (offset) + 0x1c]; | ||
| 79 | |||
| 80 | #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ | ||
| 81 | ldd [%src + (offset) + 0x00], %t0; \ | ||
| 82 | ldd [%src + (offset) + 0x08], %t2; \ | ||
| 83 | ldd [%src + (offset) + 0x10], %t4; \ | ||
| 84 | ldd [%src + (offset) + 0x18], %t6; \ | ||
| 85 | std %t0, [%dst + (offset) + 0x00]; \ | ||
| 86 | std %t2, [%dst + (offset) + 0x08]; \ | ||
| 87 | std %t4, [%dst + (offset) + 0x10]; \ | ||
| 88 | std %t6, [%dst + (offset) + 0x18]; | ||
| 89 | |||
| 90 | #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ | ||
| 91 | ldd [%src - (offset) - 0x10], %t0; \ | ||
| 92 | ldd [%src - (offset) - 0x08], %t2; \ | ||
| 93 | st %t0, [%dst - (offset) - 0x10]; \ | ||
| 94 | st %t1, [%dst - (offset) - 0x0c]; \ | ||
| 95 | st %t2, [%dst - (offset) - 0x08]; \ | ||
| 96 | st %t3, [%dst - (offset) - 0x04]; | ||
| 97 | |||
| 98 | #define MOVE_HALFCHUNK(src, dst, offset, t0, t1, t2, t3) \ | ||
| 99 | lduh [%src + (offset) + 0x00], %t0; \ | ||
| 100 | lduh [%src + (offset) + 0x02], %t1; \ | ||
| 101 | lduh [%src + (offset) + 0x04], %t2; \ | ||
| 102 | lduh [%src + (offset) + 0x06], %t3; \ | ||
| 103 | sth %t0, [%dst + (offset) + 0x00]; \ | ||
| 104 | sth %t1, [%dst + (offset) + 0x02]; \ | ||
| 105 | sth %t2, [%dst + (offset) + 0x04]; \ | ||
| 106 | sth %t3, [%dst + (offset) + 0x06]; | ||
| 107 | |||
| 108 | #define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ | ||
| 109 | ldub [%src - (offset) - 0x02], %t0; \ | ||
| 110 | ldub [%src - (offset) - 0x01], %t1; \ | ||
| 111 | stb %t0, [%dst - (offset) - 0x02]; \ | ||
| 112 | stb %t1, [%dst - (offset) - 0x01]; | ||
| 113 | |||
| 114 | .text | ||
| 115 | .align 4 | ||
| 116 | |||
| 117 | .globl __copy_user_begin | ||
| 118 | __copy_user_begin: | ||
| 119 | |||
| 120 | .globl __copy_user | ||
| 121 | dword_align: | ||
| 122 | andcc %o1, 1, %g0 | ||
| 123 | be 4f | ||
| 124 | andcc %o1, 2, %g0 | ||
| 125 | |||
| 126 | EXO2(ldub [%o1], %g2) | ||
| 127 | add %o1, 1, %o1 | ||
| 128 | EXO2(stb %g2, [%o0]) | ||
| 129 | sub %o2, 1, %o2 | ||
| 130 | bne 3f | ||
| 131 | add %o0, 1, %o0 | ||
| 132 | |||
| 133 | EXO2(lduh [%o1], %g2) | ||
| 134 | add %o1, 2, %o1 | ||
| 135 | EXO2(sth %g2, [%o0]) | ||
| 136 | sub %o2, 2, %o2 | ||
| 137 | b 3f | ||
| 138 | add %o0, 2, %o0 | ||
| 139 | 4: | ||
| 140 | EXO2(lduh [%o1], %g2) | ||
| 141 | add %o1, 2, %o1 | ||
| 142 | EXO2(sth %g2, [%o0]) | ||
| 143 | sub %o2, 2, %o2 | ||
| 144 | b 3f | ||
| 145 | add %o0, 2, %o0 | ||
| 146 | |||
| 147 | __copy_user: /* %o0=dst %o1=src %o2=len */ | ||
| 148 | xor %o0, %o1, %o4 | ||
| 149 | 1: | ||
| 150 | andcc %o4, 3, %o5 | ||
| 151 | 2: | ||
| 152 | bne cannot_optimize | ||
| 153 | cmp %o2, 15 | ||
| 154 | |||
| 155 | bleu short_aligned_end | ||
| 156 | andcc %o1, 3, %g0 | ||
| 157 | |||
| 158 | bne dword_align | ||
| 159 | 3: | ||
| 160 | andcc %o1, 4, %g0 | ||
| 161 | |||
| 162 | be 2f | ||
| 163 | mov %o2, %g1 | ||
| 164 | |||
| 165 | EXO2(ld [%o1], %o4) | ||
| 166 | sub %g1, 4, %g1 | ||
| 167 | EXO2(st %o4, [%o0]) | ||
| 168 | add %o1, 4, %o1 | ||
| 169 | add %o0, 4, %o0 | ||
| 170 | 2: | ||
| 171 | andcc %g1, 0xffffff80, %g7 | ||
| 172 | be 3f | ||
| 173 | andcc %o0, 4, %g0 | ||
| 174 | |||
| 175 | be ldd_std + 4 | ||
| 176 | 5: | ||
| 177 | MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) | ||
| 178 | MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) | ||
| 179 | MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) | ||
| 180 | MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) | ||
| 181 | 80: | ||
| 182 | EXT(5b, 80b, 50f) | ||
| 183 | subcc %g7, 128, %g7 | ||
| 184 | add %o1, 128, %o1 | ||
| 185 | bne 5b | ||
| 186 | add %o0, 128, %o0 | ||
| 187 | 3: | ||
| 188 | andcc %g1, 0x70, %g7 | ||
| 189 | be copy_user_table_end | ||
| 190 | andcc %g1, 8, %g0 | ||
| 191 | |||
| 192 | sethi %hi(copy_user_table_end), %o5 | ||
| 193 | srl %g7, 1, %o4 | ||
| 194 | add %g7, %o4, %o4 | ||
| 195 | add %o1, %g7, %o1 | ||
| 196 | sub %o5, %o4, %o5 | ||
| 197 | jmpl %o5 + %lo(copy_user_table_end), %g0 | ||
| 198 | add %o0, %g7, %o0 | ||
| 199 | |||
| 200 | copy_user_table: | ||
| 201 | MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5) | ||
| 202 | MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5) | ||
| 203 | MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5) | ||
| 204 | MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5) | ||
| 205 | MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5) | ||
| 206 | MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5) | ||
| 207 | MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5) | ||
| 208 | copy_user_table_end: | ||
| 209 | EXT(copy_user_table, copy_user_table_end, 51f) | ||
| 210 | be copy_user_last7 | ||
| 211 | andcc %g1, 4, %g0 | ||
| 212 | |||
| 213 | EX(ldd [%o1], %g2, and %g1, 0xf) | ||
| 214 | add %o0, 8, %o0 | ||
| 215 | add %o1, 8, %o1 | ||
| 216 | EX(st %g2, [%o0 - 0x08], and %g1, 0xf) | ||
| 217 | EX2(st %g3, [%o0 - 0x04], and %g1, 0xf, %g1, sub %g1, 4) | ||
| 218 | copy_user_last7: | ||
| 219 | be 1f | ||
| 220 | andcc %g1, 2, %g0 | ||
| 221 | |||
| 222 | EX(ld [%o1], %g2, and %g1, 7) | ||
| 223 | add %o1, 4, %o1 | ||
| 224 | EX(st %g2, [%o0], and %g1, 7) | ||
| 225 | add %o0, 4, %o0 | ||
| 226 | 1: | ||
| 227 | be 1f | ||
| 228 | andcc %g1, 1, %g0 | ||
| 229 | |||
| 230 | EX(lduh [%o1], %g2, and %g1, 3) | ||
| 231 | add %o1, 2, %o1 | ||
| 232 | EX(sth %g2, [%o0], and %g1, 3) | ||
| 233 | add %o0, 2, %o0 | ||
| 234 | 1: | ||
| 235 | be 1f | ||
| 236 | nop | ||
| 237 | |||
| 238 | EX(ldub [%o1], %g2, add %g0, 1) | ||
| 239 | EX(stb %g2, [%o0], add %g0, 1) | ||
| 240 | 1: | ||
| 241 | retl | ||
| 242 | clr %o0 | ||
| 243 | |||
| 244 | ldd_std: | ||
| 245 | MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) | ||
| 246 | MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) | ||
| 247 | MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) | ||
| 248 | MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) | ||
| 249 | 81: | ||
| 250 | EXT(ldd_std, 81b, 52f) | ||
| 251 | subcc %g7, 128, %g7 | ||
| 252 | add %o1, 128, %o1 | ||
| 253 | bne ldd_std | ||
| 254 | add %o0, 128, %o0 | ||
| 255 | |||
| 256 | andcc %g1, 0x70, %g7 | ||
| 257 | be copy_user_table_end | ||
| 258 | andcc %g1, 8, %g0 | ||
| 259 | |||
| 260 | sethi %hi(copy_user_table_end), %o5 | ||
| 261 | srl %g7, 1, %o4 | ||
| 262 | add %g7, %o4, %o4 | ||
| 263 | add %o1, %g7, %o1 | ||
| 264 | sub %o5, %o4, %o5 | ||
| 265 | jmpl %o5 + %lo(copy_user_table_end), %g0 | ||
| 266 | add %o0, %g7, %o0 | ||
| 267 | |||
| 268 | cannot_optimize: | ||
| 269 | bleu short_end | ||
| 270 | cmp %o5, 2 | ||
| 271 | |||
| 272 | bne byte_chunk | ||
| 273 | and %o2, 0xfffffff0, %o3 | ||
| 274 | |||
| 275 | andcc %o1, 1, %g0 | ||
| 276 | be 10f | ||
| 277 | nop | ||
| 278 | |||
| 279 | EXO2(ldub [%o1], %g2) | ||
| 280 | add %o1, 1, %o1 | ||
| 281 | EXO2(stb %g2, [%o0]) | ||
| 282 | sub %o2, 1, %o2 | ||
| 283 | andcc %o2, 0xfffffff0, %o3 | ||
| 284 | be short_end | ||
| 285 | add %o0, 1, %o0 | ||
| 286 | 10: | ||
| 287 | MOVE_HALFCHUNK(o1, o0, 0x00, g2, g3, g4, g5) | ||
| 288 | MOVE_HALFCHUNK(o1, o0, 0x08, g2, g3, g4, g5) | ||
| 289 | 82: | ||
| 290 | EXT(10b, 82b, 53f) | ||
| 291 | subcc %o3, 0x10, %o3 | ||
| 292 | add %o1, 0x10, %o1 | ||
| 293 | bne 10b | ||
| 294 | add %o0, 0x10, %o0 | ||
| 295 | b 2f | ||
| 296 | and %o2, 0xe, %o3 | ||
| 297 | |||
| 298 | byte_chunk: | ||
| 299 | MOVE_SHORTCHUNK(o1, o0, -0x02, g2, g3) | ||
| 300 | MOVE_SHORTCHUNK(o1, o0, -0x04, g2, g3) | ||
| 301 | MOVE_SHORTCHUNK(o1, o0, -0x06, g2, g3) | ||
| 302 | MOVE_SHORTCHUNK(o1, o0, -0x08, g2, g3) | ||
| 303 | MOVE_SHORTCHUNK(o1, o0, -0x0a, g2, g3) | ||
| 304 | MOVE_SHORTCHUNK(o1, o0, -0x0c, g2, g3) | ||
| 305 | MOVE_SHORTCHUNK(o1, o0, -0x0e, g2, g3) | ||
| 306 | MOVE_SHORTCHUNK(o1, o0, -0x10, g2, g3) | ||
| 307 | 83: | ||
| 308 | EXT(byte_chunk, 83b, 54f) | ||
| 309 | subcc %o3, 0x10, %o3 | ||
| 310 | add %o1, 0x10, %o1 | ||
| 311 | bne byte_chunk | ||
| 312 | add %o0, 0x10, %o0 | ||
| 313 | |||
| 314 | short_end: | ||
| 315 | and %o2, 0xe, %o3 | ||
| 316 | 2: | ||
| 317 | sethi %hi(short_table_end), %o5 | ||
| 318 | sll %o3, 3, %o4 | ||
| 319 | add %o0, %o3, %o0 | ||
| 320 | sub %o5, %o4, %o5 | ||
| 321 | add %o1, %o3, %o1 | ||
| 322 | jmpl %o5 + %lo(short_table_end), %g0 | ||
| 323 | andcc %o2, 1, %g0 | ||
| 324 | 84: | ||
| 325 | MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3) | ||
| 326 | MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3) | ||
| 327 | MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3) | ||
| 328 | MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3) | ||
| 329 | MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3) | ||
| 330 | MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3) | ||
| 331 | MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3) | ||
| 332 | short_table_end: | ||
| 333 | EXT(84b, short_table_end, 55f) | ||
| 334 | be 1f | ||
| 335 | nop | ||
| 336 | EX(ldub [%o1], %g2, add %g0, 1) | ||
| 337 | EX(stb %g2, [%o0], add %g0, 1) | ||
| 338 | 1: | ||
| 339 | retl | ||
| 340 | clr %o0 | ||
| 341 | |||
| 342 | short_aligned_end: | ||
| 343 | bne short_end | ||
| 344 | andcc %o2, 8, %g0 | ||
| 345 | |||
| 346 | be 1f | ||
| 347 | andcc %o2, 4, %g0 | ||
| 348 | |||
| 349 | EXO2(ld [%o1 + 0x00], %g2) | ||
| 350 | EXO2(ld [%o1 + 0x04], %g3) | ||
| 351 | add %o1, 8, %o1 | ||
| 352 | EXO2(st %g2, [%o0 + 0x00]) | ||
| 353 | EX(st %g3, [%o0 + 0x04], sub %o2, 4) | ||
| 354 | add %o0, 8, %o0 | ||
| 355 | 1: | ||
| 356 | b copy_user_last7 | ||
| 357 | mov %o2, %g1 | ||
| 358 | |||
| 359 | .section .fixup,#alloc,#execinstr | ||
| 360 | .align 4 | ||
| 361 | 97: | ||
| 362 | mov %o2, %g3 | ||
| 363 | fixupretl: | ||
| 364 | sethi %hi(PAGE_OFFSET), %g1 | ||
| 365 | cmp %o0, %g1 | ||
| 366 | blu 1f | ||
| 367 | cmp %o1, %g1 | ||
| 368 | bgeu 1f | ||
| 369 | nop | ||
| 370 | save %sp, -64, %sp | ||
| 371 | mov %i0, %o0 | ||
| 372 | call __bzero | ||
| 373 | mov %g3, %o1 | ||
| 374 | restore | ||
| 375 | 1: retl | ||
| 376 | mov %g3, %o0 | ||
| 377 | |||
| 378 | /* exception routine sets %g2 to (broken_insn - first_insn)>>2 */ | ||
| 379 | 50: | ||
| 380 | /* This magic counts how many bytes are left when crash in MOVE_BIGCHUNK | ||
| 381 | * happens. This is derived from the amount ldd reads, st stores, etc. | ||
| 382 | * x = g2 % 12; | ||
| 383 | * g3 = g1 + g7 - ((g2 / 12) * 32 + (x < 4) ? 0 : (x - 4) * 4); | ||
| 384 | * o0 += (g2 / 12) * 32; | ||
| 385 | */ | ||
| 386 | cmp %g2, 12 | ||
| 387 | add %o0, %g7, %o0 | ||
| 388 | bcs 1f | ||
| 389 | cmp %g2, 24 | ||
| 390 | bcs 2f | ||
| 391 | cmp %g2, 36 | ||
| 392 | bcs 3f | ||
| 393 | nop | ||
| 394 | sub %g2, 12, %g2 | ||
| 395 | sub %g7, 32, %g7 | ||
| 396 | 3: sub %g2, 12, %g2 | ||
| 397 | sub %g7, 32, %g7 | ||
| 398 | 2: sub %g2, 12, %g2 | ||
| 399 | sub %g7, 32, %g7 | ||
| 400 | 1: cmp %g2, 4 | ||
| 401 | bcs,a 60f | ||
| 402 | clr %g2 | ||
| 403 | sub %g2, 4, %g2 | ||
| 404 | sll %g2, 2, %g2 | ||
| 405 | 60: and %g1, 0x7f, %g3 | ||
| 406 | sub %o0, %g7, %o0 | ||
| 407 | add %g3, %g7, %g3 | ||
| 408 | ba fixupretl | ||
| 409 | sub %g3, %g2, %g3 | ||
| 410 | 51: | ||
| 411 | /* i = 41 - g2; j = i % 6; | ||
| 412 | * g3 = (g1 & 15) + (i / 6) * 16 + (j < 4) ? (j + 1) * 4 : 16; | ||
| 413 | * o0 -= (i / 6) * 16 + 16; | ||
| 414 | */ | ||
| 415 | neg %g2 | ||
| 416 | and %g1, 0xf, %g1 | ||
| 417 | add %g2, 41, %g2 | ||
| 418 | add %o0, %g1, %o0 | ||
| 419 | 1: cmp %g2, 6 | ||
| 420 | bcs,a 2f | ||
| 421 | cmp %g2, 4 | ||
| 422 | add %g1, 16, %g1 | ||
| 423 | b 1b | ||
| 424 | sub %g2, 6, %g2 | ||
| 425 | 2: bcc,a 2f | ||
| 426 | mov 16, %g2 | ||
| 427 | inc %g2 | ||
| 428 | sll %g2, 2, %g2 | ||
| 429 | 2: add %g1, %g2, %g3 | ||
| 430 | ba fixupretl | ||
| 431 | sub %o0, %g3, %o0 | ||
| 432 | 52: | ||
| 433 | /* g3 = g1 + g7 - (g2 / 8) * 32 + (g2 & 4) ? (g2 & 3) * 8 : 0; | ||
| 434 | o0 += (g2 / 8) * 32 */ | ||
| 435 | andn %g2, 7, %g4 | ||
| 436 | add %o0, %g7, %o0 | ||
| 437 | andcc %g2, 4, %g0 | ||
| 438 | and %g2, 3, %g2 | ||
| 439 | sll %g4, 2, %g4 | ||
| 440 | sll %g2, 3, %g2 | ||
| 441 | bne 60b | ||
| 442 | sub %g7, %g4, %g7 | ||
| 443 | ba 60b | ||
| 444 | clr %g2 | ||
| 445 | 53: | ||
| 446 | /* g3 = o3 + (o2 & 15) - (g2 & 8) - (g2 & 4) ? (g2 & 3) * 2 : 0; | ||
| 447 | o0 += (g2 & 8) */ | ||
| 448 | and %g2, 3, %g4 | ||
| 449 | andcc %g2, 4, %g0 | ||
| 450 | and %g2, 8, %g2 | ||
| 451 | sll %g4, 1, %g4 | ||
| 452 | be 1f | ||
| 453 | add %o0, %g2, %o0 | ||
| 454 | add %g2, %g4, %g2 | ||
| 455 | 1: and %o2, 0xf, %g3 | ||
| 456 | add %g3, %o3, %g3 | ||
| 457 | ba fixupretl | ||
| 458 | sub %g3, %g2, %g3 | ||
| 459 | 54: | ||
| 460 | /* g3 = o3 + (o2 & 15) - (g2 / 4) * 2 - (g2 & 2) ? (g2 & 1) : 0; | ||
| 461 | o0 += (g2 / 4) * 2 */ | ||
| 462 | srl %g2, 2, %o4 | ||
| 463 | and %g2, 1, %o5 | ||
| 464 | srl %g2, 1, %g2 | ||
| 465 | add %o4, %o4, %o4 | ||
| 466 | and %o5, %g2, %o5 | ||
| 467 | and %o2, 0xf, %o2 | ||
| 468 | add %o0, %o4, %o0 | ||
| 469 | sub %o3, %o5, %o3 | ||
| 470 | sub %o2, %o4, %o2 | ||
| 471 | ba fixupretl | ||
| 472 | add %o2, %o3, %g3 | ||
| 473 | 55: | ||
| 474 | /* i = 27 - g2; | ||
| 475 | g3 = (o2 & 1) + i / 4 * 2 + !(i & 3); | ||
| 476 | o0 -= i / 4 * 2 + 1 */ | ||
| 477 | neg %g2 | ||
| 478 | and %o2, 1, %o2 | ||
| 479 | add %g2, 27, %g2 | ||
| 480 | srl %g2, 2, %o5 | ||
| 481 | andcc %g2, 3, %g0 | ||
| 482 | mov 1, %g2 | ||
| 483 | add %o5, %o5, %o5 | ||
| 484 | be,a 1f | ||
| 485 | clr %g2 | ||
| 486 | 1: add %g2, %o5, %g3 | ||
| 487 | sub %o0, %g3, %o0 | ||
| 488 | ba fixupretl | ||
| 489 | add %g3, %o2, %g3 | ||
| 490 | |||
| 491 | .globl __copy_user_end | ||
| 492 | __copy_user_end: | ||
diff --git a/arch/sparc/lib/debuglocks.c b/arch/sparc/lib/debuglocks.c new file mode 100644 index 000000000000..fb182352782c --- /dev/null +++ b/arch/sparc/lib/debuglocks.c | |||
| @@ -0,0 +1,202 @@ | |||
| 1 | /* $Id: debuglocks.c,v 1.11 2001/09/20 00:35:31 davem Exp $ | ||
| 2 | * debuglocks.c: Debugging versions of SMP locking primitives. | ||
| 3 | * | ||
| 4 | * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) | ||
| 5 | * Copyright (C) 1998-99 Anton Blanchard (anton@progsoc.uts.edu.au) | ||
| 6 | */ | ||
| 7 | |||
| 8 | #include <linux/kernel.h> | ||
| 9 | #include <linux/sched.h> | ||
| 10 | #include <linux/threads.h> /* For NR_CPUS */ | ||
| 11 | #include <linux/spinlock.h> | ||
| 12 | #include <asm/psr.h> | ||
| 13 | #include <asm/system.h> | ||
| 14 | |||
| 15 | #ifdef CONFIG_SMP | ||
| 16 | |||
| 17 | /* Some notes on how these debugging routines work. When a lock is acquired | ||
| 18 | * an extra debugging member lock->owner_pc is set to the caller of the lock | ||
| 19 | * acquisition routine. Right before releasing a lock, the debugging program | ||
| 20 | * counter is cleared to zero. | ||
| 21 | * | ||
| 22 | * Furthermore, since PC's are 4 byte aligned on Sparc, we stuff the CPU | ||
| 23 | * number of the owner in the lowest two bits. | ||
| 24 | */ | ||
| 25 | |||
| 26 | #define STORE_CALLER(A) __asm__ __volatile__("mov %%i7, %0" : "=r" (A)); | ||
| 27 | |||
| 28 | static inline void show(char *str, spinlock_t *lock, unsigned long caller) | ||
| 29 | { | ||
| 30 | int cpu = smp_processor_id(); | ||
| 31 | |||
| 32 | printk("%s(%p) CPU#%d stuck at %08lx, owner PC(%08lx):CPU(%lx)\n",str, | ||
| 33 | lock, cpu, caller, lock->owner_pc & ~3, lock->owner_pc & 3); | ||
| 34 | } | ||
| 35 | |||
| 36 | static inline void show_read(char *str, rwlock_t *lock, unsigned long caller) | ||
| 37 | { | ||
| 38 | int cpu = smp_processor_id(); | ||
| 39 | |||
| 40 | printk("%s(%p) CPU#%d stuck at %08lx, owner PC(%08lx):CPU(%lx)\n", str, | ||
| 41 | lock, cpu, caller, lock->owner_pc & ~3, lock->owner_pc & 3); | ||
| 42 | } | ||
| 43 | |||
| 44 | static inline void show_write(char *str, rwlock_t *lock, unsigned long caller) | ||
| 45 | { | ||
| 46 | int cpu = smp_processor_id(); | ||
| 47 | int i; | ||
| 48 | |||
| 49 | printk("%s(%p) CPU#%d stuck at %08lx, owner PC(%08lx):CPU(%lx)", str, | ||
| 50 | lock, cpu, caller, lock->owner_pc & ~3, lock->owner_pc & 3); | ||
| 51 | |||
| 52 | for(i = 0; i < NR_CPUS; i++) | ||
| 53 | printk(" reader[%d]=%08lx", i, lock->reader_pc[i]); | ||
| 54 | |||
| 55 | printk("\n"); | ||
| 56 | } | ||
| 57 | |||
| 58 | #undef INIT_STUCK | ||
| 59 | #define INIT_STUCK 100000000 | ||
| 60 | |||
| 61 | void _do_spin_lock(spinlock_t *lock, char *str) | ||
| 62 | { | ||
| 63 | unsigned long caller; | ||
| 64 | unsigned long val; | ||
| 65 | int cpu = smp_processor_id(); | ||
| 66 | int stuck = INIT_STUCK; | ||
| 67 | |||
| 68 | STORE_CALLER(caller); | ||
| 69 | |||
| 70 | again: | ||
| 71 | __asm__ __volatile__("ldstub [%1], %0" : "=r" (val) : "r" (&(lock->lock))); | ||
| 72 | if(val) { | ||
| 73 | while(lock->lock) { | ||
| 74 | if (!--stuck) { | ||
| 75 | show(str, lock, caller); | ||
| 76 | stuck = INIT_STUCK; | ||
| 77 | } | ||
| 78 | barrier(); | ||
| 79 | } | ||
| 80 | goto again; | ||
| 81 | } | ||
| 82 | lock->owner_pc = (cpu & 3) | (caller & ~3); | ||
| 83 | } | ||
| 84 | |||
| 85 | int _spin_trylock(spinlock_t *lock) | ||
| 86 | { | ||
| 87 | unsigned long val; | ||
| 88 | unsigned long caller; | ||
| 89 | int cpu = smp_processor_id(); | ||
| 90 | |||
| 91 | STORE_CALLER(caller); | ||
| 92 | |||
| 93 | __asm__ __volatile__("ldstub [%1], %0" : "=r" (val) : "r" (&(lock->lock))); | ||
| 94 | if(!val) { | ||
| 95 | /* We got it, record our identity for debugging. */ | ||
| 96 | lock->owner_pc = (cpu & 3) | (caller & ~3); | ||
| 97 | } | ||
| 98 | return val == 0; | ||
| 99 | } | ||
| 100 | |||
| 101 | void _do_spin_unlock(spinlock_t *lock) | ||
| 102 | { | ||
| 103 | lock->owner_pc = 0; | ||
| 104 | barrier(); | ||
| 105 | lock->lock = 0; | ||
| 106 | } | ||
| 107 | |||
| 108 | void _do_read_lock(rwlock_t *rw, char *str) | ||
| 109 | { | ||
| 110 | unsigned long caller; | ||
| 111 | unsigned long val; | ||
| 112 | int cpu = smp_processor_id(); | ||
| 113 | int stuck = INIT_STUCK; | ||
| 114 | |||
| 115 | STORE_CALLER(caller); | ||
| 116 | |||
| 117 | wlock_again: | ||
| 118 | __asm__ __volatile__("ldstub [%1 + 3], %0" : "=r" (val) : "r" (&(rw->lock))); | ||
| 119 | if(val) { | ||
| 120 | while(rw->lock & 0xff) { | ||
| 121 | if (!--stuck) { | ||
| 122 | show_read(str, rw, caller); | ||
| 123 | stuck = INIT_STUCK; | ||
| 124 | } | ||
| 125 | barrier(); | ||
| 126 | } | ||
| 127 | goto wlock_again; | ||
| 128 | } | ||
| 129 | |||
| 130 | rw->reader_pc[cpu] = caller; | ||
| 131 | barrier(); | ||
| 132 | rw->lock++; | ||
| 133 | } | ||
| 134 | |||
| 135 | void _do_read_unlock(rwlock_t *rw, char *str) | ||
| 136 | { | ||
| 137 | unsigned long caller; | ||
| 138 | unsigned long val; | ||
| 139 | int cpu = smp_processor_id(); | ||
| 140 | int stuck = INIT_STUCK; | ||
| 141 | |||
| 142 | STORE_CALLER(caller); | ||
| 143 | |||
| 144 | wlock_again: | ||
| 145 | __asm__ __volatile__("ldstub [%1 + 3], %0" : "=r" (val) : "r" (&(rw->lock))); | ||
| 146 | if(val) { | ||
| 147 | while(rw->lock & 0xff) { | ||
| 148 | if (!--stuck) { | ||
| 149 | show_read(str, rw, caller); | ||
| 150 | stuck = INIT_STUCK; | ||
| 151 | } | ||
| 152 | barrier(); | ||
| 153 | } | ||
| 154 | goto wlock_again; | ||
| 155 | } | ||
| 156 | |||
| 157 | rw->reader_pc[cpu] = 0; | ||
| 158 | barrier(); | ||
| 159 | rw->lock -= 0x1ff; | ||
| 160 | } | ||
| 161 | |||
| 162 | void _do_write_lock(rwlock_t *rw, char *str) | ||
| 163 | { | ||
| 164 | unsigned long caller; | ||
| 165 | unsigned long val; | ||
| 166 | int cpu = smp_processor_id(); | ||
| 167 | int stuck = INIT_STUCK; | ||
| 168 | |||
| 169 | STORE_CALLER(caller); | ||
| 170 | |||
| 171 | wlock_again: | ||
| 172 | __asm__ __volatile__("ldstub [%1 + 3], %0" : "=r" (val) : "r" (&(rw->lock))); | ||
| 173 | if(val) { | ||
| 174 | wlock_wait: | ||
| 175 | while(rw->lock) { | ||
| 176 | if (!--stuck) { | ||
| 177 | show_write(str, rw, caller); | ||
| 178 | stuck = INIT_STUCK; | ||
| 179 | } | ||
| 180 | barrier(); | ||
| 181 | } | ||
| 182 | goto wlock_again; | ||
| 183 | } | ||
| 184 | |||
| 185 | if (rw->lock & ~0xff) { | ||
| 186 | *(((unsigned char *)&rw->lock)+3) = 0; | ||
| 187 | barrier(); | ||
| 188 | goto wlock_wait; | ||
| 189 | } | ||
| 190 | |||
| 191 | barrier(); | ||
| 192 | rw->owner_pc = (cpu & 3) | (caller & ~3); | ||
| 193 | } | ||
| 194 | |||
| 195 | void _do_write_unlock(rwlock_t *rw) | ||
| 196 | { | ||
| 197 | rw->owner_pc = 0; | ||
| 198 | barrier(); | ||
| 199 | rw->lock = 0; | ||
| 200 | } | ||
| 201 | |||
| 202 | #endif /* SMP */ | ||
diff --git a/arch/sparc/lib/divdi3.S b/arch/sparc/lib/divdi3.S new file mode 100644 index 000000000000..681b3683da9e --- /dev/null +++ b/arch/sparc/lib/divdi3.S | |||
| @@ -0,0 +1,295 @@ | |||
| 1 | /* Copyright (C) 1989, 1992, 1993, 1994, 1995 Free Software Foundation, Inc. | ||
| 2 | |||
| 3 | This file is part of GNU CC. | ||
| 4 | |||
| 5 | GNU CC is free software; you can redistribute it and/or modify | ||
| 6 | it under the terms of the GNU General Public License as published by | ||
| 7 | the Free Software Foundation; either version 2, or (at your option) | ||
| 8 | any later version. | ||
| 9 | |||
| 10 | GNU CC is distributed in the hope that it will be useful, | ||
| 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 13 | GNU General Public License for more details. | ||
| 14 | |||
| 15 | You should have received a copy of the GNU General Public License | ||
| 16 | along with GNU CC; see the file COPYING. If not, write to | ||
| 17 | the Free Software Foundation, 59 Temple Place - Suite 330, | ||
| 18 | Boston, MA 02111-1307, USA. */ | ||
| 19 | |||
| 20 | .data | ||
| 21 | .align 8 | ||
| 22 | .globl __clz_tab | ||
| 23 | __clz_tab: | ||
| 24 | .byte 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5 | ||
| 25 | .byte 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6 | ||
| 26 | .byte 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 | ||
| 27 | .byte 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 | ||
| 28 | .byte 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8 | ||
| 29 | .byte 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8 | ||
| 30 | .byte 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8 | ||
| 31 | .byte 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8 | ||
| 32 | .size __clz_tab,256 | ||
| 33 | .global .udiv | ||
| 34 | |||
| 35 | .text | ||
| 36 | .align 4 | ||
| 37 | .globl __divdi3 | ||
| 38 | __divdi3: | ||
| 39 | save %sp,-104,%sp | ||
| 40 | cmp %i0,0 | ||
| 41 | bge .LL40 | ||
| 42 | mov 0,%l4 | ||
| 43 | mov -1,%l4 | ||
| 44 | sub %g0,%i1,%o0 | ||
| 45 | mov %o0,%o5 | ||
| 46 | subcc %g0,%o0,%g0 | ||
| 47 | sub %g0,%i0,%o0 | ||
| 48 | subx %o0,0,%o4 | ||
| 49 | mov %o4,%i0 | ||
| 50 | mov %o5,%i1 | ||
| 51 | .LL40: | ||
| 52 | cmp %i2,0 | ||
| 53 | bge .LL84 | ||
| 54 | mov %i3,%o4 | ||
| 55 | xnor %g0,%l4,%l4 | ||
| 56 | sub %g0,%i3,%o0 | ||
| 57 | mov %o0,%o3 | ||
| 58 | subcc %g0,%o0,%g0 | ||
| 59 | sub %g0,%i2,%o0 | ||
| 60 | subx %o0,0,%o2 | ||
| 61 | mov %o2,%i2 | ||
| 62 | mov %o3,%i3 | ||
| 63 | mov %i3,%o4 | ||
| 64 | .LL84: | ||
| 65 | cmp %i2,0 | ||
| 66 | bne .LL45 | ||
| 67 | mov %i1,%i3 | ||
| 68 | cmp %o4,%i0 | ||
| 69 | bleu .LL46 | ||
| 70 | mov %i3,%o1 | ||
| 71 | mov 32,%g1 | ||
| 72 | subcc %i0,%o4,%g0 | ||
| 73 | 1: bcs 5f | ||
| 74 | addxcc %o1,%o1,%o1 ! shift n1n0 and a q-bit in lsb | ||
| 75 | sub %i0,%o4,%i0 ! this kills msb of n | ||
| 76 | addx %i0,%i0,%i0 ! so this cannot give carry | ||
| 77 | subcc %g1,1,%g1 | ||
| 78 | 2: bne 1b | ||
| 79 | subcc %i0,%o4,%g0 | ||
| 80 | bcs 3f | ||
| 81 | addxcc %o1,%o1,%o1 ! shift n1n0 and a q-bit in lsb | ||
| 82 | b 3f | ||
| 83 | sub %i0,%o4,%i0 ! this kills msb of n | ||
| 84 | 4: sub %i0,%o4,%i0 | ||
| 85 | 5: addxcc %i0,%i0,%i0 | ||
| 86 | bcc 2b | ||
| 87 | subcc %g1,1,%g1 | ||
| 88 | ! Got carry from n. Subtract next step to cancel this carry. | ||
| 89 | bne 4b | ||
| 90 | addcc %o1,%o1,%o1 ! shift n1n0 and a 0-bit in lsb | ||
| 91 | sub %i0,%o4,%i0 | ||
| 92 | 3: xnor %o1,0,%o1 | ||
| 93 | b .LL50 | ||
| 94 | mov 0,%o2 | ||
| 95 | .LL46: | ||
| 96 | cmp %o4,0 | ||
| 97 | bne .LL85 | ||
| 98 | mov %i0,%o2 | ||
| 99 | mov 1,%o0 | ||
| 100 | call .udiv,0 | ||
| 101 | mov 0,%o1 | ||
| 102 | mov %o0,%o4 | ||
| 103 | mov %i0,%o2 | ||
| 104 | .LL85: | ||
| 105 | mov 0,%g3 | ||
| 106 | mov 32,%g1 | ||
| 107 | subcc %g3,%o4,%g0 | ||
| 108 | 1: bcs 5f | ||
| 109 | addxcc %o2,%o2,%o2 ! shift n1n0 and a q-bit in lsb | ||
| 110 | sub %g3,%o4,%g3 ! this kills msb of n | ||
| 111 | addx %g3,%g3,%g3 ! so this cannot give carry | ||
| 112 | subcc %g1,1,%g1 | ||
| 113 | 2: bne 1b | ||
| 114 | subcc %g3,%o4,%g0 | ||
| 115 | bcs 3f | ||
| 116 | addxcc %o2,%o2,%o2 ! shift n1n0 and a q-bit in lsb | ||
| 117 | b 3f | ||
| 118 | sub %g3,%o4,%g3 ! this kills msb of n | ||
| 119 | 4: sub %g3,%o4,%g3 | ||
| 120 | 5: addxcc %g3,%g3,%g3 | ||
| 121 | bcc 2b | ||
| 122 | subcc %g1,1,%g1 | ||
| 123 | ! Got carry from n. Subtract next step to cancel this carry. | ||
| 124 | bne 4b | ||
| 125 | addcc %o2,%o2,%o2 ! shift n1n0 and a 0-bit in lsb | ||
| 126 | sub %g3,%o4,%g3 | ||
| 127 | 3: xnor %o2,0,%o2 | ||
| 128 | mov %g3,%i0 | ||
| 129 | mov %i3,%o1 | ||
| 130 | mov 32,%g1 | ||
| 131 | subcc %i0,%o4,%g0 | ||
| 132 | 1: bcs 5f | ||
| 133 | addxcc %o1,%o1,%o1 ! shift n1n0 and a q-bit in lsb | ||
| 134 | sub %i0,%o4,%i0 ! this kills msb of n | ||
| 135 | addx %i0,%i0,%i0 ! so this cannot give carry | ||
| 136 | subcc %g1,1,%g1 | ||
| 137 | 2: bne 1b | ||
| 138 | subcc %i0,%o4,%g0 | ||
| 139 | bcs 3f | ||
| 140 | addxcc %o1,%o1,%o1 ! shift n1n0 and a q-bit in lsb | ||
| 141 | b 3f | ||
| 142 | sub %i0,%o4,%i0 ! this kills msb of n | ||
| 143 | 4: sub %i0,%o4,%i0 | ||
| 144 | 5: addxcc %i0,%i0,%i0 | ||
| 145 | bcc 2b | ||
| 146 | subcc %g1,1,%g1 | ||
| 147 | ! Got carry from n. Subtract next step to cancel this carry. | ||
| 148 | bne 4b | ||
| 149 | addcc %o1,%o1,%o1 ! shift n1n0 and a 0-bit in lsb | ||
| 150 | sub %i0,%o4,%i0 | ||
| 151 | 3: xnor %o1,0,%o1 | ||
| 152 | b .LL86 | ||
| 153 | mov %o1,%l1 | ||
| 154 | .LL45: | ||
| 155 | cmp %i2,%i0 | ||
| 156 | bleu .LL51 | ||
| 157 | sethi %hi(65535),%o0 | ||
| 158 | b .LL78 | ||
| 159 | mov 0,%o1 | ||
| 160 | .LL51: | ||
| 161 | or %o0,%lo(65535),%o0 | ||
| 162 | cmp %i2,%o0 | ||
| 163 | bgu .LL58 | ||
| 164 | mov %i2,%o1 | ||
| 165 | cmp %i2,256 | ||
| 166 | addx %g0,-1,%o0 | ||
| 167 | b .LL64 | ||
| 168 | and %o0,8,%o2 | ||
| 169 | .LL58: | ||
| 170 | sethi %hi(16777215),%o0 | ||
| 171 | or %o0,%lo(16777215),%o0 | ||
| 172 | cmp %i2,%o0 | ||
| 173 | bgu .LL64 | ||
| 174 | mov 24,%o2 | ||
| 175 | mov 16,%o2 | ||
| 176 | .LL64: | ||
| 177 | srl %o1,%o2,%o0 | ||
| 178 | sethi %hi(__clz_tab),%o1 | ||
| 179 | or %o1,%lo(__clz_tab),%o1 | ||
| 180 | ldub [%o0+%o1],%o0 | ||
| 181 | add %o0,%o2,%o0 | ||
| 182 | mov 32,%o1 | ||
| 183 | subcc %o1,%o0,%o3 | ||
| 184 | bne,a .LL72 | ||
| 185 | sub %o1,%o3,%o1 | ||
| 186 | cmp %i0,%i2 | ||
| 187 | bgu .LL74 | ||
| 188 | cmp %i3,%o4 | ||
| 189 | blu .LL78 | ||
| 190 | mov 0,%o1 | ||
| 191 | .LL74: | ||
| 192 | b .LL78 | ||
| 193 | mov 1,%o1 | ||
| 194 | .LL72: | ||
| 195 | sll %i2,%o3,%o2 | ||
| 196 | srl %o4,%o1,%o0 | ||
| 197 | or %o2,%o0,%i2 | ||
| 198 | sll %o4,%o3,%o4 | ||
| 199 | srl %i0,%o1,%o2 | ||
| 200 | sll %i0,%o3,%o0 | ||
| 201 | srl %i3,%o1,%o1 | ||
| 202 | or %o0,%o1,%i0 | ||
| 203 | sll %i3,%o3,%i3 | ||
| 204 | mov %i0,%o1 | ||
| 205 | mov 32,%g1 | ||
| 206 | subcc %o2,%i2,%g0 | ||
| 207 | 1: bcs 5f | ||
| 208 | addxcc %o1,%o1,%o1 ! shift n1n0 and a q-bit in lsb | ||
| 209 | sub %o2,%i2,%o2 ! this kills msb of n | ||
| 210 | addx %o2,%o2,%o2 ! so this cannot give carry | ||
| 211 | subcc %g1,1,%g1 | ||
| 212 | 2: bne 1b | ||
| 213 | subcc %o2,%i2,%g0 | ||
| 214 | bcs 3f | ||
| 215 | addxcc %o1,%o1,%o1 ! shift n1n0 and a q-bit in lsb | ||
| 216 | b 3f | ||
| 217 | sub %o2,%i2,%o2 ! this kills msb of n | ||
| 218 | 4: sub %o2,%i2,%o2 | ||
| 219 | 5: addxcc %o2,%o2,%o2 | ||
| 220 | bcc 2b | ||
| 221 | subcc %g1,1,%g1 | ||
| 222 | ! Got carry from n. Subtract next step to cancel this carry. | ||
| 223 | bne 4b | ||
| 224 | addcc %o1,%o1,%o1 ! shift n1n0 and a 0-bit in lsb | ||
| 225 | sub %o2,%i2,%o2 | ||
| 226 | 3: xnor %o1,0,%o1 | ||
| 227 | mov %o2,%i0 | ||
| 228 | wr %g0,%o1,%y ! SPARC has 0-3 delay insn after a wr | ||
| 229 | sra %o4,31,%g2 ! Do not move this insn | ||
| 230 | and %o1,%g2,%g2 ! Do not move this insn | ||
| 231 | andcc %g0,0,%g1 ! Do not move this insn | ||
| 232 | mulscc %g1,%o4,%g1 | ||
| 233 | mulscc %g1,%o4,%g1 | ||
| 234 | mulscc %g1,%o4,%g1 | ||
| 235 | mulscc %g1,%o4,%g1 | ||
| 236 | mulscc %g1,%o4,%g1 | ||
| 237 | mulscc %g1,%o4,%g1 | ||
| 238 | mulscc %g1,%o4,%g1 | ||
| 239 | mulscc %g1,%o4,%g1 | ||
| 240 | mulscc %g1,%o4,%g1 | ||
| 241 | mulscc %g1,%o4,%g1 | ||
| 242 | mulscc %g1,%o4,%g1 | ||
| 243 | mulscc %g1,%o4,%g1 | ||
| 244 | mulscc %g1,%o4,%g1 | ||
| 245 | mulscc %g1,%o4,%g1 | ||
| 246 | mulscc %g1,%o4,%g1 | ||
| 247 | mulscc %g1,%o4,%g1 | ||
| 248 | mulscc %g1,%o4,%g1 | ||
| 249 | mulscc %g1,%o4,%g1 | ||
| 250 | mulscc %g1,%o4,%g1 | ||
| 251 | mulscc %g1,%o4,%g1 | ||
| 252 | mulscc %g1,%o4,%g1 | ||
| 253 | mulscc %g1,%o4,%g1 | ||
| 254 | mulscc %g1,%o4,%g1 | ||
| 255 | mulscc %g1,%o4,%g1 | ||
| 256 | mulscc %g1,%o4,%g1 | ||
| 257 | mulscc %g1,%o4,%g1 | ||
| 258 | mulscc %g1,%o4,%g1 | ||
| 259 | mulscc %g1,%o4,%g1 | ||
| 260 | mulscc %g1,%o4,%g1 | ||
| 261 | mulscc %g1,%o4,%g1 | ||
| 262 | mulscc %g1,%o4,%g1 | ||
| 263 | mulscc %g1,%o4,%g1 | ||
| 264 | mulscc %g1,0,%g1 | ||
| 265 | add %g1,%g2,%o0 | ||
| 266 | rd %y,%o2 | ||
| 267 | cmp %o0,%i0 | ||
| 268 | bgu,a .LL78 | ||
| 269 | add %o1,-1,%o1 | ||
| 270 | bne,a .LL50 | ||
| 271 | mov 0,%o2 | ||
| 272 | cmp %o2,%i3 | ||
| 273 | bleu .LL50 | ||
| 274 | mov 0,%o2 | ||
| 275 | add %o1,-1,%o1 | ||
| 276 | .LL78: | ||
| 277 | mov 0,%o2 | ||
| 278 | .LL50: | ||
| 279 | mov %o1,%l1 | ||
| 280 | .LL86: | ||
| 281 | mov %o2,%l0 | ||
| 282 | mov %l0,%i0 | ||
| 283 | mov %l1,%i1 | ||
| 284 | cmp %l4,0 | ||
| 285 | be .LL81 | ||
| 286 | sub %g0,%i1,%o0 | ||
| 287 | mov %o0,%l3 | ||
| 288 | subcc %g0,%o0,%g0 | ||
| 289 | sub %g0,%i0,%o0 | ||
| 290 | subx %o0,0,%l2 | ||
| 291 | mov %l2,%i0 | ||
| 292 | mov %l3,%i1 | ||
| 293 | .LL81: | ||
| 294 | ret | ||
| 295 | restore | ||
diff --git a/arch/sparc/lib/locks.S b/arch/sparc/lib/locks.S new file mode 100644 index 000000000000..95fa48424967 --- /dev/null +++ b/arch/sparc/lib/locks.S | |||
| @@ -0,0 +1,72 @@ | |||
| 1 | /* $Id: locks.S,v 1.16 2000/02/26 11:02:47 anton Exp $ | ||
| 2 | * locks.S: SMP low-level lock primitives on Sparc. | ||
| 3 | * | ||
| 4 | * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) | ||
| 5 | * Copyright (C) 1998 Anton Blanchard (anton@progsoc.uts.edu.au) | ||
| 6 | * Copyright (C) 1998 Jakub Jelinek (jj@ultra.linux.cz) | ||
| 7 | */ | ||
| 8 | |||
| 9 | #include <asm/ptrace.h> | ||
| 10 | #include <asm/psr.h> | ||
| 11 | #include <asm/smp.h> | ||
| 12 | #include <asm/spinlock.h> | ||
| 13 | |||
| 14 | .text | ||
| 15 | .align 4 | ||
| 16 | |||
| 17 | /* Read/writer locks, as usual this is overly clever to make it | ||
| 18 | * as fast as possible. | ||
| 19 | */ | ||
| 20 | |||
| 21 | /* caches... */ | ||
| 22 | ___rw_read_enter_spin_on_wlock: | ||
| 23 | orcc %g2, 0x0, %g0 | ||
| 24 | be,a ___rw_read_enter | ||
| 25 | ldstub [%g1 + 3], %g2 | ||
| 26 | b ___rw_read_enter_spin_on_wlock | ||
| 27 | ldub [%g1 + 3], %g2 | ||
| 28 | ___rw_read_exit_spin_on_wlock: | ||
| 29 | orcc %g2, 0x0, %g0 | ||
| 30 | be,a ___rw_read_exit | ||
| 31 | ldstub [%g1 + 3], %g2 | ||
| 32 | b ___rw_read_exit_spin_on_wlock | ||
| 33 | ldub [%g1 + 3], %g2 | ||
| 34 | ___rw_write_enter_spin_on_wlock: | ||
| 35 | orcc %g2, 0x0, %g0 | ||
| 36 | be,a ___rw_write_enter | ||
| 37 | ldstub [%g1 + 3], %g2 | ||
| 38 | b ___rw_write_enter_spin_on_wlock | ||
| 39 | ld [%g1], %g2 | ||
| 40 | |||
| 41 | .globl ___rw_read_enter | ||
| 42 | ___rw_read_enter: | ||
| 43 | orcc %g2, 0x0, %g0 | ||
| 44 | bne,a ___rw_read_enter_spin_on_wlock | ||
| 45 | ldub [%g1 + 3], %g2 | ||
| 46 | ld [%g1], %g2 | ||
| 47 | add %g2, 1, %g2 | ||
| 48 | st %g2, [%g1] | ||
| 49 | retl | ||
| 50 | mov %g4, %o7 | ||
| 51 | |||
| 52 | .globl ___rw_read_exit | ||
| 53 | ___rw_read_exit: | ||
| 54 | orcc %g2, 0x0, %g0 | ||
| 55 | bne,a ___rw_read_exit_spin_on_wlock | ||
| 56 | ldub [%g1 + 3], %g2 | ||
| 57 | ld [%g1], %g2 | ||
| 58 | sub %g2, 0x1ff, %g2 | ||
| 59 | st %g2, [%g1] | ||
| 60 | retl | ||
| 61 | mov %g4, %o7 | ||
| 62 | |||
| 63 | .globl ___rw_write_enter | ||
| 64 | ___rw_write_enter: | ||
| 65 | orcc %g2, 0x0, %g0 | ||
| 66 | bne ___rw_write_enter_spin_on_wlock | ||
| 67 | ld [%g1], %g2 | ||
| 68 | andncc %g2, 0xff, %g0 | ||
| 69 | bne,a ___rw_write_enter_spin_on_wlock | ||
| 70 | stb %g0, [%g1 + 3] | ||
| 71 | retl | ||
| 72 | mov %g4, %o7 | ||
diff --git a/arch/sparc/lib/lshrdi3.S b/arch/sparc/lib/lshrdi3.S new file mode 100644 index 000000000000..35abf5b2bd15 --- /dev/null +++ b/arch/sparc/lib/lshrdi3.S | |||
| @@ -0,0 +1,27 @@ | |||
| 1 | /* $Id: lshrdi3.S,v 1.1 1999/03/21 06:37:45 davem Exp $ */ | ||
| 2 | |||
| 3 | .globl __lshrdi3 | ||
| 4 | __lshrdi3: | ||
| 5 | cmp %o2, 0 | ||
| 6 | be 3f | ||
| 7 | mov 0x20, %g2 | ||
| 8 | |||
| 9 | sub %g2, %o2, %g2 | ||
| 10 | cmp %g2, 0 | ||
| 11 | bg 1f | ||
| 12 | srl %o0, %o2, %o4 | ||
| 13 | |||
| 14 | clr %o4 | ||
| 15 | neg %g2 | ||
| 16 | b 2f | ||
| 17 | srl %o0, %g2, %o5 | ||
| 18 | 1: | ||
| 19 | sll %o0, %g2, %g3 | ||
| 20 | srl %o1, %o2, %g2 | ||
| 21 | or %g2, %g3, %o5 | ||
| 22 | 2: | ||
| 23 | mov %o4, %o0 | ||
| 24 | mov %o5, %o1 | ||
| 25 | 3: | ||
| 26 | retl | ||
| 27 | nop | ||
diff --git a/arch/sparc/lib/memcmp.S b/arch/sparc/lib/memcmp.S new file mode 100644 index 000000000000..cb4bdb0cc2af --- /dev/null +++ b/arch/sparc/lib/memcmp.S | |||
| @@ -0,0 +1,312 @@ | |||
| 1 | .text | ||
| 2 | .align 4 | ||
| 3 | .global __memcmp, memcmp | ||
| 4 | __memcmp: | ||
| 5 | memcmp: | ||
| 6 | #if 1 | ||
| 7 | cmp %o2, 0 | ||
| 8 | ble L3 | ||
| 9 | mov 0, %g3 | ||
| 10 | L5: | ||
| 11 | ldub [%o0], %g2 | ||
| 12 | ldub [%o1], %g3 | ||
| 13 | sub %g2, %g3, %g2 | ||
| 14 | mov %g2, %g3 | ||
| 15 | sll %g2, 24, %g2 | ||
| 16 | |||
| 17 | cmp %g2, 0 | ||
| 18 | bne L3 | ||
| 19 | add %o0, 1, %o0 | ||
| 20 | |||
| 21 | add %o2, -1, %o2 | ||
| 22 | |||
| 23 | cmp %o2, 0 | ||
| 24 | bg L5 | ||
| 25 | add %o1, 1, %o1 | ||
| 26 | L3: | ||
| 27 | sll %g3, 24, %o0 | ||
| 28 | sra %o0, 24, %o0 | ||
| 29 | |||
| 30 | retl | ||
| 31 | nop | ||
| 32 | #else | ||
| 33 | save %sp, -104, %sp | ||
| 34 | mov %i2, %o4 | ||
| 35 | mov %i0, %o0 | ||
| 36 | |||
| 37 | cmp %o4, 15 | ||
| 38 | ble L72 | ||
| 39 | mov %i1, %i2 | ||
| 40 | |||
| 41 | andcc %i2, 3, %g0 | ||
| 42 | be L161 | ||
| 43 | andcc %o0, 3, %g2 | ||
| 44 | L75: | ||
| 45 | ldub [%o0], %g3 | ||
| 46 | ldub [%i2], %g2 | ||
| 47 | add %o0,1, %o0 | ||
| 48 | |||
| 49 | subcc %g3, %g2, %i0 | ||
| 50 | bne L156 | ||
| 51 | add %i2, 1, %i2 | ||
| 52 | |||
| 53 | andcc %i2, 3, %g0 | ||
| 54 | bne L75 | ||
| 55 | add %o4, -1, %o4 | ||
| 56 | |||
| 57 | andcc %o0, 3, %g2 | ||
| 58 | L161: | ||
| 59 | bne,a L78 | ||
| 60 | mov %i2, %i1 | ||
| 61 | |||
| 62 | mov %o0, %i5 | ||
| 63 | mov %i2, %i3 | ||
| 64 | srl %o4, 2, %i4 | ||
| 65 | |||
| 66 | cmp %i4, 0 | ||
| 67 | bge L93 | ||
| 68 | mov %i4, %g2 | ||
| 69 | |||
| 70 | add %i4, 3, %g2 | ||
| 71 | L93: | ||
| 72 | sra %g2, 2, %g2 | ||
| 73 | sll %g2, 2, %g2 | ||
| 74 | sub %i4, %g2, %g2 | ||
| 75 | |||
| 76 | cmp %g2, 1 | ||
| 77 | be,a L88 | ||
| 78 | add %o0, 4, %i5 | ||
| 79 | |||
| 80 | bg L94 | ||
| 81 | cmp %g2, 2 | ||
| 82 | |||
| 83 | cmp %g2, 0 | ||
| 84 | be,a L86 | ||
| 85 | ld [%o0], %g3 | ||
| 86 | |||
| 87 | b L162 | ||
| 88 | ld [%i5], %g3 | ||
| 89 | L94: | ||
| 90 | be L81 | ||
| 91 | cmp %g2, 3 | ||
| 92 | |||
| 93 | be,a L83 | ||
| 94 | add %o0, -4, %i5 | ||
| 95 | |||
| 96 | b L162 | ||
| 97 | ld [%i5], %g3 | ||
| 98 | L81: | ||
| 99 | add %o0, -8, %i5 | ||
| 100 | ld [%o0], %g3 | ||
| 101 | add %i2, -8, %i3 | ||
| 102 | ld [%i2], %g2 | ||
| 103 | |||
| 104 | b L82 | ||
| 105 | add %i4, 2, %i4 | ||
| 106 | L83: | ||
| 107 | ld [%o0], %g4 | ||
| 108 | add %i2, -4, %i3 | ||
| 109 | ld [%i2], %g1 | ||
| 110 | |||
| 111 | b L84 | ||
| 112 | add %i4, 1, %i4 | ||
| 113 | L86: | ||
| 114 | b L87 | ||
| 115 | ld [%i2], %g2 | ||
| 116 | L88: | ||
| 117 | add %i2, 4, %i3 | ||
| 118 | ld [%o0], %g4 | ||
| 119 | add %i4, -1, %i4 | ||
| 120 | ld [%i2], %g1 | ||
| 121 | L95: | ||
| 122 | ld [%i5], %g3 | ||
| 123 | L162: | ||
| 124 | cmp %g4, %g1 | ||
| 125 | be L87 | ||
| 126 | ld [%i3], %g2 | ||
| 127 | |||
| 128 | cmp %g4, %g1 | ||
| 129 | L163: | ||
| 130 | bleu L114 | ||
| 131 | mov -1, %i0 | ||
| 132 | |||
| 133 | b L114 | ||
| 134 | mov 1, %i0 | ||
| 135 | L87: | ||
| 136 | ld [%i5 + 4], %g4 | ||
| 137 | cmp %g3, %g2 | ||
| 138 | bne L163 | ||
| 139 | ld [%i3 + 4], %g1 | ||
| 140 | L84: | ||
| 141 | ld [%i5 + 8], %g3 | ||
| 142 | |||
| 143 | cmp %g4, %g1 | ||
| 144 | bne L163 | ||
| 145 | ld [%i3 + 8], %g2 | ||
| 146 | L82: | ||
| 147 | ld [%i5 + 12], %g4 | ||
| 148 | cmp %g3, %g2 | ||
| 149 | bne L163 | ||
| 150 | ld [%i3 + 12], %g1 | ||
| 151 | |||
| 152 | add %i5, 16, %i5 | ||
| 153 | |||
| 154 | addcc %i4, -4, %i4 | ||
| 155 | bne L95 | ||
| 156 | add %i3, 16, %i3 | ||
| 157 | |||
| 158 | cmp %g4, %g1 | ||
| 159 | bne L163 | ||
| 160 | nop | ||
| 161 | |||
| 162 | b L114 | ||
| 163 | mov 0, %i0 | ||
| 164 | L78: | ||
| 165 | srl %o4, 2, %i0 | ||
| 166 | and %o0, -4, %i3 | ||
| 167 | orcc %i0, %g0, %g3 | ||
| 168 | sll %g2, 3, %o7 | ||
| 169 | mov 32, %g2 | ||
| 170 | |||
| 171 | bge L129 | ||
| 172 | sub %g2, %o7, %o1 | ||
| 173 | |||
| 174 | add %i0, 3, %g3 | ||
| 175 | L129: | ||
| 176 | sra %g3, 2, %g2 | ||
| 177 | sll %g2, 2, %g2 | ||
| 178 | sub %i0, %g2, %g2 | ||
| 179 | |||
| 180 | cmp %g2, 1 | ||
| 181 | be,a L124 | ||
| 182 | ld [%i3], %o3 | ||
| 183 | |||
| 184 | bg L130 | ||
| 185 | cmp %g2, 2 | ||
| 186 | |||
| 187 | cmp %g2, 0 | ||
| 188 | be,a L122 | ||
| 189 | ld [%i3], %o2 | ||
| 190 | |||
| 191 | b L164 | ||
| 192 | sll %o3, %o7, %g3 | ||
| 193 | L130: | ||
| 194 | be L117 | ||
| 195 | cmp %g2, 3 | ||
| 196 | |||
| 197 | be,a L119 | ||
| 198 | ld [%i3], %g1 | ||
| 199 | |||
| 200 | b L164 | ||
| 201 | sll %o3, %o7, %g3 | ||
| 202 | L117: | ||
| 203 | ld [%i3], %g4 | ||
| 204 | add %i2, -8, %i1 | ||
| 205 | ld [%i3 + 4], %o3 | ||
| 206 | add %i0, 2, %i0 | ||
| 207 | ld [%i2], %i4 | ||
| 208 | |||
| 209 | b L118 | ||
| 210 | add %i3, -4, %i3 | ||
| 211 | L119: | ||
| 212 | ld [%i3 + 4], %g4 | ||
| 213 | add %i2, -4, %i1 | ||
| 214 | ld [%i2], %i5 | ||
| 215 | |||
| 216 | b L120 | ||
| 217 | add %i0, 1, %i0 | ||
| 218 | L122: | ||
| 219 | ld [%i3 + 4], %g1 | ||
| 220 | ld [%i2], %i4 | ||
| 221 | |||
| 222 | b L123 | ||
| 223 | add %i3, 4, %i3 | ||
| 224 | L124: | ||
| 225 | add %i2, 4, %i1 | ||
| 226 | ld [%i3 + 4], %o2 | ||
| 227 | add %i0, -1, %i0 | ||
| 228 | ld [%i2], %i5 | ||
| 229 | add %i3, 8, %i3 | ||
| 230 | L131: | ||
| 231 | sll %o3, %o7, %g3 | ||
| 232 | L164: | ||
| 233 | srl %o2, %o1, %g2 | ||
| 234 | ld [%i3], %g1 | ||
| 235 | or %g3, %g2, %g3 | ||
| 236 | |||
| 237 | cmp %g3, %i5 | ||
| 238 | bne L163 | ||
| 239 | ld [%i1], %i4 | ||
| 240 | L123: | ||
| 241 | sll %o2, %o7, %g3 | ||
| 242 | srl %g1, %o1, %g2 | ||
| 243 | ld [%i3 + 4], %g4 | ||
| 244 | or %g3, %g2, %g3 | ||
| 245 | |||
| 246 | cmp %g3, %i4 | ||
| 247 | bne L163 | ||
| 248 | ld [%i1 + 4], %i5 | ||
| 249 | L120: | ||
| 250 | sll %g1, %o7, %g3 | ||
| 251 | srl %g4, %o1, %g2 | ||
| 252 | ld [%i3 + 8], %o3 | ||
| 253 | or %g3, %g2, %g3 | ||
| 254 | |||
| 255 | cmp %g3, %i5 | ||
| 256 | bne L163 | ||
| 257 | ld [%i1 + 8], %i4 | ||
| 258 | L118: | ||
| 259 | sll %g4, %o7, %g3 | ||
| 260 | srl %o3, %o1, %g2 | ||
| 261 | ld [%i3 + 12], %o2 | ||
| 262 | or %g3, %g2, %g3 | ||
| 263 | |||
| 264 | cmp %g3, %i4 | ||
| 265 | bne L163 | ||
| 266 | ld [%i1 + 12], %i5 | ||
| 267 | |||
| 268 | add %i3, 16, %i3 | ||
| 269 | addcc %i0, -4, %i0 | ||
| 270 | bne L131 | ||
| 271 | add %i1, 16, %i1 | ||
| 272 | |||
| 273 | sll %o3, %o7, %g3 | ||
| 274 | srl %o2, %o1, %g2 | ||
| 275 | or %g3, %g2, %g3 | ||
| 276 | |||
| 277 | cmp %g3, %i5 | ||
| 278 | be,a L114 | ||
| 279 | mov 0, %i0 | ||
| 280 | |||
| 281 | b,a L163 | ||
| 282 | L114: | ||
| 283 | cmp %i0, 0 | ||
| 284 | bne L156 | ||
| 285 | and %o4, -4, %g2 | ||
| 286 | |||
| 287 | add %o0, %g2, %o0 | ||
| 288 | add %i2, %g2, %i2 | ||
| 289 | and %o4, 3, %o4 | ||
| 290 | L72: | ||
| 291 | cmp %o4, 0 | ||
| 292 | be L156 | ||
| 293 | mov 0, %i0 | ||
| 294 | |||
| 295 | ldub [%o0], %g3 | ||
| 296 | L165: | ||
| 297 | ldub [%i2], %g2 | ||
| 298 | add %o0, 1, %o0 | ||
| 299 | |||
| 300 | subcc %g3, %g2, %i0 | ||
| 301 | bne L156 | ||
| 302 | add %i2, 1, %i2 | ||
| 303 | |||
| 304 | addcc %o4, -1, %o4 | ||
| 305 | bne,a L165 | ||
| 306 | ldub [%o0], %g3 | ||
| 307 | |||
| 308 | mov 0, %i0 | ||
| 309 | L156: | ||
| 310 | ret | ||
| 311 | restore | ||
| 312 | #endif | ||
diff --git a/arch/sparc/lib/memcpy.S b/arch/sparc/lib/memcpy.S new file mode 100644 index 000000000000..ce10bc869af9 --- /dev/null +++ b/arch/sparc/lib/memcpy.S | |||
| @@ -0,0 +1,1150 @@ | |||
| 1 | /* memcpy.S: Sparc optimized memcpy and memmove code | ||
| 2 | * Hand optimized from GNU libc's memcpy and memmove | ||
| 3 | * Copyright (C) 1991,1996 Free Software Foundation | ||
| 4 | * Copyright (C) 1995 Linus Torvalds (Linus.Torvalds@helsinki.fi) | ||
| 5 | * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) | ||
| 6 | * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be) | ||
| 7 | * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz) | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifdef __KERNEL__ | ||
| 11 | |||
| 12 | #define FUNC(x) \ | ||
| 13 | .globl x; \ | ||
| 14 | .type x,@function; \ | ||
| 15 | .align 4; \ | ||
| 16 | x: | ||
| 17 | |||
| 18 | #undef FASTER_REVERSE | ||
| 19 | #undef FASTER_NONALIGNED | ||
| 20 | #define FASTER_ALIGNED | ||
| 21 | |||
| 22 | /* In kernel these functions don't return a value. | ||
| 23 | * One should use macros in asm/string.h for that purpose. | ||
| 24 | * We return 0, so that bugs are more apparent. | ||
| 25 | */ | ||
| 26 | #define SETUP_RETL | ||
| 27 | #define RETL_INSN clr %o0 | ||
| 28 | |||
| 29 | #else | ||
| 30 | |||
| 31 | /* libc */ | ||
| 32 | |||
| 33 | #include "DEFS.h" | ||
| 34 | |||
| 35 | #define FASTER_REVERSE | ||
| 36 | #define FASTER_NONALIGNED | ||
| 37 | #define FASTER_ALIGNED | ||
| 38 | |||
| 39 | #define SETUP_RETL mov %o0, %g6 | ||
| 40 | #define RETL_INSN mov %g6, %o0 | ||
| 41 | |||
| 42 | #endif | ||
| 43 | |||
| 44 | /* Both these macros have to start with exactly the same insn */ | ||
| 45 | #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ | ||
| 46 | ldd [%src + (offset) + 0x00], %t0; \ | ||
| 47 | ldd [%src + (offset) + 0x08], %t2; \ | ||
| 48 | ldd [%src + (offset) + 0x10], %t4; \ | ||
| 49 | ldd [%src + (offset) + 0x18], %t6; \ | ||
| 50 | st %t0, [%dst + (offset) + 0x00]; \ | ||
| 51 | st %t1, [%dst + (offset) + 0x04]; \ | ||
| 52 | st %t2, [%dst + (offset) + 0x08]; \ | ||
| 53 | st %t3, [%dst + (offset) + 0x0c]; \ | ||
| 54 | st %t4, [%dst + (offset) + 0x10]; \ | ||
| 55 | st %t5, [%dst + (offset) + 0x14]; \ | ||
| 56 | st %t6, [%dst + (offset) + 0x18]; \ | ||
| 57 | st %t7, [%dst + (offset) + 0x1c]; | ||
| 58 | |||
| 59 | #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ | ||
| 60 | ldd [%src + (offset) + 0x00], %t0; \ | ||
| 61 | ldd [%src + (offset) + 0x08], %t2; \ | ||
| 62 | ldd [%src + (offset) + 0x10], %t4; \ | ||
| 63 | ldd [%src + (offset) + 0x18], %t6; \ | ||
| 64 | std %t0, [%dst + (offset) + 0x00]; \ | ||
| 65 | std %t2, [%dst + (offset) + 0x08]; \ | ||
| 66 | std %t4, [%dst + (offset) + 0x10]; \ | ||
| 67 | std %t6, [%dst + (offset) + 0x18]; | ||
| 68 | |||
| 69 | #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ | ||
| 70 | ldd [%src - (offset) - 0x10], %t0; \ | ||
| 71 | ldd [%src - (offset) - 0x08], %t2; \ | ||
| 72 | st %t0, [%dst - (offset) - 0x10]; \ | ||
| 73 | st %t1, [%dst - (offset) - 0x0c]; \ | ||
| 74 | st %t2, [%dst - (offset) - 0x08]; \ | ||
| 75 | st %t3, [%dst - (offset) - 0x04]; | ||
| 76 | |||
| 77 | #define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \ | ||
| 78 | ldd [%src - (offset) - 0x10], %t0; \ | ||
| 79 | ldd [%src - (offset) - 0x08], %t2; \ | ||
| 80 | std %t0, [%dst - (offset) - 0x10]; \ | ||
| 81 | std %t2, [%dst - (offset) - 0x08]; | ||
| 82 | |||
| 83 | #define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ | ||
| 84 | ldub [%src - (offset) - 0x02], %t0; \ | ||
| 85 | ldub [%src - (offset) - 0x01], %t1; \ | ||
| 86 | stb %t0, [%dst - (offset) - 0x02]; \ | ||
| 87 | stb %t1, [%dst - (offset) - 0x01]; | ||
| 88 | |||
| 89 | /* Both these macros have to start with exactly the same insn */ | ||
| 90 | #define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ | ||
| 91 | ldd [%src - (offset) - 0x20], %t0; \ | ||
| 92 | ldd [%src - (offset) - 0x18], %t2; \ | ||
| 93 | ldd [%src - (offset) - 0x10], %t4; \ | ||
| 94 | ldd [%src - (offset) - 0x08], %t6; \ | ||
| 95 | st %t0, [%dst - (offset) - 0x20]; \ | ||
| 96 | st %t1, [%dst - (offset) - 0x1c]; \ | ||
| 97 | st %t2, [%dst - (offset) - 0x18]; \ | ||
| 98 | st %t3, [%dst - (offset) - 0x14]; \ | ||
| 99 | st %t4, [%dst - (offset) - 0x10]; \ | ||
| 100 | st %t5, [%dst - (offset) - 0x0c]; \ | ||
| 101 | st %t6, [%dst - (offset) - 0x08]; \ | ||
| 102 | st %t7, [%dst - (offset) - 0x04]; | ||
| 103 | |||
| 104 | #define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ | ||
| 105 | ldd [%src - (offset) - 0x20], %t0; \ | ||
| 106 | ldd [%src - (offset) - 0x18], %t2; \ | ||
| 107 | ldd [%src - (offset) - 0x10], %t4; \ | ||
| 108 | ldd [%src - (offset) - 0x08], %t6; \ | ||
| 109 | std %t0, [%dst - (offset) - 0x20]; \ | ||
| 110 | std %t2, [%dst - (offset) - 0x18]; \ | ||
| 111 | std %t4, [%dst - (offset) - 0x10]; \ | ||
| 112 | std %t6, [%dst - (offset) - 0x08]; | ||
| 113 | |||
| 114 | #define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ | ||
| 115 | ldd [%src + (offset) + 0x00], %t0; \ | ||
| 116 | ldd [%src + (offset) + 0x08], %t2; \ | ||
| 117 | st %t0, [%dst + (offset) + 0x00]; \ | ||
| 118 | st %t1, [%dst + (offset) + 0x04]; \ | ||
| 119 | st %t2, [%dst + (offset) + 0x08]; \ | ||
| 120 | st %t3, [%dst + (offset) + 0x0c]; | ||
| 121 | |||
| 122 | #define RMOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ | ||
| 123 | ldub [%src + (offset) + 0x00], %t0; \ | ||
| 124 | ldub [%src + (offset) + 0x01], %t1; \ | ||
| 125 | stb %t0, [%dst + (offset) + 0x00]; \ | ||
| 126 | stb %t1, [%dst + (offset) + 0x01]; | ||
| 127 | |||
| 128 | #define SMOVE_CHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, prev, shil, shir, offset2) \ | ||
| 129 | ldd [%src + (offset) + 0x00], %t0; \ | ||
| 130 | ldd [%src + (offset) + 0x08], %t2; \ | ||
| 131 | srl %t0, shir, %t5; \ | ||
| 132 | srl %t1, shir, %t6; \ | ||
| 133 | sll %t0, shil, %t0; \ | ||
| 134 | or %t5, %prev, %t5; \ | ||
| 135 | sll %t1, shil, %prev; \ | ||
| 136 | or %t6, %t0, %t0; \ | ||
| 137 | srl %t2, shir, %t1; \ | ||
| 138 | srl %t3, shir, %t6; \ | ||
| 139 | sll %t2, shil, %t2; \ | ||
| 140 | or %t1, %prev, %t1; \ | ||
| 141 | std %t4, [%dst + (offset) + (offset2) - 0x04]; \ | ||
| 142 | std %t0, [%dst + (offset) + (offset2) + 0x04]; \ | ||
| 143 | sll %t3, shil, %prev; \ | ||
| 144 | or %t6, %t2, %t4; | ||
| 145 | |||
| 146 | #define SMOVE_ALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, prev, shil, shir, offset2) \ | ||
| 147 | ldd [%src + (offset) + 0x00], %t0; \ | ||
| 148 | ldd [%src + (offset) + 0x08], %t2; \ | ||
| 149 | srl %t0, shir, %t4; \ | ||
| 150 | srl %t1, shir, %t5; \ | ||
| 151 | sll %t0, shil, %t6; \ | ||
| 152 | or %t4, %prev, %t0; \ | ||
| 153 | sll %t1, shil, %prev; \ | ||
| 154 | or %t5, %t6, %t1; \ | ||
| 155 | srl %t2, shir, %t4; \ | ||
| 156 | srl %t3, shir, %t5; \ | ||
| 157 | sll %t2, shil, %t6; \ | ||
| 158 | or %t4, %prev, %t2; \ | ||
| 159 | sll %t3, shil, %prev; \ | ||
| 160 | or %t5, %t6, %t3; \ | ||
| 161 | std %t0, [%dst + (offset) + (offset2) + 0x00]; \ | ||
| 162 | std %t2, [%dst + (offset) + (offset2) + 0x08]; | ||
| 163 | |||
| 164 | .text | ||
| 165 | .align 4 | ||
| 166 | |||
| 167 | #ifdef FASTER_REVERSE | ||
| 168 | |||
| 169 | 70: /* rdword_align */ | ||
| 170 | |||
| 171 | andcc %o1, 1, %g0 | ||
| 172 | be 4f | ||
| 173 | andcc %o1, 2, %g0 | ||
| 174 | |||
| 175 | ldub [%o1 - 1], %g2 | ||
| 176 | sub %o1, 1, %o1 | ||
| 177 | stb %g2, [%o0 - 1] | ||
| 178 | sub %o2, 1, %o2 | ||
| 179 | be 3f | ||
| 180 | sub %o0, 1, %o0 | ||
| 181 | 4: | ||
| 182 | lduh [%o1 - 2], %g2 | ||
| 183 | sub %o1, 2, %o1 | ||
| 184 | sth %g2, [%o0 - 2] | ||
| 185 | sub %o2, 2, %o2 | ||
| 186 | b 3f | ||
| 187 | sub %o0, 2, %o0 | ||
| 188 | |||
| 189 | #endif /* FASTER_REVERSE */ | ||
| 190 | |||
| 191 | 0: | ||
| 192 | retl | ||
| 193 | nop ! Only bcopy returns here and it retuns void... | ||
| 194 | |||
| 195 | #ifdef __KERNEL__ | ||
| 196 | FUNC(amemmove) | ||
| 197 | FUNC(__memmove) | ||
| 198 | #endif | ||
| 199 | FUNC(memmove) | ||
| 200 | cmp %o0, %o1 | ||
| 201 | SETUP_RETL | ||
| 202 | bleu 9f | ||
| 203 | sub %o0, %o1, %o4 | ||
| 204 | |||
| 205 | add %o1, %o2, %o3 | ||
| 206 | cmp %o3, %o0 | ||
| 207 | bleu 0f | ||
| 208 | andcc %o4, 3, %o5 | ||
| 209 | |||
| 210 | #ifndef FASTER_REVERSE | ||
| 211 | |||
| 212 | add %o1, %o2, %o1 | ||
| 213 | add %o0, %o2, %o0 | ||
| 214 | sub %o1, 1, %o1 | ||
| 215 | sub %o0, 1, %o0 | ||
| 216 | |||
| 217 | 1: /* reverse_bytes */ | ||
| 218 | |||
| 219 | ldub [%o1], %o4 | ||
| 220 | subcc %o2, 1, %o2 | ||
| 221 | stb %o4, [%o0] | ||
| 222 | sub %o1, 1, %o1 | ||
| 223 | bne 1b | ||
| 224 | sub %o0, 1, %o0 | ||
| 225 | |||
| 226 | retl | ||
| 227 | RETL_INSN | ||
| 228 | |||
| 229 | #else /* FASTER_REVERSE */ | ||
| 230 | |||
| 231 | add %o1, %o2, %o1 | ||
| 232 | add %o0, %o2, %o0 | ||
| 233 | bne 77f | ||
| 234 | cmp %o2, 15 | ||
| 235 | bleu 91f | ||
| 236 | andcc %o1, 3, %g0 | ||
| 237 | bne 70b | ||
| 238 | 3: | ||
| 239 | andcc %o1, 4, %g0 | ||
| 240 | |||
| 241 | be 2f | ||
| 242 | mov %o2, %g1 | ||
| 243 | |||
| 244 | ld [%o1 - 4], %o4 | ||
| 245 | sub %g1, 4, %g1 | ||
| 246 | st %o4, [%o0 - 4] | ||
| 247 | sub %o1, 4, %o1 | ||
| 248 | sub %o0, 4, %o0 | ||
| 249 | 2: | ||
| 250 | andcc %g1, 0xffffff80, %g7 | ||
| 251 | be 3f | ||
| 252 | andcc %o0, 4, %g0 | ||
| 253 | |||
| 254 | be 74f + 4 | ||
| 255 | 5: | ||
| 256 | RMOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) | ||
| 257 | RMOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) | ||
| 258 | RMOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) | ||
| 259 | RMOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) | ||
| 260 | subcc %g7, 128, %g7 | ||
| 261 | sub %o1, 128, %o1 | ||
| 262 | bne 5b | ||
| 263 | sub %o0, 128, %o0 | ||
| 264 | 3: | ||
| 265 | andcc %g1, 0x70, %g7 | ||
| 266 | be 72f | ||
| 267 | andcc %g1, 8, %g0 | ||
| 268 | |||
| 269 | sethi %hi(72f), %o5 | ||
| 270 | srl %g7, 1, %o4 | ||
| 271 | add %g7, %o4, %o4 | ||
| 272 | sub %o1, %g7, %o1 | ||
| 273 | sub %o5, %o4, %o5 | ||
| 274 | jmpl %o5 + %lo(72f), %g0 | ||
| 275 | sub %o0, %g7, %o0 | ||
| 276 | |||
| 277 | 71: /* rmemcpy_table */ | ||
| 278 | RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5) | ||
| 279 | RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5) | ||
| 280 | RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5) | ||
| 281 | RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5) | ||
| 282 | RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5) | ||
| 283 | RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5) | ||
| 284 | RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5) | ||
| 285 | |||
| 286 | 72: /* rmemcpy_table_end */ | ||
| 287 | |||
| 288 | be 73f | ||
| 289 | andcc %g1, 4, %g0 | ||
| 290 | |||
| 291 | ldd [%o1 - 0x08], %g2 | ||
| 292 | sub %o0, 8, %o0 | ||
| 293 | sub %o1, 8, %o1 | ||
| 294 | st %g2, [%o0] | ||
| 295 | st %g3, [%o0 + 0x04] | ||
| 296 | |||
| 297 | 73: /* rmemcpy_last7 */ | ||
| 298 | |||
| 299 | be 1f | ||
| 300 | andcc %g1, 2, %g0 | ||
| 301 | |||
| 302 | ld [%o1 - 4], %g2 | ||
| 303 | sub %o1, 4, %o1 | ||
| 304 | st %g2, [%o0 - 4] | ||
| 305 | sub %o0, 4, %o0 | ||
| 306 | 1: | ||
| 307 | be 1f | ||
| 308 | andcc %g1, 1, %g0 | ||
| 309 | |||
| 310 | lduh [%o1 - 2], %g2 | ||
| 311 | sub %o1, 2, %o1 | ||
| 312 | sth %g2, [%o0 - 2] | ||
| 313 | sub %o0, 2, %o0 | ||
| 314 | 1: | ||
| 315 | be 1f | ||
| 316 | nop | ||
| 317 | |||
| 318 | ldub [%o1 - 1], %g2 | ||
| 319 | stb %g2, [%o0 - 1] | ||
| 320 | 1: | ||
| 321 | retl | ||
| 322 | RETL_INSN | ||
| 323 | |||
| 324 | 74: /* rldd_std */ | ||
| 325 | RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) | ||
| 326 | RMOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) | ||
| 327 | RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) | ||
| 328 | RMOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) | ||
| 329 | subcc %g7, 128, %g7 | ||
| 330 | sub %o1, 128, %o1 | ||
| 331 | bne 74b | ||
| 332 | sub %o0, 128, %o0 | ||
| 333 | |||
| 334 | andcc %g1, 0x70, %g7 | ||
| 335 | be 72b | ||
| 336 | andcc %g1, 8, %g0 | ||
| 337 | |||
| 338 | sethi %hi(72b), %o5 | ||
| 339 | srl %g7, 1, %o4 | ||
| 340 | add %g7, %o4, %o4 | ||
| 341 | sub %o1, %g7, %o1 | ||
| 342 | sub %o5, %o4, %o5 | ||
| 343 | jmpl %o5 + %lo(72b), %g0 | ||
| 344 | sub %o0, %g7, %o0 | ||
| 345 | |||
| 346 | 75: /* rshort_end */ | ||
| 347 | |||
| 348 | and %o2, 0xe, %o3 | ||
| 349 | 2: | ||
| 350 | sethi %hi(76f), %o5 | ||
| 351 | sll %o3, 3, %o4 | ||
| 352 | sub %o0, %o3, %o0 | ||
| 353 | sub %o5, %o4, %o5 | ||
| 354 | sub %o1, %o3, %o1 | ||
| 355 | jmpl %o5 + %lo(76f), %g0 | ||
| 356 | andcc %o2, 1, %g0 | ||
| 357 | |||
| 358 | RMOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3) | ||
| 359 | RMOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3) | ||
| 360 | RMOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3) | ||
| 361 | RMOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3) | ||
| 362 | RMOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3) | ||
| 363 | RMOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3) | ||
| 364 | RMOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3) | ||
| 365 | |||
| 366 | 76: /* rshort_table_end */ | ||
| 367 | |||
| 368 | be 1f | ||
| 369 | nop | ||
| 370 | ldub [%o1 - 1], %g2 | ||
| 371 | stb %g2, [%o0 - 1] | ||
| 372 | 1: | ||
| 373 | retl | ||
| 374 | RETL_INSN | ||
| 375 | |||
| 376 | 91: /* rshort_aligned_end */ | ||
| 377 | |||
| 378 | bne 75b | ||
| 379 | andcc %o2, 8, %g0 | ||
| 380 | |||
| 381 | be 1f | ||
| 382 | andcc %o2, 4, %g0 | ||
| 383 | |||
| 384 | ld [%o1 - 0x08], %g2 | ||
| 385 | ld [%o1 - 0x04], %g3 | ||
| 386 | sub %o1, 8, %o1 | ||
| 387 | st %g2, [%o0 - 0x08] | ||
| 388 | st %g3, [%o0 - 0x04] | ||
| 389 | sub %o0, 8, %o0 | ||
| 390 | 1: | ||
| 391 | b 73b | ||
| 392 | mov %o2, %g1 | ||
| 393 | |||
| 394 | 77: /* rnon_aligned */ | ||
| 395 | cmp %o2, 15 | ||
| 396 | bleu 75b | ||
| 397 | andcc %o0, 3, %g0 | ||
| 398 | be 64f | ||
| 399 | andcc %o0, 1, %g0 | ||
| 400 | be 63f | ||
| 401 | andcc %o0, 2, %g0 | ||
| 402 | ldub [%o1 - 1], %g5 | ||
| 403 | sub %o1, 1, %o1 | ||
| 404 | stb %g5, [%o0 - 1] | ||
| 405 | sub %o0, 1, %o0 | ||
| 406 | be 64f | ||
| 407 | sub %o2, 1, %o2 | ||
| 408 | 63: | ||
| 409 | ldub [%o1 - 1], %g5 | ||
| 410 | sub %o1, 2, %o1 | ||
| 411 | stb %g5, [%o0 - 1] | ||
| 412 | sub %o0, 2, %o0 | ||
| 413 | ldub [%o1], %g5 | ||
| 414 | sub %o2, 2, %o2 | ||
| 415 | stb %g5, [%o0] | ||
| 416 | 64: | ||
| 417 | and %o1, 3, %g2 | ||
| 418 | and %o1, -4, %o1 | ||
| 419 | and %o2, 0xc, %g3 | ||
| 420 | add %o1, 4, %o1 | ||
| 421 | cmp %g3, 4 | ||
| 422 | sll %g2, 3, %g4 | ||
| 423 | mov 32, %g2 | ||
| 424 | be 4f | ||
| 425 | sub %g2, %g4, %g7 | ||
| 426 | |||
| 427 | blu 3f | ||
| 428 | cmp %g3, 8 | ||
| 429 | |||
| 430 | be 2f | ||
| 431 | srl %o2, 2, %g3 | ||
| 432 | |||
| 433 | ld [%o1 - 4], %o3 | ||
| 434 | add %o0, -8, %o0 | ||
| 435 | ld [%o1 - 8], %o4 | ||
| 436 | add %o1, -16, %o1 | ||
| 437 | b 7f | ||
| 438 | add %g3, 1, %g3 | ||
| 439 | 2: | ||
| 440 | ld [%o1 - 4], %o4 | ||
| 441 | add %o0, -4, %o0 | ||
| 442 | ld [%o1 - 8], %g1 | ||
| 443 | add %o1, -12, %o1 | ||
| 444 | b 8f | ||
| 445 | add %g3, 2, %g3 | ||
| 446 | 3: | ||
| 447 | ld [%o1 - 4], %o5 | ||
| 448 | add %o0, -12, %o0 | ||
| 449 | ld [%o1 - 8], %o3 | ||
| 450 | add %o1, -20, %o1 | ||
| 451 | b 6f | ||
| 452 | srl %o2, 2, %g3 | ||
| 453 | 4: | ||
| 454 | ld [%o1 - 4], %g1 | ||
| 455 | srl %o2, 2, %g3 | ||
| 456 | ld [%o1 - 8], %o5 | ||
| 457 | add %o1, -24, %o1 | ||
| 458 | add %o0, -16, %o0 | ||
| 459 | add %g3, -1, %g3 | ||
| 460 | |||
| 461 | ld [%o1 + 12], %o3 | ||
| 462 | 5: | ||
| 463 | sll %o5, %g4, %g2 | ||
| 464 | srl %g1, %g7, %g5 | ||
| 465 | or %g2, %g5, %g2 | ||
| 466 | st %g2, [%o0 + 12] | ||
| 467 | 6: | ||
| 468 | ld [%o1 + 8], %o4 | ||
| 469 | sll %o3, %g4, %g2 | ||
| 470 | srl %o5, %g7, %g5 | ||
| 471 | or %g2, %g5, %g2 | ||
| 472 | st %g2, [%o0 + 8] | ||
| 473 | 7: | ||
| 474 | ld [%o1 + 4], %g1 | ||
| 475 | sll %o4, %g4, %g2 | ||
| 476 | srl %o3, %g7, %g5 | ||
| 477 | or %g2, %g5, %g2 | ||
| 478 | st %g2, [%o0 + 4] | ||
| 479 | 8: | ||
| 480 | ld [%o1], %o5 | ||
| 481 | sll %g1, %g4, %g2 | ||
| 482 | srl %o4, %g7, %g5 | ||
| 483 | addcc %g3, -4, %g3 | ||
| 484 | or %g2, %g5, %g2 | ||
| 485 | add %o1, -16, %o1 | ||
| 486 | st %g2, [%o0] | ||
| 487 | add %o0, -16, %o0 | ||
| 488 | bne,a 5b | ||
| 489 | ld [%o1 + 12], %o3 | ||
| 490 | sll %o5, %g4, %g2 | ||
| 491 | srl %g1, %g7, %g5 | ||
| 492 | srl %g4, 3, %g3 | ||
| 493 | or %g2, %g5, %g2 | ||
| 494 | add %o1, %g3, %o1 | ||
| 495 | andcc %o2, 2, %g0 | ||
| 496 | st %g2, [%o0 + 12] | ||
| 497 | be 1f | ||
| 498 | andcc %o2, 1, %g0 | ||
| 499 | |||
| 500 | ldub [%o1 + 15], %g5 | ||
| 501 | add %o1, -2, %o1 | ||
| 502 | stb %g5, [%o0 + 11] | ||
| 503 | add %o0, -2, %o0 | ||
| 504 | ldub [%o1 + 16], %g5 | ||
| 505 | stb %g5, [%o0 + 12] | ||
| 506 | 1: | ||
| 507 | be 1f | ||
| 508 | nop | ||
| 509 | ldub [%o1 + 15], %g5 | ||
| 510 | stb %g5, [%o0 + 11] | ||
| 511 | 1: | ||
| 512 | retl | ||
| 513 | RETL_INSN | ||
| 514 | |||
| 515 | #endif /* FASTER_REVERSE */ | ||
| 516 | |||
| 517 | /* NOTE: This code is executed just for the cases, | ||
| 518 | where %src (=%o1) & 3 is != 0. | ||
| 519 | We need to align it to 4. So, for (%src & 3) | ||
| 520 | 1 we need to do ldub,lduh | ||
| 521 | 2 lduh | ||
| 522 | 3 just ldub | ||
| 523 | so even if it looks weird, the branches | ||
| 524 | are correct here. -jj | ||
| 525 | */ | ||
| 526 | 78: /* dword_align */ | ||
| 527 | |||
| 528 | andcc %o1, 1, %g0 | ||
| 529 | be 4f | ||
| 530 | andcc %o1, 2, %g0 | ||
| 531 | |||
| 532 | ldub [%o1], %g2 | ||
| 533 | add %o1, 1, %o1 | ||
| 534 | stb %g2, [%o0] | ||
| 535 | sub %o2, 1, %o2 | ||
| 536 | bne 3f | ||
| 537 | add %o0, 1, %o0 | ||
| 538 | 4: | ||
| 539 | lduh [%o1], %g2 | ||
| 540 | add %o1, 2, %o1 | ||
| 541 | sth %g2, [%o0] | ||
| 542 | sub %o2, 2, %o2 | ||
| 543 | b 3f | ||
| 544 | add %o0, 2, %o0 | ||
| 545 | |||
| 546 | #ifdef __KERNEL__ | ||
| 547 | FUNC(__memcpy) | ||
| 548 | #endif | ||
| 549 | FUNC(memcpy) /* %o0=dst %o1=src %o2=len */ | ||
| 550 | |||
| 551 | sub %o0, %o1, %o4 | ||
| 552 | SETUP_RETL | ||
| 553 | 9: | ||
| 554 | andcc %o4, 3, %o5 | ||
| 555 | 0: | ||
| 556 | bne 86f | ||
| 557 | cmp %o2, 15 | ||
| 558 | |||
| 559 | bleu 90f | ||
| 560 | andcc %o1, 3, %g0 | ||
| 561 | |||
| 562 | bne 78b | ||
| 563 | 3: | ||
| 564 | andcc %o1, 4, %g0 | ||
| 565 | |||
| 566 | be 2f | ||
| 567 | mov %o2, %g1 | ||
| 568 | |||
| 569 | ld [%o1], %o4 | ||
| 570 | sub %g1, 4, %g1 | ||
| 571 | st %o4, [%o0] | ||
| 572 | add %o1, 4, %o1 | ||
| 573 | add %o0, 4, %o0 | ||
| 574 | 2: | ||
| 575 | andcc %g1, 0xffffff80, %g7 | ||
| 576 | be 3f | ||
| 577 | andcc %o0, 4, %g0 | ||
| 578 | |||
| 579 | be 82f + 4 | ||
| 580 | 5: | ||
| 581 | MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) | ||
| 582 | MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) | ||
| 583 | MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) | ||
| 584 | MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) | ||
| 585 | subcc %g7, 128, %g7 | ||
| 586 | add %o1, 128, %o1 | ||
| 587 | bne 5b | ||
| 588 | add %o0, 128, %o0 | ||
| 589 | 3: | ||
| 590 | andcc %g1, 0x70, %g7 | ||
| 591 | be 80f | ||
| 592 | andcc %g1, 8, %g0 | ||
| 593 | |||
| 594 | sethi %hi(80f), %o5 | ||
| 595 | srl %g7, 1, %o4 | ||
| 596 | add %g7, %o4, %o4 | ||
| 597 | add %o1, %g7, %o1 | ||
| 598 | sub %o5, %o4, %o5 | ||
| 599 | jmpl %o5 + %lo(80f), %g0 | ||
| 600 | add %o0, %g7, %o0 | ||
| 601 | |||
| 602 | 79: /* memcpy_table */ | ||
| 603 | |||
| 604 | MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5) | ||
| 605 | MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5) | ||
| 606 | MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5) | ||
| 607 | MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5) | ||
| 608 | MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5) | ||
| 609 | MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5) | ||
| 610 | MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5) | ||
| 611 | |||
| 612 | 80: /* memcpy_table_end */ | ||
| 613 | be 81f | ||
| 614 | andcc %g1, 4, %g0 | ||
| 615 | |||
| 616 | ldd [%o1], %g2 | ||
| 617 | add %o0, 8, %o0 | ||
| 618 | st %g2, [%o0 - 0x08] | ||
| 619 | add %o1, 8, %o1 | ||
| 620 | st %g3, [%o0 - 0x04] | ||
| 621 | |||
| 622 | 81: /* memcpy_last7 */ | ||
| 623 | |||
| 624 | be 1f | ||
| 625 | andcc %g1, 2, %g0 | ||
| 626 | |||
| 627 | ld [%o1], %g2 | ||
| 628 | add %o1, 4, %o1 | ||
| 629 | st %g2, [%o0] | ||
| 630 | add %o0, 4, %o0 | ||
| 631 | 1: | ||
| 632 | be 1f | ||
| 633 | andcc %g1, 1, %g0 | ||
| 634 | |||
| 635 | lduh [%o1], %g2 | ||
| 636 | add %o1, 2, %o1 | ||
| 637 | sth %g2, [%o0] | ||
| 638 | add %o0, 2, %o0 | ||
| 639 | 1: | ||
| 640 | be 1f | ||
| 641 | nop | ||
| 642 | |||
| 643 | ldub [%o1], %g2 | ||
| 644 | stb %g2, [%o0] | ||
| 645 | 1: | ||
| 646 | retl | ||
| 647 | RETL_INSN | ||
| 648 | |||
| 649 | 82: /* ldd_std */ | ||
| 650 | MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) | ||
| 651 | MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) | ||
| 652 | MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) | ||
| 653 | MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) | ||
| 654 | subcc %g7, 128, %g7 | ||
| 655 | add %o1, 128, %o1 | ||
| 656 | bne 82b | ||
| 657 | add %o0, 128, %o0 | ||
| 658 | |||
| 659 | #ifndef FASTER_ALIGNED | ||
| 660 | |||
| 661 | andcc %g1, 0x70, %g7 | ||
| 662 | be 80b | ||
| 663 | andcc %g1, 8, %g0 | ||
| 664 | |||
| 665 | sethi %hi(80b), %o5 | ||
| 666 | srl %g7, 1, %o4 | ||
| 667 | add %g7, %o4, %o4 | ||
| 668 | add %o1, %g7, %o1 | ||
| 669 | sub %o5, %o4, %o5 | ||
| 670 | jmpl %o5 + %lo(80b), %g0 | ||
| 671 | add %o0, %g7, %o0 | ||
| 672 | |||
| 673 | #else /* FASTER_ALIGNED */ | ||
| 674 | |||
| 675 | andcc %g1, 0x70, %g7 | ||
| 676 | be 84f | ||
| 677 | andcc %g1, 8, %g0 | ||
| 678 | |||
| 679 | sethi %hi(84f), %o5 | ||
| 680 | add %o1, %g7, %o1 | ||
| 681 | sub %o5, %g7, %o5 | ||
| 682 | jmpl %o5 + %lo(84f), %g0 | ||
| 683 | add %o0, %g7, %o0 | ||
| 684 | |||
| 685 | 83: /* amemcpy_table */ | ||
| 686 | |||
| 687 | MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3, g4, g5) | ||
| 688 | MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3, g4, g5) | ||
| 689 | MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3, g4, g5) | ||
| 690 | MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5) | ||
| 691 | MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5) | ||
| 692 | MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5) | ||
| 693 | MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5) | ||
| 694 | |||
| 695 | 84: /* amemcpy_table_end */ | ||
| 696 | be 85f | ||
| 697 | andcc %g1, 4, %g0 | ||
| 698 | |||
| 699 | ldd [%o1], %g2 | ||
| 700 | add %o0, 8, %o0 | ||
| 701 | std %g2, [%o0 - 0x08] | ||
| 702 | add %o1, 8, %o1 | ||
| 703 | 85: /* amemcpy_last7 */ | ||
| 704 | be 1f | ||
| 705 | andcc %g1, 2, %g0 | ||
| 706 | |||
| 707 | ld [%o1], %g2 | ||
| 708 | add %o1, 4, %o1 | ||
| 709 | st %g2, [%o0] | ||
| 710 | add %o0, 4, %o0 | ||
| 711 | 1: | ||
| 712 | be 1f | ||
| 713 | andcc %g1, 1, %g0 | ||
| 714 | |||
| 715 | lduh [%o1], %g2 | ||
| 716 | add %o1, 2, %o1 | ||
| 717 | sth %g2, [%o0] | ||
| 718 | add %o0, 2, %o0 | ||
| 719 | 1: | ||
| 720 | be 1f | ||
| 721 | nop | ||
| 722 | |||
| 723 | ldub [%o1], %g2 | ||
| 724 | stb %g2, [%o0] | ||
| 725 | 1: | ||
| 726 | retl | ||
| 727 | RETL_INSN | ||
| 728 | |||
| 729 | #endif /* FASTER_ALIGNED */ | ||
| 730 | |||
| 731 | 86: /* non_aligned */ | ||
| 732 | cmp %o2, 6 | ||
| 733 | bleu 88f | ||
| 734 | |||
| 735 | #ifdef FASTER_NONALIGNED | ||
| 736 | |||
| 737 | cmp %o2, 256 | ||
| 738 | bcc 87f | ||
| 739 | |||
| 740 | #endif /* FASTER_NONALIGNED */ | ||
| 741 | |||
| 742 | andcc %o0, 3, %g0 | ||
| 743 | be 61f | ||
| 744 | andcc %o0, 1, %g0 | ||
| 745 | be 60f | ||
| 746 | andcc %o0, 2, %g0 | ||
| 747 | |||
| 748 | ldub [%o1], %g5 | ||
| 749 | add %o1, 1, %o1 | ||
| 750 | stb %g5, [%o0] | ||
| 751 | sub %o2, 1, %o2 | ||
| 752 | bne 61f | ||
| 753 | add %o0, 1, %o0 | ||
| 754 | 60: | ||
| 755 | ldub [%o1], %g3 | ||
| 756 | add %o1, 2, %o1 | ||
| 757 | stb %g3, [%o0] | ||
| 758 | sub %o2, 2, %o2 | ||
| 759 | ldub [%o1 - 1], %g3 | ||
| 760 | add %o0, 2, %o0 | ||
| 761 | stb %g3, [%o0 - 1] | ||
| 762 | 61: | ||
| 763 | and %o1, 3, %g2 | ||
| 764 | and %o2, 0xc, %g3 | ||
| 765 | and %o1, -4, %o1 | ||
| 766 | cmp %g3, 4 | ||
| 767 | sll %g2, 3, %g4 | ||
| 768 | mov 32, %g2 | ||
| 769 | be 4f | ||
| 770 | sub %g2, %g4, %g7 | ||
| 771 | |||
| 772 | blu 3f | ||
| 773 | cmp %g3, 0x8 | ||
| 774 | |||
| 775 | be 2f | ||
| 776 | srl %o2, 2, %g3 | ||
| 777 | |||
| 778 | ld [%o1], %o3 | ||
| 779 | add %o0, -8, %o0 | ||
| 780 | ld [%o1 + 4], %o4 | ||
| 781 | b 8f | ||
| 782 | add %g3, 1, %g3 | ||
| 783 | 2: | ||
| 784 | ld [%o1], %o4 | ||
| 785 | add %o0, -12, %o0 | ||
| 786 | ld [%o1 + 4], %o5 | ||
| 787 | add %g3, 2, %g3 | ||
| 788 | b 9f | ||
| 789 | add %o1, -4, %o1 | ||
| 790 | 3: | ||
| 791 | ld [%o1], %g1 | ||
| 792 | add %o0, -4, %o0 | ||
| 793 | ld [%o1 + 4], %o3 | ||
| 794 | srl %o2, 2, %g3 | ||
| 795 | b 7f | ||
| 796 | add %o1, 4, %o1 | ||
| 797 | 4: | ||
| 798 | ld [%o1], %o5 | ||
| 799 | cmp %o2, 7 | ||
| 800 | ld [%o1 + 4], %g1 | ||
| 801 | srl %o2, 2, %g3 | ||
| 802 | bleu 10f | ||
| 803 | add %o1, 8, %o1 | ||
| 804 | |||
| 805 | ld [%o1], %o3 | ||
| 806 | add %g3, -1, %g3 | ||
| 807 | 5: | ||
| 808 | sll %o5, %g4, %g2 | ||
| 809 | srl %g1, %g7, %g5 | ||
| 810 | or %g2, %g5, %g2 | ||
| 811 | st %g2, [%o0] | ||
| 812 | 7: | ||
| 813 | ld [%o1 + 4], %o4 | ||
| 814 | sll %g1, %g4, %g2 | ||
| 815 | srl %o3, %g7, %g5 | ||
| 816 | or %g2, %g5, %g2 | ||
| 817 | st %g2, [%o0 + 4] | ||
| 818 | 8: | ||
| 819 | ld [%o1 + 8], %o5 | ||
| 820 | sll %o3, %g4, %g2 | ||
| 821 | srl %o4, %g7, %g5 | ||
| 822 | or %g2, %g5, %g2 | ||
| 823 | st %g2, [%o0 + 8] | ||
| 824 | 9: | ||
| 825 | ld [%o1 + 12], %g1 | ||
| 826 | sll %o4, %g4, %g2 | ||
| 827 | srl %o5, %g7, %g5 | ||
| 828 | addcc %g3, -4, %g3 | ||
| 829 | or %g2, %g5, %g2 | ||
| 830 | add %o1, 16, %o1 | ||
| 831 | st %g2, [%o0 + 12] | ||
| 832 | add %o0, 16, %o0 | ||
| 833 | bne,a 5b | ||
| 834 | ld [%o1], %o3 | ||
| 835 | 10: | ||
| 836 | sll %o5, %g4, %g2 | ||
| 837 | srl %g1, %g7, %g5 | ||
| 838 | srl %g7, 3, %g3 | ||
| 839 | or %g2, %g5, %g2 | ||
| 840 | sub %o1, %g3, %o1 | ||
| 841 | andcc %o2, 2, %g0 | ||
| 842 | st %g2, [%o0] | ||
| 843 | be 1f | ||
| 844 | andcc %o2, 1, %g0 | ||
| 845 | |||
| 846 | ldub [%o1], %g2 | ||
| 847 | add %o1, 2, %o1 | ||
| 848 | stb %g2, [%o0 + 4] | ||
| 849 | add %o0, 2, %o0 | ||
| 850 | ldub [%o1 - 1], %g2 | ||
| 851 | stb %g2, [%o0 + 3] | ||
| 852 | 1: | ||
| 853 | be 1f | ||
| 854 | nop | ||
| 855 | ldub [%o1], %g2 | ||
| 856 | stb %g2, [%o0 + 4] | ||
| 857 | 1: | ||
| 858 | retl | ||
| 859 | RETL_INSN | ||
| 860 | |||
| 861 | #ifdef FASTER_NONALIGNED | ||
| 862 | |||
| 863 | 87: /* faster_nonaligned */ | ||
| 864 | |||
| 865 | andcc %o1, 3, %g0 | ||
| 866 | be 3f | ||
| 867 | andcc %o1, 1, %g0 | ||
| 868 | |||
| 869 | be 4f | ||
| 870 | andcc %o1, 2, %g0 | ||
| 871 | |||
| 872 | ldub [%o1], %g2 | ||
| 873 | add %o1, 1, %o1 | ||
| 874 | stb %g2, [%o0] | ||
| 875 | sub %o2, 1, %o2 | ||
| 876 | bne 3f | ||
| 877 | add %o0, 1, %o0 | ||
| 878 | 4: | ||
| 879 | lduh [%o1], %g2 | ||
| 880 | add %o1, 2, %o1 | ||
| 881 | srl %g2, 8, %g3 | ||
| 882 | sub %o2, 2, %o2 | ||
| 883 | stb %g3, [%o0] | ||
| 884 | add %o0, 2, %o0 | ||
| 885 | stb %g2, [%o0 - 1] | ||
| 886 | 3: | ||
| 887 | andcc %o1, 4, %g0 | ||
| 888 | |||
| 889 | bne 2f | ||
| 890 | cmp %o5, 1 | ||
| 891 | |||
| 892 | ld [%o1], %o4 | ||
| 893 | srl %o4, 24, %g2 | ||
| 894 | stb %g2, [%o0] | ||
| 895 | srl %o4, 16, %g3 | ||
| 896 | stb %g3, [%o0 + 1] | ||
| 897 | srl %o4, 8, %g2 | ||
| 898 | stb %g2, [%o0 + 2] | ||
| 899 | sub %o2, 4, %o2 | ||
| 900 | stb %o4, [%o0 + 3] | ||
| 901 | add %o1, 4, %o1 | ||
| 902 | add %o0, 4, %o0 | ||
| 903 | 2: | ||
| 904 | be 33f | ||
| 905 | cmp %o5, 2 | ||
| 906 | be 32f | ||
| 907 | sub %o2, 4, %o2 | ||
| 908 | 31: | ||
| 909 | ld [%o1], %g2 | ||
| 910 | add %o1, 4, %o1 | ||
| 911 | srl %g2, 24, %g3 | ||
| 912 | and %o0, 7, %g5 | ||
| 913 | stb %g3, [%o0] | ||
| 914 | cmp %g5, 7 | ||
| 915 | sll %g2, 8, %g1 | ||
| 916 | add %o0, 4, %o0 | ||
| 917 | be 41f | ||
| 918 | and %o2, 0xffffffc0, %o3 | ||
| 919 | ld [%o0 - 7], %o4 | ||
| 920 | 4: | ||
| 921 | SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) | ||
| 922 | SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) | ||
| 923 | SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) | ||
| 924 | SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) | ||
| 925 | subcc %o3, 64, %o3 | ||
| 926 | add %o1, 64, %o1 | ||
| 927 | bne 4b | ||
| 928 | add %o0, 64, %o0 | ||
| 929 | |||
| 930 | andcc %o2, 0x30, %o3 | ||
| 931 | be,a 1f | ||
| 932 | srl %g1, 16, %g2 | ||
| 933 | 4: | ||
| 934 | SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) | ||
| 935 | subcc %o3, 16, %o3 | ||
| 936 | add %o1, 16, %o1 | ||
| 937 | bne 4b | ||
| 938 | add %o0, 16, %o0 | ||
| 939 | |||
| 940 | srl %g1, 16, %g2 | ||
| 941 | 1: | ||
| 942 | st %o4, [%o0 - 7] | ||
| 943 | sth %g2, [%o0 - 3] | ||
| 944 | srl %g1, 8, %g4 | ||
| 945 | b 88f | ||
| 946 | stb %g4, [%o0 - 1] | ||
| 947 | 32: | ||
| 948 | ld [%o1], %g2 | ||
| 949 | add %o1, 4, %o1 | ||
| 950 | srl %g2, 16, %g3 | ||
| 951 | and %o0, 7, %g5 | ||
| 952 | sth %g3, [%o0] | ||
| 953 | cmp %g5, 6 | ||
| 954 | sll %g2, 16, %g1 | ||
| 955 | add %o0, 4, %o0 | ||
| 956 | be 42f | ||
| 957 | and %o2, 0xffffffc0, %o3 | ||
| 958 | ld [%o0 - 6], %o4 | ||
| 959 | 4: | ||
| 960 | SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) | ||
| 961 | SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) | ||
| 962 | SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) | ||
| 963 | SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) | ||
| 964 | subcc %o3, 64, %o3 | ||
| 965 | add %o1, 64, %o1 | ||
| 966 | bne 4b | ||
| 967 | add %o0, 64, %o0 | ||
| 968 | |||
| 969 | andcc %o2, 0x30, %o3 | ||
| 970 | be,a 1f | ||
| 971 | srl %g1, 16, %g2 | ||
| 972 | 4: | ||
| 973 | SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) | ||
| 974 | subcc %o3, 16, %o3 | ||
| 975 | add %o1, 16, %o1 | ||
| 976 | bne 4b | ||
| 977 | add %o0, 16, %o0 | ||
| 978 | |||
| 979 | srl %g1, 16, %g2 | ||
| 980 | 1: | ||
| 981 | st %o4, [%o0 - 6] | ||
| 982 | b 88f | ||
| 983 | sth %g2, [%o0 - 2] | ||
| 984 | 33: | ||
| 985 | ld [%o1], %g2 | ||
| 986 | sub %o2, 4, %o2 | ||
| 987 | srl %g2, 24, %g3 | ||
| 988 | and %o0, 7, %g5 | ||
| 989 | stb %g3, [%o0] | ||
| 990 | cmp %g5, 5 | ||
| 991 | srl %g2, 8, %g4 | ||
| 992 | sll %g2, 24, %g1 | ||
| 993 | sth %g4, [%o0 + 1] | ||
| 994 | add %o1, 4, %o1 | ||
| 995 | be 43f | ||
| 996 | and %o2, 0xffffffc0, %o3 | ||
| 997 | |||
| 998 | ld [%o0 - 1], %o4 | ||
| 999 | add %o0, 4, %o0 | ||
| 1000 | 4: | ||
| 1001 | SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1) | ||
| 1002 | SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1) | ||
| 1003 | SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1) | ||
| 1004 | SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1) | ||
| 1005 | subcc %o3, 64, %o3 | ||
| 1006 | add %o1, 64, %o1 | ||
| 1007 | bne 4b | ||
| 1008 | add %o0, 64, %o0 | ||
| 1009 | |||
| 1010 | andcc %o2, 0x30, %o3 | ||
| 1011 | be,a 1f | ||
| 1012 | srl %g1, 24, %g2 | ||
| 1013 | 4: | ||
| 1014 | SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1) | ||
| 1015 | subcc %o3, 16, %o3 | ||
| 1016 | add %o1, 16, %o1 | ||
| 1017 | bne 4b | ||
| 1018 | add %o0, 16, %o0 | ||
| 1019 | |||
| 1020 | srl %g1, 24, %g2 | ||
| 1021 | 1: | ||
| 1022 | st %o4, [%o0 - 5] | ||
| 1023 | b 88f | ||
| 1024 | stb %g2, [%o0 - 1] | ||
| 1025 | 41: | ||
| 1026 | SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) | ||
| 1027 | SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) | ||
| 1028 | SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) | ||
| 1029 | SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) | ||
| 1030 | subcc %o3, 64, %o3 | ||
| 1031 | add %o1, 64, %o1 | ||
| 1032 | bne 41b | ||
| 1033 | add %o0, 64, %o0 | ||
| 1034 | |||
| 1035 | andcc %o2, 0x30, %o3 | ||
| 1036 | be,a 1f | ||
| 1037 | srl %g1, 16, %g2 | ||
| 1038 | 4: | ||
| 1039 | SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) | ||
| 1040 | subcc %o3, 16, %o3 | ||
| 1041 | add %o1, 16, %o1 | ||
| 1042 | bne 4b | ||
| 1043 | add %o0, 16, %o0 | ||
| 1044 | |||
| 1045 | srl %g1, 16, %g2 | ||
| 1046 | 1: | ||
| 1047 | sth %g2, [%o0 - 3] | ||
| 1048 | srl %g1, 8, %g4 | ||
| 1049 | b 88f | ||
| 1050 | stb %g4, [%o0 - 1] | ||
| 1051 | 43: | ||
| 1052 | SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3) | ||
| 1053 | SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3) | ||
| 1054 | SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3) | ||
| 1055 | SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3) | ||
| 1056 | subcc %o3, 64, %o3 | ||
| 1057 | add %o1, 64, %o1 | ||
| 1058 | bne 43b | ||
| 1059 | add %o0, 64, %o0 | ||
| 1060 | |||
| 1061 | andcc %o2, 0x30, %o3 | ||
| 1062 | be,a 1f | ||
| 1063 | srl %g1, 24, %g2 | ||
| 1064 | 4: | ||
| 1065 | SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3) | ||
| 1066 | subcc %o3, 16, %o3 | ||
| 1067 | add %o1, 16, %o1 | ||
| 1068 | bne 4b | ||
| 1069 | add %o0, 16, %o0 | ||
| 1070 | |||
| 1071 | srl %g1, 24, %g2 | ||
| 1072 | 1: | ||
| 1073 | stb %g2, [%o0 + 3] | ||
| 1074 | b 88f | ||
| 1075 | add %o0, 4, %o0 | ||
| 1076 | 42: | ||
| 1077 | SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) | ||
| 1078 | SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) | ||
| 1079 | SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) | ||
| 1080 | SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) | ||
| 1081 | subcc %o3, 64, %o3 | ||
| 1082 | add %o1, 64, %o1 | ||
| 1083 | bne 42b | ||
| 1084 | add %o0, 64, %o0 | ||
| 1085 | |||
| 1086 | andcc %o2, 0x30, %o3 | ||
| 1087 | be,a 1f | ||
| 1088 | srl %g1, 16, %g2 | ||
| 1089 | 4: | ||
| 1090 | SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) | ||
| 1091 | subcc %o3, 16, %o3 | ||
| 1092 | add %o1, 16, %o1 | ||
| 1093 | bne 4b | ||
| 1094 | add %o0, 16, %o0 | ||
| 1095 | |||
| 1096 | srl %g1, 16, %g2 | ||
| 1097 | 1: | ||
| 1098 | sth %g2, [%o0 - 2] | ||
| 1099 | |||
| 1100 | /* Fall through */ | ||
| 1101 | |||
| 1102 | #endif /* FASTER_NONALIGNED */ | ||
| 1103 | |||
| 1104 | 88: /* short_end */ | ||
| 1105 | |||
| 1106 | and %o2, 0xe, %o3 | ||
| 1107 | 20: | ||
| 1108 | sethi %hi(89f), %o5 | ||
| 1109 | sll %o3, 3, %o4 | ||
| 1110 | add %o0, %o3, %o0 | ||
| 1111 | sub %o5, %o4, %o5 | ||
| 1112 | add %o1, %o3, %o1 | ||
| 1113 | jmpl %o5 + %lo(89f), %g0 | ||
| 1114 | andcc %o2, 1, %g0 | ||
| 1115 | |||
| 1116 | MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3) | ||
| 1117 | MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3) | ||
| 1118 | MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3) | ||
| 1119 | MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3) | ||
| 1120 | MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3) | ||
| 1121 | MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3) | ||
| 1122 | MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3) | ||
| 1123 | |||
| 1124 | 89: /* short_table_end */ | ||
| 1125 | |||
| 1126 | be 1f | ||
| 1127 | nop | ||
| 1128 | |||
| 1129 | ldub [%o1], %g2 | ||
| 1130 | stb %g2, [%o0] | ||
| 1131 | 1: | ||
| 1132 | retl | ||
| 1133 | RETL_INSN | ||
| 1134 | |||
| 1135 | 90: /* short_aligned_end */ | ||
| 1136 | bne 88b | ||
| 1137 | andcc %o2, 8, %g0 | ||
| 1138 | |||
| 1139 | be 1f | ||
| 1140 | andcc %o2, 4, %g0 | ||
| 1141 | |||
| 1142 | ld [%o1 + 0x00], %g2 | ||
| 1143 | ld [%o1 + 0x04], %g3 | ||
| 1144 | add %o1, 8, %o1 | ||
| 1145 | st %g2, [%o0 + 0x00] | ||
| 1146 | st %g3, [%o0 + 0x04] | ||
| 1147 | add %o0, 8, %o0 | ||
| 1148 | 1: | ||
| 1149 | b 81b | ||
| 1150 | mov %o2, %g1 | ||
diff --git a/arch/sparc/lib/memscan.S b/arch/sparc/lib/memscan.S new file mode 100644 index 000000000000..28e78ff090ac --- /dev/null +++ b/arch/sparc/lib/memscan.S | |||
| @@ -0,0 +1,133 @@ | |||
| 1 | /* $Id: memscan.S,v 1.4 1996/09/08 02:01:20 davem Exp $ | ||
| 2 | * memscan.S: Optimized memscan for the Sparc. | ||
| 3 | * | ||
| 4 | * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) | ||
| 5 | */ | ||
| 6 | |||
| 7 | /* In essence, this is just a fancy strlen. */ | ||
| 8 | |||
| 9 | #define LO_MAGIC 0x01010101 | ||
| 10 | #define HI_MAGIC 0x80808080 | ||
| 11 | |||
| 12 | .text | ||
| 13 | .align 4 | ||
| 14 | .globl __memscan_zero, __memscan_generic | ||
| 15 | .globl memscan | ||
| 16 | __memscan_zero: | ||
| 17 | /* %o0 = addr, %o1 = size */ | ||
| 18 | cmp %o1, 0 | ||
| 19 | bne,a 1f | ||
| 20 | andcc %o0, 3, %g0 | ||
| 21 | |||
| 22 | retl | ||
| 23 | nop | ||
| 24 | |||
| 25 | 1: | ||
| 26 | be mzero_scan_word | ||
| 27 | sethi %hi(HI_MAGIC), %g2 | ||
| 28 | |||
| 29 | ldsb [%o0], %g3 | ||
| 30 | mzero_still_not_word_aligned: | ||
| 31 | cmp %g3, 0 | ||
| 32 | bne 1f | ||
| 33 | add %o0, 1, %o0 | ||
| 34 | |||
| 35 | retl | ||
| 36 | sub %o0, 1, %o0 | ||
| 37 | |||
| 38 | 1: | ||
| 39 | subcc %o1, 1, %o1 | ||
| 40 | bne,a 1f | ||
| 41 | andcc %o0, 3, %g0 | ||
| 42 | |||
| 43 | retl | ||
| 44 | nop | ||
| 45 | |||
| 46 | 1: | ||
| 47 | bne,a mzero_still_not_word_aligned | ||
| 48 | ldsb [%o0], %g3 | ||
| 49 | |||
| 50 | sethi %hi(HI_MAGIC), %g2 | ||
| 51 | mzero_scan_word: | ||
| 52 | or %g2, %lo(HI_MAGIC), %o3 | ||
| 53 | sethi %hi(LO_MAGIC), %g3 | ||
| 54 | or %g3, %lo(LO_MAGIC), %o2 | ||
| 55 | mzero_next_word: | ||
| 56 | ld [%o0], %g2 | ||
| 57 | mzero_next_word_preloaded: | ||
| 58 | sub %g2, %o2, %g2 | ||
| 59 | mzero_next_word_preloaded_next: | ||
| 60 | andcc %g2, %o3, %g0 | ||
| 61 | bne mzero_byte_zero | ||
| 62 | add %o0, 4, %o0 | ||
| 63 | |||
| 64 | mzero_check_out_of_fuel: | ||
| 65 | subcc %o1, 4, %o1 | ||
| 66 | bg,a 1f | ||
| 67 | ld [%o0], %g2 | ||
| 68 | |||
| 69 | retl | ||
| 70 | nop | ||
| 71 | |||
| 72 | 1: | ||
| 73 | b mzero_next_word_preloaded_next | ||
| 74 | sub %g2, %o2, %g2 | ||
| 75 | |||
| 76 | /* Check every byte. */ | ||
| 77 | mzero_byte_zero: | ||
| 78 | ldsb [%o0 - 4], %g2 | ||
| 79 | cmp %g2, 0 | ||
| 80 | bne mzero_byte_one | ||
| 81 | sub %o0, 4, %g3 | ||
| 82 | |||
| 83 | retl | ||
| 84 | mov %g3, %o0 | ||
| 85 | |||
| 86 | mzero_byte_one: | ||
| 87 | ldsb [%o0 - 3], %g2 | ||
| 88 | cmp %g2, 0 | ||
| 89 | bne,a mzero_byte_two_and_three | ||
| 90 | ldsb [%o0 - 2], %g2 | ||
| 91 | |||
| 92 | retl | ||
| 93 | sub %o0, 3, %o0 | ||
| 94 | |||
| 95 | mzero_byte_two_and_three: | ||
| 96 | cmp %g2, 0 | ||
| 97 | bne,a 1f | ||
| 98 | ldsb [%o0 - 1], %g2 | ||
| 99 | |||
| 100 | retl | ||
| 101 | sub %o0, 2, %o0 | ||
| 102 | |||
| 103 | 1: | ||
| 104 | cmp %g2, 0 | ||
| 105 | bne,a mzero_next_word_preloaded | ||
| 106 | ld [%o0], %g2 | ||
| 107 | |||
| 108 | retl | ||
| 109 | sub %o0, 1, %o0 | ||
| 110 | |||
| 111 | mzero_found_it: | ||
| 112 | retl | ||
| 113 | sub %o0, 2, %o0 | ||
| 114 | |||
| 115 | memscan: | ||
| 116 | __memscan_generic: | ||
| 117 | /* %o0 = addr, %o1 = c, %o2 = size */ | ||
| 118 | cmp %o2, 0 | ||
| 119 | bne,a 0f | ||
| 120 | ldub [%o0], %g2 | ||
| 121 | |||
| 122 | b,a 2f | ||
| 123 | 1: | ||
| 124 | ldub [%o0], %g2 | ||
| 125 | 0: | ||
| 126 | cmp %g2, %o1 | ||
| 127 | be 2f | ||
| 128 | addcc %o2, -1, %o2 | ||
| 129 | bne 1b | ||
| 130 | add %o0, 1, %o0 | ||
| 131 | 2: | ||
| 132 | retl | ||
| 133 | nop | ||
diff --git a/arch/sparc/lib/memset.S b/arch/sparc/lib/memset.S new file mode 100644 index 000000000000..a65eba41097c --- /dev/null +++ b/arch/sparc/lib/memset.S | |||
| @@ -0,0 +1,203 @@ | |||
| 1 | /* linux/arch/sparc/lib/memset.S: Sparc optimized memset, bzero and clear_user code | ||
| 2 | * Copyright (C) 1991,1996 Free Software Foundation | ||
| 3 | * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) | ||
| 4 | * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) | ||
| 5 | * | ||
| 6 | * Returns 0, if ok, and number of bytes not yet set if exception | ||
| 7 | * occurs and we were called as clear_user. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <asm/ptrace.h> | ||
| 11 | |||
| 12 | /* Work around cpp -rob */ | ||
| 13 | #define ALLOC #alloc | ||
| 14 | #define EXECINSTR #execinstr | ||
| 15 | #define EX(x,y,a,b) \ | ||
| 16 | 98: x,y; \ | ||
| 17 | .section .fixup,ALLOC,EXECINSTR; \ | ||
| 18 | .align 4; \ | ||
| 19 | 99: ba 30f; \ | ||
| 20 | a, b, %o0; \ | ||
| 21 | .section __ex_table,ALLOC; \ | ||
| 22 | .align 4; \ | ||
| 23 | .word 98b, 99b; \ | ||
| 24 | .text; \ | ||
| 25 | .align 4 | ||
| 26 | |||
| 27 | #define EXT(start,end,handler) \ | ||
| 28 | .section __ex_table,ALLOC; \ | ||
| 29 | .align 4; \ | ||
| 30 | .word start, 0, end, handler; \ | ||
| 31 | .text; \ | ||
| 32 | .align 4 | ||
| 33 | |||
| 34 | /* Please don't change these macros, unless you change the logic | ||
| 35 | * in the .fixup section below as well. | ||
| 36 | * Store 64 bytes at (BASE + OFFSET) using value SOURCE. */ | ||
| 37 | #define ZERO_BIG_BLOCK(base, offset, source) \ | ||
| 38 | std source, [base + offset + 0x00]; \ | ||
| 39 | std source, [base + offset + 0x08]; \ | ||
| 40 | std source, [base + offset + 0x10]; \ | ||
| 41 | std source, [base + offset + 0x18]; \ | ||
| 42 | std source, [base + offset + 0x20]; \ | ||
| 43 | std source, [base + offset + 0x28]; \ | ||
| 44 | std source, [base + offset + 0x30]; \ | ||
| 45 | std source, [base + offset + 0x38]; | ||
| 46 | |||
| 47 | #define ZERO_LAST_BLOCKS(base, offset, source) \ | ||
| 48 | std source, [base - offset - 0x38]; \ | ||
| 49 | std source, [base - offset - 0x30]; \ | ||
| 50 | std source, [base - offset - 0x28]; \ | ||
| 51 | std source, [base - offset - 0x20]; \ | ||
| 52 | std source, [base - offset - 0x18]; \ | ||
| 53 | std source, [base - offset - 0x10]; \ | ||
| 54 | std source, [base - offset - 0x08]; \ | ||
| 55 | std source, [base - offset - 0x00]; | ||
| 56 | |||
| 57 | .text | ||
| 58 | .align 4 | ||
| 59 | |||
| 60 | .globl __bzero_begin | ||
| 61 | __bzero_begin: | ||
| 62 | |||
| 63 | .globl __bzero, __memset, | ||
| 64 | .globl memset | ||
| 65 | .globl __memset_start, __memset_end | ||
| 66 | __memset_start: | ||
| 67 | __memset: | ||
| 68 | memset: | ||
| 69 | and %o1, 0xff, %g3 | ||
| 70 | sll %g3, 8, %g2 | ||
| 71 | or %g3, %g2, %g3 | ||
| 72 | sll %g3, 16, %g2 | ||
| 73 | or %g3, %g2, %g3 | ||
| 74 | b 1f | ||
| 75 | mov %o2, %o1 | ||
| 76 | 3: | ||
| 77 | cmp %o2, 3 | ||
| 78 | be 2f | ||
| 79 | EX(stb %g3, [%o0], sub %o1, 0) | ||
| 80 | |||
| 81 | cmp %o2, 2 | ||
| 82 | be 2f | ||
| 83 | EX(stb %g3, [%o0 + 0x01], sub %o1, 1) | ||
| 84 | |||
| 85 | EX(stb %g3, [%o0 + 0x02], sub %o1, 2) | ||
| 86 | 2: | ||
| 87 | sub %o2, 4, %o2 | ||
| 88 | add %o1, %o2, %o1 | ||
| 89 | b 4f | ||
| 90 | sub %o0, %o2, %o0 | ||
| 91 | |||
| 92 | __bzero: | ||
| 93 | mov %g0, %g3 | ||
| 94 | 1: | ||
| 95 | cmp %o1, 7 | ||
| 96 | bleu 7f | ||
| 97 | andcc %o0, 3, %o2 | ||
| 98 | |||
| 99 | bne 3b | ||
| 100 | 4: | ||
| 101 | andcc %o0, 4, %g0 | ||
| 102 | |||
| 103 | be 2f | ||
| 104 | mov %g3, %g2 | ||
| 105 | |||
| 106 | EX(st %g3, [%o0], sub %o1, 0) | ||
| 107 | sub %o1, 4, %o1 | ||
| 108 | add %o0, 4, %o0 | ||
| 109 | 2: | ||
| 110 | andcc %o1, 0xffffff80, %o3 ! Now everything is 8 aligned and o1 is len to run | ||
| 111 | be 9f | ||
| 112 | andcc %o1, 0x78, %o2 | ||
| 113 | 10: | ||
| 114 | ZERO_BIG_BLOCK(%o0, 0x00, %g2) | ||
| 115 | subcc %o3, 128, %o3 | ||
| 116 | ZERO_BIG_BLOCK(%o0, 0x40, %g2) | ||
| 117 | 11: | ||
| 118 | EXT(10b, 11b, 20f) | ||
| 119 | bne 10b | ||
| 120 | add %o0, 128, %o0 | ||
| 121 | |||
| 122 | orcc %o2, %g0, %g0 | ||
| 123 | 9: | ||
| 124 | be 13f | ||
| 125 | andcc %o1, 7, %o1 | ||
| 126 | |||
| 127 | srl %o2, 1, %o3 | ||
| 128 | set 13f, %o4 | ||
| 129 | sub %o4, %o3, %o4 | ||
| 130 | jmp %o4 | ||
| 131 | add %o0, %o2, %o0 | ||
| 132 | |||
| 133 | 12: | ||
| 134 | ZERO_LAST_BLOCKS(%o0, 0x48, %g2) | ||
| 135 | ZERO_LAST_BLOCKS(%o0, 0x08, %g2) | ||
| 136 | 13: | ||
| 137 | be 8f | ||
| 138 | andcc %o1, 4, %g0 | ||
| 139 | |||
| 140 | be 1f | ||
| 141 | andcc %o1, 2, %g0 | ||
| 142 | |||
| 143 | EX(st %g3, [%o0], and %o1, 7) | ||
| 144 | add %o0, 4, %o0 | ||
| 145 | 1: | ||
| 146 | be 1f | ||
| 147 | andcc %o1, 1, %g0 | ||
| 148 | |||
| 149 | EX(sth %g3, [%o0], and %o1, 3) | ||
| 150 | add %o0, 2, %o0 | ||
| 151 | 1: | ||
| 152 | bne,a 8f | ||
| 153 | EX(stb %g3, [%o0], and %o1, 1) | ||
| 154 | 8: | ||
| 155 | retl | ||
| 156 | clr %o0 | ||
| 157 | 7: | ||
| 158 | be 13b | ||
| 159 | orcc %o1, 0, %g0 | ||
| 160 | |||
| 161 | be 0f | ||
| 162 | 8: | ||
| 163 | add %o0, 1, %o0 | ||
| 164 | subcc %o1, 1, %o1 | ||
| 165 | bne,a 8b | ||
| 166 | EX(stb %g3, [%o0 - 1], add %o1, 1) | ||
| 167 | 0: | ||
| 168 | retl | ||
| 169 | clr %o0 | ||
| 170 | __memset_end: | ||
| 171 | |||
| 172 | .section .fixup,#alloc,#execinstr | ||
| 173 | .align 4 | ||
| 174 | 20: | ||
| 175 | cmp %g2, 8 | ||
| 176 | bleu 1f | ||
| 177 | and %o1, 0x7f, %o1 | ||
| 178 | sub %g2, 9, %g2 | ||
| 179 | add %o3, 64, %o3 | ||
| 180 | 1: | ||
| 181 | sll %g2, 3, %g2 | ||
| 182 | add %o3, %o1, %o0 | ||
| 183 | b 30f | ||
| 184 | sub %o0, %g2, %o0 | ||
| 185 | 21: | ||
| 186 | mov 8, %o0 | ||
| 187 | and %o1, 7, %o1 | ||
| 188 | sub %o0, %g2, %o0 | ||
| 189 | sll %o0, 3, %o0 | ||
| 190 | b 30f | ||
| 191 | add %o0, %o1, %o0 | ||
| 192 | 30: | ||
| 193 | /* %o4 is faulting address, %o5 is %pc where fault occurred */ | ||
| 194 | save %sp, -104, %sp | ||
| 195 | mov %i5, %o0 | ||
| 196 | mov %i7, %o1 | ||
| 197 | call lookup_fault | ||
| 198 | mov %i4, %o2 | ||
| 199 | ret | ||
| 200 | restore | ||
| 201 | |||
| 202 | .globl __bzero_end | ||
| 203 | __bzero_end: | ||
diff --git a/arch/sparc/lib/mul.S b/arch/sparc/lib/mul.S new file mode 100644 index 000000000000..83dffbc2f62f --- /dev/null +++ b/arch/sparc/lib/mul.S | |||
| @@ -0,0 +1,135 @@ | |||
| 1 | /* $Id: mul.S,v 1.4 1996/09/30 02:22:32 davem Exp $ | ||
| 2 | * mul.S: This routine was taken from glibc-1.09 and is covered | ||
| 3 | * by the GNU Library General Public License Version 2. | ||
| 4 | */ | ||
| 5 | |||
| 6 | /* | ||
| 7 | * Signed multiply, from Appendix E of the Sparc Version 8 | ||
| 8 | * Architecture Manual. | ||
| 9 | */ | ||
| 10 | |||
| 11 | /* | ||
| 12 | * Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the upper 32 bits of | ||
| 13 | * the 64-bit product). | ||
| 14 | * | ||
| 15 | * This code optimizes short (less than 13-bit) multiplies. | ||
| 16 | */ | ||
| 17 | |||
| 18 | .globl .mul | ||
| 19 | .mul: | ||
| 20 | mov %o0, %y ! multiplier -> Y | ||
| 21 | andncc %o0, 0xfff, %g0 ! test bits 12..31 | ||
| 22 | be Lmul_shortway ! if zero, can do it the short way | ||
| 23 | andcc %g0, %g0, %o4 ! zero the partial product and clear N and V | ||
| 24 | |||
| 25 | /* | ||
| 26 | * Long multiply. 32 steps, followed by a final shift step. | ||
| 27 | */ | ||
| 28 | mulscc %o4, %o1, %o4 ! 1 | ||
| 29 | mulscc %o4, %o1, %o4 ! 2 | ||
| 30 | mulscc %o4, %o1, %o4 ! 3 | ||
| 31 | mulscc %o4, %o1, %o4 ! 4 | ||
| 32 | mulscc %o4, %o1, %o4 ! 5 | ||
| 33 | mulscc %o4, %o1, %o4 ! 6 | ||
| 34 | mulscc %o4, %o1, %o4 ! 7 | ||
| 35 | mulscc %o4, %o1, %o4 ! 8 | ||
| 36 | mulscc %o4, %o1, %o4 ! 9 | ||
| 37 | mulscc %o4, %o1, %o4 ! 10 | ||
| 38 | mulscc %o4, %o1, %o4 ! 11 | ||
| 39 | mulscc %o4, %o1, %o4 ! 12 | ||
| 40 | mulscc %o4, %o1, %o4 ! 13 | ||
| 41 | mulscc %o4, %o1, %o4 ! 14 | ||
| 42 | mulscc %o4, %o1, %o4 ! 15 | ||
| 43 | mulscc %o4, %o1, %o4 ! 16 | ||
| 44 | mulscc %o4, %o1, %o4 ! 17 | ||
| 45 | mulscc %o4, %o1, %o4 ! 18 | ||
| 46 | mulscc %o4, %o1, %o4 ! 19 | ||
| 47 | mulscc %o4, %o1, %o4 ! 20 | ||
| 48 | mulscc %o4, %o1, %o4 ! 21 | ||
| 49 | mulscc %o4, %o1, %o4 ! 22 | ||
| 50 | mulscc %o4, %o1, %o4 ! 23 | ||
| 51 | mulscc %o4, %o1, %o4 ! 24 | ||
| 52 | mulscc %o4, %o1, %o4 ! 25 | ||
| 53 | mulscc %o4, %o1, %o4 ! 26 | ||
| 54 | mulscc %o4, %o1, %o4 ! 27 | ||
| 55 | mulscc %o4, %o1, %o4 ! 28 | ||
| 56 | mulscc %o4, %o1, %o4 ! 29 | ||
| 57 | mulscc %o4, %o1, %o4 ! 30 | ||
| 58 | mulscc %o4, %o1, %o4 ! 31 | ||
| 59 | mulscc %o4, %o1, %o4 ! 32 | ||
| 60 | mulscc %o4, %g0, %o4 ! final shift | ||
| 61 | |||
| 62 | ! If %o0 was negative, the result is | ||
| 63 | ! (%o0 * %o1) + (%o1 << 32)) | ||
| 64 | ! We fix that here. | ||
| 65 | |||
| 66 | #if 0 | ||
| 67 | tst %o0 | ||
| 68 | bge 1f | ||
| 69 | rd %y, %o0 | ||
| 70 | |||
| 71 | ! %o0 was indeed negative; fix upper 32 bits of result by subtracting | ||
| 72 | ! %o1 (i.e., return %o4 - %o1 in %o1). | ||
| 73 | retl | ||
| 74 | sub %o4, %o1, %o1 | ||
| 75 | |||
| 76 | 1: | ||
| 77 | retl | ||
| 78 | mov %o4, %o1 | ||
| 79 | #else | ||
| 80 | /* Faster code adapted from tege@sics.se's code for umul.S. */ | ||
| 81 | sra %o0, 31, %o2 ! make mask from sign bit | ||
| 82 | and %o1, %o2, %o2 ! %o2 = 0 or %o1, depending on sign of %o0 | ||
| 83 | rd %y, %o0 ! get lower half of product | ||
| 84 | retl | ||
| 85 | sub %o4, %o2, %o1 ! subtract compensation | ||
| 86 | ! and put upper half in place | ||
| 87 | #endif | ||
| 88 | |||
| 89 | Lmul_shortway: | ||
| 90 | /* | ||
| 91 | * Short multiply. 12 steps, followed by a final shift step. | ||
| 92 | * The resulting bits are off by 12 and (32-12) = 20 bit positions, | ||
| 93 | * but there is no problem with %o0 being negative (unlike above). | ||
| 94 | */ | ||
| 95 | mulscc %o4, %o1, %o4 ! 1 | ||
| 96 | mulscc %o4, %o1, %o4 ! 2 | ||
| 97 | mulscc %o4, %o1, %o4 ! 3 | ||
| 98 | mulscc %o4, %o1, %o4 ! 4 | ||
| 99 | mulscc %o4, %o1, %o4 ! 5 | ||
| 100 | mulscc %o4, %o1, %o4 ! 6 | ||
| 101 | mulscc %o4, %o1, %o4 ! 7 | ||
| 102 | mulscc %o4, %o1, %o4 ! 8 | ||
| 103 | mulscc %o4, %o1, %o4 ! 9 | ||
| 104 | mulscc %o4, %o1, %o4 ! 10 | ||
| 105 | mulscc %o4, %o1, %o4 ! 11 | ||
| 106 | mulscc %o4, %o1, %o4 ! 12 | ||
| 107 | mulscc %o4, %g0, %o4 ! final shift | ||
| 108 | |||
| 109 | /* | ||
| 110 | * %o4 has 20 of the bits that should be in the low part of the | ||
| 111 | * result; %y has the bottom 12 (as %y's top 12). That is: | ||
| 112 | * | ||
| 113 | * %o4 %y | ||
| 114 | * +----------------+----------------+ | ||
| 115 | * | -12- | -20- | -12- | -20- | | ||
| 116 | * +------(---------+------)---------+ | ||
| 117 | * --hi-- ----low-part---- | ||
| 118 | * | ||
| 119 | * The upper 12 bits of %o4 should be sign-extended to form the | ||
| 120 | * high part of the product (i.e., highpart = %o4 >> 20). | ||
| 121 | */ | ||
| 122 | |||
| 123 | rd %y, %o5 | ||
| 124 | sll %o4, 12, %o0 ! shift middle bits left 12 | ||
| 125 | srl %o5, 20, %o5 ! shift low bits right 20, zero fill at left | ||
| 126 | or %o5, %o0, %o0 ! construct low part of result | ||
| 127 | retl | ||
| 128 | sra %o4, 20, %o1 ! ... and extract high part of result | ||
| 129 | |||
| 130 | .globl .mul_patch | ||
| 131 | .mul_patch: | ||
| 132 | smul %o0, %o1, %o0 | ||
| 133 | retl | ||
| 134 | rd %y, %o1 | ||
| 135 | nop | ||
diff --git a/arch/sparc/lib/muldi3.S b/arch/sparc/lib/muldi3.S new file mode 100644 index 000000000000..7f17872d0603 --- /dev/null +++ b/arch/sparc/lib/muldi3.S | |||
| @@ -0,0 +1,76 @@ | |||
| 1 | /* Copyright (C) 1989, 1992, 1993, 1994, 1995 Free Software Foundation, Inc. | ||
| 2 | |||
| 3 | This file is part of GNU CC. | ||
| 4 | |||
| 5 | GNU CC is free software; you can redistribute it and/or modify | ||
| 6 | it under the terms of the GNU General Public License as published by | ||
| 7 | the Free Software Foundation; either version 2, or (at your option) | ||
| 8 | any later version. | ||
| 9 | |||
| 10 | GNU CC is distributed in the hope that it will be useful, | ||
| 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 13 | GNU General Public License for more details. | ||
| 14 | |||
| 15 | You should have received a copy of the GNU General Public License | ||
| 16 | along with GNU CC; see the file COPYING. If not, write to | ||
| 17 | the Free Software Foundation, 59 Temple Place - Suite 330, | ||
| 18 | Boston, MA 02111-1307, USA. */ | ||
| 19 | |||
| 20 | .text | ||
| 21 | .align 4 | ||
| 22 | .globl __muldi3 | ||
| 23 | __muldi3: | ||
| 24 | save %sp, -104, %sp | ||
| 25 | wr %g0, %i1, %y | ||
| 26 | sra %i3, 0x1f, %g2 | ||
| 27 | and %i1, %g2, %g2 | ||
| 28 | andcc %g0, 0, %g1 | ||
| 29 | mulscc %g1, %i3, %g1 | ||
| 30 | mulscc %g1, %i3, %g1 | ||
| 31 | mulscc %g1, %i3, %g1 | ||
| 32 | mulscc %g1, %i3, %g1 | ||
| 33 | mulscc %g1, %i3, %g1 | ||
| 34 | mulscc %g1, %i3, %g1 | ||
| 35 | mulscc %g1, %i3, %g1 | ||
| 36 | mulscc %g1, %i3, %g1 | ||
| 37 | mulscc %g1, %i3, %g1 | ||
| 38 | mulscc %g1, %i3, %g1 | ||
| 39 | mulscc %g1, %i3, %g1 | ||
| 40 | mulscc %g1, %i3, %g1 | ||
| 41 | mulscc %g1, %i3, %g1 | ||
| 42 | mulscc %g1, %i3, %g1 | ||
| 43 | mulscc %g1, %i3, %g1 | ||
| 44 | mulscc %g1, %i3, %g1 | ||
| 45 | mulscc %g1, %i3, %g1 | ||
| 46 | mulscc %g1, %i3, %g1 | ||
| 47 | mulscc %g1, %i3, %g1 | ||
| 48 | mulscc %g1, %i3, %g1 | ||
| 49 | mulscc %g1, %i3, %g1 | ||
| 50 | mulscc %g1, %i3, %g1 | ||
| 51 | mulscc %g1, %i3, %g1 | ||
| 52 | mulscc %g1, %i3, %g1 | ||
| 53 | mulscc %g1, %i3, %g1 | ||
| 54 | mulscc %g1, %i3, %g1 | ||
| 55 | mulscc %g1, %i3, %g1 | ||
| 56 | mulscc %g1, %i3, %g1 | ||
| 57 | mulscc %g1, %i3, %g1 | ||
| 58 | mulscc %g1, %i3, %g1 | ||
| 59 | mulscc %g1, %i3, %g1 | ||
| 60 | mulscc %g1, %i3, %g1 | ||
| 61 | mulscc %g1, 0, %g1 | ||
| 62 | add %g1, %g2, %l2 | ||
| 63 | rd %y, %o1 | ||
| 64 | mov %o1, %l3 | ||
| 65 | mov %i1, %o0 | ||
| 66 | call .umul | ||
| 67 | mov %i2, %o1 | ||
| 68 | mov %o0, %l0 | ||
| 69 | mov %i0, %o0 | ||
| 70 | call .umul | ||
| 71 | mov %i3, %o1 | ||
| 72 | add %l0, %o0, %l0 | ||
| 73 | mov %l2, %i0 | ||
| 74 | add %l2, %l0, %i0 | ||
| 75 | ret | ||
| 76 | restore %g0, %l3, %o1 | ||
diff --git a/arch/sparc/lib/rem.S b/arch/sparc/lib/rem.S new file mode 100644 index 000000000000..44508148d055 --- /dev/null +++ b/arch/sparc/lib/rem.S | |||
| @@ -0,0 +1,382 @@ | |||
| 1 | /* $Id: rem.S,v 1.7 1996/09/30 02:22:34 davem Exp $ | ||
| 2 | * rem.S: This routine was taken from glibc-1.09 and is covered | ||
| 3 | * by the GNU Library General Public License Version 2. | ||
| 4 | */ | ||
| 5 | |||
| 6 | |||
| 7 | /* This file is generated from divrem.m4; DO NOT EDIT! */ | ||
| 8 | /* | ||
| 9 | * Division and remainder, from Appendix E of the Sparc Version 8 | ||
| 10 | * Architecture Manual, with fixes from Gordon Irlam. | ||
| 11 | */ | ||
| 12 | |||
| 13 | /* | ||
| 14 | * Input: dividend and divisor in %o0 and %o1 respectively. | ||
| 15 | * | ||
| 16 | * m4 parameters: | ||
| 17 | * .rem name of function to generate | ||
| 18 | * rem rem=div => %o0 / %o1; rem=rem => %o0 % %o1 | ||
| 19 | * true true=true => signed; true=false => unsigned | ||
| 20 | * | ||
| 21 | * Algorithm parameters: | ||
| 22 | * N how many bits per iteration we try to get (4) | ||
| 23 | * WORDSIZE total number of bits (32) | ||
| 24 | * | ||
| 25 | * Derived constants: | ||
| 26 | * TOPBITS number of bits in the top decade of a number | ||
| 27 | * | ||
| 28 | * Important variables: | ||
| 29 | * Q the partial quotient under development (initially 0) | ||
| 30 | * R the remainder so far, initially the dividend | ||
| 31 | * ITER number of main division loop iterations required; | ||
| 32 | * equal to ceil(log2(quotient) / N). Note that this | ||
| 33 | * is the log base (2^N) of the quotient. | ||
| 34 | * V the current comparand, initially divisor*2^(ITER*N-1) | ||
| 35 | * | ||
| 36 | * Cost: | ||
| 37 | * Current estimate for non-large dividend is | ||
| 38 | * ceil(log2(quotient) / N) * (10 + 7N/2) + C | ||
| 39 | * A large dividend is one greater than 2^(31-TOPBITS) and takes a | ||
| 40 | * different path, as the upper bits of the quotient must be developed | ||
| 41 | * one bit at a time. | ||
| 42 | */ | ||
| 43 | |||
| 44 | |||
| 45 | .globl .rem | ||
| 46 | .rem: | ||
| 47 | ! compute sign of result; if neither is negative, no problem | ||
| 48 | orcc %o1, %o0, %g0 ! either negative? | ||
| 49 | bge 2f ! no, go do the divide | ||
| 50 | mov %o0, %g2 ! compute sign in any case | ||
| 51 | |||
| 52 | tst %o1 | ||
| 53 | bge 1f | ||
| 54 | tst %o0 | ||
| 55 | ! %o1 is definitely negative; %o0 might also be negative | ||
| 56 | bge 2f ! if %o0 not negative... | ||
| 57 | sub %g0, %o1, %o1 ! in any case, make %o1 nonneg | ||
| 58 | 1: ! %o0 is negative, %o1 is nonnegative | ||
| 59 | sub %g0, %o0, %o0 ! make %o0 nonnegative | ||
| 60 | 2: | ||
| 61 | |||
| 62 | ! Ready to divide. Compute size of quotient; scale comparand. | ||
| 63 | orcc %o1, %g0, %o5 | ||
| 64 | bne 1f | ||
| 65 | mov %o0, %o3 | ||
| 66 | |||
| 67 | ! Divide by zero trap. If it returns, return 0 (about as | ||
| 68 | ! wrong as possible, but that is what SunOS does...). | ||
| 69 | ta ST_DIV0 | ||
| 70 | retl | ||
| 71 | clr %o0 | ||
| 72 | |||
| 73 | 1: | ||
| 74 | cmp %o3, %o5 ! if %o1 exceeds %o0, done | ||
| 75 | blu Lgot_result ! (and algorithm fails otherwise) | ||
| 76 | clr %o2 | ||
| 77 | |||
| 78 | sethi %hi(1 << (32 - 4 - 1)), %g1 | ||
| 79 | |||
| 80 | cmp %o3, %g1 | ||
| 81 | blu Lnot_really_big | ||
| 82 | clr %o4 | ||
| 83 | |||
| 84 | ! Here the dividend is >= 2**(31-N) or so. We must be careful here, | ||
| 85 | ! as our usual N-at-a-shot divide step will cause overflow and havoc. | ||
| 86 | ! The number of bits in the result here is N*ITER+SC, where SC <= N. | ||
| 87 | ! Compute ITER in an unorthodox manner: know we need to shift V into | ||
| 88 | ! the top decade: so do not even bother to compare to R. | ||
| 89 | 1: | ||
| 90 | cmp %o5, %g1 | ||
| 91 | bgeu 3f | ||
| 92 | mov 1, %g7 | ||
| 93 | |||
| 94 | sll %o5, 4, %o5 | ||
| 95 | |||
| 96 | b 1b | ||
| 97 | add %o4, 1, %o4 | ||
| 98 | |||
| 99 | ! Now compute %g7. | ||
| 100 | 2: | ||
| 101 | addcc %o5, %o5, %o5 | ||
| 102 | |||
| 103 | bcc Lnot_too_big | ||
| 104 | add %g7, 1, %g7 | ||
| 105 | |||
| 106 | ! We get here if the %o1 overflowed while shifting. | ||
| 107 | ! This means that %o3 has the high-order bit set. | ||
| 108 | ! Restore %o5 and subtract from %o3. | ||
| 109 | sll %g1, 4, %g1 ! high order bit | ||
| 110 | srl %o5, 1, %o5 ! rest of %o5 | ||
| 111 | add %o5, %g1, %o5 | ||
| 112 | |||
| 113 | b Ldo_single_div | ||
| 114 | sub %g7, 1, %g7 | ||
| 115 | |||
| 116 | Lnot_too_big: | ||
| 117 | 3: | ||
| 118 | cmp %o5, %o3 | ||
| 119 | blu 2b | ||
| 120 | nop | ||
| 121 | |||
| 122 | be Ldo_single_div | ||
| 123 | nop | ||
| 124 | /* NB: these are commented out in the V8-Sparc manual as well */ | ||
| 125 | /* (I do not understand this) */ | ||
| 126 | ! %o5 > %o3: went too far: back up 1 step | ||
| 127 | ! srl %o5, 1, %o5 | ||
| 128 | ! dec %g7 | ||
| 129 | ! do single-bit divide steps | ||
| 130 | ! | ||
| 131 | ! We have to be careful here. We know that %o3 >= %o5, so we can do the | ||
| 132 | ! first divide step without thinking. BUT, the others are conditional, | ||
| 133 | ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- | ||
| 134 | ! order bit set in the first step, just falling into the regular | ||
| 135 | ! division loop will mess up the first time around. | ||
| 136 | ! So we unroll slightly... | ||
| 137 | Ldo_single_div: | ||
| 138 | subcc %g7, 1, %g7 | ||
| 139 | bl Lend_regular_divide | ||
| 140 | nop | ||
| 141 | |||
| 142 | sub %o3, %o5, %o3 | ||
| 143 | mov 1, %o2 | ||
| 144 | |||
| 145 | b Lend_single_divloop | ||
| 146 | nop | ||
| 147 | Lsingle_divloop: | ||
| 148 | sll %o2, 1, %o2 | ||
| 149 | |||
| 150 | bl 1f | ||
| 151 | srl %o5, 1, %o5 | ||
| 152 | ! %o3 >= 0 | ||
| 153 | sub %o3, %o5, %o3 | ||
| 154 | |||
| 155 | b 2f | ||
| 156 | add %o2, 1, %o2 | ||
| 157 | 1: ! %o3 < 0 | ||
| 158 | add %o3, %o5, %o3 | ||
| 159 | sub %o2, 1, %o2 | ||
| 160 | 2: | ||
| 161 | Lend_single_divloop: | ||
| 162 | subcc %g7, 1, %g7 | ||
| 163 | bge Lsingle_divloop | ||
| 164 | tst %o3 | ||
| 165 | |||
| 166 | b,a Lend_regular_divide | ||
| 167 | |||
| 168 | Lnot_really_big: | ||
| 169 | 1: | ||
| 170 | sll %o5, 4, %o5 | ||
| 171 | cmp %o5, %o3 | ||
| 172 | bleu 1b | ||
| 173 | addcc %o4, 1, %o4 | ||
| 174 | be Lgot_result | ||
| 175 | sub %o4, 1, %o4 | ||
| 176 | |||
| 177 | tst %o3 ! set up for initial iteration | ||
| 178 | Ldivloop: | ||
| 179 | sll %o2, 4, %o2 | ||
| 180 | ! depth 1, accumulated bits 0 | ||
| 181 | bl L.1.16 | ||
| 182 | srl %o5,1,%o5 | ||
| 183 | ! remainder is positive | ||
| 184 | subcc %o3,%o5,%o3 | ||
| 185 | ! depth 2, accumulated bits 1 | ||
| 186 | bl L.2.17 | ||
| 187 | srl %o5,1,%o5 | ||
| 188 | ! remainder is positive | ||
| 189 | subcc %o3,%o5,%o3 | ||
| 190 | ! depth 3, accumulated bits 3 | ||
| 191 | bl L.3.19 | ||
| 192 | srl %o5,1,%o5 | ||
| 193 | ! remainder is positive | ||
| 194 | subcc %o3,%o5,%o3 | ||
| 195 | ! depth 4, accumulated bits 7 | ||
| 196 | bl L.4.23 | ||
| 197 | srl %o5,1,%o5 | ||
| 198 | ! remainder is positive | ||
| 199 | subcc %o3,%o5,%o3 | ||
| 200 | |||
| 201 | b 9f | ||
| 202 | add %o2, (7*2+1), %o2 | ||
| 203 | |||
| 204 | L.4.23: | ||
| 205 | ! remainder is negative | ||
| 206 | addcc %o3,%o5,%o3 | ||
| 207 | b 9f | ||
| 208 | add %o2, (7*2-1), %o2 | ||
| 209 | |||
| 210 | L.3.19: | ||
| 211 | ! remainder is negative | ||
| 212 | addcc %o3,%o5,%o3 | ||
| 213 | ! depth 4, accumulated bits 5 | ||
| 214 | bl L.4.21 | ||
| 215 | srl %o5,1,%o5 | ||
| 216 | ! remainder is positive | ||
| 217 | subcc %o3,%o5,%o3 | ||
| 218 | b 9f | ||
| 219 | add %o2, (5*2+1), %o2 | ||
| 220 | |||
| 221 | L.4.21: | ||
| 222 | ! remainder is negative | ||
| 223 | addcc %o3,%o5,%o3 | ||
| 224 | b 9f | ||
| 225 | add %o2, (5*2-1), %o2 | ||
| 226 | |||
| 227 | L.2.17: | ||
| 228 | ! remainder is negative | ||
| 229 | addcc %o3,%o5,%o3 | ||
| 230 | ! depth 3, accumulated bits 1 | ||
| 231 | bl L.3.17 | ||
| 232 | srl %o5,1,%o5 | ||
| 233 | ! remainder is positive | ||
| 234 | subcc %o3,%o5,%o3 | ||
| 235 | ! depth 4, accumulated bits 3 | ||
| 236 | bl L.4.19 | ||
| 237 | srl %o5,1,%o5 | ||
| 238 | ! remainder is positive | ||
| 239 | subcc %o3,%o5,%o3 | ||
| 240 | b 9f | ||
| 241 | add %o2, (3*2+1), %o2 | ||
| 242 | |||
| 243 | L.4.19: | ||
| 244 | ! remainder is negative | ||
| 245 | addcc %o3,%o5,%o3 | ||
| 246 | b 9f | ||
| 247 | add %o2, (3*2-1), %o2 | ||
| 248 | |||
| 249 | L.3.17: | ||
| 250 | ! remainder is negative | ||
| 251 | addcc %o3,%o5,%o3 | ||
| 252 | ! depth 4, accumulated bits 1 | ||
| 253 | bl L.4.17 | ||
| 254 | srl %o5,1,%o5 | ||
| 255 | ! remainder is positive | ||
| 256 | subcc %o3,%o5,%o3 | ||
| 257 | b 9f | ||
| 258 | add %o2, (1*2+1), %o2 | ||
| 259 | |||
| 260 | L.4.17: | ||
| 261 | ! remainder is negative | ||
| 262 | addcc %o3,%o5,%o3 | ||
| 263 | b 9f | ||
| 264 | add %o2, (1*2-1), %o2 | ||
| 265 | |||
| 266 | L.1.16: | ||
| 267 | ! remainder is negative | ||
| 268 | addcc %o3,%o5,%o3 | ||
| 269 | ! depth 2, accumulated bits -1 | ||
| 270 | bl L.2.15 | ||
| 271 | srl %o5,1,%o5 | ||
| 272 | ! remainder is positive | ||
| 273 | subcc %o3,%o5,%o3 | ||
| 274 | ! depth 3, accumulated bits -1 | ||
| 275 | bl L.3.15 | ||
| 276 | srl %o5,1,%o5 | ||
| 277 | ! remainder is positive | ||
| 278 | subcc %o3,%o5,%o3 | ||
| 279 | ! depth 4, accumulated bits -1 | ||
| 280 | bl L.4.15 | ||
| 281 | srl %o5,1,%o5 | ||
| 282 | ! remainder is positive | ||
| 283 | subcc %o3,%o5,%o3 | ||
| 284 | b 9f | ||
| 285 | add %o2, (-1*2+1), %o2 | ||
| 286 | |||
| 287 | L.4.15: | ||
| 288 | ! remainder is negative | ||
| 289 | addcc %o3,%o5,%o3 | ||
| 290 | b 9f | ||
| 291 | add %o2, (-1*2-1), %o2 | ||
| 292 | |||
| 293 | L.3.15: | ||
| 294 | ! remainder is negative | ||
| 295 | addcc %o3,%o5,%o3 | ||
| 296 | ! depth 4, accumulated bits -3 | ||
| 297 | bl L.4.13 | ||
| 298 | srl %o5,1,%o5 | ||
| 299 | ! remainder is positive | ||
| 300 | subcc %o3,%o5,%o3 | ||
| 301 | b 9f | ||
| 302 | add %o2, (-3*2+1), %o2 | ||
| 303 | |||
| 304 | L.4.13: | ||
| 305 | ! remainder is negative | ||
| 306 | addcc %o3,%o5,%o3 | ||
| 307 | b 9f | ||
| 308 | add %o2, (-3*2-1), %o2 | ||
| 309 | |||
| 310 | L.2.15: | ||
| 311 | ! remainder is negative | ||
| 312 | addcc %o3,%o5,%o3 | ||
| 313 | ! depth 3, accumulated bits -3 | ||
| 314 | bl L.3.13 | ||
| 315 | srl %o5,1,%o5 | ||
| 316 | ! remainder is positive | ||
| 317 | subcc %o3,%o5,%o3 | ||
| 318 | ! depth 4, accumulated bits -5 | ||
| 319 | bl L.4.11 | ||
| 320 | srl %o5,1,%o5 | ||
| 321 | ! remainder is positive | ||
| 322 | subcc %o3,%o5,%o3 | ||
| 323 | b 9f | ||
| 324 | add %o2, (-5*2+1), %o2 | ||
| 325 | |||
| 326 | L.4.11: | ||
| 327 | ! remainder is negative | ||
| 328 | addcc %o3,%o5,%o3 | ||
| 329 | b 9f | ||
| 330 | add %o2, (-5*2-1), %o2 | ||
| 331 | |||
| 332 | |||
| 333 | L.3.13: | ||
| 334 | ! remainder is negative | ||
| 335 | addcc %o3,%o5,%o3 | ||
| 336 | ! depth 4, accumulated bits -7 | ||
| 337 | bl L.4.9 | ||
| 338 | srl %o5,1,%o5 | ||
| 339 | ! remainder is positive | ||
| 340 | subcc %o3,%o5,%o3 | ||
| 341 | b 9f | ||
| 342 | add %o2, (-7*2+1), %o2 | ||
| 343 | |||
| 344 | L.4.9: | ||
| 345 | ! remainder is negative | ||
| 346 | addcc %o3,%o5,%o3 | ||
| 347 | b 9f | ||
| 348 | add %o2, (-7*2-1), %o2 | ||
| 349 | |||
| 350 | 9: | ||
| 351 | Lend_regular_divide: | ||
| 352 | subcc %o4, 1, %o4 | ||
| 353 | bge Ldivloop | ||
| 354 | tst %o3 | ||
| 355 | |||
| 356 | bl,a Lgot_result | ||
| 357 | ! non-restoring fixup here (one instruction only!) | ||
| 358 | add %o3, %o1, %o3 | ||
| 359 | |||
| 360 | Lgot_result: | ||
| 361 | ! check to see if answer should be < 0 | ||
| 362 | tst %g2 | ||
| 363 | bl,a 1f | ||
| 364 | sub %g0, %o3, %o3 | ||
| 365 | 1: | ||
| 366 | retl | ||
| 367 | mov %o3, %o0 | ||
| 368 | |||
| 369 | .globl .rem_patch | ||
| 370 | .rem_patch: | ||
| 371 | sra %o0, 0x1f, %o4 | ||
| 372 | wr %o4, 0x0, %y | ||
| 373 | nop | ||
| 374 | nop | ||
| 375 | nop | ||
| 376 | sdivcc %o0, %o1, %o2 | ||
| 377 | bvs,a 1f | ||
| 378 | xnor %o2, %g0, %o2 | ||
| 379 | 1: smul %o2, %o1, %o2 | ||
| 380 | retl | ||
| 381 | sub %o0, %o2, %o0 | ||
| 382 | nop | ||
diff --git a/arch/sparc/lib/rwsem.S b/arch/sparc/lib/rwsem.S new file mode 100644 index 000000000000..e7578dc600b8 --- /dev/null +++ b/arch/sparc/lib/rwsem.S | |||
| @@ -0,0 +1,205 @@ | |||
| 1 | /* $Id: rwsem.S,v 1.5 2000/05/09 17:40:13 davem Exp $ | ||
| 2 | * Assembly part of rw semaphores. | ||
| 3 | * | ||
| 4 | * Copyright (C) 1999 Jakub Jelinek (jakub@redhat.com) | ||
| 5 | */ | ||
| 6 | |||
| 7 | #include <linux/config.h> | ||
| 8 | #include <asm/ptrace.h> | ||
| 9 | #include <asm/psr.h> | ||
| 10 | |||
| 11 | .section .sched.text | ||
| 12 | .align 4 | ||
| 13 | |||
| 14 | .globl ___down_read | ||
| 15 | ___down_read: | ||
| 16 | rd %psr, %g3 | ||
| 17 | nop | ||
| 18 | nop | ||
| 19 | nop | ||
| 20 | or %g3, PSR_PIL, %g7 | ||
| 21 | wr %g7, 0, %psr | ||
| 22 | nop | ||
| 23 | nop | ||
| 24 | nop | ||
| 25 | #ifdef CONFIG_SMP | ||
| 26 | 1: ldstub [%g1 + 4], %g7 | ||
| 27 | tst %g7 | ||
| 28 | bne 1b | ||
| 29 | ld [%g1], %g7 | ||
| 30 | sub %g7, 1, %g7 | ||
| 31 | st %g7, [%g1] | ||
| 32 | stb %g0, [%g1 + 4] | ||
| 33 | #else | ||
| 34 | ld [%g1], %g7 | ||
| 35 | sub %g7, 1, %g7 | ||
| 36 | st %g7, [%g1] | ||
| 37 | #endif | ||
| 38 | wr %g3, 0, %psr | ||
| 39 | add %g7, 1, %g7 | ||
| 40 | nop | ||
| 41 | nop | ||
| 42 | subcc %g7, 1, %g7 | ||
| 43 | bneg 3f | ||
| 44 | nop | ||
| 45 | 2: jmpl %o7, %g0 | ||
| 46 | mov %g4, %o7 | ||
| 47 | 3: save %sp, -64, %sp | ||
| 48 | mov %g1, %l1 | ||
| 49 | mov %g4, %l4 | ||
| 50 | bcs 4f | ||
| 51 | mov %g5, %l5 | ||
| 52 | call down_read_failed | ||
| 53 | mov %l1, %o0 | ||
| 54 | mov %l1, %g1 | ||
| 55 | mov %l4, %g4 | ||
| 56 | ba ___down_read | ||
| 57 | restore %l5, %g0, %g5 | ||
| 58 | 4: call down_read_failed_biased | ||
| 59 | mov %l1, %o0 | ||
| 60 | mov %l1, %g1 | ||
| 61 | mov %l4, %g4 | ||
| 62 | ba 2b | ||
| 63 | restore %l5, %g0, %g5 | ||
| 64 | |||
| 65 | .globl ___down_write | ||
| 66 | ___down_write: | ||
| 67 | rd %psr, %g3 | ||
| 68 | nop | ||
| 69 | nop | ||
| 70 | nop | ||
| 71 | or %g3, PSR_PIL, %g7 | ||
| 72 | wr %g7, 0, %psr | ||
| 73 | sethi %hi(0x01000000), %g2 | ||
| 74 | nop | ||
| 75 | nop | ||
| 76 | #ifdef CONFIG_SMP | ||
| 77 | 1: ldstub [%g1 + 4], %g7 | ||
| 78 | tst %g7 | ||
| 79 | bne 1b | ||
| 80 | ld [%g1], %g7 | ||
| 81 | sub %g7, %g2, %g7 | ||
| 82 | st %g7, [%g1] | ||
| 83 | stb %g0, [%g1 + 4] | ||
| 84 | #else | ||
| 85 | ld [%g1], %g7 | ||
| 86 | sub %g7, %g2, %g7 | ||
| 87 | st %g7, [%g1] | ||
| 88 | #endif | ||
| 89 | wr %g3, 0, %psr | ||
| 90 | add %g7, %g2, %g7 | ||
| 91 | nop | ||
| 92 | nop | ||
| 93 | subcc %g7, %g2, %g7 | ||
| 94 | bne 3f | ||
| 95 | nop | ||
| 96 | 2: jmpl %o7, %g0 | ||
| 97 | mov %g4, %o7 | ||
| 98 | 3: save %sp, -64, %sp | ||
| 99 | mov %g1, %l1 | ||
| 100 | mov %g4, %l4 | ||
| 101 | bcs 4f | ||
| 102 | mov %g5, %l5 | ||
| 103 | call down_write_failed | ||
| 104 | mov %l1, %o0 | ||
| 105 | mov %l1, %g1 | ||
| 106 | mov %l4, %g4 | ||
| 107 | ba ___down_write | ||
| 108 | restore %l5, %g0, %g5 | ||
| 109 | 4: call down_write_failed_biased | ||
| 110 | mov %l1, %o0 | ||
| 111 | mov %l1, %g1 | ||
| 112 | mov %l4, %g4 | ||
| 113 | ba 2b | ||
| 114 | restore %l5, %g0, %g5 | ||
| 115 | |||
| 116 | .text | ||
| 117 | .globl ___up_read | ||
| 118 | ___up_read: | ||
| 119 | rd %psr, %g3 | ||
| 120 | nop | ||
| 121 | nop | ||
| 122 | nop | ||
| 123 | or %g3, PSR_PIL, %g7 | ||
| 124 | wr %g7, 0, %psr | ||
| 125 | nop | ||
| 126 | nop | ||
| 127 | nop | ||
| 128 | #ifdef CONFIG_SMP | ||
| 129 | 1: ldstub [%g1 + 4], %g7 | ||
| 130 | tst %g7 | ||
| 131 | bne 1b | ||
| 132 | ld [%g1], %g7 | ||
| 133 | add %g7, 1, %g7 | ||
| 134 | st %g7, [%g1] | ||
| 135 | stb %g0, [%g1 + 4] | ||
| 136 | #else | ||
| 137 | ld [%g1], %g7 | ||
| 138 | add %g7, 1, %g7 | ||
| 139 | st %g7, [%g1] | ||
| 140 | #endif | ||
| 141 | wr %g3, 0, %psr | ||
| 142 | nop | ||
| 143 | nop | ||
| 144 | nop | ||
| 145 | cmp %g7, 0 | ||
| 146 | be 3f | ||
| 147 | nop | ||
| 148 | 2: jmpl %o7, %g0 | ||
| 149 | mov %g4, %o7 | ||
| 150 | 3: save %sp, -64, %sp | ||
| 151 | mov %g1, %l1 | ||
| 152 | mov %g4, %l4 | ||
| 153 | mov %g5, %l5 | ||
| 154 | clr %o1 | ||
| 155 | call __rwsem_wake | ||
| 156 | mov %l1, %o0 | ||
| 157 | mov %l1, %g1 | ||
| 158 | mov %l4, %g4 | ||
| 159 | ba 2b | ||
| 160 | restore %l5, %g0, %g5 | ||
| 161 | |||
| 162 | .globl ___up_write | ||
| 163 | ___up_write: | ||
| 164 | rd %psr, %g3 | ||
| 165 | nop | ||
| 166 | nop | ||
| 167 | nop | ||
| 168 | or %g3, PSR_PIL, %g7 | ||
| 169 | wr %g7, 0, %psr | ||
| 170 | sethi %hi(0x01000000), %g2 | ||
| 171 | nop | ||
| 172 | nop | ||
| 173 | #ifdef CONFIG_SMP | ||
| 174 | 1: ldstub [%g1 + 4], %g7 | ||
| 175 | tst %g7 | ||
| 176 | bne 1b | ||
| 177 | ld [%g1], %g7 | ||
| 178 | add %g7, %g2, %g7 | ||
| 179 | st %g7, [%g1] | ||
| 180 | stb %g0, [%g1 + 4] | ||
| 181 | #else | ||
| 182 | ld [%g1], %g7 | ||
| 183 | add %g7, %g2, %g7 | ||
| 184 | st %g7, [%g1] | ||
| 185 | #endif | ||
| 186 | wr %g3, 0, %psr | ||
| 187 | sub %g7, %g2, %g7 | ||
| 188 | nop | ||
| 189 | nop | ||
| 190 | addcc %g7, %g2, %g7 | ||
| 191 | bcs 3f | ||
| 192 | nop | ||
| 193 | 2: jmpl %o7, %g0 | ||
| 194 | mov %g4, %o7 | ||
| 195 | 3: save %sp, -64, %sp | ||
| 196 | mov %g1, %l1 | ||
| 197 | mov %g4, %l4 | ||
| 198 | mov %g5, %l5 | ||
| 199 | mov %g7, %o1 | ||
| 200 | call __rwsem_wake | ||
| 201 | mov %l1, %o0 | ||
| 202 | mov %l1, %g1 | ||
| 203 | mov %l4, %g4 | ||
| 204 | ba 2b | ||
| 205 | restore %l5, %g0, %g5 | ||
diff --git a/arch/sparc/lib/sdiv.S b/arch/sparc/lib/sdiv.S new file mode 100644 index 000000000000..e0ad80b6f63d --- /dev/null +++ b/arch/sparc/lib/sdiv.S | |||
| @@ -0,0 +1,379 @@ | |||
| 1 | /* $Id: sdiv.S,v 1.6 1996/10/02 17:37:00 davem Exp $ | ||
| 2 | * sdiv.S: This routine was taken from glibc-1.09 and is covered | ||
| 3 | * by the GNU Library General Public License Version 2. | ||
| 4 | */ | ||
| 5 | |||
| 6 | |||
| 7 | /* This file is generated from divrem.m4; DO NOT EDIT! */ | ||
| 8 | /* | ||
| 9 | * Division and remainder, from Appendix E of the Sparc Version 8 | ||
| 10 | * Architecture Manual, with fixes from Gordon Irlam. | ||
| 11 | */ | ||
| 12 | |||
| 13 | /* | ||
| 14 | * Input: dividend and divisor in %o0 and %o1 respectively. | ||
| 15 | * | ||
| 16 | * m4 parameters: | ||
| 17 | * .div name of function to generate | ||
| 18 | * div div=div => %o0 / %o1; div=rem => %o0 % %o1 | ||
| 19 | * true true=true => signed; true=false => unsigned | ||
| 20 | * | ||
| 21 | * Algorithm parameters: | ||
| 22 | * N how many bits per iteration we try to get (4) | ||
| 23 | * WORDSIZE total number of bits (32) | ||
| 24 | * | ||
| 25 | * Derived constants: | ||
| 26 | * TOPBITS number of bits in the top decade of a number | ||
| 27 | * | ||
| 28 | * Important variables: | ||
| 29 | * Q the partial quotient under development (initially 0) | ||
| 30 | * R the remainder so far, initially the dividend | ||
| 31 | * ITER number of main division loop iterations required; | ||
| 32 | * equal to ceil(log2(quotient) / N). Note that this | ||
| 33 | * is the log base (2^N) of the quotient. | ||
| 34 | * V the current comparand, initially divisor*2^(ITER*N-1) | ||
| 35 | * | ||
| 36 | * Cost: | ||
| 37 | * Current estimate for non-large dividend is | ||
| 38 | * ceil(log2(quotient) / N) * (10 + 7N/2) + C | ||
| 39 | * A large dividend is one greater than 2^(31-TOPBITS) and takes a | ||
| 40 | * different path, as the upper bits of the quotient must be developed | ||
| 41 | * one bit at a time. | ||
| 42 | */ | ||
| 43 | |||
| 44 | |||
| 45 | .globl .div | ||
| 46 | .div: | ||
| 47 | ! compute sign of result; if neither is negative, no problem | ||
| 48 | orcc %o1, %o0, %g0 ! either negative? | ||
| 49 | bge 2f ! no, go do the divide | ||
| 50 | xor %o1, %o0, %g2 ! compute sign in any case | ||
| 51 | |||
| 52 | tst %o1 | ||
| 53 | bge 1f | ||
| 54 | tst %o0 | ||
| 55 | ! %o1 is definitely negative; %o0 might also be negative | ||
| 56 | bge 2f ! if %o0 not negative... | ||
| 57 | sub %g0, %o1, %o1 ! in any case, make %o1 nonneg | ||
| 58 | 1: ! %o0 is negative, %o1 is nonnegative | ||
| 59 | sub %g0, %o0, %o0 ! make %o0 nonnegative | ||
| 60 | 2: | ||
| 61 | |||
| 62 | ! Ready to divide. Compute size of quotient; scale comparand. | ||
| 63 | orcc %o1, %g0, %o5 | ||
| 64 | bne 1f | ||
| 65 | mov %o0, %o3 | ||
| 66 | |||
| 67 | ! Divide by zero trap. If it returns, return 0 (about as | ||
| 68 | ! wrong as possible, but that is what SunOS does...). | ||
| 69 | ta ST_DIV0 | ||
| 70 | retl | ||
| 71 | clr %o0 | ||
| 72 | |||
| 73 | 1: | ||
| 74 | cmp %o3, %o5 ! if %o1 exceeds %o0, done | ||
| 75 | blu Lgot_result ! (and algorithm fails otherwise) | ||
| 76 | clr %o2 | ||
| 77 | |||
| 78 | sethi %hi(1 << (32 - 4 - 1)), %g1 | ||
| 79 | |||
| 80 | cmp %o3, %g1 | ||
| 81 | blu Lnot_really_big | ||
| 82 | clr %o4 | ||
| 83 | |||
| 84 | ! Here the dividend is >= 2**(31-N) or so. We must be careful here, | ||
| 85 | ! as our usual N-at-a-shot divide step will cause overflow and havoc. | ||
| 86 | ! The number of bits in the result here is N*ITER+SC, where SC <= N. | ||
| 87 | ! Compute ITER in an unorthodox manner: know we need to shift V into | ||
| 88 | ! the top decade: so do not even bother to compare to R. | ||
| 89 | 1: | ||
| 90 | cmp %o5, %g1 | ||
| 91 | bgeu 3f | ||
| 92 | mov 1, %g7 | ||
| 93 | |||
| 94 | sll %o5, 4, %o5 | ||
| 95 | |||
| 96 | b 1b | ||
| 97 | add %o4, 1, %o4 | ||
| 98 | |||
| 99 | ! Now compute %g7. | ||
| 100 | 2: | ||
| 101 | addcc %o5, %o5, %o5 | ||
| 102 | bcc Lnot_too_big | ||
| 103 | add %g7, 1, %g7 | ||
| 104 | |||
| 105 | ! We get here if the %o1 overflowed while shifting. | ||
| 106 | ! This means that %o3 has the high-order bit set. | ||
| 107 | ! Restore %o5 and subtract from %o3. | ||
| 108 | sll %g1, 4, %g1 ! high order bit | ||
| 109 | srl %o5, 1, %o5 ! rest of %o5 | ||
| 110 | add %o5, %g1, %o5 | ||
| 111 | |||
| 112 | b Ldo_single_div | ||
| 113 | sub %g7, 1, %g7 | ||
| 114 | |||
| 115 | Lnot_too_big: | ||
| 116 | 3: | ||
| 117 | cmp %o5, %o3 | ||
| 118 | blu 2b | ||
| 119 | nop | ||
| 120 | |||
| 121 | be Ldo_single_div | ||
| 122 | nop | ||
| 123 | /* NB: these are commented out in the V8-Sparc manual as well */ | ||
| 124 | /* (I do not understand this) */ | ||
| 125 | ! %o5 > %o3: went too far: back up 1 step | ||
| 126 | ! srl %o5, 1, %o5 | ||
| 127 | ! dec %g7 | ||
| 128 | ! do single-bit divide steps | ||
| 129 | ! | ||
| 130 | ! We have to be careful here. We know that %o3 >= %o5, so we can do the | ||
| 131 | ! first divide step without thinking. BUT, the others are conditional, | ||
| 132 | ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- | ||
| 133 | ! order bit set in the first step, just falling into the regular | ||
| 134 | ! division loop will mess up the first time around. | ||
| 135 | ! So we unroll slightly... | ||
| 136 | Ldo_single_div: | ||
| 137 | subcc %g7, 1, %g7 | ||
| 138 | bl Lend_regular_divide | ||
| 139 | nop | ||
| 140 | |||
| 141 | sub %o3, %o5, %o3 | ||
| 142 | mov 1, %o2 | ||
| 143 | |||
| 144 | b Lend_single_divloop | ||
| 145 | nop | ||
| 146 | Lsingle_divloop: | ||
| 147 | sll %o2, 1, %o2 | ||
| 148 | |||
| 149 | bl 1f | ||
| 150 | srl %o5, 1, %o5 | ||
| 151 | ! %o3 >= 0 | ||
| 152 | sub %o3, %o5, %o3 | ||
| 153 | |||
| 154 | b 2f | ||
| 155 | add %o2, 1, %o2 | ||
| 156 | 1: ! %o3 < 0 | ||
| 157 | add %o3, %o5, %o3 | ||
| 158 | sub %o2, 1, %o2 | ||
| 159 | 2: | ||
| 160 | Lend_single_divloop: | ||
| 161 | subcc %g7, 1, %g7 | ||
| 162 | bge Lsingle_divloop | ||
| 163 | tst %o3 | ||
| 164 | |||
| 165 | b,a Lend_regular_divide | ||
| 166 | |||
| 167 | Lnot_really_big: | ||
| 168 | 1: | ||
| 169 | sll %o5, 4, %o5 | ||
| 170 | cmp %o5, %o3 | ||
| 171 | bleu 1b | ||
| 172 | addcc %o4, 1, %o4 | ||
| 173 | |||
| 174 | be Lgot_result | ||
| 175 | sub %o4, 1, %o4 | ||
| 176 | |||
| 177 | tst %o3 ! set up for initial iteration | ||
| 178 | Ldivloop: | ||
| 179 | sll %o2, 4, %o2 | ||
| 180 | ! depth 1, accumulated bits 0 | ||
| 181 | bl L.1.16 | ||
| 182 | srl %o5,1,%o5 | ||
| 183 | ! remainder is positive | ||
| 184 | subcc %o3,%o5,%o3 | ||
| 185 | ! depth 2, accumulated bits 1 | ||
| 186 | bl L.2.17 | ||
| 187 | srl %o5,1,%o5 | ||
| 188 | ! remainder is positive | ||
| 189 | subcc %o3,%o5,%o3 | ||
| 190 | ! depth 3, accumulated bits 3 | ||
| 191 | bl L.3.19 | ||
| 192 | srl %o5,1,%o5 | ||
| 193 | ! remainder is positive | ||
| 194 | subcc %o3,%o5,%o3 | ||
| 195 | ! depth 4, accumulated bits 7 | ||
| 196 | bl L.4.23 | ||
| 197 | srl %o5,1,%o5 | ||
| 198 | ! remainder is positive | ||
| 199 | subcc %o3,%o5,%o3 | ||
| 200 | b 9f | ||
| 201 | add %o2, (7*2+1), %o2 | ||
| 202 | |||
| 203 | L.4.23: | ||
| 204 | ! remainder is negative | ||
| 205 | addcc %o3,%o5,%o3 | ||
| 206 | b 9f | ||
| 207 | add %o2, (7*2-1), %o2 | ||
| 208 | |||
| 209 | L.3.19: | ||
| 210 | ! remainder is negative | ||
| 211 | addcc %o3,%o5,%o3 | ||
| 212 | ! depth 4, accumulated bits 5 | ||
| 213 | bl L.4.21 | ||
| 214 | srl %o5,1,%o5 | ||
| 215 | ! remainder is positive | ||
| 216 | subcc %o3,%o5,%o3 | ||
| 217 | b 9f | ||
| 218 | add %o2, (5*2+1), %o2 | ||
| 219 | |||
| 220 | L.4.21: | ||
| 221 | ! remainder is negative | ||
| 222 | addcc %o3,%o5,%o3 | ||
| 223 | b 9f | ||
| 224 | add %o2, (5*2-1), %o2 | ||
| 225 | |||
| 226 | L.2.17: | ||
| 227 | ! remainder is negative | ||
| 228 | addcc %o3,%o5,%o3 | ||
| 229 | ! depth 3, accumulated bits 1 | ||
| 230 | bl L.3.17 | ||
| 231 | srl %o5,1,%o5 | ||
| 232 | ! remainder is positive | ||
| 233 | subcc %o3,%o5,%o3 | ||
| 234 | ! depth 4, accumulated bits 3 | ||
| 235 | bl L.4.19 | ||
| 236 | srl %o5,1,%o5 | ||
| 237 | ! remainder is positive | ||
| 238 | subcc %o3,%o5,%o3 | ||
| 239 | b 9f | ||
| 240 | add %o2, (3*2+1), %o2 | ||
| 241 | |||
| 242 | L.4.19: | ||
| 243 | ! remainder is negative | ||
| 244 | addcc %o3,%o5,%o3 | ||
| 245 | b 9f | ||
| 246 | add %o2, (3*2-1), %o2 | ||
| 247 | |||
| 248 | |||
| 249 | L.3.17: | ||
| 250 | ! remainder is negative | ||
| 251 | addcc %o3,%o5,%o3 | ||
| 252 | ! depth 4, accumulated bits 1 | ||
| 253 | bl L.4.17 | ||
| 254 | srl %o5,1,%o5 | ||
| 255 | ! remainder is positive | ||
| 256 | subcc %o3,%o5,%o3 | ||
| 257 | b 9f | ||
| 258 | add %o2, (1*2+1), %o2 | ||
| 259 | |||
| 260 | L.4.17: | ||
| 261 | ! remainder is negative | ||
| 262 | addcc %o3,%o5,%o3 | ||
| 263 | b 9f | ||
| 264 | add %o2, (1*2-1), %o2 | ||
| 265 | |||
| 266 | L.1.16: | ||
| 267 | ! remainder is negative | ||
| 268 | addcc %o3,%o5,%o3 | ||
| 269 | ! depth 2, accumulated bits -1 | ||
| 270 | bl L.2.15 | ||
| 271 | srl %o5,1,%o5 | ||
| 272 | ! remainder is positive | ||
| 273 | subcc %o3,%o5,%o3 | ||
| 274 | ! depth 3, accumulated bits -1 | ||
| 275 | bl L.3.15 | ||
| 276 | srl %o5,1,%o5 | ||
| 277 | ! remainder is positive | ||
| 278 | subcc %o3,%o5,%o3 | ||
| 279 | ! depth 4, accumulated bits -1 | ||
| 280 | bl L.4.15 | ||
| 281 | srl %o5,1,%o5 | ||
| 282 | ! remainder is positive | ||
| 283 | subcc %o3,%o5,%o3 | ||
| 284 | b 9f | ||
| 285 | add %o2, (-1*2+1), %o2 | ||
| 286 | |||
| 287 | L.4.15: | ||
| 288 | ! remainder is negative | ||
| 289 | addcc %o3,%o5,%o3 | ||
| 290 | b 9f | ||
| 291 | add %o2, (-1*2-1), %o2 | ||
| 292 | |||
| 293 | L.3.15: | ||
| 294 | ! remainder is negative | ||
| 295 | addcc %o3,%o5,%o3 | ||
| 296 | ! depth 4, accumulated bits -3 | ||
| 297 | bl L.4.13 | ||
| 298 | srl %o5,1,%o5 | ||
| 299 | ! remainder is positive | ||
| 300 | subcc %o3,%o5,%o3 | ||
| 301 | b 9f | ||
| 302 | add %o2, (-3*2+1), %o2 | ||
| 303 | |||
| 304 | L.4.13: | ||
| 305 | ! remainder is negative | ||
| 306 | addcc %o3,%o5,%o3 | ||
| 307 | b 9f | ||
| 308 | add %o2, (-3*2-1), %o2 | ||
| 309 | |||
| 310 | L.2.15: | ||
| 311 | ! remainder is negative | ||
| 312 | addcc %o3,%o5,%o3 | ||
| 313 | ! depth 3, accumulated bits -3 | ||
| 314 | bl L.3.13 | ||
| 315 | srl %o5,1,%o5 | ||
| 316 | ! remainder is positive | ||
| 317 | subcc %o3,%o5,%o3 | ||
| 318 | ! depth 4, accumulated bits -5 | ||
| 319 | bl L.4.11 | ||
| 320 | srl %o5,1,%o5 | ||
| 321 | ! remainder is positive | ||
| 322 | subcc %o3,%o5,%o3 | ||
| 323 | b 9f | ||
| 324 | add %o2, (-5*2+1), %o2 | ||
| 325 | |||
| 326 | L.4.11: | ||
| 327 | ! remainder is negative | ||
| 328 | addcc %o3,%o5,%o3 | ||
| 329 | b 9f | ||
| 330 | add %o2, (-5*2-1), %o2 | ||
| 331 | |||
| 332 | L.3.13: | ||
| 333 | ! remainder is negative | ||
| 334 | addcc %o3,%o5,%o3 | ||
| 335 | ! depth 4, accumulated bits -7 | ||
| 336 | bl L.4.9 | ||
| 337 | srl %o5,1,%o5 | ||
| 338 | ! remainder is positive | ||
| 339 | subcc %o3,%o5,%o3 | ||
| 340 | b 9f | ||
| 341 | add %o2, (-7*2+1), %o2 | ||
| 342 | |||
| 343 | L.4.9: | ||
| 344 | ! remainder is negative | ||
| 345 | addcc %o3,%o5,%o3 | ||
| 346 | b 9f | ||
| 347 | add %o2, (-7*2-1), %o2 | ||
| 348 | |||
| 349 | 9: | ||
| 350 | Lend_regular_divide: | ||
| 351 | subcc %o4, 1, %o4 | ||
| 352 | bge Ldivloop | ||
| 353 | tst %o3 | ||
| 354 | |||
| 355 | bl,a Lgot_result | ||
| 356 | ! non-restoring fixup here (one instruction only!) | ||
| 357 | sub %o2, 1, %o2 | ||
| 358 | |||
| 359 | Lgot_result: | ||
| 360 | ! check to see if answer should be < 0 | ||
| 361 | tst %g2 | ||
| 362 | bl,a 1f | ||
| 363 | sub %g0, %o2, %o2 | ||
| 364 | 1: | ||
| 365 | retl | ||
| 366 | mov %o2, %o0 | ||
| 367 | |||
| 368 | .globl .div_patch | ||
| 369 | .div_patch: | ||
| 370 | sra %o0, 0x1f, %o2 | ||
| 371 | wr %o2, 0x0, %y | ||
| 372 | nop | ||
| 373 | nop | ||
| 374 | nop | ||
| 375 | sdivcc %o0, %o1, %o0 | ||
| 376 | bvs,a 1f | ||
| 377 | xnor %o0, %g0, %o0 | ||
| 378 | 1: retl | ||
| 379 | nop | ||
diff --git a/arch/sparc/lib/strlen.S b/arch/sparc/lib/strlen.S new file mode 100644 index 000000000000..ed9a763368cd --- /dev/null +++ b/arch/sparc/lib/strlen.S | |||
| @@ -0,0 +1,81 @@ | |||
| 1 | /* strlen.S: Sparc optimized strlen code | ||
| 2 | * Hand optimized from GNU libc's strlen | ||
| 3 | * Copyright (C) 1991,1996 Free Software Foundation | ||
| 4 | * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) | ||
| 5 | * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz) | ||
| 6 | */ | ||
| 7 | |||
| 8 | #define LO_MAGIC 0x01010101 | ||
| 9 | #define HI_MAGIC 0x80808080 | ||
| 10 | |||
| 11 | 0: | ||
| 12 | ldub [%o0], %o5 | ||
| 13 | cmp %o5, 0 | ||
| 14 | be 1f | ||
| 15 | add %o0, 1, %o0 | ||
| 16 | andcc %o0, 3, %g0 | ||
| 17 | be 4f | ||
| 18 | or %o4, %lo(HI_MAGIC), %o3 | ||
| 19 | ldub [%o0], %o5 | ||
| 20 | cmp %o5, 0 | ||
| 21 | be 2f | ||
| 22 | add %o0, 1, %o0 | ||
| 23 | andcc %o0, 3, %g0 | ||
| 24 | be 5f | ||
| 25 | sethi %hi(LO_MAGIC), %o4 | ||
| 26 | ldub [%o0], %o5 | ||
| 27 | cmp %o5, 0 | ||
| 28 | be 3f | ||
| 29 | add %o0, 1, %o0 | ||
| 30 | b 8f | ||
| 31 | or %o4, %lo(LO_MAGIC), %o2 | ||
| 32 | 1: | ||
| 33 | retl | ||
| 34 | mov 0, %o0 | ||
| 35 | 2: | ||
| 36 | retl | ||
| 37 | mov 1, %o0 | ||
| 38 | 3: | ||
| 39 | retl | ||
| 40 | mov 2, %o0 | ||
| 41 | |||
| 42 | .align 4 | ||
| 43 | .global strlen | ||
| 44 | strlen: | ||
| 45 | mov %o0, %o1 | ||
| 46 | andcc %o0, 3, %g0 | ||
| 47 | bne 0b | ||
| 48 | sethi %hi(HI_MAGIC), %o4 | ||
| 49 | or %o4, %lo(HI_MAGIC), %o3 | ||
| 50 | 4: | ||
| 51 | sethi %hi(LO_MAGIC), %o4 | ||
| 52 | 5: | ||
| 53 | or %o4, %lo(LO_MAGIC), %o2 | ||
| 54 | 8: | ||
| 55 | ld [%o0], %o5 | ||
| 56 | 2: | ||
| 57 | sub %o5, %o2, %o4 | ||
| 58 | andcc %o4, %o3, %g0 | ||
| 59 | be 8b | ||
| 60 | add %o0, 4, %o0 | ||
| 61 | |||
| 62 | /* Check every byte. */ | ||
| 63 | srl %o5, 24, %g5 | ||
| 64 | andcc %g5, 0xff, %g0 | ||
| 65 | be 1f | ||
| 66 | add %o0, -4, %o4 | ||
| 67 | srl %o5, 16, %g5 | ||
| 68 | andcc %g5, 0xff, %g0 | ||
| 69 | be 1f | ||
| 70 | add %o4, 1, %o4 | ||
| 71 | srl %o5, 8, %g5 | ||
| 72 | andcc %g5, 0xff, %g0 | ||
| 73 | be 1f | ||
| 74 | add %o4, 1, %o4 | ||
| 75 | andcc %o5, 0xff, %g0 | ||
| 76 | bne,a 2b | ||
| 77 | ld [%o0], %o5 | ||
| 78 | add %o4, 1, %o4 | ||
| 79 | 1: | ||
| 80 | retl | ||
| 81 | sub %o4, %o1, %o0 | ||
diff --git a/arch/sparc/lib/strlen_user.S b/arch/sparc/lib/strlen_user.S new file mode 100644 index 000000000000..8c8a371df3c9 --- /dev/null +++ b/arch/sparc/lib/strlen_user.S | |||
| @@ -0,0 +1,109 @@ | |||
| 1 | /* strlen_user.S: Sparc optimized strlen_user code | ||
| 2 | * | ||
| 3 | * Return length of string in userspace including terminating 0 | ||
| 4 | * or 0 for error | ||
| 5 | * | ||
| 6 | * Copyright (C) 1991,1996 Free Software Foundation | ||
| 7 | * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) | ||
| 8 | * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz) | ||
| 9 | */ | ||
| 10 | |||
| 11 | #define LO_MAGIC 0x01010101 | ||
| 12 | #define HI_MAGIC 0x80808080 | ||
| 13 | |||
| 14 | 10: | ||
| 15 | ldub [%o0], %o5 | ||
| 16 | cmp %o5, 0 | ||
| 17 | be 1f | ||
| 18 | add %o0, 1, %o0 | ||
| 19 | andcc %o0, 3, %g0 | ||
| 20 | be 4f | ||
| 21 | or %o4, %lo(HI_MAGIC), %o3 | ||
| 22 | 11: | ||
| 23 | ldub [%o0], %o5 | ||
| 24 | cmp %o5, 0 | ||
| 25 | be 2f | ||
| 26 | add %o0, 1, %o0 | ||
| 27 | andcc %o0, 3, %g0 | ||
| 28 | be 5f | ||
| 29 | sethi %hi(LO_MAGIC), %o4 | ||
| 30 | 12: | ||
| 31 | ldub [%o0], %o5 | ||
| 32 | cmp %o5, 0 | ||
| 33 | be 3f | ||
| 34 | add %o0, 1, %o0 | ||
| 35 | b 13f | ||
| 36 | or %o4, %lo(LO_MAGIC), %o2 | ||
| 37 | 1: | ||
| 38 | retl | ||
| 39 | mov 1, %o0 | ||
| 40 | 2: | ||
| 41 | retl | ||
| 42 | mov 2, %o0 | ||
| 43 | 3: | ||
| 44 | retl | ||
| 45 | mov 3, %o0 | ||
| 46 | |||
| 47 | .align 4 | ||
| 48 | .global __strlen_user, __strnlen_user | ||
| 49 | __strlen_user: | ||
| 50 | sethi %hi(32768), %o1 | ||
| 51 | __strnlen_user: | ||
| 52 | mov %o1, %g1 | ||
| 53 | mov %o0, %o1 | ||
| 54 | andcc %o0, 3, %g0 | ||
| 55 | bne 10b | ||
| 56 | sethi %hi(HI_MAGIC), %o4 | ||
| 57 | or %o4, %lo(HI_MAGIC), %o3 | ||
| 58 | 4: | ||
| 59 | sethi %hi(LO_MAGIC), %o4 | ||
| 60 | 5: | ||
| 61 | or %o4, %lo(LO_MAGIC), %o2 | ||
| 62 | 13: | ||
| 63 | ld [%o0], %o5 | ||
| 64 | 2: | ||
| 65 | sub %o5, %o2, %o4 | ||
| 66 | andcc %o4, %o3, %g0 | ||
| 67 | bne 82f | ||
| 68 | add %o0, 4, %o0 | ||
| 69 | sub %o0, %o1, %g2 | ||
| 70 | 81: cmp %g2, %g1 | ||
| 71 | blu 13b | ||
| 72 | mov %o0, %o4 | ||
| 73 | ba,a 1f | ||
| 74 | |||
| 75 | /* Check every byte. */ | ||
| 76 | 82: srl %o5, 24, %g5 | ||
| 77 | andcc %g5, 0xff, %g0 | ||
| 78 | be 1f | ||
| 79 | add %o0, -3, %o4 | ||
| 80 | srl %o5, 16, %g5 | ||
| 81 | andcc %g5, 0xff, %g0 | ||
| 82 | be 1f | ||
| 83 | add %o4, 1, %o4 | ||
| 84 | srl %o5, 8, %g5 | ||
| 85 | andcc %g5, 0xff, %g0 | ||
| 86 | be 1f | ||
| 87 | add %o4, 1, %o4 | ||
| 88 | andcc %o5, 0xff, %g0 | ||
| 89 | bne 81b | ||
| 90 | sub %o0, %o1, %g2 | ||
| 91 | |||
| 92 | add %o4, 1, %o4 | ||
| 93 | 1: | ||
| 94 | retl | ||
| 95 | sub %o4, %o1, %o0 | ||
| 96 | |||
| 97 | .section .fixup,#alloc,#execinstr | ||
| 98 | .align 4 | ||
| 99 | 9: | ||
| 100 | retl | ||
| 101 | clr %o0 | ||
| 102 | |||
| 103 | .section __ex_table,#alloc | ||
| 104 | .align 4 | ||
| 105 | |||
| 106 | .word 10b, 9b | ||
| 107 | .word 11b, 9b | ||
| 108 | .word 12b, 9b | ||
| 109 | .word 13b, 9b | ||
diff --git a/arch/sparc/lib/strncmp.S b/arch/sparc/lib/strncmp.S new file mode 100644 index 000000000000..615626805d4b --- /dev/null +++ b/arch/sparc/lib/strncmp.S | |||
| @@ -0,0 +1,118 @@ | |||
| 1 | /* $Id: strncmp.S,v 1.2 1996/09/09 02:47:20 davem Exp $ | ||
| 2 | * strncmp.S: Hand optimized Sparc assembly of GCC output from GNU libc | ||
| 3 | * generic strncmp routine. | ||
| 4 | */ | ||
| 5 | |||
| 6 | .text | ||
| 7 | .align 4 | ||
| 8 | .global __strncmp, strncmp | ||
| 9 | __strncmp: | ||
| 10 | strncmp: | ||
| 11 | mov %o0, %g3 | ||
| 12 | mov 0, %o3 | ||
| 13 | |||
| 14 | cmp %o2, 3 | ||
| 15 | ble 7f | ||
| 16 | mov 0, %g2 | ||
| 17 | |||
| 18 | sra %o2, 2, %o4 | ||
| 19 | ldub [%g3], %o3 | ||
| 20 | |||
| 21 | 0: | ||
| 22 | ldub [%o1], %g2 | ||
| 23 | add %g3, 1, %g3 | ||
| 24 | and %o3, 0xff, %o0 | ||
| 25 | |||
| 26 | cmp %o0, 0 | ||
| 27 | be 8f | ||
| 28 | add %o1, 1, %o1 | ||
| 29 | |||
| 30 | cmp %o0, %g2 | ||
| 31 | be,a 1f | ||
| 32 | ldub [%g3], %o3 | ||
| 33 | |||
| 34 | retl | ||
| 35 | sub %o0, %g2, %o0 | ||
| 36 | |||
| 37 | 1: | ||
| 38 | ldub [%o1], %g2 | ||
| 39 | add %g3,1, %g3 | ||
| 40 | and %o3, 0xff, %o0 | ||
| 41 | |||
| 42 | cmp %o0, 0 | ||
| 43 | be 8f | ||
| 44 | add %o1, 1, %o1 | ||
| 45 | |||
| 46 | cmp %o0, %g2 | ||
| 47 | be,a 1f | ||
| 48 | ldub [%g3], %o3 | ||
| 49 | |||
| 50 | retl | ||
| 51 | sub %o0, %g2, %o0 | ||
| 52 | |||
| 53 | 1: | ||
| 54 | ldub [%o1], %g2 | ||
| 55 | add %g3, 1, %g3 | ||
| 56 | and %o3, 0xff, %o0 | ||
| 57 | |||
| 58 | cmp %o0, 0 | ||
| 59 | be 8f | ||
| 60 | add %o1, 1, %o1 | ||
| 61 | |||
| 62 | cmp %o0, %g2 | ||
| 63 | be,a 1f | ||
| 64 | ldub [%g3], %o3 | ||
| 65 | |||
| 66 | retl | ||
| 67 | sub %o0, %g2, %o0 | ||
| 68 | |||
| 69 | 1: | ||
| 70 | ldub [%o1], %g2 | ||
| 71 | add %g3, 1, %g3 | ||
| 72 | and %o3, 0xff, %o0 | ||
| 73 | |||
| 74 | cmp %o0, 0 | ||
| 75 | be 8f | ||
| 76 | add %o1, 1, %o1 | ||
| 77 | |||
| 78 | cmp %o0, %g2 | ||
| 79 | be 1f | ||
| 80 | add %o4, -1, %o4 | ||
| 81 | |||
| 82 | retl | ||
| 83 | sub %o0, %g2, %o0 | ||
| 84 | |||
| 85 | 1: | ||
| 86 | |||
| 87 | cmp %o4, 0 | ||
| 88 | bg,a 0b | ||
| 89 | ldub [%g3], %o3 | ||
| 90 | |||
| 91 | b 7f | ||
| 92 | and %o2, 3, %o2 | ||
| 93 | |||
| 94 | 9: | ||
| 95 | ldub [%o1], %g2 | ||
| 96 | add %g3, 1, %g3 | ||
| 97 | and %o3, 0xff, %o0 | ||
| 98 | |||
| 99 | cmp %o0, 0 | ||
| 100 | be 8f | ||
| 101 | add %o1, 1, %o1 | ||
| 102 | |||
| 103 | cmp %o0, %g2 | ||
| 104 | be 7f | ||
| 105 | add %o2, -1, %o2 | ||
| 106 | |||
| 107 | 8: | ||
| 108 | retl | ||
| 109 | sub %o0, %g2, %o0 | ||
| 110 | |||
| 111 | 7: | ||
| 112 | cmp %o2, 0 | ||
| 113 | bg,a 9b | ||
| 114 | ldub [%g3], %o3 | ||
| 115 | |||
| 116 | and %g2, 0xff, %o0 | ||
| 117 | retl | ||
| 118 | sub %o3, %o0, %o0 | ||
diff --git a/arch/sparc/lib/strncpy_from_user.S b/arch/sparc/lib/strncpy_from_user.S new file mode 100644 index 000000000000..d77198976a66 --- /dev/null +++ b/arch/sparc/lib/strncpy_from_user.S | |||
| @@ -0,0 +1,47 @@ | |||
| 1 | /* strncpy_from_user.S: Sparc strncpy from userspace. | ||
| 2 | * | ||
| 3 | * Copyright(C) 1996 David S. Miller | ||
| 4 | */ | ||
| 5 | |||
| 6 | #include <asm/ptrace.h> | ||
| 7 | #include <asm/errno.h> | ||
| 8 | |||
| 9 | .text | ||
| 10 | .align 4 | ||
| 11 | |||
| 12 | /* Must return: | ||
| 13 | * | ||
| 14 | * -EFAULT for an exception | ||
| 15 | * count if we hit the buffer limit | ||
| 16 | * bytes copied if we hit a null byte | ||
| 17 | */ | ||
| 18 | |||
| 19 | .globl __strncpy_from_user | ||
| 20 | __strncpy_from_user: | ||
| 21 | /* %o0=dest, %o1=src, %o2=count */ | ||
| 22 | mov %o2, %o3 | ||
| 23 | 1: | ||
| 24 | subcc %o2, 1, %o2 | ||
| 25 | bneg 2f | ||
| 26 | nop | ||
| 27 | 10: | ||
| 28 | ldub [%o1], %o4 | ||
| 29 | add %o0, 1, %o0 | ||
| 30 | cmp %o4, 0 | ||
| 31 | add %o1, 1, %o1 | ||
| 32 | bne 1b | ||
| 33 | stb %o4, [%o0 - 1] | ||
| 34 | 2: | ||
| 35 | add %o2, 1, %o0 | ||
| 36 | retl | ||
| 37 | sub %o3, %o0, %o0 | ||
| 38 | |||
| 39 | .section .fixup,#alloc,#execinstr | ||
| 40 | .align 4 | ||
| 41 | 4: | ||
| 42 | retl | ||
| 43 | mov -EFAULT, %o0 | ||
| 44 | |||
| 45 | .section __ex_table,#alloc | ||
| 46 | .align 4 | ||
| 47 | .word 10b, 4b | ||
diff --git a/arch/sparc/lib/udiv.S b/arch/sparc/lib/udiv.S new file mode 100644 index 000000000000..2abfc6b0f3e9 --- /dev/null +++ b/arch/sparc/lib/udiv.S | |||
| @@ -0,0 +1,355 @@ | |||
| 1 | /* $Id: udiv.S,v 1.4 1996/09/30 02:22:38 davem Exp $ | ||
| 2 | * udiv.S: This routine was taken from glibc-1.09 and is covered | ||
| 3 | * by the GNU Library General Public License Version 2. | ||
| 4 | */ | ||
| 5 | |||
| 6 | |||
| 7 | /* This file is generated from divrem.m4; DO NOT EDIT! */ | ||
| 8 | /* | ||
| 9 | * Division and remainder, from Appendix E of the Sparc Version 8 | ||
| 10 | * Architecture Manual, with fixes from Gordon Irlam. | ||
| 11 | */ | ||
| 12 | |||
| 13 | /* | ||
| 14 | * Input: dividend and divisor in %o0 and %o1 respectively. | ||
| 15 | * | ||
| 16 | * m4 parameters: | ||
| 17 | * .udiv name of function to generate | ||
| 18 | * div div=div => %o0 / %o1; div=rem => %o0 % %o1 | ||
| 19 | * false false=true => signed; false=false => unsigned | ||
| 20 | * | ||
| 21 | * Algorithm parameters: | ||
| 22 | * N how many bits per iteration we try to get (4) | ||
| 23 | * WORDSIZE total number of bits (32) | ||
| 24 | * | ||
| 25 | * Derived constants: | ||
| 26 | * TOPBITS number of bits in the top decade of a number | ||
| 27 | * | ||
| 28 | * Important variables: | ||
| 29 | * Q the partial quotient under development (initially 0) | ||
| 30 | * R the remainder so far, initially the dividend | ||
| 31 | * ITER number of main division loop iterations required; | ||
| 32 | * equal to ceil(log2(quotient) / N). Note that this | ||
| 33 | * is the log base (2^N) of the quotient. | ||
| 34 | * V the current comparand, initially divisor*2^(ITER*N-1) | ||
| 35 | * | ||
| 36 | * Cost: | ||
| 37 | * Current estimate for non-large dividend is | ||
| 38 | * ceil(log2(quotient) / N) * (10 + 7N/2) + C | ||
| 39 | * A large dividend is one greater than 2^(31-TOPBITS) and takes a | ||
| 40 | * different path, as the upper bits of the quotient must be developed | ||
| 41 | * one bit at a time. | ||
| 42 | */ | ||
| 43 | |||
| 44 | |||
| 45 | .globl .udiv | ||
| 46 | .udiv: | ||
| 47 | |||
| 48 | ! Ready to divide. Compute size of quotient; scale comparand. | ||
| 49 | orcc %o1, %g0, %o5 | ||
| 50 | bne 1f | ||
| 51 | mov %o0, %o3 | ||
| 52 | |||
| 53 | ! Divide by zero trap. If it returns, return 0 (about as | ||
| 54 | ! wrong as possible, but that is what SunOS does...). | ||
| 55 | ta ST_DIV0 | ||
| 56 | retl | ||
| 57 | clr %o0 | ||
| 58 | |||
| 59 | 1: | ||
| 60 | cmp %o3, %o5 ! if %o1 exceeds %o0, done | ||
| 61 | blu Lgot_result ! (and algorithm fails otherwise) | ||
| 62 | clr %o2 | ||
| 63 | |||
| 64 | sethi %hi(1 << (32 - 4 - 1)), %g1 | ||
| 65 | |||
| 66 | cmp %o3, %g1 | ||
| 67 | blu Lnot_really_big | ||
| 68 | clr %o4 | ||
| 69 | |||
| 70 | ! Here the dividend is >= 2**(31-N) or so. We must be careful here, | ||
| 71 | ! as our usual N-at-a-shot divide step will cause overflow and havoc. | ||
| 72 | ! The number of bits in the result here is N*ITER+SC, where SC <= N. | ||
| 73 | ! Compute ITER in an unorthodox manner: know we need to shift V into | ||
| 74 | ! the top decade: so do not even bother to compare to R. | ||
| 75 | 1: | ||
| 76 | cmp %o5, %g1 | ||
| 77 | bgeu 3f | ||
| 78 | mov 1, %g7 | ||
| 79 | |||
| 80 | sll %o5, 4, %o5 | ||
| 81 | |||
| 82 | b 1b | ||
| 83 | add %o4, 1, %o4 | ||
| 84 | |||
| 85 | ! Now compute %g7. | ||
| 86 | 2: | ||
| 87 | addcc %o5, %o5, %o5 | ||
| 88 | bcc Lnot_too_big | ||
| 89 | add %g7, 1, %g7 | ||
| 90 | |||
| 91 | ! We get here if the %o1 overflowed while shifting. | ||
| 92 | ! This means that %o3 has the high-order bit set. | ||
| 93 | ! Restore %o5 and subtract from %o3. | ||
| 94 | sll %g1, 4, %g1 ! high order bit | ||
| 95 | srl %o5, 1, %o5 ! rest of %o5 | ||
| 96 | add %o5, %g1, %o5 | ||
| 97 | |||
| 98 | b Ldo_single_div | ||
| 99 | sub %g7, 1, %g7 | ||
| 100 | |||
| 101 | Lnot_too_big: | ||
| 102 | 3: | ||
| 103 | cmp %o5, %o3 | ||
| 104 | blu 2b | ||
| 105 | nop | ||
| 106 | |||
| 107 | be Ldo_single_div | ||
| 108 | nop | ||
| 109 | /* NB: these are commented out in the V8-Sparc manual as well */ | ||
| 110 | /* (I do not understand this) */ | ||
| 111 | ! %o5 > %o3: went too far: back up 1 step | ||
| 112 | ! srl %o5, 1, %o5 | ||
| 113 | ! dec %g7 | ||
| 114 | ! do single-bit divide steps | ||
| 115 | ! | ||
| 116 | ! We have to be careful here. We know that %o3 >= %o5, so we can do the | ||
| 117 | ! first divide step without thinking. BUT, the others are conditional, | ||
| 118 | ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- | ||
| 119 | ! order bit set in the first step, just falling into the regular | ||
| 120 | ! division loop will mess up the first time around. | ||
| 121 | ! So we unroll slightly... | ||
| 122 | Ldo_single_div: | ||
| 123 | subcc %g7, 1, %g7 | ||
| 124 | bl Lend_regular_divide | ||
| 125 | nop | ||
| 126 | |||
| 127 | sub %o3, %o5, %o3 | ||
| 128 | mov 1, %o2 | ||
| 129 | |||
| 130 | b Lend_single_divloop | ||
| 131 | nop | ||
| 132 | Lsingle_divloop: | ||
| 133 | sll %o2, 1, %o2 | ||
| 134 | bl 1f | ||
| 135 | srl %o5, 1, %o5 | ||
| 136 | ! %o3 >= 0 | ||
| 137 | sub %o3, %o5, %o3 | ||
| 138 | b 2f | ||
| 139 | add %o2, 1, %o2 | ||
| 140 | 1: ! %o3 < 0 | ||
| 141 | add %o3, %o5, %o3 | ||
| 142 | sub %o2, 1, %o2 | ||
| 143 | 2: | ||
| 144 | Lend_single_divloop: | ||
| 145 | subcc %g7, 1, %g7 | ||
| 146 | bge Lsingle_divloop | ||
| 147 | tst %o3 | ||
| 148 | |||
| 149 | b,a Lend_regular_divide | ||
| 150 | |||
| 151 | Lnot_really_big: | ||
| 152 | 1: | ||
| 153 | sll %o5, 4, %o5 | ||
| 154 | |||
| 155 | cmp %o5, %o3 | ||
| 156 | bleu 1b | ||
| 157 | addcc %o4, 1, %o4 | ||
| 158 | |||
| 159 | be Lgot_result | ||
| 160 | sub %o4, 1, %o4 | ||
| 161 | |||
| 162 | tst %o3 ! set up for initial iteration | ||
| 163 | Ldivloop: | ||
| 164 | sll %o2, 4, %o2 | ||
| 165 | ! depth 1, accumulated bits 0 | ||
| 166 | bl L.1.16 | ||
| 167 | srl %o5,1,%o5 | ||
| 168 | ! remainder is positive | ||
| 169 | subcc %o3,%o5,%o3 | ||
| 170 | ! depth 2, accumulated bits 1 | ||
| 171 | bl L.2.17 | ||
| 172 | srl %o5,1,%o5 | ||
| 173 | ! remainder is positive | ||
| 174 | subcc %o3,%o5,%o3 | ||
| 175 | ! depth 3, accumulated bits 3 | ||
| 176 | bl L.3.19 | ||
| 177 | srl %o5,1,%o5 | ||
| 178 | ! remainder is positive | ||
| 179 | subcc %o3,%o5,%o3 | ||
| 180 | ! depth 4, accumulated bits 7 | ||
| 181 | bl L.4.23 | ||
| 182 | srl %o5,1,%o5 | ||
| 183 | ! remainder is positive | ||
| 184 | subcc %o3,%o5,%o3 | ||
| 185 | b 9f | ||
| 186 | add %o2, (7*2+1), %o2 | ||
| 187 | |||
| 188 | L.4.23: | ||
| 189 | ! remainder is negative | ||
| 190 | addcc %o3,%o5,%o3 | ||
| 191 | b 9f | ||
| 192 | add %o2, (7*2-1), %o2 | ||
| 193 | |||
| 194 | L.3.19: | ||
| 195 | ! remainder is negative | ||
| 196 | addcc %o3,%o5,%o3 | ||
| 197 | ! depth 4, accumulated bits 5 | ||
| 198 | bl L.4.21 | ||
| 199 | srl %o5,1,%o5 | ||
| 200 | ! remainder is positive | ||
| 201 | subcc %o3,%o5,%o3 | ||
| 202 | b 9f | ||
| 203 | add %o2, (5*2+1), %o2 | ||
| 204 | |||
| 205 | L.4.21: | ||
| 206 | ! remainder is negative | ||
| 207 | addcc %o3,%o5,%o3 | ||
| 208 | b 9f | ||
| 209 | add %o2, (5*2-1), %o2 | ||
| 210 | |||
| 211 | L.2.17: | ||
| 212 | ! remainder is negative | ||
| 213 | addcc %o3,%o5,%o3 | ||
| 214 | ! depth 3, accumulated bits 1 | ||
| 215 | bl L.3.17 | ||
| 216 | srl %o5,1,%o5 | ||
| 217 | ! remainder is positive | ||
| 218 | subcc %o3,%o5,%o3 | ||
| 219 | ! depth 4, accumulated bits 3 | ||
| 220 | bl L.4.19 | ||
| 221 | srl %o5,1,%o5 | ||
| 222 | ! remainder is positive | ||
| 223 | subcc %o3,%o5,%o3 | ||
| 224 | b 9f | ||
| 225 | add %o2, (3*2+1), %o2 | ||
| 226 | |||
| 227 | L.4.19: | ||
| 228 | ! remainder is negative | ||
| 229 | addcc %o3,%o5,%o3 | ||
| 230 | b 9f | ||
| 231 | add %o2, (3*2-1), %o2 | ||
| 232 | |||
| 233 | L.3.17: | ||
| 234 | ! remainder is negative | ||
| 235 | addcc %o3,%o5,%o3 | ||
| 236 | ! depth 4, accumulated bits 1 | ||
| 237 | bl L.4.17 | ||
| 238 | srl %o5,1,%o5 | ||
| 239 | ! remainder is positive | ||
| 240 | subcc %o3,%o5,%o3 | ||
| 241 | b 9f | ||
| 242 | add %o2, (1*2+1), %o2 | ||
| 243 | |||
| 244 | L.4.17: | ||
| 245 | ! remainder is negative | ||
| 246 | addcc %o3,%o5,%o3 | ||
| 247 | b 9f | ||
| 248 | add %o2, (1*2-1), %o2 | ||
| 249 | |||
| 250 | L.1.16: | ||
| 251 | ! remainder is negative | ||
| 252 | addcc %o3,%o5,%o3 | ||
| 253 | ! depth 2, accumulated bits -1 | ||
| 254 | bl L.2.15 | ||
| 255 | srl %o5,1,%o5 | ||
| 256 | ! remainder is positive | ||
| 257 | subcc %o3,%o5,%o3 | ||
| 258 | ! depth 3, accumulated bits -1 | ||
| 259 | bl L.3.15 | ||
| 260 | srl %o5,1,%o5 | ||
| 261 | ! remainder is positive | ||
| 262 | subcc %o3,%o5,%o3 | ||
| 263 | ! depth 4, accumulated bits -1 | ||
| 264 | bl L.4.15 | ||
| 265 | srl %o5,1,%o5 | ||
| 266 | ! remainder is positive | ||
| 267 | subcc %o3,%o5,%o3 | ||
| 268 | b 9f | ||
| 269 | add %o2, (-1*2+1), %o2 | ||
| 270 | |||
| 271 | L.4.15: | ||
| 272 | ! remainder is negative | ||
| 273 | addcc %o3,%o5,%o3 | ||
| 274 | b 9f | ||
| 275 | add %o2, (-1*2-1), %o2 | ||
| 276 | |||
| 277 | L.3.15: | ||
| 278 | ! remainder is negative | ||
| 279 | addcc %o3,%o5,%o3 | ||
| 280 | ! depth 4, accumulated bits -3 | ||
| 281 | bl L.4.13 | ||
| 282 | srl %o5,1,%o5 | ||
| 283 | ! remainder is positive | ||
| 284 | subcc %o3,%o5,%o3 | ||
| 285 | b 9f | ||
| 286 | add %o2, (-3*2+1), %o2 | ||
| 287 | |||
| 288 | L.4.13: | ||
| 289 | ! remainder is negative | ||
| 290 | addcc %o3,%o5,%o3 | ||
| 291 | b 9f | ||
| 292 | add %o2, (-3*2-1), %o2 | ||
| 293 | |||
| 294 | L.2.15: | ||
| 295 | ! remainder is negative | ||
| 296 | addcc %o3,%o5,%o3 | ||
| 297 | ! depth 3, accumulated bits -3 | ||
| 298 | bl L.3.13 | ||
| 299 | srl %o5,1,%o5 | ||
| 300 | ! remainder is positive | ||
| 301 | subcc %o3,%o5,%o3 | ||
| 302 | ! depth 4, accumulated bits -5 | ||
| 303 | bl L.4.11 | ||
| 304 | srl %o5,1,%o5 | ||
| 305 | ! remainder is positive | ||
| 306 | subcc %o3,%o5,%o3 | ||
| 307 | b 9f | ||
| 308 | add %o2, (-5*2+1), %o2 | ||
| 309 | |||
| 310 | L.4.11: | ||
| 311 | ! remainder is negative | ||
| 312 | addcc %o3,%o5,%o3 | ||
| 313 | b 9f | ||
| 314 | add %o2, (-5*2-1), %o2 | ||
| 315 | |||
| 316 | L.3.13: | ||
| 317 | ! remainder is negative | ||
| 318 | addcc %o3,%o5,%o3 | ||
| 319 | ! depth 4, accumulated bits -7 | ||
| 320 | bl L.4.9 | ||
| 321 | srl %o5,1,%o5 | ||
| 322 | ! remainder is positive | ||
| 323 | subcc %o3,%o5,%o3 | ||
| 324 | b 9f | ||
| 325 | add %o2, (-7*2+1), %o2 | ||
| 326 | |||
| 327 | L.4.9: | ||
| 328 | ! remainder is negative | ||
| 329 | addcc %o3,%o5,%o3 | ||
| 330 | b 9f | ||
| 331 | add %o2, (-7*2-1), %o2 | ||
| 332 | |||
| 333 | 9: | ||
| 334 | Lend_regular_divide: | ||
| 335 | subcc %o4, 1, %o4 | ||
| 336 | bge Ldivloop | ||
| 337 | tst %o3 | ||
| 338 | |||
| 339 | bl,a Lgot_result | ||
| 340 | ! non-restoring fixup here (one instruction only!) | ||
| 341 | sub %o2, 1, %o2 | ||
| 342 | |||
| 343 | Lgot_result: | ||
| 344 | |||
| 345 | retl | ||
| 346 | mov %o2, %o0 | ||
| 347 | |||
| 348 | .globl .udiv_patch | ||
| 349 | .udiv_patch: | ||
| 350 | wr %g0, 0x0, %y | ||
| 351 | nop | ||
| 352 | nop | ||
| 353 | retl | ||
| 354 | udiv %o0, %o1, %o0 | ||
| 355 | nop | ||
diff --git a/arch/sparc/lib/udivdi3.S b/arch/sparc/lib/udivdi3.S new file mode 100644 index 000000000000..b430f1f0ef62 --- /dev/null +++ b/arch/sparc/lib/udivdi3.S | |||
| @@ -0,0 +1,258 @@ | |||
| 1 | /* Copyright (C) 1989, 1992, 1993, 1994, 1995 Free Software Foundation, Inc. | ||
| 2 | |||
| 3 | This file is part of GNU CC. | ||
| 4 | |||
| 5 | GNU CC is free software; you can redistribute it and/or modify | ||
| 6 | it under the terms of the GNU General Public License as published by | ||
| 7 | the Free Software Foundation; either version 2, or (at your option) | ||
| 8 | any later version. | ||
| 9 | |||
| 10 | GNU CC is distributed in the hope that it will be useful, | ||
| 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 13 | GNU General Public License for more details. | ||
| 14 | |||
| 15 | You should have received a copy of the GNU General Public License | ||
| 16 | along with GNU CC; see the file COPYING. If not, write to | ||
| 17 | the Free Software Foundation, 59 Temple Place - Suite 330, | ||
| 18 | Boston, MA 02111-1307, USA. */ | ||
| 19 | |||
| 20 | .text | ||
| 21 | .align 4 | ||
| 22 | .globl __udivdi3 | ||
| 23 | __udivdi3: | ||
| 24 | save %sp,-104,%sp | ||
| 25 | mov %i3,%o3 | ||
| 26 | cmp %i2,0 | ||
| 27 | bne .LL40 | ||
| 28 | mov %i1,%i3 | ||
| 29 | cmp %o3,%i0 | ||
| 30 | bleu .LL41 | ||
| 31 | mov %i3,%o1 | ||
| 32 | ! Inlined udiv_qrnnd | ||
| 33 | mov 32,%g1 | ||
| 34 | subcc %i0,%o3,%g0 | ||
| 35 | 1: bcs 5f | ||
| 36 | addxcc %o1,%o1,%o1 ! shift n1n0 and a q-bit in lsb | ||
| 37 | sub %i0,%o3,%i0 ! this kills msb of n | ||
| 38 | addx %i0,%i0,%i0 ! so this cannot give carry | ||
| 39 | subcc %g1,1,%g1 | ||
| 40 | 2: bne 1b | ||
| 41 | subcc %i0,%o3,%g0 | ||
| 42 | bcs 3f | ||
| 43 | addxcc %o1,%o1,%o1 ! shift n1n0 and a q-bit in lsb | ||
| 44 | b 3f | ||
| 45 | sub %i0,%o3,%i0 ! this kills msb of n | ||
| 46 | 4: sub %i0,%o3,%i0 | ||
| 47 | 5: addxcc %i0,%i0,%i0 | ||
| 48 | bcc 2b | ||
| 49 | subcc %g1,1,%g1 | ||
| 50 | ! Got carry from n. Subtract next step to cancel this carry. | ||
| 51 | bne 4b | ||
| 52 | addcc %o1,%o1,%o1 ! shift n1n0 and a 0-bit in lsb | ||
| 53 | sub %i0,%o3,%i0 | ||
| 54 | 3: xnor %o1,0,%o1 | ||
| 55 | ! End of inline udiv_qrnnd | ||
| 56 | b .LL45 | ||
| 57 | mov 0,%o2 | ||
| 58 | .LL41: | ||
| 59 | cmp %o3,0 | ||
| 60 | bne .LL77 | ||
| 61 | mov %i0,%o2 | ||
| 62 | mov 1,%o0 | ||
| 63 | call .udiv,0 | ||
| 64 | mov 0,%o1 | ||
| 65 | mov %o0,%o3 | ||
| 66 | mov %i0,%o2 | ||
| 67 | .LL77: | ||
| 68 | mov 0,%o4 | ||
| 69 | ! Inlined udiv_qrnnd | ||
| 70 | mov 32,%g1 | ||
| 71 | subcc %o4,%o3,%g0 | ||
| 72 | 1: bcs 5f | ||
| 73 | addxcc %o2,%o2,%o2 ! shift n1n0 and a q-bit in lsb | ||
| 74 | sub %o4,%o3,%o4 ! this kills msb of n | ||
| 75 | addx %o4,%o4,%o4 ! so this cannot give carry | ||
| 76 | subcc %g1,1,%g1 | ||
| 77 | 2: bne 1b | ||
| 78 | subcc %o4,%o3,%g0 | ||
| 79 | bcs 3f | ||
| 80 | addxcc %o2,%o2,%o2 ! shift n1n0 and a q-bit in lsb | ||
| 81 | b 3f | ||
| 82 | sub %o4,%o3,%o4 ! this kills msb of n | ||
| 83 | 4: sub %o4,%o3,%o4 | ||
| 84 | 5: addxcc %o4,%o4,%o4 | ||
| 85 | bcc 2b | ||
| 86 | subcc %g1,1,%g1 | ||
| 87 | ! Got carry from n. Subtract next step to cancel this carry. | ||
| 88 | bne 4b | ||
| 89 | addcc %o2,%o2,%o2 ! shift n1n0 and a 0-bit in lsb | ||
| 90 | sub %o4,%o3,%o4 | ||
| 91 | 3: xnor %o2,0,%o2 | ||
| 92 | ! End of inline udiv_qrnnd | ||
| 93 | mov %o4,%i0 | ||
| 94 | mov %i3,%o1 | ||
| 95 | ! Inlined udiv_qrnnd | ||
| 96 | mov 32,%g1 | ||
| 97 | subcc %i0,%o3,%g0 | ||
| 98 | 1: bcs 5f | ||
| 99 | addxcc %o1,%o1,%o1 ! shift n1n0 and a q-bit in lsb | ||
| 100 | sub %i0,%o3,%i0 ! this kills msb of n | ||
| 101 | addx %i0,%i0,%i0 ! so this cannot give carry | ||
| 102 | subcc %g1,1,%g1 | ||
| 103 | 2: bne 1b | ||
| 104 | subcc %i0,%o3,%g0 | ||
| 105 | bcs 3f | ||
| 106 | addxcc %o1,%o1,%o1 ! shift n1n0 and a q-bit in lsb | ||
| 107 | b 3f | ||
| 108 | sub %i0,%o3,%i0 ! this kills msb of n | ||
| 109 | 4: sub %i0,%o3,%i0 | ||
| 110 | 5: addxcc %i0,%i0,%i0 | ||
| 111 | bcc 2b | ||
| 112 | subcc %g1,1,%g1 | ||
| 113 | ! Got carry from n. Subtract next step to cancel this carry. | ||
| 114 | bne 4b | ||
| 115 | addcc %o1,%o1,%o1 ! shift n1n0 and a 0-bit in lsb | ||
| 116 | sub %i0,%o3,%i0 | ||
| 117 | 3: xnor %o1,0,%o1 | ||
| 118 | ! End of inline udiv_qrnnd | ||
| 119 | b .LL78 | ||
| 120 | mov %o1,%l1 | ||
| 121 | .LL40: | ||
| 122 | cmp %i2,%i0 | ||
| 123 | bleu .LL46 | ||
| 124 | sethi %hi(65535),%o0 | ||
| 125 | b .LL73 | ||
| 126 | mov 0,%o1 | ||
| 127 | .LL46: | ||
| 128 | or %o0,%lo(65535),%o0 | ||
| 129 | cmp %i2,%o0 | ||
| 130 | bgu .LL53 | ||
| 131 | mov %i2,%o1 | ||
| 132 | cmp %i2,256 | ||
| 133 | addx %g0,-1,%o0 | ||
| 134 | b .LL59 | ||
| 135 | and %o0,8,%o2 | ||
| 136 | .LL53: | ||
| 137 | sethi %hi(16777215),%o0 | ||
| 138 | or %o0,%lo(16777215),%o0 | ||
| 139 | cmp %o1,%o0 | ||
| 140 | bgu .LL59 | ||
| 141 | mov 24,%o2 | ||
| 142 | mov 16,%o2 | ||
| 143 | .LL59: | ||
| 144 | srl %o1,%o2,%o1 | ||
| 145 | sethi %hi(__clz_tab),%o0 | ||
| 146 | or %o0,%lo(__clz_tab),%o0 | ||
| 147 | ldub [%o1+%o0],%o0 | ||
| 148 | add %o0,%o2,%o0 | ||
| 149 | mov 32,%o1 | ||
| 150 | subcc %o1,%o0,%o2 | ||
| 151 | bne,a .LL67 | ||
| 152 | mov 32,%o0 | ||
| 153 | cmp %i0,%i2 | ||
| 154 | bgu .LL69 | ||
| 155 | cmp %i3,%o3 | ||
| 156 | blu .LL73 | ||
| 157 | mov 0,%o1 | ||
| 158 | .LL69: | ||
| 159 | b .LL73 | ||
| 160 | mov 1,%o1 | ||
| 161 | .LL67: | ||
| 162 | sub %o0,%o2,%o0 | ||
| 163 | sll %i2,%o2,%i2 | ||
| 164 | srl %o3,%o0,%o1 | ||
| 165 | or %i2,%o1,%i2 | ||
| 166 | sll %o3,%o2,%o3 | ||
| 167 | srl %i0,%o0,%o1 | ||
| 168 | sll %i0,%o2,%i0 | ||
| 169 | srl %i3,%o0,%o0 | ||
| 170 | or %i0,%o0,%i0 | ||
| 171 | sll %i3,%o2,%i3 | ||
| 172 | mov %i0,%o5 | ||
| 173 | mov %o1,%o4 | ||
| 174 | ! Inlined udiv_qrnnd | ||
| 175 | mov 32,%g1 | ||
| 176 | subcc %o4,%i2,%g0 | ||
| 177 | 1: bcs 5f | ||
| 178 | addxcc %o5,%o5,%o5 ! shift n1n0 and a q-bit in lsb | ||
| 179 | sub %o4,%i2,%o4 ! this kills msb of n | ||
| 180 | addx %o4,%o4,%o4 ! so this cannot give carry | ||
| 181 | subcc %g1,1,%g1 | ||
| 182 | 2: bne 1b | ||
| 183 | subcc %o4,%i2,%g0 | ||
| 184 | bcs 3f | ||
| 185 | addxcc %o5,%o5,%o5 ! shift n1n0 and a q-bit in lsb | ||
| 186 | b 3f | ||
| 187 | sub %o4,%i2,%o4 ! this kills msb of n | ||
| 188 | 4: sub %o4,%i2,%o4 | ||
| 189 | 5: addxcc %o4,%o4,%o4 | ||
| 190 | bcc 2b | ||
| 191 | subcc %g1,1,%g1 | ||
| 192 | ! Got carry from n. Subtract next step to cancel this carry. | ||
| 193 | bne 4b | ||
| 194 | addcc %o5,%o5,%o5 ! shift n1n0 and a 0-bit in lsb | ||
| 195 | sub %o4,%i2,%o4 | ||
| 196 | 3: xnor %o5,0,%o5 | ||
| 197 | ! End of inline udiv_qrnnd | ||
| 198 | mov %o4,%i0 | ||
| 199 | mov %o5,%o1 | ||
| 200 | ! Inlined umul_ppmm | ||
| 201 | wr %g0,%o1,%y ! SPARC has 0-3 delay insn after a wr | ||
| 202 | sra %o3,31,%g2 ! Do not move this insn | ||
| 203 | and %o1,%g2,%g2 ! Do not move this insn | ||
| 204 | andcc %g0,0,%g1 ! Do not move this insn | ||
| 205 | mulscc %g1,%o3,%g1 | ||
| 206 | mulscc %g1,%o3,%g1 | ||
| 207 | mulscc %g1,%o3,%g1 | ||
| 208 | mulscc %g1,%o3,%g1 | ||
| 209 | mulscc %g1,%o3,%g1 | ||
| 210 | mulscc %g1,%o3,%g1 | ||
| 211 | mulscc %g1,%o3,%g1 | ||
| 212 | mulscc %g1,%o3,%g1 | ||
| 213 | mulscc %g1,%o3,%g1 | ||
| 214 | mulscc %g1,%o3,%g1 | ||
| 215 | mulscc %g1,%o3,%g1 | ||
| 216 | mulscc %g1,%o3,%g1 | ||
| 217 | mulscc %g1,%o3,%g1 | ||
| 218 | mulscc %g1,%o3,%g1 | ||
| 219 | mulscc %g1,%o3,%g1 | ||
| 220 | mulscc %g1,%o3,%g1 | ||
| 221 | mulscc %g1,%o3,%g1 | ||
| 222 | mulscc %g1,%o3,%g1 | ||
| 223 | mulscc %g1,%o3,%g1 | ||
| 224 | mulscc %g1,%o3,%g1 | ||
| 225 | mulscc %g1,%o3,%g1 | ||
| 226 | mulscc %g1,%o3,%g1 | ||
| 227 | mulscc %g1,%o3,%g1 | ||
| 228 | mulscc %g1,%o3,%g1 | ||
| 229 | mulscc %g1,%o3,%g1 | ||
| 230 | mulscc %g1,%o3,%g1 | ||
| 231 | mulscc %g1,%o3,%g1 | ||
| 232 | mulscc %g1,%o3,%g1 | ||
| 233 | mulscc %g1,%o3,%g1 | ||
| 234 | mulscc %g1,%o3,%g1 | ||
| 235 | mulscc %g1,%o3,%g1 | ||
| 236 | mulscc %g1,%o3,%g1 | ||
| 237 | mulscc %g1,0,%g1 | ||
| 238 | add %g1,%g2,%o0 | ||
| 239 | rd %y,%o2 | ||
| 240 | cmp %o0,%i0 | ||
| 241 | bgu,a .LL73 | ||
| 242 | add %o1,-1,%o1 | ||
| 243 | bne,a .LL45 | ||
| 244 | mov 0,%o2 | ||
| 245 | cmp %o2,%i3 | ||
| 246 | bleu .LL45 | ||
| 247 | mov 0,%o2 | ||
| 248 | add %o1,-1,%o1 | ||
| 249 | .LL73: | ||
| 250 | mov 0,%o2 | ||
| 251 | .LL45: | ||
| 252 | mov %o1,%l1 | ||
| 253 | .LL78: | ||
| 254 | mov %o2,%l0 | ||
| 255 | mov %l0,%i0 | ||
| 256 | mov %l1,%i1 | ||
| 257 | ret | ||
| 258 | restore | ||
diff --git a/arch/sparc/lib/umul.S b/arch/sparc/lib/umul.S new file mode 100644 index 000000000000..a784720a8a22 --- /dev/null +++ b/arch/sparc/lib/umul.S | |||
| @@ -0,0 +1,169 @@ | |||
| 1 | /* $Id: umul.S,v 1.4 1996/09/30 02:22:39 davem Exp $ | ||
| 2 | * umul.S: This routine was taken from glibc-1.09 and is covered | ||
| 3 | * by the GNU Library General Public License Version 2. | ||
| 4 | */ | ||
| 5 | |||
| 6 | |||
| 7 | /* | ||
| 8 | * Unsigned multiply. Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the | ||
| 9 | * upper 32 bits of the 64-bit product). | ||
| 10 | * | ||
| 11 | * This code optimizes short (less than 13-bit) multiplies. Short | ||
| 12 | * multiplies require 25 instruction cycles, and long ones require | ||
| 13 | * 45 instruction cycles. | ||
| 14 | * | ||
| 15 | * On return, overflow has occurred (%o1 is not zero) if and only if | ||
| 16 | * the Z condition code is clear, allowing, e.g., the following: | ||
| 17 | * | ||
| 18 | * call .umul | ||
| 19 | * nop | ||
| 20 | * bnz overflow (or tnz) | ||
| 21 | */ | ||
| 22 | |||
| 23 | .globl .umul | ||
| 24 | .umul: | ||
| 25 | or %o0, %o1, %o4 | ||
| 26 | mov %o0, %y ! multiplier -> Y | ||
| 27 | |||
| 28 | andncc %o4, 0xfff, %g0 ! test bits 12..31 of *both* args | ||
| 29 | be Lmul_shortway ! if zero, can do it the short way | ||
| 30 | andcc %g0, %g0, %o4 ! zero the partial product and clear N and V | ||
| 31 | |||
| 32 | /* | ||
| 33 | * Long multiply. 32 steps, followed by a final shift step. | ||
| 34 | */ | ||
| 35 | mulscc %o4, %o1, %o4 ! 1 | ||
| 36 | mulscc %o4, %o1, %o4 ! 2 | ||
| 37 | mulscc %o4, %o1, %o4 ! 3 | ||
| 38 | mulscc %o4, %o1, %o4 ! 4 | ||
| 39 | mulscc %o4, %o1, %o4 ! 5 | ||
| 40 | mulscc %o4, %o1, %o4 ! 6 | ||
| 41 | mulscc %o4, %o1, %o4 ! 7 | ||
| 42 | mulscc %o4, %o1, %o4 ! 8 | ||
| 43 | mulscc %o4, %o1, %o4 ! 9 | ||
| 44 | mulscc %o4, %o1, %o4 ! 10 | ||
| 45 | mulscc %o4, %o1, %o4 ! 11 | ||
| 46 | mulscc %o4, %o1, %o4 ! 12 | ||
| 47 | mulscc %o4, %o1, %o4 ! 13 | ||
| 48 | mulscc %o4, %o1, %o4 ! 14 | ||
| 49 | mulscc %o4, %o1, %o4 ! 15 | ||
| 50 | mulscc %o4, %o1, %o4 ! 16 | ||
| 51 | mulscc %o4, %o1, %o4 ! 17 | ||
| 52 | mulscc %o4, %o1, %o4 ! 18 | ||
| 53 | mulscc %o4, %o1, %o4 ! 19 | ||
| 54 | mulscc %o4, %o1, %o4 ! 20 | ||
| 55 | mulscc %o4, %o1, %o4 ! 21 | ||
| 56 | mulscc %o4, %o1, %o4 ! 22 | ||
| 57 | mulscc %o4, %o1, %o4 ! 23 | ||
| 58 | mulscc %o4, %o1, %o4 ! 24 | ||
| 59 | mulscc %o4, %o1, %o4 ! 25 | ||
| 60 | mulscc %o4, %o1, %o4 ! 26 | ||
| 61 | mulscc %o4, %o1, %o4 ! 27 | ||
| 62 | mulscc %o4, %o1, %o4 ! 28 | ||
| 63 | mulscc %o4, %o1, %o4 ! 29 | ||
| 64 | mulscc %o4, %o1, %o4 ! 30 | ||
| 65 | mulscc %o4, %o1, %o4 ! 31 | ||
| 66 | mulscc %o4, %o1, %o4 ! 32 | ||
| 67 | mulscc %o4, %g0, %o4 ! final shift | ||
| 68 | |||
| 69 | |||
| 70 | /* | ||
| 71 | * Normally, with the shift-and-add approach, if both numbers are | ||
| 72 | * positive you get the correct result. With 32-bit two's-complement | ||
| 73 | * numbers, -x is represented as | ||
| 74 | * | ||
| 75 | * x 32 | ||
| 76 | * ( 2 - ------ ) mod 2 * 2 | ||
| 77 | * 32 | ||
| 78 | * 2 | ||
| 79 | * | ||
| 80 | * (the `mod 2' subtracts 1 from 1.bbbb). To avoid lots of 2^32s, | ||
| 81 | * we can treat this as if the radix point were just to the left | ||
| 82 | * of the sign bit (multiply by 2^32), and get | ||
| 83 | * | ||
| 84 | * -x = (2 - x) mod 2 | ||
| 85 | * | ||
| 86 | * Then, ignoring the `mod 2's for convenience: | ||
| 87 | * | ||
| 88 | * x * y = xy | ||
| 89 | * -x * y = 2y - xy | ||
| 90 | * x * -y = 2x - xy | ||
| 91 | * -x * -y = 4 - 2x - 2y + xy | ||
| 92 | * | ||
| 93 | * For signed multiplies, we subtract (x << 32) from the partial | ||
| 94 | * product to fix this problem for negative multipliers (see mul.s). | ||
| 95 | * Because of the way the shift into the partial product is calculated | ||
| 96 | * (N xor V), this term is automatically removed for the multiplicand, | ||
| 97 | * so we don't have to adjust. | ||
| 98 | * | ||
| 99 | * But for unsigned multiplies, the high order bit wasn't a sign bit, | ||
| 100 | * and the correction is wrong. So for unsigned multiplies where the | ||
| 101 | * high order bit is one, we end up with xy - (y << 32). To fix it | ||
| 102 | * we add y << 32. | ||
| 103 | */ | ||
| 104 | #if 0 | ||
| 105 | tst %o1 | ||
| 106 | bl,a 1f ! if %o1 < 0 (high order bit = 1), | ||
| 107 | add %o4, %o0, %o4 ! %o4 += %o0 (add y to upper half) | ||
| 108 | |||
| 109 | 1: | ||
| 110 | rd %y, %o0 ! get lower half of product | ||
| 111 | retl | ||
| 112 | addcc %o4, %g0, %o1 ! put upper half in place and set Z for %o1==0 | ||
| 113 | #else | ||
| 114 | /* Faster code from tege@sics.se. */ | ||
| 115 | sra %o1, 31, %o2 ! make mask from sign bit | ||
| 116 | and %o0, %o2, %o2 ! %o2 = 0 or %o0, depending on sign of %o1 | ||
| 117 | rd %y, %o0 ! get lower half of product | ||
| 118 | retl | ||
| 119 | addcc %o4, %o2, %o1 ! add compensation and put upper half in place | ||
| 120 | #endif | ||
| 121 | |||
| 122 | Lmul_shortway: | ||
| 123 | /* | ||
| 124 | * Short multiply. 12 steps, followed by a final shift step. | ||
| 125 | * The resulting bits are off by 12 and (32-12) = 20 bit positions, | ||
| 126 | * but there is no problem with %o0 being negative (unlike above), | ||
| 127 | * and overflow is impossible (the answer is at most 24 bits long). | ||
| 128 | */ | ||
| 129 | mulscc %o4, %o1, %o4 ! 1 | ||
| 130 | mulscc %o4, %o1, %o4 ! 2 | ||
| 131 | mulscc %o4, %o1, %o4 ! 3 | ||
| 132 | mulscc %o4, %o1, %o4 ! 4 | ||
| 133 | mulscc %o4, %o1, %o4 ! 5 | ||
| 134 | mulscc %o4, %o1, %o4 ! 6 | ||
| 135 | mulscc %o4, %o1, %o4 ! 7 | ||
| 136 | mulscc %o4, %o1, %o4 ! 8 | ||
| 137 | mulscc %o4, %o1, %o4 ! 9 | ||
| 138 | mulscc %o4, %o1, %o4 ! 10 | ||
| 139 | mulscc %o4, %o1, %o4 ! 11 | ||
| 140 | mulscc %o4, %o1, %o4 ! 12 | ||
| 141 | mulscc %o4, %g0, %o4 ! final shift | ||
| 142 | |||
| 143 | /* | ||
| 144 | * %o4 has 20 of the bits that should be in the result; %y has | ||
| 145 | * the bottom 12 (as %y's top 12). That is: | ||
| 146 | * | ||
| 147 | * %o4 %y | ||
| 148 | * +----------------+----------------+ | ||
| 149 | * | -12- | -20- | -12- | -20- | | ||
| 150 | * +------(---------+------)---------+ | ||
| 151 | * -----result----- | ||
| 152 | * | ||
| 153 | * The 12 bits of %o4 left of the `result' area are all zero; | ||
| 154 | * in fact, all top 20 bits of %o4 are zero. | ||
| 155 | */ | ||
| 156 | |||
| 157 | rd %y, %o5 | ||
| 158 | sll %o4, 12, %o0 ! shift middle bits left 12 | ||
| 159 | srl %o5, 20, %o5 ! shift low bits right 20 | ||
| 160 | or %o5, %o0, %o0 | ||
| 161 | retl | ||
| 162 | addcc %g0, %g0, %o1 ! %o1 = zero, and set Z | ||
| 163 | |||
| 164 | .globl .umul_patch | ||
| 165 | .umul_patch: | ||
| 166 | umul %o0, %o1, %o0 | ||
| 167 | retl | ||
| 168 | rd %y, %o1 | ||
| 169 | nop | ||
diff --git a/arch/sparc/lib/urem.S b/arch/sparc/lib/urem.S new file mode 100644 index 000000000000..ec7f0c502c56 --- /dev/null +++ b/arch/sparc/lib/urem.S | |||
| @@ -0,0 +1,355 @@ | |||
| 1 | /* $Id: urem.S,v 1.4 1996/09/30 02:22:42 davem Exp $ | ||
| 2 | * urem.S: This routine was taken from glibc-1.09 and is covered | ||
| 3 | * by the GNU Library General Public License Version 2. | ||
| 4 | */ | ||
| 5 | |||
| 6 | /* This file is generated from divrem.m4; DO NOT EDIT! */ | ||
| 7 | /* | ||
| 8 | * Division and remainder, from Appendix E of the Sparc Version 8 | ||
| 9 | * Architecture Manual, with fixes from Gordon Irlam. | ||
| 10 | */ | ||
| 11 | |||
| 12 | /* | ||
| 13 | * Input: dividend and divisor in %o0 and %o1 respectively. | ||
| 14 | * | ||
| 15 | * m4 parameters: | ||
| 16 | * .urem name of function to generate | ||
| 17 | * rem rem=div => %o0 / %o1; rem=rem => %o0 % %o1 | ||
| 18 | * false false=true => signed; false=false => unsigned | ||
| 19 | * | ||
| 20 | * Algorithm parameters: | ||
| 21 | * N how many bits per iteration we try to get (4) | ||
| 22 | * WORDSIZE total number of bits (32) | ||
| 23 | * | ||
| 24 | * Derived constants: | ||
| 25 | * TOPBITS number of bits in the top decade of a number | ||
| 26 | * | ||
| 27 | * Important variables: | ||
| 28 | * Q the partial quotient under development (initially 0) | ||
| 29 | * R the remainder so far, initially the dividend | ||
| 30 | * ITER number of main division loop iterations required; | ||
| 31 | * equal to ceil(log2(quotient) / N). Note that this | ||
| 32 | * is the log base (2^N) of the quotient. | ||
| 33 | * V the current comparand, initially divisor*2^(ITER*N-1) | ||
| 34 | * | ||
| 35 | * Cost: | ||
| 36 | * Current estimate for non-large dividend is | ||
| 37 | * ceil(log2(quotient) / N) * (10 + 7N/2) + C | ||
| 38 | * A large dividend is one greater than 2^(31-TOPBITS) and takes a | ||
| 39 | * different path, as the upper bits of the quotient must be developed | ||
| 40 | * one bit at a time. | ||
| 41 | */ | ||
| 42 | |||
| 43 | .globl .urem | ||
| 44 | .urem: | ||
| 45 | |||
| 46 | ! Ready to divide. Compute size of quotient; scale comparand. | ||
| 47 | orcc %o1, %g0, %o5 | ||
| 48 | bne 1f | ||
| 49 | mov %o0, %o3 | ||
| 50 | |||
| 51 | ! Divide by zero trap. If it returns, return 0 (about as | ||
| 52 | ! wrong as possible, but that is what SunOS does...). | ||
| 53 | ta ST_DIV0 | ||
| 54 | retl | ||
| 55 | clr %o0 | ||
| 56 | |||
| 57 | 1: | ||
| 58 | cmp %o3, %o5 ! if %o1 exceeds %o0, done | ||
| 59 | blu Lgot_result ! (and algorithm fails otherwise) | ||
| 60 | clr %o2 | ||
| 61 | |||
| 62 | sethi %hi(1 << (32 - 4 - 1)), %g1 | ||
| 63 | |||
| 64 | cmp %o3, %g1 | ||
| 65 | blu Lnot_really_big | ||
| 66 | clr %o4 | ||
| 67 | |||
| 68 | ! Here the dividend is >= 2**(31-N) or so. We must be careful here, | ||
| 69 | ! as our usual N-at-a-shot divide step will cause overflow and havoc. | ||
| 70 | ! The number of bits in the result here is N*ITER+SC, where SC <= N. | ||
| 71 | ! Compute ITER in an unorthodox manner: know we need to shift V into | ||
| 72 | ! the top decade: so do not even bother to compare to R. | ||
| 73 | 1: | ||
| 74 | cmp %o5, %g1 | ||
| 75 | bgeu 3f | ||
| 76 | mov 1, %g7 | ||
| 77 | |||
| 78 | sll %o5, 4, %o5 | ||
| 79 | |||
| 80 | b 1b | ||
| 81 | add %o4, 1, %o4 | ||
| 82 | |||
| 83 | ! Now compute %g7. | ||
| 84 | 2: | ||
| 85 | addcc %o5, %o5, %o5 | ||
| 86 | bcc Lnot_too_big | ||
| 87 | add %g7, 1, %g7 | ||
| 88 | |||
| 89 | ! We get here if the %o1 overflowed while shifting. | ||
| 90 | ! This means that %o3 has the high-order bit set. | ||
| 91 | ! Restore %o5 and subtract from %o3. | ||
| 92 | sll %g1, 4, %g1 ! high order bit | ||
| 93 | srl %o5, 1, %o5 ! rest of %o5 | ||
| 94 | add %o5, %g1, %o5 | ||
| 95 | |||
| 96 | b Ldo_single_div | ||
| 97 | sub %g7, 1, %g7 | ||
| 98 | |||
| 99 | Lnot_too_big: | ||
| 100 | 3: | ||
| 101 | cmp %o5, %o3 | ||
| 102 | blu 2b | ||
| 103 | nop | ||
| 104 | |||
| 105 | be Ldo_single_div | ||
| 106 | nop | ||
| 107 | /* NB: these are commented out in the V8-Sparc manual as well */ | ||
| 108 | /* (I do not understand this) */ | ||
| 109 | ! %o5 > %o3: went too far: back up 1 step | ||
| 110 | ! srl %o5, 1, %o5 | ||
| 111 | ! dec %g7 | ||
| 112 | ! do single-bit divide steps | ||
| 113 | ! | ||
| 114 | ! We have to be careful here. We know that %o3 >= %o5, so we can do the | ||
| 115 | ! first divide step without thinking. BUT, the others are conditional, | ||
| 116 | ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- | ||
| 117 | ! order bit set in the first step, just falling into the regular | ||
| 118 | ! division loop will mess up the first time around. | ||
| 119 | ! So we unroll slightly... | ||
| 120 | Ldo_single_div: | ||
| 121 | subcc %g7, 1, %g7 | ||
| 122 | bl Lend_regular_divide | ||
| 123 | nop | ||
| 124 | |||
| 125 | sub %o3, %o5, %o3 | ||
| 126 | mov 1, %o2 | ||
| 127 | |||
| 128 | b Lend_single_divloop | ||
| 129 | nop | ||
| 130 | Lsingle_divloop: | ||
| 131 | sll %o2, 1, %o2 | ||
| 132 | bl 1f | ||
| 133 | srl %o5, 1, %o5 | ||
| 134 | ! %o3 >= 0 | ||
| 135 | sub %o3, %o5, %o3 | ||
| 136 | b 2f | ||
| 137 | add %o2, 1, %o2 | ||
| 138 | 1: ! %o3 < 0 | ||
| 139 | add %o3, %o5, %o3 | ||
| 140 | sub %o2, 1, %o2 | ||
| 141 | 2: | ||
| 142 | Lend_single_divloop: | ||
| 143 | subcc %g7, 1, %g7 | ||
| 144 | bge Lsingle_divloop | ||
| 145 | tst %o3 | ||
| 146 | |||
| 147 | b,a Lend_regular_divide | ||
| 148 | |||
| 149 | Lnot_really_big: | ||
| 150 | 1: | ||
| 151 | sll %o5, 4, %o5 | ||
| 152 | |||
| 153 | cmp %o5, %o3 | ||
| 154 | bleu 1b | ||
| 155 | addcc %o4, 1, %o4 | ||
| 156 | |||
| 157 | be Lgot_result | ||
| 158 | sub %o4, 1, %o4 | ||
| 159 | |||
| 160 | tst %o3 ! set up for initial iteration | ||
| 161 | Ldivloop: | ||
| 162 | sll %o2, 4, %o2 | ||
| 163 | ! depth 1, accumulated bits 0 | ||
| 164 | bl L.1.16 | ||
| 165 | srl %o5,1,%o5 | ||
| 166 | ! remainder is positive | ||
| 167 | subcc %o3,%o5,%o3 | ||
| 168 | ! depth 2, accumulated bits 1 | ||
| 169 | bl L.2.17 | ||
| 170 | srl %o5,1,%o5 | ||
| 171 | ! remainder is positive | ||
| 172 | subcc %o3,%o5,%o3 | ||
| 173 | ! depth 3, accumulated bits 3 | ||
| 174 | bl L.3.19 | ||
| 175 | srl %o5,1,%o5 | ||
| 176 | ! remainder is positive | ||
| 177 | subcc %o3,%o5,%o3 | ||
| 178 | ! depth 4, accumulated bits 7 | ||
| 179 | bl L.4.23 | ||
| 180 | srl %o5,1,%o5 | ||
| 181 | ! remainder is positive | ||
| 182 | subcc %o3,%o5,%o3 | ||
| 183 | b 9f | ||
| 184 | add %o2, (7*2+1), %o2 | ||
| 185 | |||
| 186 | L.4.23: | ||
| 187 | ! remainder is negative | ||
| 188 | addcc %o3,%o5,%o3 | ||
| 189 | b 9f | ||
| 190 | add %o2, (7*2-1), %o2 | ||
| 191 | |||
| 192 | L.3.19: | ||
| 193 | ! remainder is negative | ||
| 194 | addcc %o3,%o5,%o3 | ||
| 195 | ! depth 4, accumulated bits 5 | ||
| 196 | bl L.4.21 | ||
| 197 | srl %o5,1,%o5 | ||
| 198 | ! remainder is positive | ||
| 199 | subcc %o3,%o5,%o3 | ||
| 200 | b 9f | ||
| 201 | add %o2, (5*2+1), %o2 | ||
| 202 | |||
| 203 | L.4.21: | ||
| 204 | ! remainder is negative | ||
| 205 | addcc %o3,%o5,%o3 | ||
| 206 | b 9f | ||
| 207 | add %o2, (5*2-1), %o2 | ||
| 208 | |||
| 209 | L.2.17: | ||
| 210 | ! remainder is negative | ||
| 211 | addcc %o3,%o5,%o3 | ||
| 212 | ! depth 3, accumulated bits 1 | ||
| 213 | bl L.3.17 | ||
| 214 | srl %o5,1,%o5 | ||
| 215 | ! remainder is positive | ||
| 216 | subcc %o3,%o5,%o3 | ||
| 217 | ! depth 4, accumulated bits 3 | ||
| 218 | bl L.4.19 | ||
| 219 | srl %o5,1,%o5 | ||
| 220 | ! remainder is positive | ||
| 221 | subcc %o3,%o5,%o3 | ||
| 222 | b 9f | ||
| 223 | add %o2, (3*2+1), %o2 | ||
| 224 | |||
| 225 | L.4.19: | ||
| 226 | ! remainder is negative | ||
| 227 | addcc %o3,%o5,%o3 | ||
| 228 | b 9f | ||
| 229 | add %o2, (3*2-1), %o2 | ||
| 230 | |||
| 231 | L.3.17: | ||
| 232 | ! remainder is negative | ||
| 233 | addcc %o3,%o5,%o3 | ||
| 234 | ! depth 4, accumulated bits 1 | ||
| 235 | bl L.4.17 | ||
| 236 | srl %o5,1,%o5 | ||
| 237 | ! remainder is positive | ||
| 238 | subcc %o3,%o5,%o3 | ||
| 239 | b 9f | ||
| 240 | add %o2, (1*2+1), %o2 | ||
| 241 | |||
| 242 | L.4.17: | ||
| 243 | ! remainder is negative | ||
| 244 | addcc %o3,%o5,%o3 | ||
| 245 | b 9f | ||
| 246 | add %o2, (1*2-1), %o2 | ||
| 247 | |||
| 248 | L.1.16: | ||
| 249 | ! remainder is negative | ||
| 250 | addcc %o3,%o5,%o3 | ||
| 251 | ! depth 2, accumulated bits -1 | ||
| 252 | bl L.2.15 | ||
| 253 | srl %o5,1,%o5 | ||
| 254 | ! remainder is positive | ||
| 255 | subcc %o3,%o5,%o3 | ||
| 256 | ! depth 3, accumulated bits -1 | ||
| 257 | bl L.3.15 | ||
| 258 | srl %o5,1,%o5 | ||
| 259 | ! remainder is positive | ||
| 260 | subcc %o3,%o5,%o3 | ||
| 261 | ! depth 4, accumulated bits -1 | ||
| 262 | bl L.4.15 | ||
| 263 | srl %o5,1,%o5 | ||
| 264 | ! remainder is positive | ||
| 265 | subcc %o3,%o5,%o3 | ||
| 266 | b 9f | ||
| 267 | add %o2, (-1*2+1), %o2 | ||
| 268 | |||
| 269 | L.4.15: | ||
| 270 | ! remainder is negative | ||
| 271 | addcc %o3,%o5,%o3 | ||
| 272 | b 9f | ||
| 273 | add %o2, (-1*2-1), %o2 | ||
| 274 | |||
| 275 | L.3.15: | ||
| 276 | ! remainder is negative | ||
| 277 | addcc %o3,%o5,%o3 | ||
| 278 | ! depth 4, accumulated bits -3 | ||
| 279 | bl L.4.13 | ||
| 280 | srl %o5,1,%o5 | ||
| 281 | ! remainder is positive | ||
| 282 | subcc %o3,%o5,%o3 | ||
| 283 | b 9f | ||
| 284 | add %o2, (-3*2+1), %o2 | ||
| 285 | |||
| 286 | L.4.13: | ||
| 287 | ! remainder is negative | ||
| 288 | addcc %o3,%o5,%o3 | ||
| 289 | b 9f | ||
| 290 | add %o2, (-3*2-1), %o2 | ||
| 291 | |||
| 292 | L.2.15: | ||
| 293 | ! remainder is negative | ||
| 294 | addcc %o3,%o5,%o3 | ||
| 295 | ! depth 3, accumulated bits -3 | ||
| 296 | bl L.3.13 | ||
| 297 | srl %o5,1,%o5 | ||
| 298 | ! remainder is positive | ||
| 299 | subcc %o3,%o5,%o3 | ||
| 300 | ! depth 4, accumulated bits -5 | ||
| 301 | bl L.4.11 | ||
| 302 | srl %o5,1,%o5 | ||
| 303 | ! remainder is positive | ||
| 304 | subcc %o3,%o5,%o3 | ||
| 305 | b 9f | ||
| 306 | add %o2, (-5*2+1), %o2 | ||
| 307 | |||
| 308 | L.4.11: | ||
| 309 | ! remainder is negative | ||
| 310 | addcc %o3,%o5,%o3 | ||
| 311 | b 9f | ||
| 312 | add %o2, (-5*2-1), %o2 | ||
| 313 | |||
| 314 | L.3.13: | ||
| 315 | ! remainder is negative | ||
| 316 | addcc %o3,%o5,%o3 | ||
| 317 | ! depth 4, accumulated bits -7 | ||
| 318 | bl L.4.9 | ||
| 319 | srl %o5,1,%o5 | ||
| 320 | ! remainder is positive | ||
| 321 | subcc %o3,%o5,%o3 | ||
| 322 | b 9f | ||
| 323 | add %o2, (-7*2+1), %o2 | ||
| 324 | |||
| 325 | L.4.9: | ||
| 326 | ! remainder is negative | ||
| 327 | addcc %o3,%o5,%o3 | ||
| 328 | b 9f | ||
| 329 | add %o2, (-7*2-1), %o2 | ||
| 330 | |||
| 331 | 9: | ||
| 332 | Lend_regular_divide: | ||
| 333 | subcc %o4, 1, %o4 | ||
| 334 | bge Ldivloop | ||
| 335 | tst %o3 | ||
| 336 | |||
| 337 | bl,a Lgot_result | ||
| 338 | ! non-restoring fixup here (one instruction only!) | ||
| 339 | add %o3, %o1, %o3 | ||
| 340 | |||
| 341 | Lgot_result: | ||
| 342 | |||
| 343 | retl | ||
| 344 | mov %o3, %o0 | ||
| 345 | |||
| 346 | .globl .urem_patch | ||
| 347 | .urem_patch: | ||
| 348 | wr %g0, 0x0, %y | ||
| 349 | nop | ||
| 350 | nop | ||
| 351 | nop | ||
| 352 | udiv %o0, %o1, %o2 | ||
| 353 | umul %o2, %o1, %o2 | ||
| 354 | retl | ||
| 355 | sub %o0, %o2, %o0 | ||
