diff options
Diffstat (limited to 'arch/sparc/lib')
86 files changed, 3685 insertions, 2107 deletions
diff --git a/arch/sparc/lib/COPYING.LIB b/arch/sparc/lib/COPYING.LIB deleted file mode 100644 index eb685a5ec981..000000000000 --- a/arch/sparc/lib/COPYING.LIB +++ /dev/null @@ -1,481 +0,0 @@ - GNU LIBRARY GENERAL PUBLIC LICENSE - Version 2, June 1991 - - Copyright (C) 1991 Free Software Foundation, Inc. - 675 Mass Ave, Cambridge, MA 02139, USA - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - -[This is the first released version of the library GPL. It is - numbered 2 because it goes with version 2 of the ordinary GPL.] - - Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -Licenses are intended to guarantee your freedom to share and change -free software--to make sure the software is free for all its users. - - This license, the Library General Public License, applies to some -specially designated Free Software Foundation software, and to any -other libraries whose authors decide to use it. You can use it for -your libraries, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -this service if you wish), that you receive source code or can get it -if you want it, that you can change the software or use pieces of it -in new free programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if -you distribute copies of the library, or if you modify it. - - For example, if you distribute copies of the library, whether gratis -or for a fee, you must give the recipients all the rights that we gave -you. You must make sure that they, too, receive or can get the source -code. If you link a program with the library, you must provide -complete object files to the recipients so that they can relink them -with the library, after making changes to the library and recompiling -it. And you must show them these terms so they know their rights. - - Our method of protecting your rights has two steps: (1) copyright -the library, and (2) offer you this license which gives you legal -permission to copy, distribute and/or modify the library. - - Also, for each distributor's protection, we want to make certain -that everyone understands that there is no warranty for this free -library. If the library is modified by someone else and passed on, we -want its recipients to know that what they have is not the original -version, so that any problems introduced by others will not reflect on -the original authors' reputations. - - Finally, any free program is threatened constantly by software -patents. We wish to avoid the danger that companies distributing free -software will individually obtain patent licenses, thus in effect -transforming the program into proprietary software. To prevent this, -we have made it clear that any patent must be licensed for everyone's -free use or not licensed at all. - - Most GNU software, including some libraries, is covered by the ordinary -GNU General Public License, which was designed for utility programs. This -license, the GNU Library General Public License, applies to certain -designated libraries. This license is quite different from the ordinary -one; be sure to read it in full, and don't assume that anything in it is -the same as in the ordinary license. - - The reason we have a separate public license for some libraries is that -they blur the distinction we usually make between modifying or adding to a -program and simply using it. Linking a program with a library, without -changing the library, is in some sense simply using the library, and is -analogous to running a utility program or application program. However, in -a textual and legal sense, the linked executable is a combined work, a -derivative of the original library, and the ordinary General Public License -treats it as such. - - Because of this blurred distinction, using the ordinary General -Public License for libraries did not effectively promote software -sharing, because most developers did not use the libraries. We -concluded that weaker conditions might promote sharing better. - - However, unrestricted linking of non-free programs would deprive the -users of those programs of all benefit from the free status of the -libraries themselves. This Library General Public License is intended to -permit developers of non-free programs to use free libraries, while -preserving your freedom as a user of such programs to change the free -libraries that are incorporated in them. (We have not seen how to achieve -this as regards changes in header files, but we have achieved it as regards -changes in the actual functions of the Library.) The hope is that this -will lead to faster development of free libraries. - - The precise terms and conditions for copying, distribution and -modification follow. Pay close attention to the difference between a -"work based on the library" and a "work that uses the library". The -former contains code derived from the library, while the latter only -works together with the library. - - Note that it is possible for a library to be covered by the ordinary -General Public License rather than by this special one. - - GNU LIBRARY GENERAL PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. This License Agreement applies to any software library which -contains a notice placed by the copyright holder or other authorized -party saying it may be distributed under the terms of this Library -General Public License (also called "this License"). Each licensee is -addressed as "you". - - A "library" means a collection of software functions and/or data -prepared so as to be conveniently linked with application programs -(which use some of those functions and data) to form executables. - - The "Library", below, refers to any such software library or work -which has been distributed under these terms. A "work based on the -Library" means either the Library or any derivative work under -copyright law: that is to say, a work containing the Library or a -portion of it, either verbatim or with modifications and/or translated -straightforwardly into another language. (Hereinafter, translation is -included without limitation in the term "modification".) - - "Source code" for a work means the preferred form of the work for -making modifications to it. For a library, complete source code means -all the source code for all modules it contains, plus any associated -interface definition files, plus the scripts used to control compilation -and installation of the library. - - Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running a program using the Library is not restricted, and output from -such a program is covered only if its contents constitute a work based -on the Library (independent of the use of the Library in a tool for -writing it). Whether that is true depends on what the Library does -and what the program that uses the Library does. - - 1. You may copy and distribute verbatim copies of the Library's -complete source code as you receive it, in any medium, provided that -you conspicuously and appropriately publish on each copy an -appropriate copyright notice and disclaimer of warranty; keep intact -all the notices that refer to this License and to the absence of any -warranty; and distribute a copy of this License along with the -Library. - - You may charge a fee for the physical act of transferring a copy, -and you may at your option offer warranty protection in exchange for a -fee. - - 2. You may modify your copy or copies of the Library or any portion -of it, thus forming a work based on the Library, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - - a) The modified work must itself be a software library. - - b) You must cause the files modified to carry prominent notices - stating that you changed the files and the date of any change. - - c) You must cause the whole of the work to be licensed at no - charge to all third parties under the terms of this License. - - d) If a facility in the modified Library refers to a function or a - table of data to be supplied by an application program that uses - the facility, other than as an argument passed when the facility - is invoked, then you must make a good faith effort to ensure that, - in the event an application does not supply such function or - table, the facility still operates, and performs whatever part of - its purpose remains meaningful. - - (For example, a function in a library to compute square roots has - a purpose that is entirely well-defined independent of the - application. Therefore, Subsection 2d requires that any - application-supplied function or table used by this function must - be optional: if the application does not supply it, the square - root function must still compute square roots.) - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Library, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Library, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote -it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Library. - -In addition, mere aggregation of another work not based on the Library -with the Library (or with a work based on the Library) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - - 3. You may opt to apply the terms of the ordinary GNU General Public -License instead of this License to a given copy of the Library. To do -this, you must alter all the notices that refer to this License, so -that they refer to the ordinary GNU General Public License, version 2, -instead of to this License. (If a newer version than version 2 of the -ordinary GNU General Public License has appeared, then you can specify -that version instead if you wish.) Do not make any other change in -these notices. - - Once this change is made in a given copy, it is irreversible for -that copy, so the ordinary GNU General Public License applies to all -subsequent copies and derivative works made from that copy. - - This option is useful when you wish to copy part of the code of -the Library into a program that is not a library. - - 4. You may copy and distribute the Library (or a portion or -derivative of it, under Section 2) in object code or executable form -under the terms of Sections 1 and 2 above provided that you accompany -it with the complete corresponding machine-readable source code, which -must be distributed under the terms of Sections 1 and 2 above on a -medium customarily used for software interchange. - - If distribution of object code is made by offering access to copy -from a designated place, then offering equivalent access to copy the -source code from the same place satisfies the requirement to -distribute the source code, even though third parties are not -compelled to copy the source along with the object code. - - 5. A program that contains no derivative of any portion of the -Library, but is designed to work with the Library by being compiled or -linked with it, is called a "work that uses the Library". Such a -work, in isolation, is not a derivative work of the Library, and -therefore falls outside the scope of this License. - - However, linking a "work that uses the Library" with the Library -creates an executable that is a derivative of the Library (because it -contains portions of the Library), rather than a "work that uses the -library". The executable is therefore covered by this License. -Section 6 states terms for distribution of such executables. - - When a "work that uses the Library" uses material from a header file -that is part of the Library, the object code for the work may be a -derivative work of the Library even though the source code is not. -Whether this is true is especially significant if the work can be -linked without the Library, or if the work is itself a library. The -threshold for this to be true is not precisely defined by law. - - If such an object file uses only numerical parameters, data -structure layouts and accessors, and small macros and small inline -functions (ten lines or less in length), then the use of the object -file is unrestricted, regardless of whether it is legally a derivative -work. (Executables containing this object code plus portions of the -Library will still fall under Section 6.) - - Otherwise, if the work is a derivative of the Library, you may -distribute the object code for the work under the terms of Section 6. -Any executables containing that work also fall under Section 6, -whether or not they are linked directly with the Library itself. - - 6. As an exception to the Sections above, you may also compile or -link a "work that uses the Library" with the Library to produce a -work containing portions of the Library, and distribute that work -under terms of your choice, provided that the terms permit -modification of the work for the customer's own use and reverse -engineering for debugging such modifications. - - You must give prominent notice with each copy of the work that the -Library is used in it and that the Library and its use are covered by -this License. You must supply a copy of this License. If the work -during execution displays copyright notices, you must include the -copyright notice for the Library among them, as well as a reference -directing the user to the copy of this License. Also, you must do one -of these things: - - a) Accompany the work with the complete corresponding - machine-readable source code for the Library including whatever - changes were used in the work (which must be distributed under - Sections 1 and 2 above); and, if the work is an executable linked - with the Library, with the complete machine-readable "work that - uses the Library", as object code and/or source code, so that the - user can modify the Library and then relink to produce a modified - executable containing the modified Library. (It is understood - that the user who changes the contents of definitions files in the - Library will not necessarily be able to recompile the application - to use the modified definitions.) - - b) Accompany the work with a written offer, valid for at - least three years, to give the same user the materials - specified in Subsection 6a, above, for a charge no more - than the cost of performing this distribution. - - c) If distribution of the work is made by offering access to copy - from a designated place, offer equivalent access to copy the above - specified materials from the same place. - - d) Verify that the user has already received a copy of these - materials or that you have already sent this user a copy. - - For an executable, the required form of the "work that uses the -Library" must include any data and utility programs needed for -reproducing the executable from it. However, as a special exception, -the source code distributed need not include anything that is normally -distributed (in either source or binary form) with the major -components (compiler, kernel, and so on) of the operating system on -which the executable runs, unless that component itself accompanies -the executable. - - It may happen that this requirement contradicts the license -restrictions of other proprietary libraries that do not normally -accompany the operating system. Such a contradiction means you cannot -use both them and the Library together in an executable that you -distribute. - - 7. You may place library facilities that are a work based on the -Library side-by-side in a single library together with other library -facilities not covered by this License, and distribute such a combined -library, provided that the separate distribution of the work based on -the Library and of the other library facilities is otherwise -permitted, and provided that you do these two things: - - a) Accompany the combined library with a copy of the same work - based on the Library, uncombined with any other library - facilities. This must be distributed under the terms of the - Sections above. - - b) Give prominent notice with the combined library of the fact - that part of it is a work based on the Library, and explaining - where to find the accompanying uncombined form of the same work. - - 8. You may not copy, modify, sublicense, link with, or distribute -the Library except as expressly provided under this License. Any -attempt otherwise to copy, modify, sublicense, link with, or -distribute the Library is void, and will automatically terminate your -rights under this License. However, parties who have received copies, -or rights, from you under this License will not have their licenses -terminated so long as such parties remain in full compliance. - - 9. You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Library or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Library (or any work based on the -Library), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Library or works based on it. - - 10. Each time you redistribute the Library (or any work based on the -Library), the recipient automatically receives a license from the -original licensor to copy, distribute, link with or modify the Library -subject to these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties to -this License. - - 11. If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Library at all. For example, if a patent -license would not permit royalty-free redistribution of the Library by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Library. - -If any portion of this section is held invalid or unenforceable under any -particular circumstance, the balance of the section is intended to apply, -and the section as a whole is intended to apply in other circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - - 12. If the distribution and/or use of the Library is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Library under this License may add -an explicit geographical distribution limitation excluding those countries, -so that distribution is permitted only in or among countries not thus -excluded. In such case, this License incorporates the limitation as if -written in the body of this License. - - 13. The Free Software Foundation may publish revised and/or new -versions of the Library General Public License from time to time. -Such new versions will be similar in spirit to the present version, -but may differ in detail to address new problems or concerns. - -Each version is given a distinguishing version number. If the Library -specifies a version number of this License which applies to it and -"any later version", you have the option of following the terms and -conditions either of that version or of any later version published by -the Free Software Foundation. If the Library does not specify a -license version number, you may choose any version ever published by -the Free Software Foundation. - - 14. If you wish to incorporate parts of the Library into other free -programs whose distribution conditions are incompatible with these, -write to the author to ask for permission. For software which is -copyrighted by the Free Software Foundation, write to the Free -Software Foundation; we sometimes make exceptions for this. Our -decision will be guided by the two goals of preserving the free status -of all derivatives of our free software and of promoting the sharing -and reuse of software generally. - - NO WARRANTY - - 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO -WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. -EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR -OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY -KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE -LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME -THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN -WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY -AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU -FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR -CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE -LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING -RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A -FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF -SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH -DAMAGES. - - END OF TERMS AND CONDITIONS - - Appendix: How to Apply These Terms to Your New Libraries - - If you develop a new library, and you want it to be of the greatest -possible use to the public, we recommend making it free software that -everyone can redistribute and change. You can do so by permitting -redistribution under these terms (or, alternatively, under the terms of the -ordinary General Public License). - - To apply these terms, attach the following notices to the library. It is -safest to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least the -"copyright" line and a pointer to where the full notice is found. - - <one line to give the library's name and a brief idea of what it does.> - Copyright (C) <year> <name of author> - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public - License as published by the Free Software Foundation; either - version 2 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public - License along with this library; if not, write to the Free - Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - -Also add information on how to contact you by electronic and paper mail. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a "copyright disclaimer" for the library, if -necessary. Here is a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the - library `Frob' (a library for tweaking knobs) written by James Random Hacker. - - <signature of Ty Coon>, 1 April 1990 - Ty Coon, President of Vice - -That's all there is to it! diff --git a/arch/sparc/lib/GENbzero.S b/arch/sparc/lib/GENbzero.S index 8e7a843ddd88..63d618857d49 100644 --- a/arch/sparc/lib/GENbzero.S +++ b/arch/sparc/lib/GENbzero.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* GENbzero.S: Generic sparc64 memset/clear_user. * * Copyright (C) 2007 David S. Miller (davem@davemloft.net) @@ -8,7 +9,7 @@ 98: x,y; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_o1; \ + .word 98b, __retl_o1_asi;\ .text; \ .align 4; diff --git a/arch/sparc/lib/GENcopy_from_user.S b/arch/sparc/lib/GENcopy_from_user.S index b7d0bd6b1406..6891a5678ea3 100644 --- a/arch/sparc/lib/GENcopy_from_user.S +++ b/arch/sparc/lib/GENcopy_from_user.S @@ -1,13 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* GENcopy_from_user.S: Generic sparc64 copy from userspace. * * Copyright (C) 2007 David S. Miller (davem@davemloft.net) */ -#define EX_LD(x) \ +#define EX_LD(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, y; \ .text; \ .align 4; @@ -23,7 +24,7 @@ #define PREAMBLE \ rd %asi, %g1; \ cmp %g1, ASI_AIUS; \ - bne,pn %icc, ___copy_in_user; \ + bne,pn %icc, raw_copy_in_user; \ nop #endif diff --git a/arch/sparc/lib/GENcopy_to_user.S b/arch/sparc/lib/GENcopy_to_user.S index 780550e1afc7..df75b532a934 100644 --- a/arch/sparc/lib/GENcopy_to_user.S +++ b/arch/sparc/lib/GENcopy_to_user.S @@ -1,13 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* GENcopy_to_user.S: Generic sparc64 copy to userspace. * * Copyright (C) 2007 David S. Miller (davem@davemloft.net) */ -#define EX_ST(x) \ +#define EX_ST(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, y; \ .text; \ .align 4; @@ -27,7 +28,7 @@ #define PREAMBLE \ rd %asi, %g1; \ cmp %g1, ASI_AIUS; \ - bne,pn %icc, ___copy_in_user; \ + bne,pn %icc, raw_copy_in_user; \ nop #endif diff --git a/arch/sparc/lib/GENmemcpy.S b/arch/sparc/lib/GENmemcpy.S index 89358ee94851..114340a0d36e 100644 --- a/arch/sparc/lib/GENmemcpy.S +++ b/arch/sparc/lib/GENmemcpy.S @@ -1,24 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* GENmemcpy.S: Generic sparc64 memcpy. * * Copyright (C) 2007 David S. Miller (davem@davemloft.net) */ #ifdef __KERNEL__ +#include <linux/linkage.h> #define GLOBAL_SPARE %g7 #else #define GLOBAL_SPARE %g5 #endif #ifndef EX_LD -#define EX_LD(x) x +#define EX_LD(x,y) x #endif #ifndef EX_ST -#define EX_ST(x) x -#endif - -#ifndef EX_RETVAL -#define EX_RETVAL(x) x +#define EX_ST(x,y) x #endif #ifndef LOAD @@ -45,6 +43,29 @@ .register %g3,#scratch .text + +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +ENTRY(GEN_retl_o4_1) + add %o4, %o2, %o4 + retl + add %o4, 1, %o0 +ENDPROC(GEN_retl_o4_1) +ENTRY(GEN_retl_g1_8) + add %g1, %o2, %g1 + retl + add %g1, 8, %o0 +ENDPROC(GEN_retl_g1_8) +ENTRY(GEN_retl_o2_4) + retl + add %o2, 4, %o0 +ENDPROC(GEN_retl_o2_4) +ENTRY(GEN_retl_o2_1) + retl + add %o2, 1, %o0 +ENDPROC(GEN_retl_o2_1) +#endif + .align 64 .globl FUNC_NAME @@ -73,8 +94,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %g0, %o4, %o4 sub %o2, %o4, %o2 1: subcc %o4, 1, %o4 - EX_LD(LOAD(ldub, %o1, %g1)) - EX_ST(STORE(stb, %g1, %o0)) + EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o4_1) + EX_ST(STORE(stb, %g1, %o0),GEN_retl_o4_1) add %o1, 1, %o1 bne,pt %XCC, 1b add %o0, 1, %o0 @@ -82,8 +103,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andn %o2, 0x7, %g1 sub %o2, %g1, %o2 1: subcc %g1, 0x8, %g1 - EX_LD(LOAD(ldx, %o1, %g2)) - EX_ST(STORE(stx, %g2, %o0)) + EX_LD(LOAD(ldx, %o1, %g2),GEN_retl_g1_8) + EX_ST(STORE(stx, %g2, %o0),GEN_retl_g1_8) add %o1, 0x8, %o1 bne,pt %XCC, 1b add %o0, 0x8, %o0 @@ -100,8 +121,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 1: subcc %o2, 4, %o2 - EX_LD(LOAD(lduw, %o1, %g1)) - EX_ST(STORE(stw, %g1, %o1 + %o3)) + EX_LD(LOAD(lduw, %o1, %g1),GEN_retl_o2_4) + EX_ST(STORE(stw, %g1, %o1 + %o3),GEN_retl_o2_4) bgu,pt %XCC, 1b add %o1, 4, %o1 @@ -111,8 +132,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ .align 32 90: subcc %o2, 1, %o2 - EX_LD(LOAD(ldub, %o1, %g1)) - EX_ST(STORE(stb, %g1, %o1 + %o3)) + EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o2_1) + EX_ST(STORE(stb, %g1, %o1 + %o3),GEN_retl_o2_1) bgu,pt %XCC, 90b add %o1, 1, %o1 retl diff --git a/arch/sparc/lib/GENpage.S b/arch/sparc/lib/GENpage.S index 2ef9d05f21bc..c143c4d1de3f 100644 --- a/arch/sparc/lib/GENpage.S +++ b/arch/sparc/lib/GENpage.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* GENpage.S: Generic clear and copy page. * * Copyright (C) 2007 (davem@davemloft.net) diff --git a/arch/sparc/lib/GENpatch.S b/arch/sparc/lib/GENpatch.S index fab9e89f16bd..1ec1f02c8b7b 100644 --- a/arch/sparc/lib/GENpatch.S +++ b/arch/sparc/lib/GENpatch.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* GENpatch.S: Patch Ultra-I routines with generic variant. * * Copyright (C) 2007 David S. Miller <davem@davemloft.net> @@ -26,8 +27,8 @@ .type generic_patch_copyops,#function generic_patch_copyops: GEN_DO_PATCH(memcpy, GENmemcpy) - GEN_DO_PATCH(___copy_from_user, GENcopy_from_user) - GEN_DO_PATCH(___copy_to_user, GENcopy_to_user) + GEN_DO_PATCH(raw_copy_from_user, GENcopy_from_user) + GEN_DO_PATCH(raw_copy_to_user, GENcopy_to_user) retl nop .size generic_patch_copyops,.-generic_patch_copyops diff --git a/arch/sparc/lib/M7copy_from_user.S b/arch/sparc/lib/M7copy_from_user.S new file mode 100644 index 000000000000..66464b3e3649 --- /dev/null +++ b/arch/sparc/lib/M7copy_from_user.S @@ -0,0 +1,40 @@ +/* + * M7copy_from_user.S: SPARC M7 optimized copy from userspace. + * + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + */ + + +#define EX_LD(x, y) \ +98: x; \ + .section __ex_table,"a"; \ + .align 4; \ + .word 98b, y; \ + .text; \ + .align 4; + +#define EX_LD_FP(x, y) \ +98: x; \ + .section __ex_table,"a"; \ + .align 4; \ + .word 98b, y##_fp; \ + .text; \ + .align 4; + +#ifndef ASI_AIUS +#define ASI_AIUS 0x11 +#endif + +#define FUNC_NAME M7copy_from_user +#define LOAD(type,addr,dest) type##a [addr] %asi, dest +#define EX_RETVAL(x) 0 + +#ifdef __KERNEL__ +#define PREAMBLE \ + rd %asi, %g1; \ + cmp %g1, ASI_AIUS; \ + bne,pn %icc, raw_copy_in_user; \ + nop +#endif + +#include "M7memcpy.S" diff --git a/arch/sparc/lib/M7copy_to_user.S b/arch/sparc/lib/M7copy_to_user.S new file mode 100644 index 000000000000..a60ac467f808 --- /dev/null +++ b/arch/sparc/lib/M7copy_to_user.S @@ -0,0 +1,51 @@ +/* + * M7copy_to_user.S: SPARC M7 optimized copy to userspace. + * + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + */ + + +#define EX_ST(x, y) \ +98: x; \ + .section __ex_table,"a"; \ + .align 4; \ + .word 98b, y; \ + .text; \ + .align 4; + +#define EX_ST_FP(x, y) \ +98: x; \ + .section __ex_table,"a"; \ + .align 4; \ + .word 98b, y##_fp; \ + .text; \ + .align 4; + + +#ifndef ASI_AIUS +#define ASI_AIUS 0x11 +#endif + +#ifndef ASI_BLK_INIT_QUAD_LDD_AIUS +#define ASI_BLK_INIT_QUAD_LDD_AIUS 0x23 +#endif + +#define FUNC_NAME M7copy_to_user +#define STORE(type,src,addr) type##a src, [addr] %asi +#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_AIUS +#define STORE_MRU_ASI ASI_ST_BLKINIT_MRU_S +#define EX_RETVAL(x) 0 + +#ifdef __KERNEL__ + /* Writing to %asi is _expensive_ so we hardcode it. + * Reading %asi to check for KERNEL_DS is comparatively + * cheap. + */ +#define PREAMBLE \ + rd %asi, %g1; \ + cmp %g1, ASI_AIUS; \ + bne,pn %icc, raw_copy_in_user; \ + nop +#endif + +#include "M7memcpy.S" diff --git a/arch/sparc/lib/M7memcpy.S b/arch/sparc/lib/M7memcpy.S new file mode 100644 index 000000000000..99357bfa8e82 --- /dev/null +++ b/arch/sparc/lib/M7memcpy.S @@ -0,0 +1,923 @@ +/* + * M7memcpy: Optimized SPARC M7 memcpy + * + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + */ + + .file "M7memcpy.S" + +/* + * memcpy(s1, s2, len) + * + * Copy s2 to s1, always copy n bytes. + * Note: this C code does not work for overlapped copies. + * + * Fast assembler language version of the following C-program for memcpy + * which represents the `standard' for the C-library. + * + * void * + * memcpy(void *s, const void *s0, size_t n) + * { + * if (n != 0) { + * char *s1 = s; + * const char *s2 = s0; + * do { + * *s1++ = *s2++; + * } while (--n != 0); + * } + * return (s); + * } + * + * + * SPARC T7/M7 Flow : + * + * if (count < SMALL_MAX) { + * if count < SHORTCOPY (SHORTCOPY=3) + * copy bytes; exit with dst addr + * if src & dst aligned on word boundary but not long word boundary, + * copy with ldw/stw; branch to finish_up + * if src & dst aligned on long word boundary + * copy with ldx/stx; branch to finish_up + * if src & dst not aligned and length <= SHORTCHECK (SHORTCHECK=14) + * copy bytes; exit with dst addr + * move enough bytes to get src to word boundary + * if dst now on word boundary + * move_words: + * copy words; branch to finish_up + * if dst now on half word boundary + * load words, shift half words, store words; branch to finish_up + * if dst on byte 1 + * load words, shift 3 bytes, store words; branch to finish_up + * if dst on byte 3 + * load words, shift 1 byte, store words; branch to finish_up + * finish_up: + * copy bytes; exit with dst addr + * } else { More than SMALL_MAX bytes + * move bytes until dst is on long word boundary + * if( src is on long word boundary ) { + * if (count < MED_MAX) { + * finish_long: src/dst aligned on 8 bytes + * copy with ldx/stx in 8-way unrolled loop; + * copy final 0-63 bytes; exit with dst addr + * } else { src/dst aligned; count > MED_MAX + * align dst on 64 byte boundary; for main data movement: + * prefetch src data to L2 cache; let HW prefetch move data to L1 cache + * Use BIS (block initializing store) to avoid copying store cache + * lines from memory. But pre-store first element of each cache line + * ST_CHUNK lines in advance of the rest of that cache line. That + * gives time for replacement cache lines to be written back without + * excess STQ and Miss Buffer filling. Repeat until near the end, + * then finish up storing before going to finish_long. + * } + * } else { src/dst not aligned on 8 bytes + * if src is word aligned and count < MED_WMAX + * move words in 8-way unrolled loop + * move final 0-31 bytes; exit with dst addr + * if count < MED_UMAX + * use alignaddr/faligndata combined with ldd/std in 8-way + * unrolled loop to move data. + * go to unalign_done + * else + * setup alignaddr for faligndata instructions + * align dst on 64 byte boundary; prefetch src data to L1 cache + * loadx8, falign, block-store, prefetch loop + * (only use block-init-store when src/dst on 8 byte boundaries.) + * unalign_done: + * move remaining bytes for unaligned cases. exit with dst addr. + * } + * + */ + +#include <asm/visasm.h> +#include <asm/asi.h> + +#if !defined(EX_LD) && !defined(EX_ST) +#define NON_USER_COPY +#endif + +#ifndef EX_LD +#define EX_LD(x,y) x +#endif +#ifndef EX_LD_FP +#define EX_LD_FP(x,y) x +#endif + +#ifndef EX_ST +#define EX_ST(x,y) x +#endif +#ifndef EX_ST_FP +#define EX_ST_FP(x,y) x +#endif + +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +#endif + +#ifndef LOAD +#define LOAD(type,addr,dest) type [addr], dest +#endif + +#ifndef STORE +#define STORE(type,src,addr) type src, [addr] +#endif + +/* + * ASI_BLK_INIT_QUAD_LDD_P/ASI_BLK_INIT_QUAD_LDD_S marks the cache + * line as "least recently used" which means if many threads are + * active, it has a high probability of being pushed out of the cache + * between the first initializing store and the final stores. + * Thus, we use ASI_ST_BLKINIT_MRU_P/ASI_ST_BLKINIT_MRU_S which + * marks the cache line as "most recently used" for all + * but the last cache line + */ +#ifndef STORE_ASI +#ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA +#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_P +#else +#define STORE_ASI 0x80 /* ASI_P */ +#endif +#endif + +#ifndef STORE_MRU_ASI +#ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA +#define STORE_MRU_ASI ASI_ST_BLKINIT_MRU_P +#else +#define STORE_MRU_ASI 0x80 /* ASI_P */ +#endif +#endif + +#ifndef STORE_INIT +#define STORE_INIT(src,addr) stxa src, [addr] STORE_ASI +#endif + +#ifndef STORE_INIT_MRU +#define STORE_INIT_MRU(src,addr) stxa src, [addr] STORE_MRU_ASI +#endif + +#ifndef FUNC_NAME +#define FUNC_NAME M7memcpy +#endif + +#ifndef PREAMBLE +#define PREAMBLE +#endif + +#define BLOCK_SIZE 64 +#define SHORTCOPY 3 +#define SHORTCHECK 14 +#define SHORT_LONG 64 /* max copy for short longword-aligned case */ + /* must be at least 64 */ +#define SMALL_MAX 128 +#define MED_UMAX 1024 /* max copy for medium un-aligned case */ +#define MED_WMAX 1024 /* max copy for medium word-aligned case */ +#define MED_MAX 1024 /* max copy for medium longword-aligned case */ +#define ST_CHUNK 24 /* ST_CHUNK - block of values for BIS Store */ +#define ALIGN_PRE 24 /* distance for aligned prefetch loop */ + + .register %g2,#scratch + + .section ".text" + .global FUNC_NAME + .type FUNC_NAME, #function + .align 16 +FUNC_NAME: + srlx %o2, 31, %g2 + cmp %g2, 0 + tne %xcc, 5 + PREAMBLE + mov %o0, %g1 ! save %o0 + brz,pn %o2, .Lsmallx + cmp %o2, 3 + ble,pn %icc, .Ltiny_cp + cmp %o2, 19 + ble,pn %icc, .Lsmall_cp + or %o0, %o1, %g2 + cmp %o2, SMALL_MAX + bl,pn %icc, .Lmedium_cp + nop + +.Lmedium: + neg %o0, %o5 + andcc %o5, 7, %o5 ! bytes till DST 8 byte aligned + brz,pt %o5, .Ldst_aligned_on_8 + + ! %o5 has the bytes to be written in partial store. + sub %o2, %o5, %o2 + sub %o1, %o0, %o1 ! %o1 gets the difference +7: ! dst aligning loop + add %o1, %o0, %o4 + EX_LD(LOAD(ldub, %o4, %o4), memcpy_retl_o2_plus_o5) ! load one byte + subcc %o5, 1, %o5 + EX_ST(STORE(stb, %o4, %o0), memcpy_retl_o2_plus_o5_plus_1) + bgu,pt %xcc, 7b + add %o0, 1, %o0 ! advance dst + add %o1, %o0, %o1 ! restore %o1 +.Ldst_aligned_on_8: + andcc %o1, 7, %o5 + brnz,pt %o5, .Lsrc_dst_unaligned_on_8 + nop + +.Lsrc_dst_aligned_on_8: + ! check if we are copying MED_MAX or more bytes + set MED_MAX, %o3 + cmp %o2, %o3 ! limit to store buffer size + bgu,pn %xcc, .Llarge_align8_copy + nop + +/* + * Special case for handling when src and dest are both long word aligned + * and total data to move is less than MED_MAX bytes + */ +.Lmedlong: + subcc %o2, 63, %o2 ! adjust length to allow cc test + ble,pn %xcc, .Lmedl63 ! skip big loop if less than 64 bytes + nop +.Lmedl64: + EX_LD(LOAD(ldx, %o1, %o4), memcpy_retl_o2_plus_63) ! load + subcc %o2, 64, %o2 ! decrement length count + EX_ST(STORE(stx, %o4, %o0), memcpy_retl_o2_plus_63_64) ! and store + EX_LD(LOAD(ldx, %o1+8, %o3), memcpy_retl_o2_plus_63_56) ! a block of 64 + EX_ST(STORE(stx, %o3, %o0+8), memcpy_retl_o2_plus_63_56) + EX_LD(LOAD(ldx, %o1+16, %o4), memcpy_retl_o2_plus_63_48) + EX_ST(STORE(stx, %o4, %o0+16), memcpy_retl_o2_plus_63_48) + EX_LD(LOAD(ldx, %o1+24, %o3), memcpy_retl_o2_plus_63_40) + EX_ST(STORE(stx, %o3, %o0+24), memcpy_retl_o2_plus_63_40) + EX_LD(LOAD(ldx, %o1+32, %o4), memcpy_retl_o2_plus_63_32)! load and store + EX_ST(STORE(stx, %o4, %o0+32), memcpy_retl_o2_plus_63_32) + EX_LD(LOAD(ldx, %o1+40, %o3), memcpy_retl_o2_plus_63_24)! a block of 64 + add %o1, 64, %o1 ! increase src ptr by 64 + EX_ST(STORE(stx, %o3, %o0+40), memcpy_retl_o2_plus_63_24) + EX_LD(LOAD(ldx, %o1-16, %o4), memcpy_retl_o2_plus_63_16) + add %o0, 64, %o0 ! increase dst ptr by 64 + EX_ST(STORE(stx, %o4, %o0-16), memcpy_retl_o2_plus_63_16) + EX_LD(LOAD(ldx, %o1-8, %o3), memcpy_retl_o2_plus_63_8) + bgu,pt %xcc, .Lmedl64 ! repeat if at least 64 bytes left + EX_ST(STORE(stx, %o3, %o0-8), memcpy_retl_o2_plus_63_8) +.Lmedl63: + addcc %o2, 32, %o2 ! adjust remaining count + ble,pt %xcc, .Lmedl31 ! to skip if 31 or fewer bytes left + nop + EX_LD(LOAD(ldx, %o1, %o4), memcpy_retl_o2_plus_31) ! load + sub %o2, 32, %o2 ! decrement length count + EX_ST(STORE(stx, %o4, %o0), memcpy_retl_o2_plus_31_32) ! and store + EX_LD(LOAD(ldx, %o1+8, %o3), memcpy_retl_o2_plus_31_24) ! a block of 32 + add %o1, 32, %o1 ! increase src ptr by 32 + EX_ST(STORE(stx, %o3, %o0+8), memcpy_retl_o2_plus_31_24) + EX_LD(LOAD(ldx, %o1-16, %o4), memcpy_retl_o2_plus_31_16) + add %o0, 32, %o0 ! increase dst ptr by 32 + EX_ST(STORE(stx, %o4, %o0-16), memcpy_retl_o2_plus_31_16) + EX_LD(LOAD(ldx, %o1-8, %o3), memcpy_retl_o2_plus_31_8) + EX_ST(STORE(stx, %o3, %o0-8), memcpy_retl_o2_plus_31_8) +.Lmedl31: + addcc %o2, 16, %o2 ! adjust remaining count + ble,pt %xcc, .Lmedl15 ! skip if 15 or fewer bytes left + nop ! + EX_LD(LOAD(ldx, %o1, %o4), memcpy_retl_o2_plus_15) + add %o1, 16, %o1 ! increase src ptr by 16 + EX_ST(STORE(stx, %o4, %o0), memcpy_retl_o2_plus_15) + sub %o2, 16, %o2 ! decrease count by 16 + EX_LD(LOAD(ldx, %o1-8, %o3), memcpy_retl_o2_plus_15_8) + add %o0, 16, %o0 ! increase dst ptr by 16 + EX_ST(STORE(stx, %o3, %o0-8), memcpy_retl_o2_plus_15_8) +.Lmedl15: + addcc %o2, 15, %o2 ! restore count + bz,pt %xcc, .Lsmallx ! exit if finished + cmp %o2, 8 + blt,pt %xcc, .Lmedw7 ! skip if 7 or fewer bytes left + tst %o2 + EX_LD(LOAD(ldx, %o1, %o4), memcpy_retl_o2) ! load 8 bytes + add %o1, 8, %o1 ! increase src ptr by 8 + add %o0, 8, %o0 ! increase dst ptr by 8 + subcc %o2, 8, %o2 ! decrease count by 8 + bnz,pn %xcc, .Lmedw7 + EX_ST(STORE(stx, %o4, %o0-8), memcpy_retl_o2_plus_8) ! and store 8 + retl + mov EX_RETVAL(%g1), %o0 ! restore %o0 + + .align 16 +.Lsrc_dst_unaligned_on_8: + ! DST is 8-byte aligned, src is not +2: + andcc %o1, 0x3, %o5 ! test word alignment + bnz,pt %xcc, .Lunalignsetup ! branch to skip if not word aligned + nop + +/* + * Handle all cases where src and dest are aligned on word + * boundaries. Use unrolled loops for better performance. + * This option wins over standard large data move when + * source and destination is in cache for.Lmedium + * to short data moves. + */ + set MED_WMAX, %o3 + cmp %o2, %o3 ! limit to store buffer size + bge,pt %xcc, .Lunalignrejoin ! otherwise rejoin main loop + nop + + subcc %o2, 31, %o2 ! adjust length to allow cc test + ! for end of loop + ble,pt %xcc, .Lmedw31 ! skip big loop if less than 16 +.Lmedw32: + EX_LD(LOAD(ld, %o1, %o4), memcpy_retl_o2_plus_31)! move a block of 32 + sllx %o4, 32, %o5 + EX_LD(LOAD(ld, %o1+4, %o4), memcpy_retl_o2_plus_31) + or %o4, %o5, %o5 + EX_ST(STORE(stx, %o5, %o0), memcpy_retl_o2_plus_31) + subcc %o2, 32, %o2 ! decrement length count + EX_LD(LOAD(ld, %o1+8, %o4), memcpy_retl_o2_plus_31_24) + sllx %o4, 32, %o5 + EX_LD(LOAD(ld, %o1+12, %o4), memcpy_retl_o2_plus_31_24) + or %o4, %o5, %o5 + EX_ST(STORE(stx, %o5, %o0+8), memcpy_retl_o2_plus_31_24) + add %o1, 32, %o1 ! increase src ptr by 32 + EX_LD(LOAD(ld, %o1-16, %o4), memcpy_retl_o2_plus_31_16) + sllx %o4, 32, %o5 + EX_LD(LOAD(ld, %o1-12, %o4), memcpy_retl_o2_plus_31_16) + or %o4, %o5, %o5 + EX_ST(STORE(stx, %o5, %o0+16), memcpy_retl_o2_plus_31_16) + add %o0, 32, %o0 ! increase dst ptr by 32 + EX_LD(LOAD(ld, %o1-8, %o4), memcpy_retl_o2_plus_31_8) + sllx %o4, 32, %o5 + EX_LD(LOAD(ld, %o1-4, %o4), memcpy_retl_o2_plus_31_8) + or %o4, %o5, %o5 + bgu,pt %xcc, .Lmedw32 ! repeat if at least 32 bytes left + EX_ST(STORE(stx, %o5, %o0-8), memcpy_retl_o2_plus_31_8) +.Lmedw31: + addcc %o2, 31, %o2 ! restore count + + bz,pt %xcc, .Lsmallx ! exit if finished + nop + cmp %o2, 16 + blt,pt %xcc, .Lmedw15 + nop + EX_LD(LOAD(ld, %o1, %o4), memcpy_retl_o2)! move a block of 16 bytes + sllx %o4, 32, %o5 + subcc %o2, 16, %o2 ! decrement length count + EX_LD(LOAD(ld, %o1+4, %o4), memcpy_retl_o2_plus_16) + or %o4, %o5, %o5 + EX_ST(STORE(stx, %o5, %o0), memcpy_retl_o2_plus_16) + add %o1, 16, %o1 ! increase src ptr by 16 + EX_LD(LOAD(ld, %o1-8, %o4), memcpy_retl_o2_plus_8) + add %o0, 16, %o0 ! increase dst ptr by 16 + sllx %o4, 32, %o5 + EX_LD(LOAD(ld, %o1-4, %o4), memcpy_retl_o2_plus_8) + or %o4, %o5, %o5 + EX_ST(STORE(stx, %o5, %o0-8), memcpy_retl_o2_plus_8) +.Lmedw15: + bz,pt %xcc, .Lsmallx ! exit if finished + cmp %o2, 8 + blt,pn %xcc, .Lmedw7 ! skip if 7 or fewer bytes left + tst %o2 + EX_LD(LOAD(ld, %o1, %o4), memcpy_retl_o2) ! load 4 bytes + subcc %o2, 8, %o2 ! decrease count by 8 + EX_ST(STORE(stw, %o4, %o0), memcpy_retl_o2_plus_8)! and store 4 bytes + add %o1, 8, %o1 ! increase src ptr by 8 + EX_LD(LOAD(ld, %o1-4, %o3), memcpy_retl_o2_plus_4) ! load 4 bytes + add %o0, 8, %o0 ! increase dst ptr by 8 + EX_ST(STORE(stw, %o3, %o0-4), memcpy_retl_o2_plus_4)! and store 4 bytes + bz,pt %xcc, .Lsmallx ! exit if finished +.Lmedw7: ! count is ge 1, less than 8 + cmp %o2, 4 ! check for 4 bytes left + blt,pn %xcc, .Lsmallleft3 ! skip if 3 or fewer bytes left + nop ! + EX_LD(LOAD(ld, %o1, %o4), memcpy_retl_o2) ! load 4 bytes + add %o1, 4, %o1 ! increase src ptr by 4 + add %o0, 4, %o0 ! increase dst ptr by 4 + subcc %o2, 4, %o2 ! decrease count by 4 + bnz .Lsmallleft3 + EX_ST(STORE(stw, %o4, %o0-4), memcpy_retl_o2_plus_4)! and store 4 bytes + retl + mov EX_RETVAL(%g1), %o0 + + .align 16 +.Llarge_align8_copy: ! Src and dst share 8 byte alignment + ! align dst to 64 byte boundary + andcc %o0, 0x3f, %o3 ! %o3 == 0 means dst is 64 byte aligned + brz,pn %o3, .Laligned_to_64 + andcc %o0, 8, %o3 ! odd long words to move? + brz,pt %o3, .Laligned_to_16 + nop + EX_LD(LOAD(ldx, %o1, %o4), memcpy_retl_o2) + sub %o2, 8, %o2 + add %o1, 8, %o1 ! increment src ptr + add %o0, 8, %o0 ! increment dst ptr + EX_ST(STORE(stx, %o4, %o0-8), memcpy_retl_o2_plus_8) +.Laligned_to_16: + andcc %o0, 16, %o3 ! pair of long words to move? + brz,pt %o3, .Laligned_to_32 + nop + EX_LD(LOAD(ldx, %o1, %o4), memcpy_retl_o2) + sub %o2, 16, %o2 + EX_ST(STORE(stx, %o4, %o0), memcpy_retl_o2_plus_16) + add %o1, 16, %o1 ! increment src ptr + EX_LD(LOAD(ldx, %o1-8, %o4), memcpy_retl_o2_plus_8) + add %o0, 16, %o0 ! increment dst ptr + EX_ST(STORE(stx, %o4, %o0-8), memcpy_retl_o2_plus_8) +.Laligned_to_32: + andcc %o0, 32, %o3 ! four long words to move? + brz,pt %o3, .Laligned_to_64 + nop + EX_LD(LOAD(ldx, %o1, %o4), memcpy_retl_o2) + sub %o2, 32, %o2 + EX_ST(STORE(stx, %o4, %o0), memcpy_retl_o2_plus_32) + EX_LD(LOAD(ldx, %o1+8, %o4), memcpy_retl_o2_plus_24) + EX_ST(STORE(stx, %o4, %o0+8), memcpy_retl_o2_plus_24) + EX_LD(LOAD(ldx, %o1+16, %o4), memcpy_retl_o2_plus_16) + EX_ST(STORE(stx, %o4, %o0+16), memcpy_retl_o2_plus_16) + add %o1, 32, %o1 ! increment src ptr + EX_LD(LOAD(ldx, %o1-8, %o4), memcpy_retl_o2_plus_8) + add %o0, 32, %o0 ! increment dst ptr + EX_ST(STORE(stx, %o4, %o0-8), memcpy_retl_o2_plus_8) +.Laligned_to_64: +! +! Using block init store (BIS) instructions to avoid fetching cache +! lines from memory. Use ST_CHUNK stores to first element of each cache +! line (similar to prefetching) to avoid overfilling STQ or miss buffers. +! Gives existing cache lines time to be moved out of L1/L2/L3 cache. +! Initial stores using MRU version of BIS to keep cache line in +! cache until we are ready to store final element of cache line. +! Then store last element using the LRU version of BIS. +! + andn %o2, 0x3f, %o5 ! %o5 is multiple of block size + and %o2, 0x3f, %o2 ! residue bytes in %o2 +! +! We use STORE_MRU_ASI for the first seven stores to each cache line +! followed by STORE_ASI (mark as LRU) for the last store. That +! mixed approach reduces the probability that the cache line is removed +! before we finish setting it, while minimizing the effects on +! other cached values during a large memcpy +! +! ST_CHUNK batches up initial BIS operations for several cache lines +! to allow multiple requests to not be blocked by overflowing the +! the store miss buffer. Then the matching stores for all those +! BIS operations are executed. +! + + sub %o0, 8, %o0 ! adjust %o0 for ASI alignment +.Lalign_loop: + cmp %o5, ST_CHUNK*64 + blu,pt %xcc, .Lalign_loop_fin + mov ST_CHUNK,%o3 +.Lalign_loop_start: + prefetch [%o1 + (ALIGN_PRE * BLOCK_SIZE)], 21 + subcc %o3, 1, %o3 + EX_LD(LOAD(ldx, %o1, %o4), memcpy_retl_o2_plus_o5) + add %o1, 64, %o1 + add %o0, 8, %o0 + EX_ST(STORE_INIT_MRU(%o4, %o0), memcpy_retl_o2_plus_o5) + bgu %xcc,.Lalign_loop_start + add %o0, 56, %o0 + + mov ST_CHUNK,%o3 + sllx %o3, 6, %o4 ! ST_CHUNK*64 + sub %o1, %o4, %o1 ! reset %o1 + sub %o0, %o4, %o0 ! reset %o0 + +.Lalign_loop_rest: + EX_LD(LOAD(ldx, %o1+8, %o4), memcpy_retl_o2_plus_o5) + add %o0, 16, %o0 + EX_ST(STORE_INIT_MRU(%o4, %o0), memcpy_retl_o2_plus_o5) + EX_LD(LOAD(ldx, %o1+16, %o4), memcpy_retl_o2_plus_o5) + add %o0, 8, %o0 + EX_ST(STORE_INIT_MRU(%o4, %o0), memcpy_retl_o2_plus_o5) + subcc %o3, 1, %o3 + EX_LD(LOAD(ldx, %o1+24, %o4), memcpy_retl_o2_plus_o5) + add %o0, 8, %o0 + EX_ST(STORE_INIT_MRU(%o4, %o0), memcpy_retl_o2_plus_o5) + EX_LD(LOAD(ldx, %o1+32, %o4), memcpy_retl_o2_plus_o5) + add %o0, 8, %o0 + EX_ST(STORE_INIT_MRU(%o4, %o0), memcpy_retl_o2_plus_o5) + EX_LD(LOAD(ldx, %o1+40, %o4), memcpy_retl_o2_plus_o5) + add %o0, 8, %o0 + EX_ST(STORE_INIT_MRU(%o4, %o0), memcpy_retl_o2_plus_o5) + EX_LD(LOAD(ldx, %o1+48, %o4), memcpy_retl_o2_plus_o5) + add %o1, 64, %o1 + add %o0, 8, %o0 + EX_ST(STORE_INIT_MRU(%o4, %o0), memcpy_retl_o2_plus_o5) + add %o0, 8, %o0 + EX_LD(LOAD(ldx, %o1-8, %o4), memcpy_retl_o2_plus_o5) + sub %o5, 64, %o5 + bgu %xcc,.Lalign_loop_rest + ! mark cache line as LRU + EX_ST(STORE_INIT(%o4, %o0), memcpy_retl_o2_plus_o5_plus_64) + + cmp %o5, ST_CHUNK*64 + bgu,pt %xcc, .Lalign_loop_start + mov ST_CHUNK,%o3 + + cmp %o5, 0 + beq .Lalign_done + nop +.Lalign_loop_fin: + EX_LD(LOAD(ldx, %o1, %o4), memcpy_retl_o2_plus_o5) + EX_ST(STORE(stx, %o4, %o0+8), memcpy_retl_o2_plus_o5) + EX_LD(LOAD(ldx, %o1+8, %o4), memcpy_retl_o2_plus_o5) + EX_ST(STORE(stx, %o4, %o0+8+8), memcpy_retl_o2_plus_o5) + EX_LD(LOAD(ldx, %o1+16, %o4), memcpy_retl_o2_plus_o5) + EX_ST(STORE(stx, %o4, %o0+8+16), memcpy_retl_o2_plus_o5) + subcc %o5, 64, %o5 + EX_LD(LOAD(ldx, %o1+24, %o4), memcpy_retl_o2_plus_o5_64) + EX_ST(STORE(stx, %o4, %o0+8+24), memcpy_retl_o2_plus_o5_64) + EX_LD(LOAD(ldx, %o1+32, %o4), memcpy_retl_o2_plus_o5_64) + EX_ST(STORE(stx, %o4, %o0+8+32), memcpy_retl_o2_plus_o5_64) + EX_LD(LOAD(ldx, %o1+40, %o4), memcpy_retl_o2_plus_o5_64) + EX_ST(STORE(stx, %o4, %o0+8+40), memcpy_retl_o2_plus_o5_64) + EX_LD(LOAD(ldx, %o1+48, %o4), memcpy_retl_o2_plus_o5_64) + add %o1, 64, %o1 + EX_ST(STORE(stx, %o4, %o0+8+48), memcpy_retl_o2_plus_o5_64) + add %o0, 64, %o0 + EX_LD(LOAD(ldx, %o1-8, %o4), memcpy_retl_o2_plus_o5_64) + bgu %xcc,.Lalign_loop_fin + EX_ST(STORE(stx, %o4, %o0), memcpy_retl_o2_plus_o5_64) + +.Lalign_done: + add %o0, 8, %o0 ! restore %o0 from ASI alignment + membar #StoreStore + sub %o2, 63, %o2 ! adjust length to allow cc test + ba .Lmedl63 ! in .Lmedl63 + nop + + .align 16 + ! Dst is on 8 byte boundary; src is not; remaining count > SMALL_MAX +.Lunalignsetup: +.Lunalignrejoin: + mov %g1, %o3 ! save %g1 as VISEntryHalf clobbers it +#ifdef NON_USER_COPY + VISEntryHalfFast(.Lmedium_vis_entry_fail_cp) +#else + VISEntryHalf +#endif + mov %o3, %g1 ! restore %g1 + + set MED_UMAX, %o3 + cmp %o2, %o3 ! check for.Lmedium unaligned limit + bge,pt %xcc,.Lunalign_large + prefetch [%o1 + (4 * BLOCK_SIZE)], 20 + andn %o2, 0x3f, %o5 ! %o5 is multiple of block size + and %o2, 0x3f, %o2 ! residue bytes in %o2 + cmp %o2, 8 ! Insure we do not load beyond + bgt .Lunalign_adjust ! end of source buffer + andn %o1, 0x7, %o4 ! %o4 has long word aligned src address + add %o2, 64, %o2 ! adjust to leave loop + sub %o5, 64, %o5 ! early if necessary +.Lunalign_adjust: + alignaddr %o1, %g0, %g0 ! generate %gsr + add %o1, %o5, %o1 ! advance %o1 to after blocks + EX_LD_FP(LOAD(ldd, %o4, %f0), memcpy_retl_o2_plus_o5) +.Lunalign_loop: + EX_LD_FP(LOAD(ldd, %o4+8, %f2), memcpy_retl_o2_plus_o5) + faligndata %f0, %f2, %f16 + EX_LD_FP(LOAD(ldd, %o4+16, %f4), memcpy_retl_o2_plus_o5) + subcc %o5, BLOCK_SIZE, %o5 + EX_ST_FP(STORE(std, %f16, %o0), memcpy_retl_o2_plus_o5_plus_64) + faligndata %f2, %f4, %f18 + EX_LD_FP(LOAD(ldd, %o4+24, %f6), memcpy_retl_o2_plus_o5_plus_56) + EX_ST_FP(STORE(std, %f18, %o0+8), memcpy_retl_o2_plus_o5_plus_56) + faligndata %f4, %f6, %f20 + EX_LD_FP(LOAD(ldd, %o4+32, %f8), memcpy_retl_o2_plus_o5_plus_48) + EX_ST_FP(STORE(std, %f20, %o0+16), memcpy_retl_o2_plus_o5_plus_48) + faligndata %f6, %f8, %f22 + EX_LD_FP(LOAD(ldd, %o4+40, %f10), memcpy_retl_o2_plus_o5_plus_40) + EX_ST_FP(STORE(std, %f22, %o0+24), memcpy_retl_o2_plus_o5_plus_40) + faligndata %f8, %f10, %f24 + EX_LD_FP(LOAD(ldd, %o4+48, %f12), memcpy_retl_o2_plus_o5_plus_32) + EX_ST_FP(STORE(std, %f24, %o0+32), memcpy_retl_o2_plus_o5_plus_32) + faligndata %f10, %f12, %f26 + EX_LD_FP(LOAD(ldd, %o4+56, %f14), memcpy_retl_o2_plus_o5_plus_24) + add %o4, BLOCK_SIZE, %o4 + EX_ST_FP(STORE(std, %f26, %o0+40), memcpy_retl_o2_plus_o5_plus_24) + faligndata %f12, %f14, %f28 + EX_LD_FP(LOAD(ldd, %o4, %f0), memcpy_retl_o2_plus_o5_plus_16) + EX_ST_FP(STORE(std, %f28, %o0+48), memcpy_retl_o2_plus_o5_plus_16) + faligndata %f14, %f0, %f30 + EX_ST_FP(STORE(std, %f30, %o0+56), memcpy_retl_o2_plus_o5_plus_8) + add %o0, BLOCK_SIZE, %o0 + bgu,pt %xcc, .Lunalign_loop + prefetch [%o4 + (5 * BLOCK_SIZE)], 20 + ba .Lunalign_done + nop + +.Lunalign_large: + andcc %o0, 0x3f, %o3 ! is dst 64-byte block aligned? + bz %xcc, .Lunalignsrc + sub %o3, 64, %o3 ! %o3 will be multiple of 8 + neg %o3 ! bytes until dest is 64 byte aligned + sub %o2, %o3, %o2 ! update cnt with bytes to be moved + ! Move bytes according to source alignment + andcc %o1, 0x1, %o5 + bnz %xcc, .Lunalignbyte ! check for byte alignment + nop + andcc %o1, 2, %o5 ! check for half word alignment + bnz %xcc, .Lunalignhalf + nop + ! Src is word aligned +.Lunalignword: + EX_LD_FP(LOAD(ld, %o1, %o4), memcpy_retl_o2_plus_o3) ! load 4 bytes + add %o1, 8, %o1 ! increase src ptr by 8 + EX_ST_FP(STORE(stw, %o4, %o0), memcpy_retl_o2_plus_o3) ! and store 4 + subcc %o3, 8, %o3 ! decrease count by 8 + EX_LD_FP(LOAD(ld, %o1-4, %o4), memcpy_retl_o2_plus_o3_plus_4)! load 4 + add %o0, 8, %o0 ! increase dst ptr by 8 + bnz %xcc, .Lunalignword + EX_ST_FP(STORE(stw, %o4, %o0-4), memcpy_retl_o2_plus_o3_plus_4) + ba .Lunalignsrc + nop + + ! Src is half-word aligned +.Lunalignhalf: + EX_LD_FP(LOAD(lduh, %o1, %o4), memcpy_retl_o2_plus_o3) ! load 2 bytes + sllx %o4, 32, %o5 ! shift left + EX_LD_FP(LOAD(lduw, %o1+2, %o4), memcpy_retl_o2_plus_o3) + or %o4, %o5, %o5 + sllx %o5, 16, %o5 + EX_LD_FP(LOAD(lduh, %o1+6, %o4), memcpy_retl_o2_plus_o3) + or %o4, %o5, %o5 + EX_ST_FP(STORE(stx, %o5, %o0), memcpy_retl_o2_plus_o3) + add %o1, 8, %o1 + subcc %o3, 8, %o3 + bnz %xcc, .Lunalignhalf + add %o0, 8, %o0 + ba .Lunalignsrc + nop + + ! Src is Byte aligned +.Lunalignbyte: + sub %o0, %o1, %o0 ! share pointer advance +.Lunalignbyte_loop: + EX_LD_FP(LOAD(ldub, %o1, %o4), memcpy_retl_o2_plus_o3) + sllx %o4, 56, %o5 + EX_LD_FP(LOAD(lduh, %o1+1, %o4), memcpy_retl_o2_plus_o3) + sllx %o4, 40, %o4 + or %o4, %o5, %o5 + EX_LD_FP(LOAD(lduh, %o1+3, %o4), memcpy_retl_o2_plus_o3) + sllx %o4, 24, %o4 + or %o4, %o5, %o5 + EX_LD_FP(LOAD(lduh, %o1+5, %o4), memcpy_retl_o2_plus_o3) + sllx %o4, 8, %o4 + or %o4, %o5, %o5 + EX_LD_FP(LOAD(ldub, %o1+7, %o4), memcpy_retl_o2_plus_o3) + or %o4, %o5, %o5 + add %o0, %o1, %o0 + EX_ST_FP(STORE(stx, %o5, %o0), memcpy_retl_o2_plus_o3) + sub %o0, %o1, %o0 + subcc %o3, 8, %o3 + bnz %xcc, .Lunalignbyte_loop + add %o1, 8, %o1 + add %o0,%o1, %o0 ! restore pointer + + ! Destination is now block (64 byte aligned) +.Lunalignsrc: + andn %o2, 0x3f, %o5 ! %o5 is multiple of block size + and %o2, 0x3f, %o2 ! residue bytes in %o2 + add %o2, 64, %o2 ! Insure we do not load beyond + sub %o5, 64, %o5 ! end of source buffer + + andn %o1, 0x7, %o4 ! %o4 has long word aligned src address + alignaddr %o1, %g0, %g0 ! generate %gsr + add %o1, %o5, %o1 ! advance %o1 to after blocks + + EX_LD_FP(LOAD(ldd, %o4, %f14), memcpy_retl_o2_plus_o5) + add %o4, 8, %o4 +.Lunalign_sloop: + EX_LD_FP(LOAD(ldd, %o4, %f16), memcpy_retl_o2_plus_o5) + faligndata %f14, %f16, %f0 + EX_LD_FP(LOAD(ldd, %o4+8, %f18), memcpy_retl_o2_plus_o5) + faligndata %f16, %f18, %f2 + EX_LD_FP(LOAD(ldd, %o4+16, %f20), memcpy_retl_o2_plus_o5) + faligndata %f18, %f20, %f4 + EX_ST_FP(STORE(std, %f0, %o0), memcpy_retl_o2_plus_o5) + subcc %o5, 64, %o5 + EX_LD_FP(LOAD(ldd, %o4+24, %f22), memcpy_retl_o2_plus_o5_plus_56) + faligndata %f20, %f22, %f6 + EX_ST_FP(STORE(std, %f2, %o0+8), memcpy_retl_o2_plus_o5_plus_56) + EX_LD_FP(LOAD(ldd, %o4+32, %f24), memcpy_retl_o2_plus_o5_plus_48) + faligndata %f22, %f24, %f8 + EX_ST_FP(STORE(std, %f4, %o0+16), memcpy_retl_o2_plus_o5_plus_48) + EX_LD_FP(LOAD(ldd, %o4+40, %f26), memcpy_retl_o2_plus_o5_plus_40) + faligndata %f24, %f26, %f10 + EX_ST_FP(STORE(std, %f6, %o0+24), memcpy_retl_o2_plus_o5_plus_40) + EX_LD_FP(LOAD(ldd, %o4+48, %f28), memcpy_retl_o2_plus_o5_plus_32) + faligndata %f26, %f28, %f12 + EX_ST_FP(STORE(std, %f8, %o0+32), memcpy_retl_o2_plus_o5_plus_32) + add %o4, 64, %o4 + EX_LD_FP(LOAD(ldd, %o4-8, %f30), memcpy_retl_o2_plus_o5_plus_24) + faligndata %f28, %f30, %f14 + EX_ST_FP(STORE(std, %f10, %o0+40), memcpy_retl_o2_plus_o5_plus_24) + EX_ST_FP(STORE(std, %f12, %o0+48), memcpy_retl_o2_plus_o5_plus_16) + add %o0, 64, %o0 + EX_ST_FP(STORE(std, %f14, %o0-8), memcpy_retl_o2_plus_o5_plus_8) + fsrc2 %f30, %f14 + bgu,pt %xcc, .Lunalign_sloop + prefetch [%o4 + (8 * BLOCK_SIZE)], 20 + +.Lunalign_done: + ! Handle trailing bytes, 64 to 127 + ! Dest long word aligned, Src not long word aligned + cmp %o2, 15 + bleu %xcc, .Lunalign_short + + andn %o2, 0x7, %o5 ! %o5 is multiple of 8 + and %o2, 0x7, %o2 ! residue bytes in %o2 + add %o2, 8, %o2 + sub %o5, 8, %o5 ! insure we do not load past end of src + andn %o1, 0x7, %o4 ! %o4 has long word aligned src address + add %o1, %o5, %o1 ! advance %o1 to after multiple of 8 + EX_LD_FP(LOAD(ldd, %o4, %f0), memcpy_retl_o2_plus_o5)! fetch partialword +.Lunalign_by8: + EX_LD_FP(LOAD(ldd, %o4+8, %f2), memcpy_retl_o2_plus_o5) + add %o4, 8, %o4 + faligndata %f0, %f2, %f16 + subcc %o5, 8, %o5 + EX_ST_FP(STORE(std, %f16, %o0), memcpy_retl_o2_plus_o5_plus_8) + fsrc2 %f2, %f0 + bgu,pt %xcc, .Lunalign_by8 + add %o0, 8, %o0 + +.Lunalign_short: +#ifdef NON_USER_COPY + VISExitHalfFast +#else + VISExitHalf +#endif + ba .Lsmallrest + nop + +/* + * This is a special case of nested memcpy. This can happen when kernel + * calls unaligned memcpy back to back without saving FP registers. We need + * traps(context switch) to save/restore FP registers. If the kernel calls + * memcpy without this trap sequence we will hit FP corruption. Let's use + * the normal integer load/store method in this case. + */ + +#ifdef NON_USER_COPY +.Lmedium_vis_entry_fail_cp: + or %o0, %o1, %g2 +#endif +.Lmedium_cp: + LOAD(prefetch, %o1 + 0x40, #n_reads_strong) + andcc %g2, 0x7, %g0 + bne,pn %xcc, .Lmedium_unaligned_cp + nop + +.Lmedium_noprefetch_cp: + andncc %o2, 0x20 - 1, %o5 + be,pn %xcc, 2f + sub %o2, %o5, %o2 +1: EX_LD(LOAD(ldx, %o1 + 0x00, %o3), memcpy_retl_o2_plus_o5) + EX_LD(LOAD(ldx, %o1 + 0x08, %g2), memcpy_retl_o2_plus_o5) + EX_LD(LOAD(ldx, %o1 + 0x10, %g7), memcpy_retl_o2_plus_o5) + EX_LD(LOAD(ldx, %o1 + 0x18, %o4), memcpy_retl_o2_plus_o5) + add %o1, 0x20, %o1 + subcc %o5, 0x20, %o5 + EX_ST(STORE(stx, %o3, %o0 + 0x00), memcpy_retl_o2_plus_o5_plus_32) + EX_ST(STORE(stx, %g2, %o0 + 0x08), memcpy_retl_o2_plus_o5_plus_24) + EX_ST(STORE(stx, %g7, %o0 + 0x10), memcpy_retl_o2_plus_o5_plus_16) + EX_ST(STORE(stx, %o4, %o0 + 0x18), memcpy_retl_o2_plus_o5_plus_8) + bne,pt %xcc, 1b + add %o0, 0x20, %o0 +2: andcc %o2, 0x18, %o5 + be,pt %xcc, 3f + sub %o2, %o5, %o2 +1: EX_LD(LOAD(ldx, %o1 + 0x00, %o3), memcpy_retl_o2_plus_o5) + add %o1, 0x08, %o1 + add %o0, 0x08, %o0 + subcc %o5, 0x08, %o5 + bne,pt %xcc, 1b + EX_ST(STORE(stx, %o3, %o0 - 0x08), memcpy_retl_o2_plus_o5_plus_8) +3: brz,pt %o2, .Lexit_cp + cmp %o2, 0x04 + bl,pn %xcc, .Ltiny_cp + nop + EX_LD(LOAD(lduw, %o1 + 0x00, %o3), memcpy_retl_o2) + add %o1, 0x04, %o1 + add %o0, 0x04, %o0 + subcc %o2, 0x04, %o2 + bne,pn %xcc, .Ltiny_cp + EX_ST(STORE(stw, %o3, %o0 - 0x04), memcpy_retl_o2_plus_4) + ba,a,pt %xcc, .Lexit_cp + +.Lmedium_unaligned_cp: + /* First get dest 8 byte aligned. */ + sub %g0, %o0, %o3 + and %o3, 0x7, %o3 + brz,pt %o3, 2f + sub %o2, %o3, %o2 + +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), memcpy_retl_o2_plus_o3) + add %o1, 1, %o1 + subcc %o3, 1, %o3 + add %o0, 1, %o0 + bne,pt %xcc, 1b + EX_ST(STORE(stb, %g2, %o0 - 0x01), memcpy_retl_o2_plus_o3_plus_1) +2: + and %o1, 0x7, %o3 + brz,pn %o3, .Lmedium_noprefetch_cp + sll %o3, 3, %o3 + mov 64, %g2 + sub %g2, %o3, %g2 + andn %o1, 0x7, %o1 + EX_LD(LOAD(ldx, %o1 + 0x00, %o4), memcpy_retl_o2) + sllx %o4, %o3, %o4 + andn %o2, 0x08 - 1, %o5 + sub %o2, %o5, %o2 + +1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3), memcpy_retl_o2_plus_o5) + add %o1, 0x08, %o1 + subcc %o5, 0x08, %o5 + srlx %g3, %g2, %g7 + or %g7, %o4, %g7 + EX_ST(STORE(stx, %g7, %o0 + 0x00), memcpy_retl_o2_plus_o5_plus_8) + add %o0, 0x08, %o0 + bne,pt %xcc, 1b + sllx %g3, %o3, %o4 + srl %o3, 3, %o3 + add %o1, %o3, %o1 + brz,pn %o2, .Lexit_cp + nop + ba,pt %xcc, .Lsmall_unaligned_cp + +.Ltiny_cp: + EX_LD(LOAD(ldub, %o1 + 0x00, %o3), memcpy_retl_o2) + subcc %o2, 1, %o2 + be,pn %xcc, .Lexit_cp + EX_ST(STORE(stb, %o3, %o0 + 0x00), memcpy_retl_o2_plus_1) + EX_LD(LOAD(ldub, %o1 + 0x01, %o3), memcpy_retl_o2) + subcc %o2, 1, %o2 + be,pn %xcc, .Lexit_cp + EX_ST(STORE(stb, %o3, %o0 + 0x01), memcpy_retl_o2_plus_1) + EX_LD(LOAD(ldub, %o1 + 0x02, %o3), memcpy_retl_o2) + ba,pt %xcc, .Lexit_cp + EX_ST(STORE(stb, %o3, %o0 + 0x02), memcpy_retl_o2) + +.Lsmall_cp: + andcc %g2, 0x3, %g0 + bne,pn %xcc, .Lsmall_unaligned_cp + andn %o2, 0x4 - 1, %o5 + sub %o2, %o5, %o2 +1: + EX_LD(LOAD(lduw, %o1 + 0x00, %o3), memcpy_retl_o2_plus_o5) + add %o1, 0x04, %o1 + subcc %o5, 0x04, %o5 + add %o0, 0x04, %o0 + bne,pt %xcc, 1b + EX_ST(STORE(stw, %o3, %o0 - 0x04), memcpy_retl_o2_plus_o5_plus_4) + brz,pt %o2, .Lexit_cp + nop + ba,a,pt %xcc, .Ltiny_cp + +.Lsmall_unaligned_cp: +1: EX_LD(LOAD(ldub, %o1 + 0x00, %o3), memcpy_retl_o2) + add %o1, 1, %o1 + add %o0, 1, %o0 + subcc %o2, 1, %o2 + bne,pt %xcc, 1b + EX_ST(STORE(stb, %o3, %o0 - 0x01), memcpy_retl_o2_plus_1) + ba,a,pt %xcc, .Lexit_cp + +.Lsmallrest: + tst %o2 + bz,pt %xcc, .Lsmallx + cmp %o2, 4 + blt,pn %xcc, .Lsmallleft3 + nop + sub %o2, 3, %o2 +.Lsmallnotalign4: + EX_LD(LOAD(ldub, %o1, %o3), memcpy_retl_o2_plus_3)! read byte + subcc %o2, 4, %o2 ! reduce count by 4 + EX_ST(STORE(stb, %o3, %o0), memcpy_retl_o2_plus_7)! write byte & repeat + EX_LD(LOAD(ldub, %o1+1, %o3), memcpy_retl_o2_plus_6)! for total of 4 + add %o1, 4, %o1 ! advance SRC by 4 + EX_ST(STORE(stb, %o3, %o0+1), memcpy_retl_o2_plus_6) + EX_LD(LOAD(ldub, %o1-2, %o3), memcpy_retl_o2_plus_5) + add %o0, 4, %o0 ! advance DST by 4 + EX_ST(STORE(stb, %o3, %o0-2), memcpy_retl_o2_plus_5) + EX_LD(LOAD(ldub, %o1-1, %o3), memcpy_retl_o2_plus_4) + bgu,pt %xcc, .Lsmallnotalign4 ! loop til 3 or fewer bytes remain + EX_ST(STORE(stb, %o3, %o0-1), memcpy_retl_o2_plus_4) + addcc %o2, 3, %o2 ! restore count + bz,pt %xcc, .Lsmallx +.Lsmallleft3: ! 1, 2, or 3 bytes remain + subcc %o2, 1, %o2 + EX_LD(LOAD(ldub, %o1, %o3), memcpy_retl_o2_plus_1) ! load one byte + bz,pt %xcc, .Lsmallx + EX_ST(STORE(stb, %o3, %o0), memcpy_retl_o2_plus_1) ! store one byte + EX_LD(LOAD(ldub, %o1+1, %o3), memcpy_retl_o2) ! load second byte + subcc %o2, 1, %o2 + bz,pt %xcc, .Lsmallx + EX_ST(STORE(stb, %o3, %o0+1), memcpy_retl_o2_plus_1)! store second byte + EX_LD(LOAD(ldub, %o1+2, %o3), memcpy_retl_o2) ! load third byte + EX_ST(STORE(stb, %o3, %o0+2), memcpy_retl_o2) ! store third byte +.Lsmallx: + retl + mov EX_RETVAL(%g1), %o0 +.Lsmallfin: + tst %o2 + bnz,pn %xcc, .Lsmallleft3 + nop + retl + mov EX_RETVAL(%g1), %o0 ! restore %o0 +.Lexit_cp: + retl + mov EX_RETVAL(%g1), %o0 + .size FUNC_NAME, .-FUNC_NAME diff --git a/arch/sparc/lib/M7memset.S b/arch/sparc/lib/M7memset.S new file mode 100644 index 000000000000..62ea91b3a6b8 --- /dev/null +++ b/arch/sparc/lib/M7memset.S @@ -0,0 +1,352 @@ +/* + * M7memset.S: SPARC M7 optimized memset. + * + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + */ + +/* + * M7memset.S: M7 optimized memset. + * + * char *memset(sp, c, n) + * + * Set an array of n chars starting at sp to the character c. + * Return sp. + * + * Fast assembler language version of the following C-program for memset + * which represents the `standard' for the C-library. + * + * void * + * memset(void *sp1, int c, size_t n) + * { + * if (n != 0) { + * char *sp = sp1; + * do { + * *sp++ = (char)c; + * } while (--n != 0); + * } + * return (sp1); + * } + * + * The algorithm is as follows : + * + * For small 6 or fewer bytes stores, bytes will be stored. + * + * For less than 32 bytes stores, align the address on 4 byte boundary. + * Then store as many 4-byte chunks, followed by trailing bytes. + * + * For sizes greater than 32 bytes, align the address on 8 byte boundary. + * if (count >= 64) { + * store 8-bytes chunks to align the address on 64 byte boundary + * if (value to be set is zero && count >= MIN_ZERO) { + * Using BIS stores, set the first long word of each + * 64-byte cache line to zero which will also clear the + * other seven long words of the cache line. + * } + * else if (count >= MIN_LOOP) { + * Using BIS stores, set the first long word of each of + * ST_CHUNK cache lines (64 bytes each) before the main + * loop is entered. + * In the main loop, continue pre-setting the first long + * word of each cache line ST_CHUNK lines in advance while + * setting the other seven long words (56 bytes) of each + * cache line until fewer than ST_CHUNK*64 bytes remain. + * Then set the remaining seven long words of each cache + * line that has already had its first long word set. + * } + * store remaining data in 64-byte chunks until less than + * 64 bytes remain. + * } + * Store as many 8-byte chunks, followed by trailing bytes. + * + * BIS = Block Init Store + * Doing the advance store of the first element of the cache line + * initiates the displacement of a cache line while only using a single + * instruction in the pipeline. That avoids various pipeline delays, + * such as filling the miss buffer. The performance effect is + * similar to prefetching for normal stores. + * The special case for zero fills runs faster and uses fewer instruction + * cycles than the normal memset loop. + * + * We only use BIS for memset of greater than MIN_LOOP bytes because a sequence + * BIS stores must be followed by a membar #StoreStore. The benefit of + * the BIS store must be balanced against the cost of the membar operation. + */ + +/* + * ASI_STBI_P marks the cache line as "least recently used" + * which means if many threads are active, it has a high chance + * of being pushed out of the cache between the first initializing + * store and the final stores. + * Thus, we use ASI_STBIMRU_P which marks the cache line as + * "most recently used" for all but the last store to the cache line. + */ + +#include <asm/asi.h> +#include <asm/page.h> + +#define ASI_STBI_P ASI_BLK_INIT_QUAD_LDD_P +#define ASI_STBIMRU_P ASI_ST_BLKINIT_MRU_P + + +#define ST_CHUNK 24 /* multiple of 4 due to loop unrolling */ +#define MIN_LOOP 16320 +#define MIN_ZERO 512 + + .section ".text" + .align 32 + +/* + * Define clear_page(dest) as memset(dest, 0, PAGE_SIZE) + * (can create a more optimized version later.) + */ + .globl M7clear_page + .globl M7clear_user_page +M7clear_page: /* clear_page(dest) */ +M7clear_user_page: + set PAGE_SIZE, %o1 + /* fall through into bzero code */ + + .size M7clear_page,.-M7clear_page + .size M7clear_user_page,.-M7clear_user_page + +/* + * Define bzero(dest, n) as memset(dest, 0, n) + * (can create a more optimized version later.) + */ + .globl M7bzero +M7bzero: /* bzero(dest, size) */ + mov %o1, %o2 + mov 0, %o1 + /* fall through into memset code */ + + .size M7bzero,.-M7bzero + + .global M7memset + .type M7memset, #function + .register %g3, #scratch +M7memset: + mov %o0, %o5 ! copy sp1 before using it + cmp %o2, 7 ! if small counts, just write bytes + bleu,pn %xcc, .wrchar + and %o1, 0xff, %o1 ! o1 is (char)c + + sll %o1, 8, %o3 + or %o1, %o3, %o1 ! now o1 has 2 bytes of c + sll %o1, 16, %o3 + cmp %o2, 32 + blu,pn %xcc, .wdalign + or %o1, %o3, %o1 ! now o1 has 4 bytes of c + + sllx %o1, 32, %o3 + or %o1, %o3, %o1 ! now o1 has 8 bytes of c + +.dbalign: + andcc %o5, 7, %o3 ! is sp1 aligned on a 8 byte bound? + bz,pt %xcc, .blkalign ! already long word aligned + sub %o3, 8, %o3 ! -(bytes till long word aligned) + + add %o2, %o3, %o2 ! update o2 with new count + ! Set -(%o3) bytes till sp1 long word aligned +1: stb %o1, [%o5] ! there is at least 1 byte to set + inccc %o3 ! byte clearing loop + bl,pt %xcc, 1b + inc %o5 + + ! Now sp1 is long word aligned (sp1 is found in %o5) +.blkalign: + cmp %o2, 64 ! check if there are 64 bytes to set + blu,pn %xcc, .wrshort + mov %o2, %o3 + + andcc %o5, 63, %o3 ! is sp1 block aligned? + bz,pt %xcc, .blkwr ! now block aligned + sub %o3, 64, %o3 ! o3 is -(bytes till block aligned) + add %o2, %o3, %o2 ! o2 is the remainder + + ! Store -(%o3) bytes till dst is block (64 byte) aligned. + ! Use long word stores. + ! Recall that dst is already long word aligned +1: + addcc %o3, 8, %o3 + stx %o1, [%o5] + bl,pt %xcc, 1b + add %o5, 8, %o5 + + ! Now sp1 is block aligned +.blkwr: + andn %o2, 63, %o4 ! calculate size of blocks in bytes + brz,pn %o1, .wrzero ! special case if c == 0 + and %o2, 63, %o3 ! %o3 = bytes left after blk stores. + + set MIN_LOOP, %g1 + cmp %o4, %g1 ! check there are enough bytes to set + blu,pn %xcc, .short_set ! to justify cost of membar + ! must be > pre-cleared lines + nop + + ! initial cache-clearing stores + ! get store pipeline moving + rd %asi, %g3 ! save %asi to be restored later + wr %g0, ASI_STBIMRU_P, %asi + + ! Primary memset loop for large memsets +.wr_loop: + sub %o5, 8, %o5 ! adjust %o5 for ASI store alignment + mov ST_CHUNK, %g1 +.wr_loop_start: + stxa %o1, [%o5+8]%asi + subcc %g1, 4, %g1 + stxa %o1, [%o5+8+64]%asi + add %o5, 256, %o5 + stxa %o1, [%o5+8-128]%asi + bgu %xcc, .wr_loop_start + stxa %o1, [%o5+8-64]%asi + + sub %o5, ST_CHUNK*64, %o5 ! reset %o5 + mov ST_CHUNK, %g1 + +.wr_loop_rest: + stxa %o1, [%o5+8+8]%asi + sub %o4, 64, %o4 + stxa %o1, [%o5+16+8]%asi + subcc %g1, 1, %g1 + stxa %o1, [%o5+24+8]%asi + stxa %o1, [%o5+32+8]%asi + stxa %o1, [%o5+40+8]%asi + add %o5, 64, %o5 + stxa %o1, [%o5-8]%asi + bgu %xcc, .wr_loop_rest + stxa %o1, [%o5]ASI_STBI_P + + ! If more than ST_CHUNK*64 bytes remain to set, continue + ! setting the first long word of each cache line in advance + ! to keep the store pipeline moving. + + cmp %o4, ST_CHUNK*64 + bge,pt %xcc, .wr_loop_start + mov ST_CHUNK, %g1 + + brz,a,pn %o4, .asi_done + add %o5, 8, %o5 ! restore %o5 offset + +.wr_loop_small: + stxa %o1, [%o5+8]%asi + stxa %o1, [%o5+8+8]%asi + stxa %o1, [%o5+16+8]%asi + stxa %o1, [%o5+24+8]%asi + stxa %o1, [%o5+32+8]%asi + subcc %o4, 64, %o4 + stxa %o1, [%o5+40+8]%asi + add %o5, 64, %o5 + stxa %o1, [%o5-8]%asi + bgu,pt %xcc, .wr_loop_small + stxa %o1, [%o5]ASI_STBI_P + + ba .asi_done + add %o5, 8, %o5 ! restore %o5 offset + + ! Special case loop for zero fill memsets + ! For each 64 byte cache line, single STBI to first element + ! clears line +.wrzero: + cmp %o4, MIN_ZERO ! check if enough bytes to set + ! to pay %asi + membar cost + blu %xcc, .short_set + nop + sub %o4, 256, %o4 + +.wrzero_loop: + mov 64, %g3 + stxa %o1, [%o5]ASI_STBI_P + subcc %o4, 256, %o4 + stxa %o1, [%o5+%g3]ASI_STBI_P + add %o5, 256, %o5 + sub %g3, 192, %g3 + stxa %o1, [%o5+%g3]ASI_STBI_P + add %g3, 64, %g3 + bge,pt %xcc, .wrzero_loop + stxa %o1, [%o5+%g3]ASI_STBI_P + add %o4, 256, %o4 + + brz,pn %o4, .bsi_done + nop + +.wrzero_small: + stxa %o1, [%o5]ASI_STBI_P + subcc %o4, 64, %o4 + bgu,pt %xcc, .wrzero_small + add %o5, 64, %o5 + ba,a .bsi_done + +.asi_done: + wr %g3, 0x0, %asi ! restored saved %asi +.bsi_done: + membar #StoreStore ! required by use of Block Store Init + +.short_set: + cmp %o4, 64 ! check if 64 bytes to set + blu %xcc, 5f + nop +4: ! set final blocks of 64 bytes + stx %o1, [%o5] + stx %o1, [%o5+8] + stx %o1, [%o5+16] + stx %o1, [%o5+24] + subcc %o4, 64, %o4 + stx %o1, [%o5+32] + stx %o1, [%o5+40] + add %o5, 64, %o5 + stx %o1, [%o5-16] + bgu,pt %xcc, 4b + stx %o1, [%o5-8] + +5: + ! Set the remaining long words +.wrshort: + subcc %o3, 8, %o3 ! Can we store any long words? + blu,pn %xcc, .wrchars + and %o2, 7, %o2 ! calc bytes left after long words +6: + subcc %o3, 8, %o3 + stx %o1, [%o5] ! store the long words + bgeu,pt %xcc, 6b + add %o5, 8, %o5 + +.wrchars: ! check for extra chars + brnz %o2, .wrfin + nop + retl + nop + +.wdalign: + andcc %o5, 3, %o3 ! is sp1 aligned on a word boundary + bz,pn %xcc, .wrword + andn %o2, 3, %o3 ! create word sized count in %o3 + + dec %o2 ! decrement count + stb %o1, [%o5] ! clear a byte + b .wdalign + inc %o5 ! next byte + +.wrword: + subcc %o3, 4, %o3 + st %o1, [%o5] ! 4-byte writing loop + bnz,pt %xcc, .wrword + add %o5, 4, %o5 + + and %o2, 3, %o2 ! leftover count, if any + +.wrchar: + ! Set the remaining bytes, if any + brz %o2, .exit + nop +.wrfin: + deccc %o2 + stb %o1, [%o5] + bgu,pt %xcc, .wrfin + inc %o5 +.exit: + retl ! %o0 was preserved + nop + + .size M7memset,.-M7memset diff --git a/arch/sparc/lib/M7patch.S b/arch/sparc/lib/M7patch.S new file mode 100644 index 000000000000..9000b7bc5f2b --- /dev/null +++ b/arch/sparc/lib/M7patch.S @@ -0,0 +1,51 @@ +/* + * M7patch.S: Patch generic routines with M7 variant. + * + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + */ + +#include <linux/linkage.h> + +#define BRANCH_ALWAYS 0x10680000 +#define NOP 0x01000000 +#define NG_DO_PATCH(OLD, NEW) \ + sethi %hi(NEW), %g1; \ + or %g1, %lo(NEW), %g1; \ + sethi %hi(OLD), %g2; \ + or %g2, %lo(OLD), %g2; \ + sub %g1, %g2, %g1; \ + sethi %hi(BRANCH_ALWAYS), %g3; \ + sll %g1, 11, %g1; \ + srl %g1, 11 + 2, %g1; \ + or %g3, %lo(BRANCH_ALWAYS), %g3; \ + or %g3, %g1, %g3; \ + stw %g3, [%g2]; \ + sethi %hi(NOP), %g3; \ + or %g3, %lo(NOP), %g3; \ + stw %g3, [%g2 + 0x4]; \ + flush %g2; + +ENTRY(m7_patch_copyops) + NG_DO_PATCH(memcpy, M7memcpy) + NG_DO_PATCH(raw_copy_from_user, M7copy_from_user) + NG_DO_PATCH(raw_copy_to_user, M7copy_to_user) + retl + nop +ENDPROC(m7_patch_copyops) + +ENTRY(m7_patch_bzero) + NG_DO_PATCH(memset, M7memset) + NG_DO_PATCH(__bzero, M7bzero) + NG_DO_PATCH(__clear_user, NGclear_user) + NG_DO_PATCH(tsb_init, NGtsb_init) + retl + nop +ENDPROC(m7_patch_bzero) + +ENTRY(m7_patch_pageops) + NG_DO_PATCH(copy_user_page, NG4copy_user_page) + NG_DO_PATCH(_clear_page, M7clear_page) + NG_DO_PATCH(clear_user_page, M7clear_user_page) + retl + nop +ENDPROC(m7_patch_pageops) diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index dbe119b63b48..783bdec0d7be 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -1,8 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0 # Makefile for Sparc library files.. # -asflags-y := -ansi -DST_DIV0=0x02 -ccflags-y := -Werror +asflags-y := -DST_DIV0=0x02 lib-$(CONFIG_SPARC32) += ashrdi3.o lib-$(CONFIG_SPARC32) += memcpy.o memset.o @@ -14,7 +14,11 @@ lib-$(CONFIG_SPARC32) += divdi3.o udivdi3.o lib-$(CONFIG_SPARC32) += copy_user.o locks.o lib-$(CONFIG_SPARC64) += atomic_64.o lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o -lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o +lib-$(CONFIG_SPARC32) += muldi3.o bitext.o +lib-$(CONFIG_SPARC64) += multi3.o +lib-$(CONFIG_SPARC64) += fls.o +lib-$(CONFIG_SPARC64) += fls64.o +lib-$(CONFIG_SPARC64) += NG4fls.o lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o @@ -35,13 +39,17 @@ lib-$(CONFIG_SPARC64) += NG2patch.o lib-$(CONFIG_SPARC64) += NG4memcpy.o NG4copy_from_user.o NG4copy_to_user.o lib-$(CONFIG_SPARC64) += NG4patch.o NG4copy_page.o NG4clear_page.o NG4memset.o +lib-$(CONFIG_SPARC64) += Memcpy_utils.o + +lib-$(CONFIG_SPARC64) += M7memcpy.o M7copy_from_user.o M7copy_to_user.o +lib-$(CONFIG_SPARC64) += M7patch.o M7memset.o + lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o -lib-$(CONFIG_SPARC64) += copy_in_user.o user_fixup.o memmove.o +lib-$(CONFIG_SPARC64) += copy_in_user.o memmove.o lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o -obj-y += iomap.o -obj-$(CONFIG_SPARC32) += atomic32.o ucmpdi2.o -obj-y += ksyms.o +obj-$(CONFIG_SPARC64) += iomap.o +obj-$(CONFIG_SPARC32) += atomic32.o obj-$(CONFIG_SPARC64) += PeeCeeI.o diff --git a/arch/sparc/lib/Memcpy_utils.S b/arch/sparc/lib/Memcpy_utils.S new file mode 100644 index 000000000000..207343367bb2 --- /dev/null +++ b/arch/sparc/lib/Memcpy_utils.S @@ -0,0 +1,354 @@ +#ifndef __ASM_MEMCPY_UTILS +#define __ASM_MEMCPY_UTILS + +#include <linux/linkage.h> +#include <asm/asi.h> +#include <asm/visasm.h> + +ENTRY(__restore_asi_fp) + VISExitHalf + retl + wr %g0, ASI_AIUS, %asi +ENDPROC(__restore_asi_fp) + +ENTRY(__restore_asi) + retl + wr %g0, ASI_AIUS, %asi +ENDPROC(__restore_asi) + +ENTRY(memcpy_retl_o2) + ba,pt %xcc, __restore_asi + mov %o2, %o0 +ENDPROC(memcpy_retl_o2) +ENTRY(memcpy_retl_o2_plus_1) + ba,pt %xcc, __restore_asi + add %o2, 1, %o0 +ENDPROC(memcpy_retl_o2_plus_1) +ENTRY(memcpy_retl_o2_plus_3) + ba,pt %xcc, __restore_asi + add %o2, 3, %o0 +ENDPROC(memcpy_retl_o2_plus_3) +ENTRY(memcpy_retl_o2_plus_4) + ba,pt %xcc, __restore_asi + add %o2, 4, %o0 +ENDPROC(memcpy_retl_o2_plus_4) +ENTRY(memcpy_retl_o2_plus_5) + ba,pt %xcc, __restore_asi + add %o2, 5, %o0 +ENDPROC(memcpy_retl_o2_plus_5) +ENTRY(memcpy_retl_o2_plus_6) + ba,pt %xcc, __restore_asi + add %o2, 6, %o0 +ENDPROC(memcpy_retl_o2_plus_6) +ENTRY(memcpy_retl_o2_plus_7) + ba,pt %xcc, __restore_asi + add %o2, 7, %o0 +ENDPROC(memcpy_retl_o2_plus_7) +ENTRY(memcpy_retl_o2_plus_8) + ba,pt %xcc, __restore_asi + add %o2, 8, %o0 +ENDPROC(memcpy_retl_o2_plus_8) +ENTRY(memcpy_retl_o2_plus_15) + ba,pt %xcc, __restore_asi + add %o2, 15, %o0 +ENDPROC(memcpy_retl_o2_plus_15) +ENTRY(memcpy_retl_o2_plus_15_8) + add %o2, 15, %o2 + ba,pt %xcc, __restore_asi + add %o2, 8, %o0 +ENDPROC(memcpy_retl_o2_plus_15_8) +ENTRY(memcpy_retl_o2_plus_16) + ba,pt %xcc, __restore_asi + add %o2, 16, %o0 +ENDPROC(memcpy_retl_o2_plus_16) +ENTRY(memcpy_retl_o2_plus_24) + ba,pt %xcc, __restore_asi + add %o2, 24, %o0 +ENDPROC(memcpy_retl_o2_plus_24) +ENTRY(memcpy_retl_o2_plus_31) + ba,pt %xcc, __restore_asi + add %o2, 31, %o0 +ENDPROC(memcpy_retl_o2_plus_31) +ENTRY(memcpy_retl_o2_plus_32) + ba,pt %xcc, __restore_asi + add %o2, 32, %o0 +ENDPROC(memcpy_retl_o2_plus_32) +ENTRY(memcpy_retl_o2_plus_31_32) + add %o2, 31, %o2 + ba,pt %xcc, __restore_asi + add %o2, 32, %o0 +ENDPROC(memcpy_retl_o2_plus_31_32) +ENTRY(memcpy_retl_o2_plus_31_24) + add %o2, 31, %o2 + ba,pt %xcc, __restore_asi + add %o2, 24, %o0 +ENDPROC(memcpy_retl_o2_plus_31_24) +ENTRY(memcpy_retl_o2_plus_31_16) + add %o2, 31, %o2 + ba,pt %xcc, __restore_asi + add %o2, 16, %o0 +ENDPROC(memcpy_retl_o2_plus_31_16) +ENTRY(memcpy_retl_o2_plus_31_8) + add %o2, 31, %o2 + ba,pt %xcc, __restore_asi + add %o2, 8, %o0 +ENDPROC(memcpy_retl_o2_plus_31_8) +ENTRY(memcpy_retl_o2_plus_63) + ba,pt %xcc, __restore_asi + add %o2, 63, %o0 +ENDPROC(memcpy_retl_o2_plus_63) +ENTRY(memcpy_retl_o2_plus_63_64) + add %o2, 63, %o2 + ba,pt %xcc, __restore_asi + add %o2, 64, %o0 +ENDPROC(memcpy_retl_o2_plus_63_64) +ENTRY(memcpy_retl_o2_plus_63_56) + add %o2, 63, %o2 + ba,pt %xcc, __restore_asi + add %o2, 56, %o0 +ENDPROC(memcpy_retl_o2_plus_63_56) +ENTRY(memcpy_retl_o2_plus_63_48) + add %o2, 63, %o2 + ba,pt %xcc, __restore_asi + add %o2, 48, %o0 +ENDPROC(memcpy_retl_o2_plus_63_48) +ENTRY(memcpy_retl_o2_plus_63_40) + add %o2, 63, %o2 + ba,pt %xcc, __restore_asi + add %o2, 40, %o0 +ENDPROC(memcpy_retl_o2_plus_63_40) +ENTRY(memcpy_retl_o2_plus_63_32) + add %o2, 63, %o2 + ba,pt %xcc, __restore_asi + add %o2, 32, %o0 +ENDPROC(memcpy_retl_o2_plus_63_32) +ENTRY(memcpy_retl_o2_plus_63_24) + add %o2, 63, %o2 + ba,pt %xcc, __restore_asi + add %o2, 24, %o0 +ENDPROC(memcpy_retl_o2_plus_63_24) +ENTRY(memcpy_retl_o2_plus_63_16) + add %o2, 63, %o2 + ba,pt %xcc, __restore_asi + add %o2, 16, %o0 +ENDPROC(memcpy_retl_o2_plus_63_16) +ENTRY(memcpy_retl_o2_plus_63_8) + add %o2, 63, %o2 + ba,pt %xcc, __restore_asi + add %o2, 8, %o0 +ENDPROC(memcpy_retl_o2_plus_63_8) +ENTRY(memcpy_retl_o2_plus_o3) + ba,pt %xcc, __restore_asi + add %o2, %o3, %o0 +ENDPROC(memcpy_retl_o2_plus_o3) +ENTRY(memcpy_retl_o2_plus_o3_plus_1) + add %o3, 1, %o3 + ba,pt %xcc, __restore_asi + add %o2, %o3, %o0 +ENDPROC(memcpy_retl_o2_plus_o3_plus_1) +ENTRY(memcpy_retl_o2_plus_o5) + ba,pt %xcc, __restore_asi + add %o2, %o5, %o0 +ENDPROC(memcpy_retl_o2_plus_o5) +ENTRY(memcpy_retl_o2_plus_o5_plus_1) + add %o5, 1, %o5 + ba,pt %xcc, __restore_asi + add %o2, %o5, %o0 +ENDPROC(memcpy_retl_o2_plus_o5_plus_1) +ENTRY(memcpy_retl_o2_plus_o5_plus_4) + add %o5, 4, %o5 + ba,pt %xcc, __restore_asi + add %o2, %o5, %o0 +ENDPROC(memcpy_retl_o2_plus_o5_plus_4) +ENTRY(memcpy_retl_o2_plus_o5_plus_8) + add %o5, 8, %o5 + ba,pt %xcc, __restore_asi + add %o2, %o5, %o0 +ENDPROC(memcpy_retl_o2_plus_o5_plus_8) +ENTRY(memcpy_retl_o2_plus_o5_plus_16) + add %o5, 16, %o5 + ba,pt %xcc, __restore_asi + add %o2, %o5, %o0 +ENDPROC(memcpy_retl_o2_plus_o5_plus_16) +ENTRY(memcpy_retl_o2_plus_o5_plus_24) + add %o5, 24, %o5 + ba,pt %xcc, __restore_asi + add %o2, %o5, %o0 +ENDPROC(memcpy_retl_o2_plus_o5_plus_24) +ENTRY(memcpy_retl_o2_plus_o5_plus_32) + add %o5, 32, %o5 + ba,pt %xcc, __restore_asi + add %o2, %o5, %o0 +ENDPROC(memcpy_retl_o2_plus_o5_plus_32) +ENTRY(memcpy_retl_o2_plus_o5_64) + add %o5, 32, %o5 + ba,pt %xcc, __restore_asi + add %o2, %o5, %o0 +ENDPROC(memcpy_retl_o2_plus_o5_64) +ENTRY(memcpy_retl_o2_plus_g1) + ba,pt %xcc, __restore_asi + add %o2, %g1, %o0 +ENDPROC(memcpy_retl_o2_plus_g1) +ENTRY(memcpy_retl_o2_plus_g1_plus_1) + add %g1, 1, %g1 + ba,pt %xcc, __restore_asi + add %o2, %g1, %o0 +ENDPROC(memcpy_retl_o2_plus_g1_plus_1) +ENTRY(memcpy_retl_o2_plus_g1_plus_8) + add %g1, 8, %g1 + ba,pt %xcc, __restore_asi + add %o2, %g1, %o0 +ENDPROC(memcpy_retl_o2_plus_g1_plus_8) +ENTRY(memcpy_retl_o2_plus_o4) + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(memcpy_retl_o2_plus_o4) +ENTRY(memcpy_retl_o2_plus_o4_plus_8) + add %o4, 8, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(memcpy_retl_o2_plus_o4_plus_8) +ENTRY(memcpy_retl_o2_plus_o4_plus_16) + add %o4, 16, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(memcpy_retl_o2_plus_o4_plus_16) +ENTRY(memcpy_retl_o2_plus_o4_plus_24) + add %o4, 24, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(memcpy_retl_o2_plus_o4_plus_24) +ENTRY(memcpy_retl_o2_plus_o4_plus_32) + add %o4, 32, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(memcpy_retl_o2_plus_o4_plus_32) +ENTRY(memcpy_retl_o2_plus_o4_plus_40) + add %o4, 40, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(memcpy_retl_o2_plus_o4_plus_40) +ENTRY(memcpy_retl_o2_plus_o4_plus_48) + add %o4, 48, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(memcpy_retl_o2_plus_o4_plus_48) +ENTRY(memcpy_retl_o2_plus_o4_plus_56) + add %o4, 56, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(memcpy_retl_o2_plus_o4_plus_56) +ENTRY(memcpy_retl_o2_plus_o4_plus_64) + add %o4, 64, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(memcpy_retl_o2_plus_o4_plus_64) +ENTRY(memcpy_retl_o2_plus_o5_plus_64) + add %o5, 64, %o5 + ba,pt %xcc, __restore_asi + add %o2, %o5, %o0 +ENDPROC(memcpy_retl_o2_plus_o5_plus_64) +ENTRY(memcpy_retl_o2_plus_o3_fp) + ba,pt %xcc, __restore_asi_fp + add %o2, %o3, %o0 +ENDPROC(memcpy_retl_o2_plus_o3_fp) +ENTRY(memcpy_retl_o2_plus_o3_plus_1_fp) + add %o3, 1, %o3 + ba,pt %xcc, __restore_asi_fp + add %o2, %o3, %o0 +ENDPROC(memcpy_retl_o2_plus_o3_plus_1_fp) +ENTRY(memcpy_retl_o2_plus_o3_plus_4_fp) + add %o3, 4, %o3 + ba,pt %xcc, __restore_asi_fp + add %o2, %o3, %o0 +ENDPROC(memcpy_retl_o2_plus_o3_plus_4_fp) +ENTRY(memcpy_retl_o2_plus_o4_fp) + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(memcpy_retl_o2_plus_o4_fp) +ENTRY(memcpy_retl_o2_plus_o4_plus_8_fp) + add %o4, 8, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(memcpy_retl_o2_plus_o4_plus_8_fp) +ENTRY(memcpy_retl_o2_plus_o4_plus_16_fp) + add %o4, 16, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(memcpy_retl_o2_plus_o4_plus_16_fp) +ENTRY(memcpy_retl_o2_plus_o4_plus_24_fp) + add %o4, 24, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(memcpy_retl_o2_plus_o4_plus_24_fp) +ENTRY(memcpy_retl_o2_plus_o4_plus_32_fp) + add %o4, 32, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(memcpy_retl_o2_plus_o4_plus_32_fp) +ENTRY(memcpy_retl_o2_plus_o4_plus_40_fp) + add %o4, 40, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(memcpy_retl_o2_plus_o4_plus_40_fp) +ENTRY(memcpy_retl_o2_plus_o4_plus_48_fp) + add %o4, 48, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(memcpy_retl_o2_plus_o4_plus_48_fp) +ENTRY(memcpy_retl_o2_plus_o4_plus_56_fp) + add %o4, 56, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(memcpy_retl_o2_plus_o4_plus_56_fp) +ENTRY(memcpy_retl_o2_plus_o4_plus_64_fp) + add %o4, 64, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(memcpy_retl_o2_plus_o4_plus_64_fp) +ENTRY(memcpy_retl_o2_plus_o5_fp) + ba,pt %xcc, __restore_asi_fp + add %o2, %o5, %o0 +ENDPROC(memcpy_retl_o2_plus_o5_fp) +ENTRY(memcpy_retl_o2_plus_o5_plus_64_fp) + add %o5, 64, %o5 + ba,pt %xcc, __restore_asi_fp + add %o2, %o5, %o0 +ENDPROC(memcpy_retl_o2_plus_o5_plus_64_fp) +ENTRY(memcpy_retl_o2_plus_o5_plus_56_fp) + add %o5, 56, %o5 + ba,pt %xcc, __restore_asi_fp + add %o2, %o5, %o0 +ENDPROC(memcpy_retl_o2_plus_o5_plus_56_fp) +ENTRY(memcpy_retl_o2_plus_o5_plus_48_fp) + add %o5, 48, %o5 + ba,pt %xcc, __restore_asi_fp + add %o2, %o5, %o0 +ENDPROC(memcpy_retl_o2_plus_o5_plus_48_fp) +ENTRY(memcpy_retl_o2_plus_o5_plus_40_fp) + add %o5, 40, %o5 + ba,pt %xcc, __restore_asi_fp + add %o2, %o5, %o0 +ENDPROC(memcpy_retl_o2_plus_o5_plus_40_fp) +ENTRY(memcpy_retl_o2_plus_o5_plus_32_fp) + add %o5, 32, %o5 + ba,pt %xcc, __restore_asi_fp + add %o2, %o5, %o0 +ENDPROC(memcpy_retl_o2_plus_o5_plus_32_fp) +ENTRY(memcpy_retl_o2_plus_o5_plus_24_fp) + add %o5, 24, %o5 + ba,pt %xcc, __restore_asi_fp + add %o2, %o5, %o0 +ENDPROC(memcpy_retl_o2_plus_o5_plus_24_fp) +ENTRY(memcpy_retl_o2_plus_o5_plus_16_fp) + add %o5, 16, %o5 + ba,pt %xcc, __restore_asi_fp + add %o2, %o5, %o0 +ENDPROC(memcpy_retl_o2_plus_o5_plus_16_fp) +ENTRY(memcpy_retl_o2_plus_o5_plus_8_fp) + add %o5, 8, %o5 + ba,pt %xcc, __restore_asi_fp + add %o2, %o5, %o0 +ENDPROC(memcpy_retl_o2_plus_o5_plus_8_fp) + +#endif diff --git a/arch/sparc/lib/NG2copy_from_user.S b/arch/sparc/lib/NG2copy_from_user.S index 119ccb9a54f4..e57bc514f538 100644 --- a/arch/sparc/lib/NG2copy_from_user.S +++ b/arch/sparc/lib/NG2copy_from_user.S @@ -1,13 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* NG2copy_from_user.S: Niagara-2 optimized copy from userspace. * * Copyright (C) 2007 David S. Miller (davem@davemloft.net) */ -#define EX_LD(x) \ +#define EX_LD(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi;\ + .word 98b, y; \ + .text; \ + .align 4; + +#define EX_LD_FP(x,y) \ +98: x; \ + .section __ex_table,"a";\ + .align 4; \ + .word 98b, y##_fp; \ .text; \ .align 4; @@ -28,7 +37,7 @@ #define PREAMBLE \ rd %asi, %g1; \ cmp %g1, ASI_AIUS; \ - bne,pn %icc, ___copy_in_user; \ + bne,pn %icc, raw_copy_in_user; \ nop #endif diff --git a/arch/sparc/lib/NG2copy_to_user.S b/arch/sparc/lib/NG2copy_to_user.S index 7fe1ccefd9d0..367c0bf01518 100644 --- a/arch/sparc/lib/NG2copy_to_user.S +++ b/arch/sparc/lib/NG2copy_to_user.S @@ -1,13 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* NG2copy_to_user.S: Niagara-2 optimized copy to userspace. * * Copyright (C) 2007 David S. Miller (davem@davemloft.net) */ -#define EX_ST(x) \ +#define EX_ST(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi;\ + .word 98b, y; \ + .text; \ + .align 4; + +#define EX_ST_FP(x,y) \ +98: x; \ + .section __ex_table,"a";\ + .align 4; \ + .word 98b, y##_fp; \ .text; \ .align 4; @@ -37,7 +46,7 @@ #define PREAMBLE \ rd %asi, %g1; \ cmp %g1, ASI_AIUS; \ - bne,pn %icc, ___copy_in_user; \ + bne,pn %icc, raw_copy_in_user; \ nop #endif diff --git a/arch/sparc/lib/NG2memcpy.S b/arch/sparc/lib/NG2memcpy.S index 2c20ad63ddbf..bcb21b3a82f1 100644 --- a/arch/sparc/lib/NG2memcpy.S +++ b/arch/sparc/lib/NG2memcpy.S @@ -1,9 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* NG2memcpy.S: Niagara-2 optimized memcpy. * * Copyright (C) 2007 David S. Miller (davem@davemloft.net) */ #ifdef __KERNEL__ +#include <linux/linkage.h> #include <asm/visasm.h> #include <asm/asi.h> #define GLOBAL_SPARE %g7 @@ -32,15 +34,17 @@ #endif #ifndef EX_LD -#define EX_LD(x) x +#define EX_LD(x,y) x +#endif +#ifndef EX_LD_FP +#define EX_LD_FP(x,y) x #endif #ifndef EX_ST -#define EX_ST(x) x +#define EX_ST(x,y) x #endif - -#ifndef EX_RETVAL -#define EX_RETVAL(x) x +#ifndef EX_ST_FP +#define EX_ST_FP(x,y) x #endif #ifndef LOAD @@ -134,45 +138,110 @@ fsrc2 %x6, %f12; \ fsrc2 %x7, %f14; #define FREG_LOAD_1(base, x0) \ - EX_LD(LOAD(ldd, base + 0x00, %x0)) + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1) #define FREG_LOAD_2(base, x0, x1) \ - EX_LD(LOAD(ldd, base + 0x00, %x0)); \ - EX_LD(LOAD(ldd, base + 0x08, %x1)); + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); #define FREG_LOAD_3(base, x0, x1, x2) \ - EX_LD(LOAD(ldd, base + 0x00, %x0)); \ - EX_LD(LOAD(ldd, base + 0x08, %x1)); \ - EX_LD(LOAD(ldd, base + 0x10, %x2)); + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); #define FREG_LOAD_4(base, x0, x1, x2, x3) \ - EX_LD(LOAD(ldd, base + 0x00, %x0)); \ - EX_LD(LOAD(ldd, base + 0x08, %x1)); \ - EX_LD(LOAD(ldd, base + 0x10, %x2)); \ - EX_LD(LOAD(ldd, base + 0x18, %x3)); + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); #define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \ - EX_LD(LOAD(ldd, base + 0x00, %x0)); \ - EX_LD(LOAD(ldd, base + 0x08, %x1)); \ - EX_LD(LOAD(ldd, base + 0x10, %x2)); \ - EX_LD(LOAD(ldd, base + 0x18, %x3)); \ - EX_LD(LOAD(ldd, base + 0x20, %x4)); + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); #define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \ - EX_LD(LOAD(ldd, base + 0x00, %x0)); \ - EX_LD(LOAD(ldd, base + 0x08, %x1)); \ - EX_LD(LOAD(ldd, base + 0x10, %x2)); \ - EX_LD(LOAD(ldd, base + 0x18, %x3)); \ - EX_LD(LOAD(ldd, base + 0x20, %x4)); \ - EX_LD(LOAD(ldd, base + 0x28, %x5)); + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1); #define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \ - EX_LD(LOAD(ldd, base + 0x00, %x0)); \ - EX_LD(LOAD(ldd, base + 0x08, %x1)); \ - EX_LD(LOAD(ldd, base + 0x10, %x2)); \ - EX_LD(LOAD(ldd, base + 0x18, %x3)); \ - EX_LD(LOAD(ldd, base + 0x20, %x4)); \ - EX_LD(LOAD(ldd, base + 0x28, %x5)); \ - EX_LD(LOAD(ldd, base + 0x30, %x6)); + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x30, %x6), NG2_retl_o2_plus_g1); .register %g2,#scratch .register %g3,#scratch .text +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +__restore_fp: + VISExitHalf +__restore_asi: + retl + wr %g0, ASI_AIUS, %asi +ENTRY(NG2_retl_o2) + ba,pt %xcc, __restore_asi + mov %o2, %o0 +ENDPROC(NG2_retl_o2) +ENTRY(NG2_retl_o2_plus_1) + ba,pt %xcc, __restore_asi + add %o2, 1, %o0 +ENDPROC(NG2_retl_o2_plus_1) +ENTRY(NG2_retl_o2_plus_4) + ba,pt %xcc, __restore_asi + add %o2, 4, %o0 +ENDPROC(NG2_retl_o2_plus_4) +ENTRY(NG2_retl_o2_plus_8) + ba,pt %xcc, __restore_asi + add %o2, 8, %o0 +ENDPROC(NG2_retl_o2_plus_8) +ENTRY(NG2_retl_o2_plus_o4_plus_1) + add %o4, 1, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG2_retl_o2_plus_o4_plus_1) +ENTRY(NG2_retl_o2_plus_o4_plus_8) + add %o4, 8, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG2_retl_o2_plus_o4_plus_8) +ENTRY(NG2_retl_o2_plus_o4_plus_16) + add %o4, 16, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG2_retl_o2_plus_o4_plus_16) +ENTRY(NG2_retl_o2_plus_g1_fp) + ba,pt %xcc, __restore_fp + add %o2, %g1, %o0 +ENDPROC(NG2_retl_o2_plus_g1_fp) +ENTRY(NG2_retl_o2_plus_g1_plus_64_fp) + add %g1, 64, %g1 + ba,pt %xcc, __restore_fp + add %o2, %g1, %o0 +ENDPROC(NG2_retl_o2_plus_g1_plus_64_fp) +ENTRY(NG2_retl_o2_plus_g1_plus_1) + add %g1, 1, %g1 + ba,pt %xcc, __restore_asi + add %o2, %g1, %o0 +ENDPROC(NG2_retl_o2_plus_g1_plus_1) +ENTRY(NG2_retl_o2_and_7_plus_o4) + and %o2, 7, %o2 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG2_retl_o2_and_7_plus_o4) +ENTRY(NG2_retl_o2_and_7_plus_o4_plus_8) + and %o2, 7, %o2 + add %o4, 8, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG2_retl_o2_and_7_plus_o4_plus_8) +#endif + .align 64 .globl FUNC_NAME @@ -224,8 +293,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %g0, %o4, %o4 ! bytes to align dst sub %o2, %o4, %o2 1: subcc %o4, 1, %o4 - EX_LD(LOAD(ldub, %o1, %g1)) - EX_ST(STORE(stb, %g1, %o0)) + EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_o4_plus_1) + EX_ST(STORE(stb, %g1, %o0), NG2_retl_o2_plus_o4_plus_1) add %o1, 1, %o1 bne,pt %XCC, 1b add %o0, 1, %o0 @@ -236,6 +305,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ */ VISEntryHalf + membar #Sync alignaddr %o1, %g0, %g0 add %o1, (64 - 1), %o4 @@ -257,11 +327,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ blu 170f nop ba,a,pt %xcc, 180f + nop 4: /* 32 <= low bits < 48 */ blu 150f nop ba,a,pt %xcc, 160f + nop 5: /* 0 < low bits < 32 */ blu,a 6f cmp %g2, 8 @@ -269,16 +341,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ blu 130f nop ba,a,pt %xcc, 140f + nop 6: /* 0 < low bits < 16 */ bgeu 120f nop /* fall through for 0 < low bits < 8 */ 110: sub %o4, 64, %g2 - EX_LD(LOAD_BLK(%g2, %f0)) -1: EX_ST(STORE_INIT(%g0, %o4 + %g3)) - EX_LD(LOAD_BLK(%o4, %f16)) + EX_LD_FP(LOAD_BLK(%g2, %f0), NG2_retl_o2_plus_g1) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f14, f16) - EX_ST(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_8(f16, f18, f20, f22, f24, f26, f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -289,10 +362,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 120: sub %o4, 56, %g2 FREG_LOAD_7(%g2, f0, f2, f4, f6, f8, f10, f12) -1: EX_ST(STORE_INIT(%g0, %o4 + %g3)) - EX_LD(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f16, f18) - EX_ST(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_7(f18, f20, f22, f24, f26, f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -303,10 +376,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 130: sub %o4, 48, %g2 FREG_LOAD_6(%g2, f0, f2, f4, f6, f8, f10) -1: EX_ST(STORE_INIT(%g0, %o4 + %g3)) - EX_LD(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f4, f6, f8, f10, f16, f18, f20) - EX_ST(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_6(f20, f22, f24, f26, f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -317,10 +390,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 140: sub %o4, 40, %g2 FREG_LOAD_5(%g2, f0, f2, f4, f6, f8) -1: EX_ST(STORE_INIT(%g0, %o4 + %g3)) - EX_LD(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f4, f6, f8, f16, f18, f20, f22) - EX_ST(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_5(f22, f24, f26, f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -331,10 +404,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 150: sub %o4, 32, %g2 FREG_LOAD_4(%g2, f0, f2, f4, f6) -1: EX_ST(STORE_INIT(%g0, %o4 + %g3)) - EX_LD(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f4, f6, f16, f18, f20, f22, f24) - EX_ST(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_4(f24, f26, f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -345,10 +418,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 160: sub %o4, 24, %g2 FREG_LOAD_3(%g2, f0, f2, f4) -1: EX_ST(STORE_INIT(%g0, %o4 + %g3)) - EX_LD(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f4, f16, f18, f20, f22, f24, f26) - EX_ST(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_3(f26, f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -359,10 +432,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 170: sub %o4, 16, %g2 FREG_LOAD_2(%g2, f0, f2) -1: EX_ST(STORE_INIT(%g0, %o4 + %g3)) - EX_LD(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f16, f18, f20, f22, f24, f26, f28) - EX_ST(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_2(f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -373,10 +446,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 180: sub %o4, 8, %g2 FREG_LOAD_1(%g2, f0) -1: EX_ST(STORE_INIT(%g0, %o4 + %g3)) - EX_LD(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f16, f18, f20, f22, f24, f26, f28, f30) - EX_ST(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_1(f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -386,10 +459,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ nop 190: -1: EX_ST(STORE_INIT(%g0, %o4 + %g3)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) subcc %g1, 64, %g1 - EX_LD(LOAD_BLK(%o4, %f0)) - EX_ST(STORE_BLK(%f0, %o4 + %g3)) + EX_LD_FP(LOAD_BLK(%o4, %f0), NG2_retl_o2_plus_g1_plus_64) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1_plus_64) add %o4, 64, %o4 bne,pt %xcc, 1b LOAD(prefetch, %o4 + 64, #one_read) @@ -406,6 +479,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ brz,pt %o2, 85f sub %o0, %o1, GLOBAL_SPARE ba,a,pt %XCC, 90f + nop .align 64 75: /* 16 < len <= 64 */ @@ -416,28 +490,28 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andn %o2, 0xf, %o4 and %o2, 0xf, %o2 1: subcc %o4, 0x10, %o4 - EX_LD(LOAD(ldx, %o1, %o5)) + EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_o4_plus_16) add %o1, 0x08, %o1 - EX_LD(LOAD(ldx, %o1, %g1)) + EX_LD(LOAD(ldx, %o1, %g1), NG2_retl_o2_plus_o4_plus_16) sub %o1, 0x08, %o1 - EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE)) + EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_16) add %o1, 0x8, %o1 - EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE)) + EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_8) bgu,pt %XCC, 1b add %o1, 0x8, %o1 73: andcc %o2, 0x8, %g0 be,pt %XCC, 1f nop sub %o2, 0x8, %o2 - EX_LD(LOAD(ldx, %o1, %o5)) - EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE)) + EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_8) + EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_8) add %o1, 0x8, %o1 1: andcc %o2, 0x4, %g0 be,pt %XCC, 1f nop sub %o2, 0x4, %o2 - EX_LD(LOAD(lduw, %o1, %o5)) - EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE)) + EX_LD(LOAD(lduw, %o1, %o5), NG2_retl_o2_plus_4) + EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4) add %o1, 0x4, %o1 1: cmp %o2, 0 be,pt %XCC, 85f @@ -453,8 +527,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %o2, %g1, %o2 1: subcc %g1, 1, %g1 - EX_LD(LOAD(ldub, %o1, %o5)) - EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE)) + EX_LD(LOAD(ldub, %o1, %o5), NG2_retl_o2_plus_g1_plus_1) + EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_g1_plus_1) bgu,pt %icc, 1b add %o1, 1, %o1 @@ -470,16 +544,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 8: mov 64, GLOBAL_SPARE andn %o1, 0x7, %o1 - EX_LD(LOAD(ldx, %o1, %g2)) + EX_LD(LOAD(ldx, %o1, %g2), NG2_retl_o2) sub GLOBAL_SPARE, %g1, GLOBAL_SPARE andn %o2, 0x7, %o4 sllx %g2, %g1, %g2 1: add %o1, 0x8, %o1 - EX_LD(LOAD(ldx, %o1, %g3)) + EX_LD(LOAD(ldx, %o1, %g3), NG2_retl_o2_and_7_plus_o4) subcc %o4, 0x8, %o4 srlx %g3, GLOBAL_SPARE, %o5 or %o5, %g2, %o5 - EX_ST(STORE(stx, %o5, %o0)) + EX_ST(STORE(stx, %o5, %o0), NG2_retl_o2_and_7_plus_o4_plus_8) add %o0, 0x8, %o0 bgu,pt %icc, 1b sllx %g3, %g1, %g2 @@ -499,8 +573,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 1: subcc %o2, 4, %o2 - EX_LD(LOAD(lduw, %o1, %g1)) - EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE)) + EX_LD(LOAD(lduw, %o1, %g1), NG2_retl_o2_plus_4) + EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4) bgu,pt %XCC, 1b add %o1, 4, %o1 @@ -510,8 +584,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ .align 32 90: subcc %o2, 1, %o2 - EX_LD(LOAD(ldub, %o1, %g1)) - EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE)) + EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_1) + EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_1) bgu,pt %XCC, 90b add %o1, 1, %o1 retl diff --git a/arch/sparc/lib/NG2patch.S b/arch/sparc/lib/NG2patch.S index 28c36f06a6d1..72431b24491a 100644 --- a/arch/sparc/lib/NG2patch.S +++ b/arch/sparc/lib/NG2patch.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* NG2patch.S: Patch Ultra-I routines with Niagara-2 variant. * * Copyright (C) 2007 David S. Miller <davem@davemloft.net> @@ -26,8 +27,8 @@ .type niagara2_patch_copyops,#function niagara2_patch_copyops: NG_DO_PATCH(memcpy, NG2memcpy) - NG_DO_PATCH(___copy_from_user, NG2copy_from_user) - NG_DO_PATCH(___copy_to_user, NG2copy_to_user) + NG_DO_PATCH(raw_copy_from_user, NG2copy_from_user) + NG_DO_PATCH(raw_copy_to_user, NG2copy_to_user) retl nop .size niagara2_patch_copyops,.-niagara2_patch_copyops diff --git a/arch/sparc/lib/NG4clear_page.S b/arch/sparc/lib/NG4clear_page.S index e16c88204a42..d91d6b5f2444 100644 --- a/arch/sparc/lib/NG4clear_page.S +++ b/arch/sparc/lib/NG4clear_page.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* NG4copy_page.S: Niagara-4 optimized clear page. * * Copyright (C) 2012 (davem@davemloft.net) @@ -26,4 +27,4 @@ NG4clear_user_page: /* %o0=dest, %o1=vaddr */ retl nop .size NG4clear_page,.-NG4clear_page - .size NG4clear_user_page,.-NG4clear_user_page
\ No newline at end of file + .size NG4clear_user_page,.-NG4clear_user_page diff --git a/arch/sparc/lib/NG4copy_from_user.S b/arch/sparc/lib/NG4copy_from_user.S index fd9f903ffa32..0cac15a6db3c 100644 --- a/arch/sparc/lib/NG4copy_from_user.S +++ b/arch/sparc/lib/NG4copy_from_user.S @@ -1,13 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* NG4copy_from_user.S: Niagara-4 optimized copy from userspace. * * Copyright (C) 2012 David S. Miller (davem@davemloft.net) */ -#define EX_LD(x) \ +#define EX_LD(x, y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi;\ + .word 98b, y; \ + .text; \ + .align 4; + +#define EX_LD_FP(x,y) \ +98: x; \ + .section __ex_table,"a";\ + .align 4; \ + .word 98b, y##_fp; \ .text; \ .align 4; @@ -23,7 +32,7 @@ #define PREAMBLE \ rd %asi, %g1; \ cmp %g1, ASI_AIUS; \ - bne,pn %icc, ___copy_in_user; \ + bne,pn %icc, raw_copy_in_user; \ nop #endif diff --git a/arch/sparc/lib/NG4copy_page.S b/arch/sparc/lib/NG4copy_page.S index 28504e88c535..581062f8ba5f 100644 --- a/arch/sparc/lib/NG4copy_page.S +++ b/arch/sparc/lib/NG4copy_page.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* NG4copy_page.S: Niagara-4 optimized copy page. * * Copyright (C) 2012 (davem@davemloft.net) diff --git a/arch/sparc/lib/NG4copy_to_user.S b/arch/sparc/lib/NG4copy_to_user.S index 9744c4540a8d..c5c9abb3cb77 100644 --- a/arch/sparc/lib/NG4copy_to_user.S +++ b/arch/sparc/lib/NG4copy_to_user.S @@ -1,13 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* NG4copy_to_user.S: Niagara-4 optimized copy to userspace. * * Copyright (C) 2012 David S. Miller (davem@davemloft.net) */ -#define EX_ST(x) \ +#define EX_ST(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi;\ + .word 98b, y; \ + .text; \ + .align 4; + +#define EX_ST_FP(x,y) \ +98: x; \ + .section __ex_table,"a";\ + .align 4; \ + .word 98b, y##_fp; \ .text; \ .align 4; @@ -32,7 +41,7 @@ #define PREAMBLE \ rd %asi, %g1; \ cmp %g1, ASI_AIUS; \ - bne,pn %icc, ___copy_in_user; \ + bne,pn %icc, raw_copy_in_user; \ nop #endif diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S new file mode 100644 index 000000000000..2d0991e5b034 --- /dev/null +++ b/arch/sparc/lib/NG4fls.S @@ -0,0 +1,30 @@ +/* NG4fls.S: SPARC optimized fls and __fls for T4 and above. + * + * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. + */ + +#include <linux/linkage.h> + +#define LZCNT_O0_G2 \ + .word 0x85b002e8 + + .text + .register %g2, #scratch + .register %g3, #scratch + +ENTRY(NG4fls) + LZCNT_O0_G2 !lzcnt %o0, %g2 + mov 64, %g3 + retl + sub %g3, %g2, %o0 +ENDPROC(NG4fls) + +ENTRY(__NG4fls) + brz,pn %o0, 1f + LZCNT_O0_G2 !lzcnt %o0, %g2 + mov 63, %g3 + sub %g3, %g2, %o0 +1: + retl + nop +ENDPROC(__NG4fls) diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S index 9cf2ee01cee3..df0ec1bd1948 100644 --- a/arch/sparc/lib/NG4memcpy.S +++ b/arch/sparc/lib/NG4memcpy.S @@ -1,9 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* NG4memcpy.S: Niagara-4 optimized memcpy. * * Copyright (C) 2012 David S. Miller (davem@davemloft.net) */ #ifdef __KERNEL__ +#include <linux/linkage.h> #include <asm/visasm.h> #include <asm/asi.h> #define GLOBAL_SPARE %g7 @@ -41,18 +43,25 @@ #endif #endif +#if !defined(EX_LD) && !defined(EX_ST) +#define NON_USER_COPY +#endif + #ifndef EX_LD -#define EX_LD(x) x +#define EX_LD(x,y) x +#endif +#ifndef EX_LD_FP +#define EX_LD_FP(x,y) x #endif #ifndef EX_ST -#define EX_ST(x) x +#define EX_ST(x,y) x #endif - -#ifndef EX_RETVAL -#define EX_RETVAL(x) x +#ifndef EX_ST_FP +#define EX_ST_FP(x,y) x #endif + #ifndef LOAD #define LOAD(type,addr,dest) type [addr], dest #endif @@ -84,6 +93,9 @@ .register %g3,#scratch .text +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +#endif .align 64 .globl FUNC_NAME @@ -114,12 +126,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ brz,pt %g1, 51f sub %o2, %g1, %o2 -1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) + +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), memcpy_retl_o2_plus_g1) add %o1, 1, %o1 subcc %g1, 1, %g1 add %o0, 1, %o0 bne,pt %icc, 1b - EX_ST(STORE(stb, %g2, %o0 - 0x01)) + EX_ST(STORE(stb, %g2, %o0 - 0x01), memcpy_retl_o2_plus_g1_plus_1) 51: LOAD(prefetch, %o1 + 0x040, #n_reads_strong) LOAD(prefetch, %o1 + 0x080, #n_reads_strong) @@ -144,43 +157,43 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ brz,pt %g1, .Llarge_aligned sub %o2, %g1, %o2 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2)) +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2), memcpy_retl_o2_plus_g1) add %o1, 8, %o1 subcc %g1, 8, %g1 add %o0, 8, %o0 bne,pt %icc, 1b - EX_ST(STORE(stx, %g2, %o0 - 0x08)) + EX_ST(STORE(stx, %g2, %o0 - 0x08), memcpy_retl_o2_plus_g1_plus_8) .Llarge_aligned: /* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */ andn %o2, 0x3f, %o4 sub %o2, %o4, %o2 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memcpy_retl_o2_plus_o4) add %o1, 0x40, %o1 - EX_LD(LOAD(ldx, %o1 - 0x38, %g2)) + EX_LD(LOAD(ldx, %o1 - 0x38, %g2), memcpy_retl_o2_plus_o4) subcc %o4, 0x40, %o4 - EX_LD(LOAD(ldx, %o1 - 0x30, %g3)) - EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE)) - EX_LD(LOAD(ldx, %o1 - 0x20, %o5)) - EX_ST(STORE_INIT(%g1, %o0)) + EX_LD(LOAD(ldx, %o1 - 0x30, %g3), memcpy_retl_o2_plus_o4_plus_64) + EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE), memcpy_retl_o2_plus_o4_plus_64) + EX_LD(LOAD(ldx, %o1 - 0x20, %o5), memcpy_retl_o2_plus_o4_plus_64) + EX_ST(STORE_INIT(%g1, %o0), memcpy_retl_o2_plus_o4_plus_64) add %o0, 0x08, %o0 - EX_ST(STORE_INIT(%g2, %o0)) + EX_ST(STORE_INIT(%g2, %o0), memcpy_retl_o2_plus_o4_plus_56) add %o0, 0x08, %o0 - EX_LD(LOAD(ldx, %o1 - 0x18, %g2)) - EX_ST(STORE_INIT(%g3, %o0)) + EX_LD(LOAD(ldx, %o1 - 0x18, %g2), memcpy_retl_o2_plus_o4_plus_48) + EX_ST(STORE_INIT(%g3, %o0), memcpy_retl_o2_plus_o4_plus_48) add %o0, 0x08, %o0 - EX_LD(LOAD(ldx, %o1 - 0x10, %g3)) - EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) + EX_LD(LOAD(ldx, %o1 - 0x10, %g3), memcpy_retl_o2_plus_o4_plus_40) + EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), memcpy_retl_o2_plus_o4_plus_40) add %o0, 0x08, %o0 - EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE)) - EX_ST(STORE_INIT(%o5, %o0)) + EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE), memcpy_retl_o2_plus_o4_plus_32) + EX_ST(STORE_INIT(%o5, %o0), memcpy_retl_o2_plus_o4_plus_32) add %o0, 0x08, %o0 - EX_ST(STORE_INIT(%g2, %o0)) + EX_ST(STORE_INIT(%g2, %o0), memcpy_retl_o2_plus_o4_plus_24) add %o0, 0x08, %o0 - EX_ST(STORE_INIT(%g3, %o0)) + EX_ST(STORE_INIT(%g3, %o0), memcpy_retl_o2_plus_o4_plus_16) add %o0, 0x08, %o0 - EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) + EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), memcpy_retl_o2_plus_o4_plus_8) add %o0, 0x08, %o0 bne,pt %icc, 1b LOAD(prefetch, %o1 + 0x200, #n_reads_strong) @@ -197,22 +210,26 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ mov EX_RETVAL(%o3), %o0 .Llarge_src_unaligned: +#ifdef NON_USER_COPY + VISEntryHalfFast(.Lmedium_vis_entry_fail) +#else + VISEntryHalf +#endif andn %o2, 0x3f, %o4 sub %o2, %o4, %o2 - VISEntryHalf alignaddr %o1, %g0, %g1 add %o1, %o4, %o1 - EX_LD(LOAD(ldd, %g1 + 0x00, %f0)) -1: EX_LD(LOAD(ldd, %g1 + 0x08, %f2)) + EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0), memcpy_retl_o2_plus_o4) +1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2), memcpy_retl_o2_plus_o4) subcc %o4, 0x40, %o4 - EX_LD(LOAD(ldd, %g1 + 0x10, %f4)) - EX_LD(LOAD(ldd, %g1 + 0x18, %f6)) - EX_LD(LOAD(ldd, %g1 + 0x20, %f8)) - EX_LD(LOAD(ldd, %g1 + 0x28, %f10)) - EX_LD(LOAD(ldd, %g1 + 0x30, %f12)) - EX_LD(LOAD(ldd, %g1 + 0x38, %f14)) + EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4), memcpy_retl_o2_plus_o4_plus_64) + EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6), memcpy_retl_o2_plus_o4_plus_64) + EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8), memcpy_retl_o2_plus_o4_plus_64) + EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10), memcpy_retl_o2_plus_o4_plus_64) + EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12), memcpy_retl_o2_plus_o4_plus_64) + EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14), memcpy_retl_o2_plus_o4_plus_64) faligndata %f0, %f2, %f16 - EX_LD(LOAD(ldd, %g1 + 0x40, %f0)) + EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0), memcpy_retl_o2_plus_o4_plus_64) faligndata %f2, %f4, %f18 add %g1, 0x40, %g1 faligndata %f4, %f6, %f20 @@ -221,25 +238,32 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ faligndata %f10, %f12, %f26 faligndata %f12, %f14, %f28 faligndata %f14, %f0, %f30 - EX_ST(STORE(std, %f16, %o0 + 0x00)) - EX_ST(STORE(std, %f18, %o0 + 0x08)) - EX_ST(STORE(std, %f20, %o0 + 0x10)) - EX_ST(STORE(std, %f22, %o0 + 0x18)) - EX_ST(STORE(std, %f24, %o0 + 0x20)) - EX_ST(STORE(std, %f26, %o0 + 0x28)) - EX_ST(STORE(std, %f28, %o0 + 0x30)) - EX_ST(STORE(std, %f30, %o0 + 0x38)) + EX_ST_FP(STORE(std, %f16, %o0 + 0x00), memcpy_retl_o2_plus_o4_plus_64) + EX_ST_FP(STORE(std, %f18, %o0 + 0x08), memcpy_retl_o2_plus_o4_plus_56) + EX_ST_FP(STORE(std, %f20, %o0 + 0x10), memcpy_retl_o2_plus_o4_plus_48) + EX_ST_FP(STORE(std, %f22, %o0 + 0x18), memcpy_retl_o2_plus_o4_plus_40) + EX_ST_FP(STORE(std, %f24, %o0 + 0x20), memcpy_retl_o2_plus_o4_plus_32) + EX_ST_FP(STORE(std, %f26, %o0 + 0x28), memcpy_retl_o2_plus_o4_plus_24) + EX_ST_FP(STORE(std, %f28, %o0 + 0x30), memcpy_retl_o2_plus_o4_plus_16) + EX_ST_FP(STORE(std, %f30, %o0 + 0x38), memcpy_retl_o2_plus_o4_plus_8) add %o0, 0x40, %o0 bne,pt %icc, 1b LOAD(prefetch, %g1 + 0x200, #n_reads_strong) +#ifdef NON_USER_COPY + VISExitHalfFast +#else VISExitHalf - +#endif brz,pn %o2, .Lexit cmp %o2, 19 ble,pn %icc, .Lsmall_unaligned nop ba,a,pt %icc, .Lmedium_unaligned +#ifdef NON_USER_COPY +.Lmedium_vis_entry_fail: + or %o0, %o1, %g2 +#endif .Lmedium: LOAD(prefetch, %o1 + 0x40, #n_reads_strong) andcc %g2, 0x7, %g0 @@ -249,37 +273,38 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andncc %o2, 0x20 - 1, %o5 be,pn %icc, 2f sub %o2, %o5, %o2 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) - EX_LD(LOAD(ldx, %o1 + 0x08, %g2)) - EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE)) - EX_LD(LOAD(ldx, %o1 + 0x18, %o4)) +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memcpy_retl_o2_plus_o5) + EX_LD(LOAD(ldx, %o1 + 0x08, %g2), memcpy_retl_o2_plus_o5) + EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE), memcpy_retl_o2_plus_o5) + EX_LD(LOAD(ldx, %o1 + 0x18, %o4), memcpy_retl_o2_plus_o5) add %o1, 0x20, %o1 subcc %o5, 0x20, %o5 - EX_ST(STORE(stx, %g1, %o0 + 0x00)) - EX_ST(STORE(stx, %g2, %o0 + 0x08)) - EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10)) - EX_ST(STORE(stx, %o4, %o0 + 0x18)) + EX_ST(STORE(stx, %g1, %o0 + 0x00), memcpy_retl_o2_plus_o5_plus_32) + EX_ST(STORE(stx, %g2, %o0 + 0x08), memcpy_retl_o2_plus_o5_plus_24) + EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10), memcpy_retl_o2_plus_o5_plus_16) + EX_ST(STORE(stx, %o4, %o0 + 0x18), memcpy_retl_o2_plus_o5_plus_8) bne,pt %icc, 1b add %o0, 0x20, %o0 2: andcc %o2, 0x18, %o5 be,pt %icc, 3f sub %o2, %o5, %o2 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) + +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memcpy_retl_o2_plus_o5) add %o1, 0x08, %o1 add %o0, 0x08, %o0 subcc %o5, 0x08, %o5 bne,pt %icc, 1b - EX_ST(STORE(stx, %g1, %o0 - 0x08)) + EX_ST(STORE(stx, %g1, %o0 - 0x08), memcpy_retl_o2_plus_o5_plus_8) 3: brz,pt %o2, .Lexit cmp %o2, 0x04 bl,pn %icc, .Ltiny nop - EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) + EX_LD(LOAD(lduw, %o1 + 0x00, %g1), memcpy_retl_o2) add %o1, 0x04, %o1 add %o0, 0x04, %o0 subcc %o2, 0x04, %o2 bne,pn %icc, .Ltiny - EX_ST(STORE(stw, %g1, %o0 - 0x04)) + EX_ST(STORE(stw, %g1, %o0 - 0x04), memcpy_retl_o2_plus_4) ba,a,pt %icc, .Lexit .Lmedium_unaligned: /* First get dest 8 byte aligned. */ @@ -288,12 +313,12 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ brz,pt %g1, 2f sub %o2, %g1, %o2 -1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), memcpy_retl_o2_plus_g1) add %o1, 1, %o1 subcc %g1, 1, %g1 add %o0, 1, %o0 bne,pt %icc, 1b - EX_ST(STORE(stb, %g2, %o0 - 0x01)) + EX_ST(STORE(stb, %g2, %o0 - 0x01), memcpy_retl_o2_plus_g1_plus_1) 2: and %o1, 0x7, %g1 brz,pn %g1, .Lmedium_noprefetch @@ -301,16 +326,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ mov 64, %g2 sub %g2, %g1, %g2 andn %o1, 0x7, %o1 - EX_LD(LOAD(ldx, %o1 + 0x00, %o4)) + EX_LD(LOAD(ldx, %o1 + 0x00, %o4), memcpy_retl_o2) sllx %o4, %g1, %o4 andn %o2, 0x08 - 1, %o5 sub %o2, %o5, %o2 -1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3)) +1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3), memcpy_retl_o2_plus_o5) add %o1, 0x08, %o1 subcc %o5, 0x08, %o5 srlx %g3, %g2, GLOBAL_SPARE or GLOBAL_SPARE, %o4, GLOBAL_SPARE - EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00)) + EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00), memcpy_retl_o2_plus_o5_plus_8) add %o0, 0x08, %o0 bne,pt %icc, 1b sllx %g3, %g1, %o4 @@ -321,17 +346,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ba,pt %icc, .Lsmall_unaligned .Ltiny: - EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) + EX_LD(LOAD(ldub, %o1 + 0x00, %g1), memcpy_retl_o2) subcc %o2, 1, %o2 be,pn %icc, .Lexit - EX_ST(STORE(stb, %g1, %o0 + 0x00)) - EX_LD(LOAD(ldub, %o1 + 0x01, %g1)) + EX_ST(STORE(stb, %g1, %o0 + 0x00), memcpy_retl_o2_plus_1) + EX_LD(LOAD(ldub, %o1 + 0x01, %g1), memcpy_retl_o2) subcc %o2, 1, %o2 be,pn %icc, .Lexit - EX_ST(STORE(stb, %g1, %o0 + 0x01)) - EX_LD(LOAD(ldub, %o1 + 0x02, %g1)) + EX_ST(STORE(stb, %g1, %o0 + 0x01), memcpy_retl_o2_plus_1) + EX_LD(LOAD(ldub, %o1 + 0x02, %g1), memcpy_retl_o2) ba,pt %icc, .Lexit - EX_ST(STORE(stb, %g1, %o0 + 0x02)) + EX_ST(STORE(stb, %g1, %o0 + 0x02), memcpy_retl_o2) .Lsmall: andcc %g2, 0x3, %g0 @@ -339,22 +364,23 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andn %o2, 0x4 - 1, %o5 sub %o2, %o5, %o2 1: - EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) + EX_LD(LOAD(lduw, %o1 + 0x00, %g1), memcpy_retl_o2_plus_o5) add %o1, 0x04, %o1 subcc %o5, 0x04, %o5 add %o0, 0x04, %o0 bne,pt %icc, 1b - EX_ST(STORE(stw, %g1, %o0 - 0x04)) + EX_ST(STORE(stw, %g1, %o0 - 0x04), memcpy_retl_o2_plus_o5_plus_4) brz,pt %o2, .Lexit nop ba,a,pt %icc, .Ltiny .Lsmall_unaligned: -1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1), memcpy_retl_o2) add %o1, 1, %o1 add %o0, 1, %o0 subcc %o2, 1, %o2 bne,pt %icc, 1b - EX_ST(STORE(stb, %g1, %o0 - 0x01)) + EX_ST(STORE(stb, %g1, %o0 - 0x01), memcpy_retl_o2_plus_1) ba,a,pt %icc, .Lexit + nop .size FUNC_NAME, .-FUNC_NAME diff --git a/arch/sparc/lib/NG4memset.S b/arch/sparc/lib/NG4memset.S index 41da4bdd95cb..f81ee5419e2c 100644 --- a/arch/sparc/lib/NG4memset.S +++ b/arch/sparc/lib/NG4memset.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* NG4memset.S: Niagara-4 optimized memset/bzero. * * Copyright (C) 2012 David S. Miller (davem@davemloft.net) @@ -102,4 +103,5 @@ NG4bzero: bne,pt %icc, 1b add %o0, 0x30, %o0 ba,a,pt %icc, .Lpostloop + nop .size NG4bzero,.-NG4bzero diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S index a114cbcf2a48..37866175c921 100644 --- a/arch/sparc/lib/NG4patch.S +++ b/arch/sparc/lib/NG4patch.S @@ -1,8 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* NG4patch.S: Patch Ultra-I routines with Niagara-4 variant. * * Copyright (C) 2012 David S. Miller <davem@davemloft.net> */ +#include <linux/linkage.h> + #define BRANCH_ALWAYS 0x10680000 #define NOP 0x01000000 #define NG_DO_PATCH(OLD, NEW) \ @@ -26,8 +29,8 @@ .type niagara4_patch_copyops,#function niagara4_patch_copyops: NG_DO_PATCH(memcpy, NG4memcpy) - NG_DO_PATCH(___copy_from_user, NG4copy_from_user) - NG_DO_PATCH(___copy_to_user, NG4copy_to_user) + NG_DO_PATCH(raw_copy_from_user, NG4copy_from_user) + NG_DO_PATCH(raw_copy_to_user, NG4copy_to_user) retl nop .size niagara4_patch_copyops,.-niagara4_patch_copyops @@ -52,3 +55,10 @@ niagara4_patch_pageops: retl nop .size niagara4_patch_pageops,.-niagara4_patch_pageops + +ENTRY(niagara4_patch_fls) + NG_DO_PATCH(fls, NG4fls) + NG_DO_PATCH(__fls, __NG4fls) + retl + nop +ENDPROC(niagara4_patch_fls) diff --git a/arch/sparc/lib/NGbzero.S b/arch/sparc/lib/NGbzero.S index beab29bf419b..19327614d57d 100644 --- a/arch/sparc/lib/NGbzero.S +++ b/arch/sparc/lib/NGbzero.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* NGbzero.S: Niagara optimized memset/clear_user. * * Copyright (C) 2006 David S. Miller (davem@davemloft.net) @@ -8,7 +9,7 @@ 98: x,y; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_o1; \ + .word 98b, __retl_o1_asi;\ .text; \ .align 4; diff --git a/arch/sparc/lib/NGcopy_from_user.S b/arch/sparc/lib/NGcopy_from_user.S index 5d1e4d1ac21e..9abc49fcdbbe 100644 --- a/arch/sparc/lib/NGcopy_from_user.S +++ b/arch/sparc/lib/NGcopy_from_user.S @@ -1,13 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* NGcopy_from_user.S: Niagara optimized copy from userspace. * * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net) */ -#define EX_LD(x) \ +#define EX_LD(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __ret_one_asi;\ + .word 98b, y; \ .text; \ .align 4; @@ -25,7 +26,7 @@ #define PREAMBLE \ rd %asi, %g1; \ cmp %g1, ASI_AIUS; \ - bne,pn %icc, ___copy_in_user; \ + bne,pn %icc, raw_copy_in_user; \ nop #endif diff --git a/arch/sparc/lib/NGcopy_to_user.S b/arch/sparc/lib/NGcopy_to_user.S index ff630dcb273c..9cbe2f18e5cc 100644 --- a/arch/sparc/lib/NGcopy_to_user.S +++ b/arch/sparc/lib/NGcopy_to_user.S @@ -1,13 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* NGcopy_to_user.S: Niagara optimized copy to userspace. * * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net) */ -#define EX_ST(x) \ +#define EX_ST(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __ret_one_asi;\ + .word 98b, y; \ .text; \ .align 4; @@ -28,7 +29,7 @@ #define PREAMBLE \ rd %asi, %g1; \ cmp %g1, ASI_AIUS; \ - bne,pn %icc, ___copy_in_user; \ + bne,pn %icc, raw_copy_in_user; \ nop #endif diff --git a/arch/sparc/lib/NGmemcpy.S b/arch/sparc/lib/NGmemcpy.S index 96a14caf6966..bbd3ea0a6482 100644 --- a/arch/sparc/lib/NGmemcpy.S +++ b/arch/sparc/lib/NGmemcpy.S @@ -1,15 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* NGmemcpy.S: Niagara optimized memcpy. * * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net) */ #ifdef __KERNEL__ +#include <linux/linkage.h> #include <asm/asi.h> #include <asm/thread_info.h> #define GLOBAL_SPARE %g7 #define RESTORE_ASI(TMP) \ - ldub [%g6 + TI_CURRENT_DS], TMP; \ - wr TMP, 0x0, %asi; + wr %g0, ASI_AIUS, %asi #else #define GLOBAL_SPARE %g5 #define RESTORE_ASI(TMP) \ @@ -27,15 +28,11 @@ #endif #ifndef EX_LD -#define EX_LD(x) x +#define EX_LD(x,y) x #endif #ifndef EX_ST -#define EX_ST(x) x -#endif - -#ifndef EX_RETVAL -#define EX_RETVAL(x) x +#define EX_ST(x,y) x #endif #ifndef LOAD @@ -79,6 +76,99 @@ .register %g3,#scratch .text +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +__restore_asi: + wr %g0, ASI_AIUS, %asi + ret + restore +ENTRY(NG_ret_i2_plus_i4_plus_1) + ba,pt %xcc, __restore_asi + add %i2, %i5, %i0 +ENDPROC(NG_ret_i2_plus_i4_plus_1) +ENTRY(NG_ret_i2_plus_g1) + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1) +ENTRY(NG_ret_i2_plus_g1_minus_8) + sub %g1, 8, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_8) +ENTRY(NG_ret_i2_plus_g1_minus_16) + sub %g1, 16, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_16) +ENTRY(NG_ret_i2_plus_g1_minus_24) + sub %g1, 24, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_24) +ENTRY(NG_ret_i2_plus_g1_minus_32) + sub %g1, 32, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_32) +ENTRY(NG_ret_i2_plus_g1_minus_40) + sub %g1, 40, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_40) +ENTRY(NG_ret_i2_plus_g1_minus_48) + sub %g1, 48, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_48) +ENTRY(NG_ret_i2_plus_g1_minus_56) + sub %g1, 56, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_56) +ENTRY(NG_ret_i2_plus_i4_plus_16) + add %i4, 16, %i4 + ba,pt %xcc, __restore_asi + add %i2, %i4, %i0 +ENDPROC(NG_ret_i2_plus_i4_plus_16) +ENTRY(NG_ret_i2_plus_i4_plus_8) + add %i4, 8, %i4 + ba,pt %xcc, __restore_asi + add %i2, %i4, %i0 +ENDPROC(NG_ret_i2_plus_i4_plus_8) +ENTRY(NG_ret_i2_plus_8) + ba,pt %xcc, __restore_asi + add %i2, 8, %i0 +ENDPROC(NG_ret_i2_plus_8) +ENTRY(NG_ret_i2_plus_4) + ba,pt %xcc, __restore_asi + add %i2, 4, %i0 +ENDPROC(NG_ret_i2_plus_4) +ENTRY(NG_ret_i2_plus_1) + ba,pt %xcc, __restore_asi + add %i2, 1, %i0 +ENDPROC(NG_ret_i2_plus_1) +ENTRY(NG_ret_i2_plus_g1_plus_1) + add %g1, 1, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_plus_1) +ENTRY(NG_ret_i2) + ba,pt %xcc, __restore_asi + mov %i2, %i0 +ENDPROC(NG_ret_i2) +ENTRY(NG_ret_i2_and_7_plus_i4) + and %i2, 7, %i2 + ba,pt %xcc, __restore_asi + add %i2, %i4, %i0 +ENDPROC(NG_ret_i2_and_7_plus_i4) +ENTRY(NG_ret_i2_and_7_plus_i4_plus_8) + and %i2, 7, %i2 + add %i4, 8, %i4 + ba,pt %xcc, __restore_asi + add %i2, %i4, %i0 +ENDPROC(NG_ret_i2_and_7_plus_i4) +#endif + .align 64 .globl FUNC_NAME @@ -126,8 +216,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ sub %g0, %i4, %i4 ! bytes to align dst sub %i2, %i4, %i2 1: subcc %i4, 1, %i4 - EX_LD(LOAD(ldub, %i1, %g1)) - EX_ST(STORE(stb, %g1, %o0)) + EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_i4_plus_1) + EX_ST(STORE(stb, %g1, %o0), NG_ret_i2_plus_i4_plus_1) add %i1, 1, %i1 bne,pt %XCC, 1b add %o0, 1, %o0 @@ -160,7 +250,7 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ and %i4, 0x7, GLOBAL_SPARE sll GLOBAL_SPARE, 3, GLOBAL_SPARE mov 64, %i5 - EX_LD(LOAD_TWIN(%i1, %g2, %g3)) + EX_LD(LOAD_TWIN(%i1, %g2, %g3), NG_ret_i2_plus_g1) sub %i5, GLOBAL_SPARE, %i5 mov 16, %o4 mov 32, %o5 @@ -178,31 +268,31 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ srlx WORD3, PRE_SHIFT, TMP; \ or WORD2, TMP, WORD2; -8: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3)) +8: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1) MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1) LOAD(prefetch, %i1 + %i3, #one_read) - EX_ST(STORE_INIT(%g2, %o0 + 0x00)) - EX_ST(STORE_INIT(%g3, %o0 + 0x08)) + EX_ST(STORE_INIT(%g2, %o0 + 0x00), NG_ret_i2_plus_g1) + EX_ST(STORE_INIT(%g3, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8) - EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3)) + EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16) MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1) - EX_ST(STORE_INIT(%o2, %o0 + 0x10)) - EX_ST(STORE_INIT(%o3, %o0 + 0x18)) + EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16) + EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24) - EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) + EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32) MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1) - EX_ST(STORE_INIT(%g2, %o0 + 0x20)) - EX_ST(STORE_INIT(%g3, %o0 + 0x28)) + EX_ST(STORE_INIT(%g2, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32) + EX_ST(STORE_INIT(%g3, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40) - EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3)) + EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48) add %i1, 64, %i1 MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1) - EX_ST(STORE_INIT(%o2, %o0 + 0x30)) - EX_ST(STORE_INIT(%o3, %o0 + 0x38)) + EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48) + EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56) subcc %g1, 64, %g1 bne,pt %XCC, 8b @@ -211,31 +301,31 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ ba,pt %XCC, 60f add %i1, %i4, %i1 -9: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3)) +9: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1) MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1) LOAD(prefetch, %i1 + %i3, #one_read) - EX_ST(STORE_INIT(%g3, %o0 + 0x00)) - EX_ST(STORE_INIT(%o2, %o0 + 0x08)) + EX_ST(STORE_INIT(%g3, %o0 + 0x00), NG_ret_i2_plus_g1) + EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8) - EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3)) + EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16) MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1) - EX_ST(STORE_INIT(%o3, %o0 + 0x10)) - EX_ST(STORE_INIT(%g2, %o0 + 0x18)) + EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16) + EX_ST(STORE_INIT(%g2, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24) - EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) + EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32) MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1) - EX_ST(STORE_INIT(%g3, %o0 + 0x20)) - EX_ST(STORE_INIT(%o2, %o0 + 0x28)) + EX_ST(STORE_INIT(%g3, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32) + EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40) - EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3)) + EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48) add %i1, 64, %i1 MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1) - EX_ST(STORE_INIT(%o3, %o0 + 0x30)) - EX_ST(STORE_INIT(%g2, %o0 + 0x38)) + EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48) + EX_ST(STORE_INIT(%g2, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56) subcc %g1, 64, %g1 bne,pt %XCC, 9b @@ -249,25 +339,25 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ * one twin load ahead, then add 8 back into source when * we finish the loop. */ - EX_LD(LOAD_TWIN(%i1, %o4, %o5)) + EX_LD(LOAD_TWIN(%i1, %o4, %o5), NG_ret_i2_plus_g1) mov 16, %o7 mov 32, %g2 mov 48, %g3 mov 64, %o1 -1: EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) +1: EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1) LOAD(prefetch, %i1 + %o1, #one_read) - EX_ST(STORE_INIT(%o5, %o0 + 0x00)) ! initializes cache line - EX_ST(STORE_INIT(%o2, %o0 + 0x08)) - EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5)) - EX_ST(STORE_INIT(%o3, %o0 + 0x10)) - EX_ST(STORE_INIT(%o4, %o0 + 0x18)) - EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3)) - EX_ST(STORE_INIT(%o5, %o0 + 0x20)) - EX_ST(STORE_INIT(%o2, %o0 + 0x28)) - EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5)) + EX_ST(STORE_INIT(%o5, %o0 + 0x00), NG_ret_i2_plus_g1) ! initializes cache line + EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8) + EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16) + EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16) + EX_ST(STORE_INIT(%o4, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24) + EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32) + EX_ST(STORE_INIT(%o5, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32) + EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40) + EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5), NG_ret_i2_plus_g1_minus_48) add %i1, 64, %i1 - EX_ST(STORE_INIT(%o3, %o0 + 0x30)) - EX_ST(STORE_INIT(%o4, %o0 + 0x38)) + EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48) + EX_ST(STORE_INIT(%o4, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56) subcc %g1, 64, %g1 bne,pt %XCC, 1b add %o0, 64, %o0 @@ -282,20 +372,20 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ mov 32, %g2 mov 48, %g3 mov 64, %o1 -1: EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5)) - EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) +1: EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5), NG_ret_i2_plus_g1) + EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1) LOAD(prefetch, %i1 + %o1, #one_read) - EX_ST(STORE_INIT(%o4, %o0 + 0x00)) ! initializes cache line - EX_ST(STORE_INIT(%o5, %o0 + 0x08)) - EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5)) - EX_ST(STORE_INIT(%o2, %o0 + 0x10)) - EX_ST(STORE_INIT(%o3, %o0 + 0x18)) - EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3)) + EX_ST(STORE_INIT(%o4, %o0 + 0x00), NG_ret_i2_plus_g1) ! initializes cache line + EX_ST(STORE_INIT(%o5, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8) + EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16) + EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16) + EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24) + EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32) add %i1, 64, %i1 - EX_ST(STORE_INIT(%o4, %o0 + 0x20)) - EX_ST(STORE_INIT(%o5, %o0 + 0x28)) - EX_ST(STORE_INIT(%o2, %o0 + 0x30)) - EX_ST(STORE_INIT(%o3, %o0 + 0x38)) + EX_ST(STORE_INIT(%o4, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32) + EX_ST(STORE_INIT(%o5, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40) + EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48) + EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56) subcc %g1, 64, %g1 bne,pt %XCC, 1b add %o0, 64, %o0 @@ -311,6 +401,7 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ brz,pt %i2, 85f sub %o0, %i1, %i3 ba,a,pt %XCC, 90f + nop .align 64 70: /* 16 < len <= 64 */ @@ -321,28 +412,28 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ andn %i2, 0xf, %i4 and %i2, 0xf, %i2 1: subcc %i4, 0x10, %i4 - EX_LD(LOAD(ldx, %i1, %o4)) + EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_i4_plus_16) add %i1, 0x08, %i1 - EX_LD(LOAD(ldx, %i1, %g1)) + EX_LD(LOAD(ldx, %i1, %g1), NG_ret_i2_plus_i4_plus_16) sub %i1, 0x08, %i1 - EX_ST(STORE(stx, %o4, %i1 + %i3)) + EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_i4_plus_16) add %i1, 0x8, %i1 - EX_ST(STORE(stx, %g1, %i1 + %i3)) + EX_ST(STORE(stx, %g1, %i1 + %i3), NG_ret_i2_plus_i4_plus_8) bgu,pt %XCC, 1b add %i1, 0x8, %i1 73: andcc %i2, 0x8, %g0 be,pt %XCC, 1f nop sub %i2, 0x8, %i2 - EX_LD(LOAD(ldx, %i1, %o4)) - EX_ST(STORE(stx, %o4, %i1 + %i3)) + EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_8) + EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_8) add %i1, 0x8, %i1 1: andcc %i2, 0x4, %g0 be,pt %XCC, 1f nop sub %i2, 0x4, %i2 - EX_LD(LOAD(lduw, %i1, %i5)) - EX_ST(STORE(stw, %i5, %i1 + %i3)) + EX_LD(LOAD(lduw, %i1, %i5), NG_ret_i2_plus_4) + EX_ST(STORE(stw, %i5, %i1 + %i3), NG_ret_i2_plus_4) add %i1, 0x4, %i1 1: cmp %i2, 0 be,pt %XCC, 85f @@ -358,8 +449,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ sub %i2, %g1, %i2 1: subcc %g1, 1, %g1 - EX_LD(LOAD(ldub, %i1, %i5)) - EX_ST(STORE(stb, %i5, %i1 + %i3)) + EX_LD(LOAD(ldub, %i1, %i5), NG_ret_i2_plus_g1_plus_1) + EX_ST(STORE(stb, %i5, %i1 + %i3), NG_ret_i2_plus_g1_plus_1) bgu,pt %icc, 1b add %i1, 1, %i1 @@ -375,16 +466,16 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ 8: mov 64, %i3 andn %i1, 0x7, %i1 - EX_LD(LOAD(ldx, %i1, %g2)) + EX_LD(LOAD(ldx, %i1, %g2), NG_ret_i2) sub %i3, %g1, %i3 andn %i2, 0x7, %i4 sllx %g2, %g1, %g2 1: add %i1, 0x8, %i1 - EX_LD(LOAD(ldx, %i1, %g3)) + EX_LD(LOAD(ldx, %i1, %g3), NG_ret_i2_and_7_plus_i4) subcc %i4, 0x8, %i4 srlx %g3, %i3, %i5 or %i5, %g2, %i5 - EX_ST(STORE(stx, %i5, %o0)) + EX_ST(STORE(stx, %i5, %o0), NG_ret_i2_and_7_plus_i4_plus_8) add %o0, 0x8, %o0 bgu,pt %icc, 1b sllx %g3, %g1, %g2 @@ -404,8 +495,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ 1: subcc %i2, 4, %i2 - EX_LD(LOAD(lduw, %i1, %g1)) - EX_ST(STORE(stw, %g1, %i1 + %i3)) + EX_LD(LOAD(lduw, %i1, %g1), NG_ret_i2_plus_4) + EX_ST(STORE(stw, %g1, %i1 + %i3), NG_ret_i2_plus_4) bgu,pt %XCC, 1b add %i1, 4, %i1 @@ -415,8 +506,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ .align 32 90: subcc %i2, 1, %i2 - EX_LD(LOAD(ldub, %i1, %g1)) - EX_ST(STORE(stb, %g1, %i1 + %i3)) + EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_1) + EX_ST(STORE(stb, %g1, %i1 + %i3), NG_ret_i2_plus_1) bgu,pt %XCC, 90b add %i1, 1, %i1 ret diff --git a/arch/sparc/lib/NGpage.S b/arch/sparc/lib/NGpage.S index 423d46e2258b..88fec7818065 100644 --- a/arch/sparc/lib/NGpage.S +++ b/arch/sparc/lib/NGpage.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* NGpage.S: Niagara optimize clear and copy page. * * Copyright (C) 2006 (davem@davemloft.net) diff --git a/arch/sparc/lib/NGpatch.S b/arch/sparc/lib/NGpatch.S index 3b0674fc3366..e9f843f1063e 100644 --- a/arch/sparc/lib/NGpatch.S +++ b/arch/sparc/lib/NGpatch.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* NGpatch.S: Patch Ultra-I routines with Niagara variant. * * Copyright (C) 2006 David S. Miller <davem@davemloft.net> @@ -26,8 +27,8 @@ .type niagara_patch_copyops,#function niagara_patch_copyops: NG_DO_PATCH(memcpy, NGmemcpy) - NG_DO_PATCH(___copy_from_user, NGcopy_from_user) - NG_DO_PATCH(___copy_to_user, NGcopy_to_user) + NG_DO_PATCH(raw_copy_from_user, NGcopy_from_user) + NG_DO_PATCH(raw_copy_to_user, NGcopy_to_user) retl nop .size niagara_patch_copyops,.-niagara_patch_copyops diff --git a/arch/sparc/lib/PeeCeeI.c b/arch/sparc/lib/PeeCeeI.c index 6529f8657597..cde4c9a51b2e 100644 --- a/arch/sparc/lib/PeeCeeI.c +++ b/arch/sparc/lib/PeeCeeI.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * PeeCeeI.c: The emerging standard... * @@ -15,7 +16,7 @@ void outsb(unsigned long __addr, const void *src, unsigned long count) const u8 *p = src; while (count--) - outb(*p++, addr); + __raw_writeb(*p++, addr); } EXPORT_SYMBOL(outsb); @@ -93,21 +94,21 @@ void insb(unsigned long __addr, void *dst, unsigned long count) u8 *pb = dst; while ((((unsigned long)pb) & 0x3) && count--) - *pb++ = inb(addr); + *pb++ = __raw_readb(addr); pi = (u32 *)pb; while (count >= 4) { u32 w; - w = (inb(addr) << 24); - w |= (inb(addr) << 16); - w |= (inb(addr) << 8); - w |= (inb(addr) << 0); + w = (__raw_readb(addr) << 24); + w |= (__raw_readb(addr) << 16); + w |= (__raw_readb(addr) << 8); + w |= (__raw_readb(addr) << 0); *pi++ = w; count -= 4; } pb = (u8 *)pi; while (count--) - *pb++ = inb(addr); + *pb++ = __raw_readb(addr); } } EXPORT_SYMBOL(insb); @@ -121,21 +122,21 @@ void insw(unsigned long __addr, void *dst, unsigned long count) u32 *pi; if (((unsigned long)ps) & 0x2) { - *ps++ = le16_to_cpu(inw(addr)); + *ps++ = __raw_readw(addr); count--; } pi = (u32 *)ps; while (count >= 2) { u32 w; - w = (le16_to_cpu(inw(addr)) << 16); - w |= (le16_to_cpu(inw(addr)) << 0); + w = __raw_readw(addr) << 16; + w |= __raw_readw(addr) << 0; *pi++ = w; count -= 2; } ps = (u16 *)pi; if (count) - *ps = le16_to_cpu(inw(addr)); + *ps = __raw_readw(addr); } } EXPORT_SYMBOL(insw); @@ -148,7 +149,7 @@ void insl(unsigned long __addr, void *dst, unsigned long count) if ((((unsigned long)dst) & 0x3) == 0) { u32 *pi = dst; while (count--) - *pi++ = le32_to_cpu(inl(addr)); + *pi++ = __raw_readl(addr); } else { u32 l = 0, l2, *pi; u16 *ps; @@ -158,11 +159,11 @@ void insl(unsigned long __addr, void *dst, unsigned long count) case 0x2: ps = dst; count -= 1; - l = le32_to_cpu(inl(addr)); + l = __raw_readl(addr); *ps++ = l; pi = (u32 *)ps; while (count--) { - l2 = le32_to_cpu(inl(addr)); + l2 = __raw_readl(addr); *pi++ = (l << 16) | (l2 >> 16); l = l2; } @@ -173,13 +174,13 @@ void insl(unsigned long __addr, void *dst, unsigned long count) case 0x1: pb = dst; count -= 1; - l = le32_to_cpu(inl(addr)); + l = __raw_readl(addr); *pb++ = l >> 24; ps = (u16 *)pb; *ps++ = ((l >> 8) & 0xffff); pi = (u32 *)ps; while (count--) { - l2 = le32_to_cpu(inl(addr)); + l2 = __raw_readl(addr); *pi++ = (l << 24) | (l2 >> 8); l = l2; } @@ -190,11 +191,11 @@ void insl(unsigned long __addr, void *dst, unsigned long count) case 0x3: pb = (u8 *)dst; count -= 1; - l = le32_to_cpu(inl(addr)); + l = __raw_readl(addr); *pb++ = l >> 24; pi = (u32 *)pb; while (count--) { - l2 = le32_to_cpu(inl(addr)); + l2 = __raw_readl(addr); *pi++ = (l << 8) | (l2 >> 24); l = l2; } diff --git a/arch/sparc/lib/U1copy_from_user.S b/arch/sparc/lib/U1copy_from_user.S index a6ae2ea04bf5..bf08d1c78836 100644 --- a/arch/sparc/lib/U1copy_from_user.S +++ b/arch/sparc/lib/U1copy_from_user.S @@ -1,17 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* U1copy_from_user.S: UltraSparc-I/II/IIi/IIe optimized copy from userspace. * * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) */ -#define EX_LD(x) \ +#define EX_LD(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, y; \ .text; \ .align 4; -#define FUNC_NAME ___copy_from_user +#define EX_LD_FP(x,y) \ +98: x; \ + .section __ex_table,"a";\ + .align 4; \ + .word 98b, y; \ + .text; \ + .align 4; + +#define FUNC_NAME raw_copy_from_user #define LOAD(type,addr,dest) type##a [addr] %asi, dest #define LOAD_BLK(addr,dest) ldda [addr] ASI_BLK_AIUS, dest #define EX_RETVAL(x) 0 @@ -23,7 +32,7 @@ #define PREAMBLE \ rd %asi, %g1; \ cmp %g1, ASI_AIUS; \ - bne,pn %icc, ___copy_in_user; \ + bne,pn %icc, raw_copy_in_user; \ nop; \ #include "U1memcpy.S" diff --git a/arch/sparc/lib/U1copy_to_user.S b/arch/sparc/lib/U1copy_to_user.S index f4b970eeb485..15169851e7ab 100644 --- a/arch/sparc/lib/U1copy_to_user.S +++ b/arch/sparc/lib/U1copy_to_user.S @@ -1,17 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* U1copy_to_user.S: UltraSparc-I/II/IIi/IIe optimized copy to userspace. * * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) */ -#define EX_ST(x) \ +#define EX_ST(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, y; \ .text; \ .align 4; -#define FUNC_NAME ___copy_to_user +#define EX_ST_FP(x,y) \ +98: x; \ + .section __ex_table,"a";\ + .align 4; \ + .word 98b, y; \ + .text; \ + .align 4; + +#define FUNC_NAME raw_copy_to_user #define STORE(type,src,addr) type##a src, [addr] ASI_AIUS #define STORE_BLK(src,addr) stda src, [addr] ASI_BLK_AIUS #define EX_RETVAL(x) 0 @@ -23,7 +32,7 @@ #define PREAMBLE \ rd %asi, %g1; \ cmp %g1, ASI_AIUS; \ - bne,pn %icc, ___copy_in_user; \ + bne,pn %icc, raw_copy_in_user; \ nop; \ #include "U1memcpy.S" diff --git a/arch/sparc/lib/U1memcpy.S b/arch/sparc/lib/U1memcpy.S index b67142b7768e..154fbd35400c 100644 --- a/arch/sparc/lib/U1memcpy.S +++ b/arch/sparc/lib/U1memcpy.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* U1memcpy.S: UltraSPARC-I/II/IIi/IIe optimized memcpy. * * Copyright (C) 1997, 2004 David S. Miller (davem@redhat.com) @@ -5,6 +6,8 @@ */ #ifdef __KERNEL__ +#include <linux/export.h> +#include <linux/linkage.h> #include <asm/visasm.h> #include <asm/asi.h> #define GLOBAL_SPARE g7 @@ -23,15 +26,17 @@ #endif #ifndef EX_LD -#define EX_LD(x) x +#define EX_LD(x,y) x +#endif +#ifndef EX_LD_FP +#define EX_LD_FP(x,y) x #endif #ifndef EX_ST -#define EX_ST(x) x +#define EX_ST(x,y) x #endif - -#ifndef EX_RETVAL -#define EX_RETVAL(x) x +#ifndef EX_ST_FP +#define EX_ST_FP(x,y) x #endif #ifndef LOAD @@ -72,53 +77,170 @@ faligndata %f7, %f8, %f60; \ faligndata %f8, %f9, %f62; -#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt) \ - EX_LD(LOAD_BLK(%src, %fdest)); \ - EX_ST(STORE_BLK(%fsrc, %dest)); \ - add %src, 0x40, %src; \ - subcc %len, 0x40, %len; \ - be,pn %xcc, jmptgt; \ - add %dest, 0x40, %dest; \ - -#define LOOP_CHUNK1(src, dest, len, branch_dest) \ - MAIN_LOOP_CHUNK(src, dest, f0, f48, len, branch_dest) -#define LOOP_CHUNK2(src, dest, len, branch_dest) \ - MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest) -#define LOOP_CHUNK3(src, dest, len, branch_dest) \ - MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest) +#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, jmptgt) \ + EX_LD_FP(LOAD_BLK(%src, %fdest), U1_gs_80_fp); \ + EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \ + add %src, 0x40, %src; \ + subcc %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE; \ + be,pn %xcc, jmptgt; \ + add %dest, 0x40, %dest; \ + +#define LOOP_CHUNK1(src, dest, branch_dest) \ + MAIN_LOOP_CHUNK(src, dest, f0, f48, branch_dest) +#define LOOP_CHUNK2(src, dest, branch_dest) \ + MAIN_LOOP_CHUNK(src, dest, f16, f48, branch_dest) +#define LOOP_CHUNK3(src, dest, branch_dest) \ + MAIN_LOOP_CHUNK(src, dest, f32, f48, branch_dest) #define DO_SYNC membar #Sync; #define STORE_SYNC(dest, fsrc) \ - EX_ST(STORE_BLK(%fsrc, %dest)); \ + EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \ add %dest, 0x40, %dest; \ DO_SYNC #define STORE_JUMP(dest, fsrc, target) \ - EX_ST(STORE_BLK(%fsrc, %dest)); \ + EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_40_fp); \ add %dest, 0x40, %dest; \ ba,pt %xcc, target; \ nop; -#define FINISH_VISCHUNK(dest, f0, f1, left) \ - subcc %left, 8, %left;\ - bl,pn %xcc, 95f; \ - faligndata %f0, %f1, %f48; \ - EX_ST(STORE(std, %f48, %dest)); \ +#define FINISH_VISCHUNK(dest, f0, f1) \ + subcc %g3, 8, %g3; \ + bl,pn %xcc, 95f; \ + faligndata %f0, %f1, %f48; \ + EX_ST_FP(STORE(std, %f48, %dest), U1_g3_8_fp); \ add %dest, 8, %dest; -#define UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ - subcc %left, 8, %left; \ - bl,pn %xcc, 95f; \ +#define UNEVEN_VISCHUNK_LAST(dest, f0, f1) \ + subcc %g3, 8, %g3; \ + bl,pn %xcc, 95f; \ fsrc2 %f0, %f1; -#define UNEVEN_VISCHUNK(dest, f0, f1, left) \ - UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ +#define UNEVEN_VISCHUNK(dest, f0, f1) \ + UNEVEN_VISCHUNK_LAST(dest, f0, f1) \ ba,a,pt %xcc, 93f; .register %g2,#scratch .register %g3,#scratch .text +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +ENTRY(U1_g1_1_fp) + VISExitHalf + add %g1, 1, %g1 + add %g1, %g2, %g1 + retl + add %g1, %o2, %o0 +ENDPROC(U1_g1_1_fp) +ENTRY(U1_g2_0_fp) + VISExitHalf + retl + add %g2, %o2, %o0 +ENDPROC(U1_g2_0_fp) +ENTRY(U1_g2_8_fp) + VISExitHalf + add %g2, 8, %g2 + retl + add %g2, %o2, %o0 +ENDPROC(U1_g2_8_fp) +ENTRY(U1_gs_0_fp) + VISExitHalf + add %GLOBAL_SPARE, %g3, %o0 + retl + add %o0, %o2, %o0 +ENDPROC(U1_gs_0_fp) +ENTRY(U1_gs_80_fp) + VISExitHalf + add %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE + add %GLOBAL_SPARE, %g3, %o0 + retl + add %o0, %o2, %o0 +ENDPROC(U1_gs_80_fp) +ENTRY(U1_gs_40_fp) + VISExitHalf + add %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE + add %GLOBAL_SPARE, %g3, %o0 + retl + add %o0, %o2, %o0 +ENDPROC(U1_gs_40_fp) +ENTRY(U1_g3_8_fp) + VISExitHalf + add %g3, 8, %g3 + retl + add %g3, %o2, %o0 +ENDPROC(U1_g3_8_fp) +ENTRY(U1_g3_16_fp) + VISExitHalf + add %g3, 16, %g3 + retl + add %g3, %o2, %o0 +ENDPROC(U1_g3_16_fp) +ENTRY(U1_o2_0_fp) + VISExitHalf + retl + mov %o2, %o0 +ENDPROC(U1_o2_0_fp) +ENTRY(U1_o2_1_fp) + VISExitHalf + retl + add %o2, 1, %o0 +ENDPROC(U1_o2_1_fp) +ENTRY(U1_gs_0) + VISExitHalf + retl + add %GLOBAL_SPARE, %o2, %o0 +ENDPROC(U1_gs_0) +ENTRY(U1_gs_8) + VISExitHalf + add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE + retl + add %GLOBAL_SPARE, 0x8, %o0 +ENDPROC(U1_gs_8) +ENTRY(U1_gs_10) + VISExitHalf + add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE + retl + add %GLOBAL_SPARE, 0x10, %o0 +ENDPROC(U1_gs_10) +ENTRY(U1_o2_0) + retl + mov %o2, %o0 +ENDPROC(U1_o2_0) +ENTRY(U1_o2_8) + retl + add %o2, 8, %o0 +ENDPROC(U1_o2_8) +ENTRY(U1_o2_4) + retl + add %o2, 4, %o0 +ENDPROC(U1_o2_4) +ENTRY(U1_o2_1) + retl + add %o2, 1, %o0 +ENDPROC(U1_o2_1) +ENTRY(U1_g1_0) + retl + add %g1, %o2, %o0 +ENDPROC(U1_g1_0) +ENTRY(U1_g1_1) + add %g1, 1, %g1 + retl + add %g1, %o2, %o0 +ENDPROC(U1_g1_1) +ENTRY(U1_gs_0_o2_adj) + and %o2, 7, %o2 + retl + add %GLOBAL_SPARE, %o2, %o0 +ENDPROC(U1_gs_0_o2_adj) +ENTRY(U1_gs_8_o2_adj) + and %o2, 7, %o2 + add %GLOBAL_SPARE, 8, %GLOBAL_SPARE + retl + add %GLOBAL_SPARE, %o2, %o0 +ENDPROC(U1_gs_8_o2_adj) +#endif + .align 64 .globl FUNC_NAME @@ -160,8 +282,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ and %g2, 0x38, %g2 1: subcc %g1, 0x1, %g1 - EX_LD(LOAD(ldub, %o1 + 0x00, %o3)) - EX_ST(STORE(stb, %o3, %o1 + %GLOBAL_SPARE)) + EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U1_g1_1_fp) + EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE), U1_g1_1_fp) bgu,pt %XCC, 1b add %o1, 0x1, %o1 @@ -172,20 +294,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ be,pt %icc, 3f alignaddr %o1, %g0, %o1 - EX_LD(LOAD(ldd, %o1, %f4)) -1: EX_LD(LOAD(ldd, %o1 + 0x8, %f6)) + EX_LD_FP(LOAD(ldd, %o1, %f4), U1_g2_0_fp) +1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U1_g2_0_fp) add %o1, 0x8, %o1 subcc %g2, 0x8, %g2 faligndata %f4, %f6, %f0 - EX_ST(STORE(std, %f0, %o0)) + EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp) be,pn %icc, 3f add %o0, 0x8, %o0 - EX_LD(LOAD(ldd, %o1 + 0x8, %f4)) + EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U1_g2_0_fp) add %o1, 0x8, %o1 subcc %g2, 0x8, %g2 faligndata %f6, %f4, %f0 - EX_ST(STORE(std, %f0, %o0)) + EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp) bne,pt %icc, 1b add %o0, 0x8, %o0 @@ -208,13 +330,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ add %g1, %GLOBAL_SPARE, %g1 subcc %o2, %g3, %o2 - EX_LD(LOAD_BLK(%o1, %f0)) + EX_LD_FP(LOAD_BLK(%o1, %f0), U1_gs_0_fp) add %o1, 0x40, %o1 add %g1, %g3, %g1 - EX_LD(LOAD_BLK(%o1, %f16)) + EX_LD_FP(LOAD_BLK(%o1, %f16), U1_gs_0_fp) add %o1, 0x40, %o1 sub %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE - EX_LD(LOAD_BLK(%o1, %f32)) + EX_LD_FP(LOAD_BLK(%o1, %f32), U1_gs_80_fp) add %o1, 0x40, %o1 /* There are 8 instances of the unrolled loop, @@ -234,11 +356,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ .align 64 1: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f0, %f2, %f48 1: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) @@ -255,11 +377,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 56f) 1: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f2, %f4, %f48 1: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) @@ -276,11 +398,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 57f) 1: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f4, %f6, %f48 1: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) @@ -297,11 +419,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 58f) 1: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f6, %f8, %f48 1: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) @@ -318,11 +440,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 59f) 1: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f8, %f10, %f48 1: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) @@ -339,11 +461,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 60f) 1: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f10, %f12, %f48 1: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) @@ -360,11 +482,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 61f) 1: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f12, %f14, %f48 1: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) @@ -381,11 +503,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 62f) 1: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f14, %f16, %f48 1: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) @@ -401,53 +523,53 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) STORE_JUMP(o0, f48, 63f) -40: FINISH_VISCHUNK(o0, f0, f2, g3) -41: FINISH_VISCHUNK(o0, f2, f4, g3) -42: FINISH_VISCHUNK(o0, f4, f6, g3) -43: FINISH_VISCHUNK(o0, f6, f8, g3) -44: FINISH_VISCHUNK(o0, f8, f10, g3) -45: FINISH_VISCHUNK(o0, f10, f12, g3) -46: FINISH_VISCHUNK(o0, f12, f14, g3) -47: UNEVEN_VISCHUNK(o0, f14, f0, g3) -48: FINISH_VISCHUNK(o0, f16, f18, g3) -49: FINISH_VISCHUNK(o0, f18, f20, g3) -50: FINISH_VISCHUNK(o0, f20, f22, g3) -51: FINISH_VISCHUNK(o0, f22, f24, g3) -52: FINISH_VISCHUNK(o0, f24, f26, g3) -53: FINISH_VISCHUNK(o0, f26, f28, g3) -54: FINISH_VISCHUNK(o0, f28, f30, g3) -55: UNEVEN_VISCHUNK(o0, f30, f0, g3) -56: FINISH_VISCHUNK(o0, f32, f34, g3) -57: FINISH_VISCHUNK(o0, f34, f36, g3) -58: FINISH_VISCHUNK(o0, f36, f38, g3) -59: FINISH_VISCHUNK(o0, f38, f40, g3) -60: FINISH_VISCHUNK(o0, f40, f42, g3) -61: FINISH_VISCHUNK(o0, f42, f44, g3) -62: FINISH_VISCHUNK(o0, f44, f46, g3) -63: UNEVEN_VISCHUNK_LAST(o0, f46, f0, g3) - -93: EX_LD(LOAD(ldd, %o1, %f2)) +40: FINISH_VISCHUNK(o0, f0, f2) +41: FINISH_VISCHUNK(o0, f2, f4) +42: FINISH_VISCHUNK(o0, f4, f6) +43: FINISH_VISCHUNK(o0, f6, f8) +44: FINISH_VISCHUNK(o0, f8, f10) +45: FINISH_VISCHUNK(o0, f10, f12) +46: FINISH_VISCHUNK(o0, f12, f14) +47: UNEVEN_VISCHUNK(o0, f14, f0) +48: FINISH_VISCHUNK(o0, f16, f18) +49: FINISH_VISCHUNK(o0, f18, f20) +50: FINISH_VISCHUNK(o0, f20, f22) +51: FINISH_VISCHUNK(o0, f22, f24) +52: FINISH_VISCHUNK(o0, f24, f26) +53: FINISH_VISCHUNK(o0, f26, f28) +54: FINISH_VISCHUNK(o0, f28, f30) +55: UNEVEN_VISCHUNK(o0, f30, f0) +56: FINISH_VISCHUNK(o0, f32, f34) +57: FINISH_VISCHUNK(o0, f34, f36) +58: FINISH_VISCHUNK(o0, f36, f38) +59: FINISH_VISCHUNK(o0, f38, f40) +60: FINISH_VISCHUNK(o0, f40, f42) +61: FINISH_VISCHUNK(o0, f42, f44) +62: FINISH_VISCHUNK(o0, f44, f46) +63: UNEVEN_VISCHUNK_LAST(o0, f46, f0) + +93: EX_LD_FP(LOAD(ldd, %o1, %f2), U1_g3_8_fp) add %o1, 8, %o1 subcc %g3, 8, %g3 faligndata %f0, %f2, %f8 - EX_ST(STORE(std, %f8, %o0)) + EX_ST_FP(STORE(std, %f8, %o0), U1_g3_16_fp) bl,pn %xcc, 95f add %o0, 8, %o0 - EX_LD(LOAD(ldd, %o1, %f0)) + EX_LD_FP(LOAD(ldd, %o1, %f0), U1_g3_8_fp) add %o1, 8, %o1 subcc %g3, 8, %g3 faligndata %f2, %f0, %f8 - EX_ST(STORE(std, %f8, %o0)) + EX_ST_FP(STORE(std, %f8, %o0), U1_g3_16_fp) bge,pt %xcc, 93b add %o0, 8, %o0 95: brz,pt %o2, 2f mov %g1, %o1 -1: EX_LD(LOAD(ldub, %o1, %o3)) +1: EX_LD_FP(LOAD(ldub, %o1, %o3), U1_o2_0_fp) add %o1, 1, %o1 subcc %o2, 1, %o2 - EX_ST(STORE(stb, %o3, %o0)) + EX_ST_FP(STORE(stb, %o3, %o0), U1_o2_1_fp) bne,pt %xcc, 1b add %o0, 1, %o0 @@ -463,27 +585,27 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 72: andn %o2, 0xf, %GLOBAL_SPARE and %o2, 0xf, %o2 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5)) - EX_LD(LOAD(ldx, %o1 + 0x08, %g1)) +1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U1_gs_0) + EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U1_gs_0) subcc %GLOBAL_SPARE, 0x10, %GLOBAL_SPARE - EX_ST(STORE(stx, %o5, %o1 + %o3)) + EX_ST(STORE(stx, %o5, %o1 + %o3), U1_gs_10) add %o1, 0x8, %o1 - EX_ST(STORE(stx, %g1, %o1 + %o3)) + EX_ST(STORE(stx, %g1, %o1 + %o3), U1_gs_8) bgu,pt %XCC, 1b add %o1, 0x8, %o1 73: andcc %o2, 0x8, %g0 be,pt %XCC, 1f nop - EX_LD(LOAD(ldx, %o1, %o5)) + EX_LD(LOAD(ldx, %o1, %o5), U1_o2_0) sub %o2, 0x8, %o2 - EX_ST(STORE(stx, %o5, %o1 + %o3)) + EX_ST(STORE(stx, %o5, %o1 + %o3), U1_o2_8) add %o1, 0x8, %o1 1: andcc %o2, 0x4, %g0 be,pt %XCC, 1f nop - EX_LD(LOAD(lduw, %o1, %o5)) + EX_LD(LOAD(lduw, %o1, %o5), U1_o2_0) sub %o2, 0x4, %o2 - EX_ST(STORE(stw, %o5, %o1 + %o3)) + EX_ST(STORE(stw, %o5, %o1 + %o3), U1_o2_4) add %o1, 0x4, %o1 1: cmp %o2, 0 be,pt %XCC, 85f @@ -497,9 +619,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %g0, %g1, %g1 sub %o2, %g1, %o2 -1: EX_LD(LOAD(ldub, %o1, %o5)) +1: EX_LD(LOAD(ldub, %o1, %o5), U1_g1_0) subcc %g1, 1, %g1 - EX_ST(STORE(stb, %o5, %o1 + %o3)) + EX_ST(STORE(stb, %o5, %o1 + %o3), U1_g1_1) bgu,pt %icc, 1b add %o1, 1, %o1 @@ -515,16 +637,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 8: mov 64, %o3 andn %o1, 0x7, %o1 - EX_LD(LOAD(ldx, %o1, %g2)) + EX_LD(LOAD(ldx, %o1, %g2), U1_o2_0) sub %o3, %g1, %o3 andn %o2, 0x7, %GLOBAL_SPARE sllx %g2, %g1, %g2 -1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3)) +1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U1_gs_0_o2_adj) subcc %GLOBAL_SPARE, 0x8, %GLOBAL_SPARE add %o1, 0x8, %o1 srlx %g3, %o3, %o5 or %o5, %g2, %o5 - EX_ST(STORE(stx, %o5, %o0)) + EX_ST(STORE(stx, %o5, %o0), U1_gs_8_o2_adj) add %o0, 0x8, %o0 bgu,pt %icc, 1b sllx %g3, %g1, %g2 @@ -542,9 +664,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ bne,pn %XCC, 90f sub %o0, %o1, %o3 -1: EX_LD(LOAD(lduw, %o1, %g1)) +1: EX_LD(LOAD(lduw, %o1, %g1), U1_o2_0) subcc %o2, 4, %o2 - EX_ST(STORE(stw, %g1, %o1 + %o3)) + EX_ST(STORE(stw, %g1, %o1 + %o3), U1_o2_4) bgu,pt %XCC, 1b add %o1, 4, %o1 @@ -552,12 +674,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ mov EX_RETVAL(%o4), %o0 .align 32 -90: EX_LD(LOAD(ldub, %o1, %g1)) +90: EX_LD(LOAD(ldub, %o1, %g1), U1_o2_0) subcc %o2, 1, %o2 - EX_ST(STORE(stb, %g1, %o1 + %o3)) + EX_ST(STORE(stb, %g1, %o1 + %o3), U1_o2_1) bgu,pt %XCC, 90b add %o1, 1, %o1 retl mov EX_RETVAL(%o4), %o0 .size FUNC_NAME, .-FUNC_NAME +EXPORT_SYMBOL(FUNC_NAME) diff --git a/arch/sparc/lib/U3copy_from_user.S b/arch/sparc/lib/U3copy_from_user.S index b1acd1331c33..9c891e9edc7b 100644 --- a/arch/sparc/lib/U3copy_from_user.S +++ b/arch/sparc/lib/U3copy_from_user.S @@ -1,13 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* U3copy_from_user.S: UltraSparc-III optimized copy from userspace. * * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) */ -#define EX_LD(x) \ +#define EX_LD(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, y; \ + .text; \ + .align 4; + +#define EX_LD_FP(x,y) \ +98: x; \ + .section __ex_table,"a";\ + .align 4; \ + .word 98b, y##_fp; \ .text; \ .align 4; diff --git a/arch/sparc/lib/U3copy_to_user.S b/arch/sparc/lib/U3copy_to_user.S index ef1e493afdfa..da424608272c 100644 --- a/arch/sparc/lib/U3copy_to_user.S +++ b/arch/sparc/lib/U3copy_to_user.S @@ -1,13 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* U3copy_to_user.S: UltraSparc-III optimized copy to userspace. * * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) */ -#define EX_ST(x) \ +#define EX_ST(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, y; \ + .text; \ + .align 4; + +#define EX_ST_FP(x,y) \ +98: x; \ + .section __ex_table,"a";\ + .align 4; \ + .word 98b, y##_fp; \ .text; \ .align 4; @@ -23,7 +32,7 @@ #define PREAMBLE \ rd %asi, %g1; \ cmp %g1, ASI_AIUS; \ - bne,pn %icc, ___copy_in_user; \ + bne,pn %icc, raw_copy_in_user; \ nop; \ #include "U3memcpy.S" diff --git a/arch/sparc/lib/U3memcpy.S b/arch/sparc/lib/U3memcpy.S index 7cae9cc6a204..bace3a18f836 100644 --- a/arch/sparc/lib/U3memcpy.S +++ b/arch/sparc/lib/U3memcpy.S @@ -1,9 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* U3memcpy.S: UltraSparc-III optimized memcpy. * * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) */ #ifdef __KERNEL__ +#include <linux/linkage.h> #include <asm/visasm.h> #include <asm/asi.h> #define GLOBAL_SPARE %g7 @@ -22,15 +24,17 @@ #endif #ifndef EX_LD -#define EX_LD(x) x +#define EX_LD(x,y) x +#endif +#ifndef EX_LD_FP +#define EX_LD_FP(x,y) x #endif #ifndef EX_ST -#define EX_ST(x) x +#define EX_ST(x,y) x #endif - -#ifndef EX_RETVAL -#define EX_RETVAL(x) x +#ifndef EX_ST_FP +#define EX_ST_FP(x,y) x #endif #ifndef LOAD @@ -71,6 +75,87 @@ */ .text +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +__restore_fp: + VISExitHalf + retl + nop +ENTRY(U3_retl_o2_plus_g2_plus_g1_plus_1_fp) + add %g1, 1, %g1 + add %g2, %g1, %g2 + ba,pt %xcc, __restore_fp + add %o2, %g2, %o0 +ENDPROC(U3_retl_o2_plus_g2_plus_g1_plus_1_fp) +ENTRY(U3_retl_o2_plus_g2_fp) + ba,pt %xcc, __restore_fp + add %o2, %g2, %o0 +ENDPROC(U3_retl_o2_plus_g2_fp) +ENTRY(U3_retl_o2_plus_g2_plus_8_fp) + add %g2, 8, %g2 + ba,pt %xcc, __restore_fp + add %o2, %g2, %o0 +ENDPROC(U3_retl_o2_plus_g2_plus_8_fp) +ENTRY(U3_retl_o2) + retl + mov %o2, %o0 +ENDPROC(U3_retl_o2) +ENTRY(U3_retl_o2_plus_1) + retl + add %o2, 1, %o0 +ENDPROC(U3_retl_o2_plus_1) +ENTRY(U3_retl_o2_plus_4) + retl + add %o2, 4, %o0 +ENDPROC(U3_retl_o2_plus_4) +ENTRY(U3_retl_o2_plus_8) + retl + add %o2, 8, %o0 +ENDPROC(U3_retl_o2_plus_8) +ENTRY(U3_retl_o2_plus_g1_plus_1) + add %g1, 1, %g1 + retl + add %o2, %g1, %o0 +ENDPROC(U3_retl_o2_plus_g1_plus_1) +ENTRY(U3_retl_o2_fp) + ba,pt %xcc, __restore_fp + mov %o2, %o0 +ENDPROC(U3_retl_o2_fp) +ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp) + sll %o3, 6, %o3 + add %o3, 0x80, %o3 + ba,pt %xcc, __restore_fp + add %o2, %o3, %o0 +ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp) +ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp) + sll %o3, 6, %o3 + add %o3, 0x40, %o3 + ba,pt %xcc, __restore_fp + add %o2, %o3, %o0 +ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp) +ENTRY(U3_retl_o2_plus_GS_plus_0x10) + add GLOBAL_SPARE, 0x10, GLOBAL_SPARE + retl + add %o2, GLOBAL_SPARE, %o0 +ENDPROC(U3_retl_o2_plus_GS_plus_0x10) +ENTRY(U3_retl_o2_plus_GS_plus_0x08) + add GLOBAL_SPARE, 0x08, GLOBAL_SPARE + retl + add %o2, GLOBAL_SPARE, %o0 +ENDPROC(U3_retl_o2_plus_GS_plus_0x08) +ENTRY(U3_retl_o2_and_7_plus_GS) + and %o2, 7, %o2 + retl + add %o2, GLOBAL_SPARE, %o0 +ENDPROC(U3_retl_o2_and_7_plus_GS) +ENTRY(U3_retl_o2_and_7_plus_GS_plus_8) + add GLOBAL_SPARE, 8, GLOBAL_SPARE + and %o2, 7, %o2 + retl + add %o2, GLOBAL_SPARE, %o0 +ENDPROC(U3_retl_o2_and_7_plus_GS_plus_8) +#endif + .align 64 /* The cheetah's flexible spine, oversized liver, enlarged heart, @@ -84,18 +169,25 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ srlx %o2, 31, %g2 cmp %g2, 0 + + /* software trap 5 "Range Check" if dst >= 0x80000000 */ tne %xcc, 5 PREAMBLE mov %o0, %o4 + + /* if len == 0 */ cmp %o2, 0 - be,pn %XCC, 85f + be,pn %XCC, end_return or %o0, %o1, %o3 + + /* if len < 16 */ cmp %o2, 16 - blu,a,pn %XCC, 80f + blu,a,pn %XCC, less_than_16 or %o3, %o2, %o3 + /* if len < 192 */ cmp %o2, (3 * 64) - blu,pt %XCC, 70f + blu,pt %XCC, less_than_192 andcc %o3, 0x7, %g0 /* Clobbers o5/g1/g2/g3/g7/icc/xcc. We must preserve @@ -120,8 +212,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ and %g2, 0x38, %g2 1: subcc %g1, 0x1, %g1 - EX_LD(LOAD(ldub, %o1 + 0x00, %o3)) - EX_ST(STORE(stb, %o3, %o1 + GLOBAL_SPARE)) + EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U3_retl_o2_plus_g2_plus_g1_plus_1) + EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE), U3_retl_o2_plus_g2_plus_g1_plus_1) bgu,pt %XCC, 1b add %o1, 0x1, %o1 @@ -132,20 +224,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ be,pt %icc, 3f alignaddr %o1, %g0, %o1 - EX_LD(LOAD(ldd, %o1, %f4)) -1: EX_LD(LOAD(ldd, %o1 + 0x8, %f6)) + EX_LD_FP(LOAD(ldd, %o1, %f4), U3_retl_o2_plus_g2) +1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U3_retl_o2_plus_g2) add %o1, 0x8, %o1 subcc %g2, 0x8, %g2 faligndata %f4, %f6, %f0 - EX_ST(STORE(std, %f0, %o0)) + EX_ST_FP(STORE(std, %f0, %o0), U3_retl_o2_plus_g2_plus_8) be,pn %icc, 3f add %o0, 0x8, %o0 - EX_LD(LOAD(ldd, %o1 + 0x8, %f4)) + EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U3_retl_o2_plus_g2) add %o1, 0x8, %o1 subcc %g2, 0x8, %g2 faligndata %f6, %f4, %f2 - EX_ST(STORE(std, %f2, %o0)) + EX_ST_FP(STORE(std, %f2, %o0), U3_retl_o2_plus_g2_plus_8) bne,pt %icc, 1b add %o0, 0x8, %o0 @@ -155,26 +247,27 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ LOAD(prefetch, %o1 + 0x080, #one_read) LOAD(prefetch, %o1 + 0x0c0, #one_read) LOAD(prefetch, %o1 + 0x100, #one_read) - EX_LD(LOAD(ldd, %o1 + 0x000, %f0)) + EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0), U3_retl_o2) LOAD(prefetch, %o1 + 0x140, #one_read) - EX_LD(LOAD(ldd, %o1 + 0x008, %f2)) + EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2) LOAD(prefetch, %o1 + 0x180, #one_read) - EX_LD(LOAD(ldd, %o1 + 0x010, %f4)) + EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2) LOAD(prefetch, %o1 + 0x1c0, #one_read) faligndata %f0, %f2, %f16 - EX_LD(LOAD(ldd, %o1 + 0x018, %f6)) + EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2) faligndata %f2, %f4, %f18 - EX_LD(LOAD(ldd, %o1 + 0x020, %f8)) + EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2) faligndata %f4, %f6, %f20 - EX_LD(LOAD(ldd, %o1 + 0x028, %f10)) + EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2) faligndata %f6, %f8, %f22 - EX_LD(LOAD(ldd, %o1 + 0x030, %f12)) + EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2) faligndata %f8, %f10, %f24 - EX_LD(LOAD(ldd, %o1 + 0x038, %f14)) + EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2) faligndata %f10, %f12, %f26 - EX_LD(LOAD(ldd, %o1 + 0x040, %f0)) + EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2) + and %o2, 0x3f, %o2 subcc GLOBAL_SPARE, 0x80, GLOBAL_SPARE add %o1, 0x40, %o1 bgu,pt %XCC, 1f @@ -184,26 +277,26 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ .align 64 1: - EX_LD(LOAD(ldd, %o1 + 0x008, %f2)) + EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80) faligndata %f12, %f14, %f28 - EX_LD(LOAD(ldd, %o1 + 0x010, %f4)) + EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80) faligndata %f14, %f0, %f30 - EX_ST(STORE_BLK(%f16, %o0)) - EX_LD(LOAD(ldd, %o1 + 0x018, %f6)) + EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80) + EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f0, %f2, %f16 add %o0, 0x40, %o0 - EX_LD(LOAD(ldd, %o1 + 0x020, %f8)) + EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f2, %f4, %f18 - EX_LD(LOAD(ldd, %o1 + 0x028, %f10)) + EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f4, %f6, %f20 - EX_LD(LOAD(ldd, %o1 + 0x030, %f12)) + EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40) subcc %o3, 0x01, %o3 faligndata %f6, %f8, %f22 - EX_LD(LOAD(ldd, %o1 + 0x038, %f14)) + EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x80) faligndata %f8, %f10, %f24 - EX_LD(LOAD(ldd, %o1 + 0x040, %f0)) + EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x80) LOAD(prefetch, %o1 + 0x1c0, #one_read) faligndata %f10, %f12, %f26 bg,pt %XCC, 1b @@ -211,29 +304,29 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ /* Finally we copy the last full 64-byte block. */ 2: - EX_LD(LOAD(ldd, %o1 + 0x008, %f2)) + EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80) faligndata %f12, %f14, %f28 - EX_LD(LOAD(ldd, %o1 + 0x010, %f4)) + EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80) faligndata %f14, %f0, %f30 - EX_ST(STORE_BLK(%f16, %o0)) - EX_LD(LOAD(ldd, %o1 + 0x018, %f6)) + EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80) + EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f0, %f2, %f16 - EX_LD(LOAD(ldd, %o1 + 0x020, %f8)) + EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f2, %f4, %f18 - EX_LD(LOAD(ldd, %o1 + 0x028, %f10)) + EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f4, %f6, %f20 - EX_LD(LOAD(ldd, %o1 + 0x030, %f12)) + EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f6, %f8, %f22 - EX_LD(LOAD(ldd, %o1 + 0x038, %f14)) + EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f8, %f10, %f24 cmp %g1, 0 be,pt %XCC, 1f add %o0, 0x40, %o0 - EX_LD(LOAD(ldd, %o1 + 0x040, %f0)) + EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x40) 1: faligndata %f10, %f12, %f26 faligndata %f12, %f14, %f28 faligndata %f14, %f0, %f30 - EX_ST(STORE_BLK(%f16, %o0)) + EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x40) add %o0, 0x40, %o0 add %o1, 0x40, %o1 membar #Sync @@ -244,7 +337,6 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ * Also notice how this code is careful not to perform a * load past the end of the src buffer. */ - and %o2, 0x3f, %o2 andcc %o2, 0x38, %g2 be,pn %XCC, 2f subcc %g2, 0x8, %g2 @@ -253,20 +345,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %o2, %g2, %o2 be,a,pt %XCC, 1f - EX_LD(LOAD(ldd, %o1 + 0x00, %f0)) + EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0), U3_retl_o2_plus_g2) -1: EX_LD(LOAD(ldd, %o1 + 0x08, %f2)) +1: EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2), U3_retl_o2_plus_g2) add %o1, 0x8, %o1 subcc %g2, 0x8, %g2 faligndata %f0, %f2, %f8 - EX_ST(STORE(std, %f8, %o0)) + EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8) be,pn %XCC, 2f add %o0, 0x8, %o0 - EX_LD(LOAD(ldd, %o1 + 0x08, %f0)) + EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0), U3_retl_o2_plus_g2) add %o1, 0x8, %o1 subcc %g2, 0x8, %g2 faligndata %f2, %f0, %f8 - EX_ST(STORE(std, %f8, %o0)) + EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8) bne,pn %XCC, 1b add %o0, 0x8, %o0 @@ -278,7 +370,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ cmp %o2, 0 add %o1, %g1, %o1 VISExitHalf - be,pn %XCC, 85f + be,pn %XCC, end_return sub %o0, %o1, %o3 andcc %g1, 0x7, %g0 @@ -286,33 +378,37 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andcc %o2, 0x8, %g0 be,pt %icc, 1f nop - EX_LD(LOAD(ldx, %o1, %o5)) - EX_ST(STORE(stx, %o5, %o1 + %o3)) + EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2) + EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2) add %o1, 0x8, %o1 + sub %o2, 8, %o2 1: andcc %o2, 0x4, %g0 be,pt %icc, 1f nop - EX_LD(LOAD(lduw, %o1, %o5)) - EX_ST(STORE(stw, %o5, %o1 + %o3)) + EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2) + EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2) add %o1, 0x4, %o1 + sub %o2, 4, %o2 1: andcc %o2, 0x2, %g0 be,pt %icc, 1f nop - EX_LD(LOAD(lduh, %o1, %o5)) - EX_ST(STORE(sth, %o5, %o1 + %o3)) + EX_LD(LOAD(lduh, %o1, %o5), U3_retl_o2) + EX_ST(STORE(sth, %o5, %o1 + %o3), U3_retl_o2) add %o1, 0x2, %o1 + sub %o2, 2, %o2 1: andcc %o2, 0x1, %g0 - be,pt %icc, 85f + be,pt %icc, end_return nop - EX_LD(LOAD(ldub, %o1, %o5)) - ba,pt %xcc, 85f - EX_ST(STORE(stb, %o5, %o1 + %o3)) + EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2) + ba,pt %xcc, end_return + EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2) .align 64 -70: /* 16 < len <= 64 */ + /* 16 <= len < 192 */ +less_than_192: bne,pn %XCC, 75f sub %o0, %o1, %o3 @@ -320,29 +416,29 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andn %o2, 0xf, GLOBAL_SPARE and %o2, 0xf, %o2 1: subcc GLOBAL_SPARE, 0x10, GLOBAL_SPARE - EX_LD(LOAD(ldx, %o1 + 0x00, %o5)) - EX_LD(LOAD(ldx, %o1 + 0x08, %g1)) - EX_ST(STORE(stx, %o5, %o1 + %o3)) + EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U3_retl_o2_plus_GS_plus_0x10) + EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U3_retl_o2_plus_GS_plus_0x10) + EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x10) add %o1, 0x8, %o1 - EX_ST(STORE(stx, %g1, %o1 + %o3)) + EX_ST(STORE(stx, %g1, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x08) bgu,pt %XCC, 1b add %o1, 0x8, %o1 73: andcc %o2, 0x8, %g0 be,pt %XCC, 1f nop sub %o2, 0x8, %o2 - EX_LD(LOAD(ldx, %o1, %o5)) - EX_ST(STORE(stx, %o5, %o1 + %o3)) + EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2_plus_8) + EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_8) add %o1, 0x8, %o1 1: andcc %o2, 0x4, %g0 be,pt %XCC, 1f nop sub %o2, 0x4, %o2 - EX_LD(LOAD(lduw, %o1, %o5)) - EX_ST(STORE(stw, %o5, %o1 + %o3)) + EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2_plus_4) + EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2_plus_4) add %o1, 0x4, %o1 1: cmp %o2, 0 - be,pt %XCC, 85f + be,pt %XCC, end_return nop ba,pt %xcc, 90f nop @@ -355,8 +451,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %o2, %g1, %o2 1: subcc %g1, 1, %g1 - EX_LD(LOAD(ldub, %o1, %o5)) - EX_ST(STORE(stb, %o5, %o1 + %o3)) + EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2_plus_g1_plus_1) + EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2_plus_g1_plus_1) bgu,pt %icc, 1b add %o1, 1, %o1 @@ -372,48 +468,50 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 8: mov 64, %o3 andn %o1, 0x7, %o1 - EX_LD(LOAD(ldx, %o1, %g2)) + EX_LD(LOAD(ldx, %o1, %g2), U3_retl_o2) sub %o3, %g1, %o3 andn %o2, 0x7, GLOBAL_SPARE sllx %g2, %g1, %g2 -1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3)) +1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U3_retl_o2_and_7_plus_GS) subcc GLOBAL_SPARE, 0x8, GLOBAL_SPARE add %o1, 0x8, %o1 srlx %g3, %o3, %o5 or %o5, %g2, %o5 - EX_ST(STORE(stx, %o5, %o0)) + EX_ST(STORE(stx, %o5, %o0), U3_retl_o2_and_7_plus_GS_plus_8) add %o0, 0x8, %o0 bgu,pt %icc, 1b sllx %g3, %g1, %g2 srl %g1, 3, %g1 andcc %o2, 0x7, %o2 - be,pn %icc, 85f + be,pn %icc, end_return add %o1, %g1, %o1 ba,pt %xcc, 90f sub %o0, %o1, %o3 .align 64 -80: /* 0 < len <= 16 */ + /* 0 < len < 16 */ +less_than_16: andcc %o3, 0x3, %g0 bne,pn %XCC, 90f sub %o0, %o1, %o3 1: subcc %o2, 4, %o2 - EX_LD(LOAD(lduw, %o1, %g1)) - EX_ST(STORE(stw, %g1, %o1 + %o3)) + EX_LD(LOAD(lduw, %o1, %g1), U3_retl_o2_plus_4) + EX_ST(STORE(stw, %g1, %o1 + %o3), U3_retl_o2_plus_4) bgu,pt %XCC, 1b add %o1, 4, %o1 -85: retl +end_return: + retl mov EX_RETVAL(%o4), %o0 .align 32 90: subcc %o2, 1, %o2 - EX_LD(LOAD(ldub, %o1, %g1)) - EX_ST(STORE(stb, %g1, %o1 + %o3)) + EX_LD(LOAD(ldub, %o1, %g1), U3_retl_o2_plus_1) + EX_ST(STORE(stb, %g1, %o1 + %o3), U3_retl_o2_plus_1) bgu,pt %XCC, 90b add %o1, 1, %o1 retl diff --git a/arch/sparc/lib/U3patch.S b/arch/sparc/lib/U3patch.S index ecc302619a6e..9a888088f3c9 100644 --- a/arch/sparc/lib/U3patch.S +++ b/arch/sparc/lib/U3patch.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* U3patch.S: Patch Ultra-I routines with Ultra-III variant. * * Copyright (C) 2004 David S. Miller <davem@redhat.com> @@ -26,8 +27,8 @@ .type cheetah_patch_copyops,#function cheetah_patch_copyops: ULTRA3_DO_PATCH(memcpy, U3memcpy) - ULTRA3_DO_PATCH(___copy_from_user, U3copy_from_user) - ULTRA3_DO_PATCH(___copy_to_user, U3copy_to_user) + ULTRA3_DO_PATCH(raw_copy_from_user, U3copy_from_user) + ULTRA3_DO_PATCH(raw_copy_to_user, U3copy_to_user) retl nop .size cheetah_patch_copyops,.-cheetah_patch_copyops diff --git a/arch/sparc/lib/VISsave.S b/arch/sparc/lib/VISsave.S index b320ae9e2e2e..31a0c336c185 100644 --- a/arch/sparc/lib/VISsave.S +++ b/arch/sparc/lib/VISsave.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * VISsave.S: Code for saving FPU register state for * VIS routines. One should not call this directly, @@ -6,24 +7,24 @@ * Copyright (C) 1998 Jakub Jelinek (jj@ultra.linux.cz) */ +#include <linux/export.h> +#include <linux/linkage.h> + #include <asm/asi.h> #include <asm/page.h> #include <asm/ptrace.h> #include <asm/visasm.h> #include <asm/thread_info.h> - .text - .globl VISenter, VISenterhalf - /* On entry: %o5=current FPRS value, %g7 is callers address */ /* May clobber %o5, %g1, %g2, %g3, %g7, %icc, %xcc */ /* Nothing special need be done here to handle pre-emption, this * FPU save/restore mechanism is already preemption safe. */ - + .text .align 32 -VISenter: +ENTRY(VISenter) ldub [%g6 + TI_FPDEPTH], %g1 brnz,a,pn %g1, 1f cmp %g1, 1 @@ -44,9 +45,8 @@ vis1: ldub [%g6 + TI_FPSAVED], %g3 stx %g3, [%g6 + TI_GSR] 2: add %g6, %g1, %g3 - cmp %o5, FPRS_DU - be,pn %icc, 6f - sll %g1, 3, %g1 + mov FPRS_DU | FPRS_DL | FPRS_FEF, %o5 + sll %g1, 3, %g1 stb %o5, [%g3 + TI_FPSAVED] rd %gsr, %g2 add %g6, %g1, %g3 @@ -80,65 +80,5 @@ vis1: ldub [%g6 + TI_FPSAVED], %g3 .align 32 80: jmpl %g7 + %g0, %g0 nop - -6: ldub [%g3 + TI_FPSAVED], %o5 - or %o5, FPRS_DU, %o5 - add %g6, TI_FPREGS+0x80, %g2 - stb %o5, [%g3 + TI_FPSAVED] - - sll %g1, 5, %g1 - add %g6, TI_FPREGS+0xc0, %g3 - wr %g0, FPRS_FEF, %fprs - membar #Sync - stda %f32, [%g2 + %g1] ASI_BLK_P - stda %f48, [%g3 + %g1] ASI_BLK_P - membar #Sync - ba,pt %xcc, 80f - nop - - .align 32 -80: jmpl %g7 + %g0, %g0 - nop - - .align 32 -VISenterhalf: - ldub [%g6 + TI_FPDEPTH], %g1 - brnz,a,pn %g1, 1f - cmp %g1, 1 - stb %g0, [%g6 + TI_FPSAVED] - stx %fsr, [%g6 + TI_XFSR] - clr %o5 - jmpl %g7 + %g0, %g0 - wr %g0, FPRS_FEF, %fprs - -1: bne,pn %icc, 2f - srl %g1, 1, %g1 - ba,pt %xcc, vis1 - sub %g7, 8, %g7 -2: addcc %g6, %g1, %g3 - sll %g1, 3, %g1 - andn %o5, FPRS_DU, %g2 - stb %g2, [%g3 + TI_FPSAVED] - - rd %gsr, %g2 - add %g6, %g1, %g3 - stx %g2, [%g3 + TI_GSR] - add %g6, %g1, %g2 - stx %fsr, [%g2 + TI_XFSR] - sll %g1, 5, %g1 -3: andcc %o5, FPRS_DL, %g0 - be,pn %icc, 4f - add %g6, TI_FPREGS, %g2 - - add %g6, TI_FPREGS+0x40, %g3 - membar #Sync - stda %f0, [%g2 + %g1] ASI_BLK_P - stda %f16, [%g3 + %g1] ASI_BLK_P - membar #Sync - ba,pt %xcc, 4f - nop - - .align 32 -4: and %o5, FPRS_DU, %o5 - jmpl %g7 + %g0, %g0 - wr %o5, FPRS_FEF, %fprs +ENDPROC(VISenter) +EXPORT_SYMBOL(VISenter) diff --git a/arch/sparc/lib/ashldi3.S b/arch/sparc/lib/ashldi3.S index 86f60de07b0a..2a9e7c4fb260 100644 --- a/arch/sparc/lib/ashldi3.S +++ b/arch/sparc/lib/ashldi3.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * ashldi3.S: GCC emits these for certain drivers playing * with long longs. @@ -5,6 +6,7 @@ * Copyright (C) 1999 David S. Miller (davem@redhat.com) */ +#include <linux/export.h> #include <linux/linkage.h> .text @@ -33,3 +35,4 @@ ENTRY(__ashldi3) retl nop ENDPROC(__ashldi3) +EXPORT_SYMBOL(__ashldi3) diff --git a/arch/sparc/lib/ashrdi3.S b/arch/sparc/lib/ashrdi3.S index 6eb8ba2dd50e..8fd0b311722f 100644 --- a/arch/sparc/lib/ashrdi3.S +++ b/arch/sparc/lib/ashrdi3.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * ashrdi3.S: The filesystem code creates all kinds of references to * this little routine on the sparc with gcc. @@ -5,6 +6,7 @@ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) */ +#include <linux/export.h> #include <linux/linkage.h> .text @@ -35,3 +37,4 @@ ENTRY(__ashrdi3) jmpl %o7 + 8, %g0 nop ENDPROC(__ashrdi3) +EXPORT_SYMBOL(__ashrdi3) diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c index 1d32b54089aa..8ae880ebf07a 100644 --- a/arch/sparc/lib/atomic32.c +++ b/arch/sparc/lib/atomic32.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * atomic32.c: 32-bit atomic_t implementation * @@ -27,20 +28,59 @@ static DEFINE_SPINLOCK(dummy); #endif /* SMP */ -int __atomic_add_return(int i, atomic_t *v) +#define ATOMIC_FETCH_OP(op, c_op) \ +int arch_atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + int ret; \ + unsigned long flags; \ + spin_lock_irqsave(ATOMIC_HASH(v), flags); \ + \ + ret = v->counter; \ + v->counter c_op i; \ + \ + spin_unlock_irqrestore(ATOMIC_HASH(v), flags); \ + return ret; \ +} \ +EXPORT_SYMBOL(arch_atomic_fetch_##op); + +#define ATOMIC_OP_RETURN(op, c_op) \ +int arch_atomic_##op##_return(int i, atomic_t *v) \ +{ \ + int ret; \ + unsigned long flags; \ + spin_lock_irqsave(ATOMIC_HASH(v), flags); \ + \ + ret = (v->counter c_op i); \ + \ + spin_unlock_irqrestore(ATOMIC_HASH(v), flags); \ + return ret; \ +} \ +EXPORT_SYMBOL(arch_atomic_##op##_return); + +ATOMIC_OP_RETURN(add, +=) + +ATOMIC_FETCH_OP(add, +=) +ATOMIC_FETCH_OP(and, &=) +ATOMIC_FETCH_OP(or, |=) +ATOMIC_FETCH_OP(xor, ^=) + +#undef ATOMIC_FETCH_OP +#undef ATOMIC_OP_RETURN + +int arch_atomic_xchg(atomic_t *v, int new) { int ret; unsigned long flags; - spin_lock_irqsave(ATOMIC_HASH(v), flags); - - ret = (v->counter += i); + spin_lock_irqsave(ATOMIC_HASH(v), flags); + ret = v->counter; + v->counter = new; spin_unlock_irqrestore(ATOMIC_HASH(v), flags); return ret; } -EXPORT_SYMBOL(__atomic_add_return); +EXPORT_SYMBOL(arch_atomic_xchg); -int atomic_cmpxchg(atomic_t *v, int old, int new) +int arch_atomic_cmpxchg(atomic_t *v, int old, int new) { int ret; unsigned long flags; @@ -53,9 +93,9 @@ int atomic_cmpxchg(atomic_t *v, int old, int new) spin_unlock_irqrestore(ATOMIC_HASH(v), flags); return ret; } -EXPORT_SYMBOL(atomic_cmpxchg); +EXPORT_SYMBOL(arch_atomic_cmpxchg); -int __atomic_add_unless(atomic_t *v, int a, int u) +int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u) { int ret; unsigned long flags; @@ -67,10 +107,10 @@ int __atomic_add_unless(atomic_t *v, int a, int u) spin_unlock_irqrestore(ATOMIC_HASH(v), flags); return ret; } -EXPORT_SYMBOL(__atomic_add_unless); +EXPORT_SYMBOL(arch_atomic_fetch_add_unless); /* Atomic operations are already serializing */ -void atomic_set(atomic_t *v, int i) +void arch_atomic_set(atomic_t *v, int i) { unsigned long flags; @@ -78,9 +118,9 @@ void atomic_set(atomic_t *v, int i) v->counter = i; spin_unlock_irqrestore(ATOMIC_HASH(v), flags); } -EXPORT_SYMBOL(atomic_set); +EXPORT_SYMBOL(arch_atomic_set); -unsigned long ___set_bit(unsigned long *addr, unsigned long mask) +unsigned long sp32___set_bit(unsigned long *addr, unsigned long mask) { unsigned long old, flags; @@ -91,9 +131,9 @@ unsigned long ___set_bit(unsigned long *addr, unsigned long mask) return old & mask; } -EXPORT_SYMBOL(___set_bit); +EXPORT_SYMBOL(sp32___set_bit); -unsigned long ___clear_bit(unsigned long *addr, unsigned long mask) +unsigned long sp32___clear_bit(unsigned long *addr, unsigned long mask) { unsigned long old, flags; @@ -104,9 +144,9 @@ unsigned long ___clear_bit(unsigned long *addr, unsigned long mask) return old & mask; } -EXPORT_SYMBOL(___clear_bit); +EXPORT_SYMBOL(sp32___clear_bit); -unsigned long ___change_bit(unsigned long *addr, unsigned long mask) +unsigned long sp32___change_bit(unsigned long *addr, unsigned long mask) { unsigned long old, flags; @@ -117,18 +157,41 @@ unsigned long ___change_bit(unsigned long *addr, unsigned long mask) return old & mask; } -EXPORT_SYMBOL(___change_bit); +EXPORT_SYMBOL(sp32___change_bit); + +#define CMPXCHG(T) \ + T __cmpxchg_##T(volatile T *ptr, T old, T new) \ + { \ + unsigned long flags; \ + T prev; \ + \ + spin_lock_irqsave(ATOMIC_HASH(ptr), flags); \ + if ((prev = *ptr) == old) \ + *ptr = new; \ + spin_unlock_irqrestore(ATOMIC_HASH(ptr), flags);\ + \ + return prev; \ + } + +CMPXCHG(u8) +CMPXCHG(u16) +CMPXCHG(u32) +CMPXCHG(u64) +EXPORT_SYMBOL(__cmpxchg_u8); +EXPORT_SYMBOL(__cmpxchg_u16); +EXPORT_SYMBOL(__cmpxchg_u32); +EXPORT_SYMBOL(__cmpxchg_u64); -unsigned long __cmpxchg_u32(volatile u32 *ptr, u32 old, u32 new) +unsigned long __xchg_u32(volatile u32 *ptr, u32 new) { unsigned long flags; u32 prev; spin_lock_irqsave(ATOMIC_HASH(ptr), flags); - if ((prev = *ptr) == old) - *ptr = new; + prev = *ptr; + *ptr = new; spin_unlock_irqrestore(ATOMIC_HASH(ptr), flags); return (unsigned long)prev; } -EXPORT_SYMBOL(__cmpxchg_u32); +EXPORT_SYMBOL(__xchg_u32); diff --git a/arch/sparc/lib/atomic_64.S b/arch/sparc/lib/atomic_64.S index 85c233d0a340..4f8cab2fb9cd 100644 --- a/arch/sparc/lib/atomic_64.S +++ b/arch/sparc/lib/atomic_64.S @@ -1,124 +1,156 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* atomic.S: These things are too big to do inline. * * Copyright (C) 1999, 2007 2012 David S. Miller (davem@davemloft.net) */ +#include <linux/export.h> #include <linux/linkage.h> #include <asm/asi.h> #include <asm/backoff.h> .text - /* Two versions of the atomic routines, one that + /* Three versions of the atomic routines, one that * does not return a value and does not perform - * memory barriers, and a second which returns - * a value and does the barriers. + * memory barriers, and a two which return + * a value, the new and old value resp. and does the + * barriers. */ -ENTRY(atomic_add) /* %o0 = increment, %o1 = atomic_ptr */ - BACKOFF_SETUP(%o2) -1: lduw [%o1], %g1 - add %g1, %o0, %g7 - cas [%o1], %g1, %g7 - cmp %g1, %g7 - bne,pn %icc, BACKOFF_LABEL(2f, 1b) - nop - retl - nop -2: BACKOFF_SPIN(%o2, %o3, 1b) -ENDPROC(atomic_add) -ENTRY(atomic_sub) /* %o0 = decrement, %o1 = atomic_ptr */ - BACKOFF_SETUP(%o2) -1: lduw [%o1], %g1 - sub %g1, %o0, %g7 - cas [%o1], %g1, %g7 - cmp %g1, %g7 - bne,pn %icc, BACKOFF_LABEL(2f, 1b) - nop - retl - nop -2: BACKOFF_SPIN(%o2, %o3, 1b) -ENDPROC(atomic_sub) +#define ATOMIC_OP(op) \ +ENTRY(arch_atomic_##op) /* %o0 = increment, %o1 = atomic_ptr */ \ + BACKOFF_SETUP(%o2); \ +1: lduw [%o1], %g1; \ + op %g1, %o0, %g7; \ + cas [%o1], %g1, %g7; \ + cmp %g1, %g7; \ + bne,pn %icc, BACKOFF_LABEL(2f, 1b); \ + nop; \ + retl; \ + nop; \ +2: BACKOFF_SPIN(%o2, %o3, 1b); \ +ENDPROC(arch_atomic_##op); \ +EXPORT_SYMBOL(arch_atomic_##op); -ENTRY(atomic_add_ret) /* %o0 = increment, %o1 = atomic_ptr */ - BACKOFF_SETUP(%o2) -1: lduw [%o1], %g1 - add %g1, %o0, %g7 - cas [%o1], %g1, %g7 - cmp %g1, %g7 - bne,pn %icc, BACKOFF_LABEL(2f, 1b) - add %g1, %o0, %g1 - retl - sra %g1, 0, %o0 -2: BACKOFF_SPIN(%o2, %o3, 1b) -ENDPROC(atomic_add_ret) +#define ATOMIC_OP_RETURN(op) \ +ENTRY(arch_atomic_##op##_return) /* %o0 = increment, %o1 = atomic_ptr */\ + BACKOFF_SETUP(%o2); \ +1: lduw [%o1], %g1; \ + op %g1, %o0, %g7; \ + cas [%o1], %g1, %g7; \ + cmp %g1, %g7; \ + bne,pn %icc, BACKOFF_LABEL(2f, 1b); \ + op %g1, %o0, %g1; \ + retl; \ + sra %g1, 0, %o0; \ +2: BACKOFF_SPIN(%o2, %o3, 1b); \ +ENDPROC(arch_atomic_##op##_return); \ +EXPORT_SYMBOL(arch_atomic_##op##_return); -ENTRY(atomic_sub_ret) /* %o0 = decrement, %o1 = atomic_ptr */ - BACKOFF_SETUP(%o2) -1: lduw [%o1], %g1 - sub %g1, %o0, %g7 - cas [%o1], %g1, %g7 - cmp %g1, %g7 - bne,pn %icc, BACKOFF_LABEL(2f, 1b) - sub %g1, %o0, %g1 - retl - sra %g1, 0, %o0 -2: BACKOFF_SPIN(%o2, %o3, 1b) -ENDPROC(atomic_sub_ret) +#define ATOMIC_FETCH_OP(op) \ +ENTRY(arch_atomic_fetch_##op) /* %o0 = increment, %o1 = atomic_ptr */ \ + BACKOFF_SETUP(%o2); \ +1: lduw [%o1], %g1; \ + op %g1, %o0, %g7; \ + cas [%o1], %g1, %g7; \ + cmp %g1, %g7; \ + bne,pn %icc, BACKOFF_LABEL(2f, 1b); \ + nop; \ + retl; \ + sra %g1, 0, %o0; \ +2: BACKOFF_SPIN(%o2, %o3, 1b); \ +ENDPROC(arch_atomic_fetch_##op); \ +EXPORT_SYMBOL(arch_atomic_fetch_##op); -ENTRY(atomic64_add) /* %o0 = increment, %o1 = atomic_ptr */ - BACKOFF_SETUP(%o2) -1: ldx [%o1], %g1 - add %g1, %o0, %g7 - casx [%o1], %g1, %g7 - cmp %g1, %g7 - bne,pn %xcc, BACKOFF_LABEL(2f, 1b) - nop - retl - nop -2: BACKOFF_SPIN(%o2, %o3, 1b) -ENDPROC(atomic64_add) +ATOMIC_OP(add) +ATOMIC_OP_RETURN(add) +ATOMIC_FETCH_OP(add) -ENTRY(atomic64_sub) /* %o0 = decrement, %o1 = atomic_ptr */ - BACKOFF_SETUP(%o2) -1: ldx [%o1], %g1 - sub %g1, %o0, %g7 - casx [%o1], %g1, %g7 - cmp %g1, %g7 - bne,pn %xcc, BACKOFF_LABEL(2f, 1b) - nop - retl - nop -2: BACKOFF_SPIN(%o2, %o3, 1b) -ENDPROC(atomic64_sub) +ATOMIC_OP(sub) +ATOMIC_OP_RETURN(sub) +ATOMIC_FETCH_OP(sub) -ENTRY(atomic64_add_ret) /* %o0 = increment, %o1 = atomic_ptr */ - BACKOFF_SETUP(%o2) -1: ldx [%o1], %g1 - add %g1, %o0, %g7 - casx [%o1], %g1, %g7 - cmp %g1, %g7 - bne,pn %xcc, BACKOFF_LABEL(2f, 1b) - nop - retl - add %g1, %o0, %o0 -2: BACKOFF_SPIN(%o2, %o3, 1b) -ENDPROC(atomic64_add_ret) +ATOMIC_OP(and) +ATOMIC_FETCH_OP(and) -ENTRY(atomic64_sub_ret) /* %o0 = decrement, %o1 = atomic_ptr */ - BACKOFF_SETUP(%o2) -1: ldx [%o1], %g1 - sub %g1, %o0, %g7 - casx [%o1], %g1, %g7 - cmp %g1, %g7 - bne,pn %xcc, BACKOFF_LABEL(2f, 1b) - nop - retl - sub %g1, %o0, %o0 -2: BACKOFF_SPIN(%o2, %o3, 1b) -ENDPROC(atomic64_sub_ret) +ATOMIC_OP(or) +ATOMIC_FETCH_OP(or) + +ATOMIC_OP(xor) +ATOMIC_FETCH_OP(xor) + +#undef ATOMIC_FETCH_OP +#undef ATOMIC_OP_RETURN +#undef ATOMIC_OP + +#define ATOMIC64_OP(op) \ +ENTRY(arch_atomic64_##op) /* %o0 = increment, %o1 = atomic_ptr */ \ + BACKOFF_SETUP(%o2); \ +1: ldx [%o1], %g1; \ + op %g1, %o0, %g7; \ + casx [%o1], %g1, %g7; \ + cmp %g1, %g7; \ + bne,pn %xcc, BACKOFF_LABEL(2f, 1b); \ + nop; \ + retl; \ + nop; \ +2: BACKOFF_SPIN(%o2, %o3, 1b); \ +ENDPROC(arch_atomic64_##op); \ +EXPORT_SYMBOL(arch_atomic64_##op); + +#define ATOMIC64_OP_RETURN(op) \ +ENTRY(arch_atomic64_##op##_return) /* %o0 = increment, %o1 = atomic_ptr */ \ + BACKOFF_SETUP(%o2); \ +1: ldx [%o1], %g1; \ + op %g1, %o0, %g7; \ + casx [%o1], %g1, %g7; \ + cmp %g1, %g7; \ + bne,pn %xcc, BACKOFF_LABEL(2f, 1b); \ + nop; \ + retl; \ + op %g1, %o0, %o0; \ +2: BACKOFF_SPIN(%o2, %o3, 1b); \ +ENDPROC(arch_atomic64_##op##_return); \ +EXPORT_SYMBOL(arch_atomic64_##op##_return); + +#define ATOMIC64_FETCH_OP(op) \ +ENTRY(arch_atomic64_fetch_##op) /* %o0 = increment, %o1 = atomic_ptr */ \ + BACKOFF_SETUP(%o2); \ +1: ldx [%o1], %g1; \ + op %g1, %o0, %g7; \ + casx [%o1], %g1, %g7; \ + cmp %g1, %g7; \ + bne,pn %xcc, BACKOFF_LABEL(2f, 1b); \ + nop; \ + retl; \ + mov %g1, %o0; \ +2: BACKOFF_SPIN(%o2, %o3, 1b); \ +ENDPROC(arch_atomic64_fetch_##op); \ +EXPORT_SYMBOL(arch_atomic64_fetch_##op); + +ATOMIC64_OP(add) +ATOMIC64_OP_RETURN(add) +ATOMIC64_FETCH_OP(add) + +ATOMIC64_OP(sub) +ATOMIC64_OP_RETURN(sub) +ATOMIC64_FETCH_OP(sub) + +ATOMIC64_OP(and) +ATOMIC64_FETCH_OP(and) + +ATOMIC64_OP(or) +ATOMIC64_FETCH_OP(or) + +ATOMIC64_OP(xor) +ATOMIC64_FETCH_OP(xor) + +#undef ATOMIC64_FETCH_OP +#undef ATOMIC64_OP_RETURN +#undef ATOMIC64_OP -ENTRY(atomic64_dec_if_positive) /* %o0 = atomic_ptr */ +ENTRY(arch_atomic64_dec_if_positive) /* %o0 = atomic_ptr */ BACKOFF_SETUP(%o2) 1: ldx [%o0], %g1 brlez,pn %g1, 3f @@ -130,4 +162,5 @@ ENTRY(atomic64_dec_if_positive) /* %o0 = atomic_ptr */ 3: retl sub %g1, 1, %o0 2: BACKOFF_SPIN(%o2, %o3, 1b) -ENDPROC(atomic64_dec_if_positive) +ENDPROC(arch_atomic64_dec_if_positive) +EXPORT_SYMBOL(arch_atomic64_dec_if_positive) diff --git a/arch/sparc/lib/bitext.c b/arch/sparc/lib/bitext.c index 8ec4e9c0251a..32a5c1d9459c 100644 --- a/arch/sparc/lib/bitext.c +++ b/arch/sparc/lib/bitext.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * bitext.c: kernel little helper (of bit shuffling variety). * diff --git a/arch/sparc/lib/bitops.S b/arch/sparc/lib/bitops.S index 36f72cc0e67e..9c91cbb310e7 100644 --- a/arch/sparc/lib/bitops.S +++ b/arch/sparc/lib/bitops.S @@ -1,8 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* bitops.S: Sparc64 atomic bit operations. * * Copyright (C) 2000, 2007 David S. Miller (davem@davemloft.net) */ +#include <linux/export.h> #include <linux/linkage.h> #include <asm/asi.h> #include <asm/backoff.h> @@ -29,6 +31,7 @@ ENTRY(test_and_set_bit) /* %o0=nr, %o1=addr */ nop 2: BACKOFF_SPIN(%o3, %o4, 1b) ENDPROC(test_and_set_bit) +EXPORT_SYMBOL(test_and_set_bit) ENTRY(test_and_clear_bit) /* %o0=nr, %o1=addr */ BACKOFF_SETUP(%o3) @@ -50,6 +53,7 @@ ENTRY(test_and_clear_bit) /* %o0=nr, %o1=addr */ nop 2: BACKOFF_SPIN(%o3, %o4, 1b) ENDPROC(test_and_clear_bit) +EXPORT_SYMBOL(test_and_clear_bit) ENTRY(test_and_change_bit) /* %o0=nr, %o1=addr */ BACKOFF_SETUP(%o3) @@ -71,6 +75,7 @@ ENTRY(test_and_change_bit) /* %o0=nr, %o1=addr */ nop 2: BACKOFF_SPIN(%o3, %o4, 1b) ENDPROC(test_and_change_bit) +EXPORT_SYMBOL(test_and_change_bit) ENTRY(set_bit) /* %o0=nr, %o1=addr */ BACKOFF_SETUP(%o3) @@ -90,6 +95,7 @@ ENTRY(set_bit) /* %o0=nr, %o1=addr */ nop 2: BACKOFF_SPIN(%o3, %o4, 1b) ENDPROC(set_bit) +EXPORT_SYMBOL(set_bit) ENTRY(clear_bit) /* %o0=nr, %o1=addr */ BACKOFF_SETUP(%o3) @@ -109,6 +115,7 @@ ENTRY(clear_bit) /* %o0=nr, %o1=addr */ nop 2: BACKOFF_SPIN(%o3, %o4, 1b) ENDPROC(clear_bit) +EXPORT_SYMBOL(clear_bit) ENTRY(change_bit) /* %o0=nr, %o1=addr */ BACKOFF_SETUP(%o3) @@ -128,3 +135,4 @@ ENTRY(change_bit) /* %o0=nr, %o1=addr */ nop 2: BACKOFF_SPIN(%o3, %o4, 1b) ENDPROC(change_bit) +EXPORT_SYMBOL(change_bit) diff --git a/arch/sparc/lib/blockops.S b/arch/sparc/lib/blockops.S index 3c771011ff4b..5b92959a4d48 100644 --- a/arch/sparc/lib/blockops.S +++ b/arch/sparc/lib/blockops.S @@ -1,9 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * blockops.S: Common block zero optimized routines. * * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) */ +#include <linux/export.h> #include <linux/linkage.h> #include <asm/page.h> @@ -64,6 +66,7 @@ ENTRY(bzero_1page) retl nop ENDPROC(bzero_1page) +EXPORT_SYMBOL(bzero_1page) ENTRY(__copy_1page) /* NOTE: If you change the number of insns of this routine, please check @@ -87,3 +90,4 @@ ENTRY(__copy_1page) retl nop ENDPROC(__copy_1page) +EXPORT_SYMBOL(__copy_1page) diff --git a/arch/sparc/lib/bzero.S b/arch/sparc/lib/bzero.S index 8c058114b649..2bfa44a6b25e 100644 --- a/arch/sparc/lib/bzero.S +++ b/arch/sparc/lib/bzero.S @@ -1,9 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* bzero.S: Simple prefetching memset, bzero, and clear_user * implementations. * * Copyright (C) 2005 David S. Miller <davem@davemloft.net> */ +#include <linux/export.h> #include <linux/linkage.h> .text @@ -78,6 +80,8 @@ __bzero_done: mov %o3, %o0 ENDPROC(__bzero) ENDPROC(memset) +EXPORT_SYMBOL(__bzero) +EXPORT_SYMBOL(memset) #define EX_ST(x,y) \ 98: x,y; \ @@ -143,3 +147,4 @@ __clear_user_done: retl clr %o0 ENDPROC(__clear_user) +EXPORT_SYMBOL(__clear_user) diff --git a/arch/sparc/lib/checksum_32.S b/arch/sparc/lib/checksum_32.S index 0084c3361e15..66eda40fce36 100644 --- a/arch/sparc/lib/checksum_32.S +++ b/arch/sparc/lib/checksum_32.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* checksum.S: Sparc optimized checksum code. * * Copyright(C) 1995 Linus Torvalds @@ -13,6 +14,7 @@ * BSD4.4 portable checksum routine */ +#include <linux/export.h> #include <asm/errno.h> #define CSUM_BIGCHUNK(buf, offset, sum, t0, t1, t2, t3, t4, t5) \ @@ -104,6 +106,7 @@ csum_partial_fix_alignment: * buffer of size 0x20. Follow the code path for that case. */ .globl csum_partial + EXPORT_SYMBOL(csum_partial) csum_partial: /* %o0=buf, %o1=len, %o2=sum */ andcc %o0, 0x7, %g0 ! alignment problems? bne csum_partial_fix_alignment ! yep, handle it @@ -141,44 +144,14 @@ cpte: bne csum_partial_end_cruft ! yep, handle it cpout: retl ! get outta here mov %o2, %o0 ! return computed csum - .globl __csum_partial_copy_start, __csum_partial_copy_end -__csum_partial_copy_start: - /* Work around cpp -rob */ #define ALLOC #alloc #define EXECINSTR #execinstr -#define EX(x,y,a,b) \ -98: x,y; \ - .section .fixup,ALLOC,EXECINSTR; \ - .align 4; \ -99: ba 30f; \ - a, b, %o3; \ - .section __ex_table,ALLOC; \ - .align 4; \ - .word 98b, 99b; \ - .text; \ - .align 4 - -#define EX2(x,y) \ -98: x,y; \ - .section __ex_table,ALLOC; \ - .align 4; \ - .word 98b, 30f; \ - .text; \ - .align 4 - -#define EX3(x,y) \ +#define EX(x,y) \ 98: x,y; \ .section __ex_table,ALLOC; \ .align 4; \ - .word 98b, 96f; \ - .text; \ - .align 4 - -#define EXT(start,end,handler) \ - .section __ex_table,ALLOC; \ - .align 4; \ - .word start, 0, end, handler; \ + .word 98b, cc_fault; \ .text; \ .align 4 @@ -189,20 +162,20 @@ __csum_partial_copy_start: * please check the fixup code below as well. */ #define CSUMCOPY_BIGCHUNK_ALIGNED(src, dst, sum, off, t0, t1, t2, t3, t4, t5, t6, t7) \ - ldd [src + off + 0x00], t0; \ - ldd [src + off + 0x08], t2; \ + EX(ldd [src + off + 0x00], t0); \ + EX(ldd [src + off + 0x08], t2); \ addxcc t0, sum, sum; \ - ldd [src + off + 0x10], t4; \ + EX(ldd [src + off + 0x10], t4); \ addxcc t1, sum, sum; \ - ldd [src + off + 0x18], t6; \ + EX(ldd [src + off + 0x18], t6); \ addxcc t2, sum, sum; \ - std t0, [dst + off + 0x00]; \ + EX(std t0, [dst + off + 0x00]); \ addxcc t3, sum, sum; \ - std t2, [dst + off + 0x08]; \ + EX(std t2, [dst + off + 0x08]); \ addxcc t4, sum, sum; \ - std t4, [dst + off + 0x10]; \ + EX(std t4, [dst + off + 0x10]); \ addxcc t5, sum, sum; \ - std t6, [dst + off + 0x18]; \ + EX(std t6, [dst + off + 0x18]); \ addxcc t6, sum, sum; \ addxcc t7, sum, sum; @@ -211,59 +184,59 @@ __csum_partial_copy_start: * Viking MXCC into streaming mode. Ho hum... */ #define CSUMCOPY_BIGCHUNK(src, dst, sum, off, t0, t1, t2, t3, t4, t5, t6, t7) \ - ldd [src + off + 0x00], t0; \ - ldd [src + off + 0x08], t2; \ - ldd [src + off + 0x10], t4; \ - ldd [src + off + 0x18], t6; \ - st t0, [dst + off + 0x00]; \ + EX(ldd [src + off + 0x00], t0); \ + EX(ldd [src + off + 0x08], t2); \ + EX(ldd [src + off + 0x10], t4); \ + EX(ldd [src + off + 0x18], t6); \ + EX(st t0, [dst + off + 0x00]); \ addxcc t0, sum, sum; \ - st t1, [dst + off + 0x04]; \ + EX(st t1, [dst + off + 0x04]); \ addxcc t1, sum, sum; \ - st t2, [dst + off + 0x08]; \ + EX(st t2, [dst + off + 0x08]); \ addxcc t2, sum, sum; \ - st t3, [dst + off + 0x0c]; \ + EX(st t3, [dst + off + 0x0c]); \ addxcc t3, sum, sum; \ - st t4, [dst + off + 0x10]; \ + EX(st t4, [dst + off + 0x10]); \ addxcc t4, sum, sum; \ - st t5, [dst + off + 0x14]; \ + EX(st t5, [dst + off + 0x14]); \ addxcc t5, sum, sum; \ - st t6, [dst + off + 0x18]; \ + EX(st t6, [dst + off + 0x18]); \ addxcc t6, sum, sum; \ - st t7, [dst + off + 0x1c]; \ + EX(st t7, [dst + off + 0x1c]); \ addxcc t7, sum, sum; /* Yuck, 6 superscalar cycles... */ #define CSUMCOPY_LASTCHUNK(src, dst, sum, off, t0, t1, t2, t3) \ - ldd [src - off - 0x08], t0; \ - ldd [src - off - 0x00], t2; \ + EX(ldd [src - off - 0x08], t0); \ + EX(ldd [src - off - 0x00], t2); \ addxcc t0, sum, sum; \ - st t0, [dst - off - 0x08]; \ + EX(st t0, [dst - off - 0x08]); \ addxcc t1, sum, sum; \ - st t1, [dst - off - 0x04]; \ + EX(st t1, [dst - off - 0x04]); \ addxcc t2, sum, sum; \ - st t2, [dst - off - 0x00]; \ + EX(st t2, [dst - off - 0x00]); \ addxcc t3, sum, sum; \ - st t3, [dst - off + 0x04]; + EX(st t3, [dst - off + 0x04]); /* Handle the end cruft code out of band for better cache patterns. */ cc_end_cruft: be 1f andcc %o3, 4, %g0 - EX(ldd [%o0 + 0x00], %g2, and %o3, 0xf) + EX(ldd [%o0 + 0x00], %g2) add %o1, 8, %o1 addcc %g2, %g7, %g7 add %o0, 8, %o0 addxcc %g3, %g7, %g7 - EX2(st %g2, [%o1 - 0x08]) + EX(st %g2, [%o1 - 0x08]) addx %g0, %g7, %g7 andcc %o3, 4, %g0 - EX2(st %g3, [%o1 - 0x04]) + EX(st %g3, [%o1 - 0x04]) 1: be 1f andcc %o3, 3, %o3 - EX(ld [%o0 + 0x00], %g2, add %o3, 4) + EX(ld [%o0 + 0x00], %g2) add %o1, 4, %o1 addcc %g2, %g7, %g7 - EX2(st %g2, [%o1 - 0x04]) + EX(st %g2, [%o1 - 0x04]) addx %g0, %g7, %g7 andcc %o3, 3, %g0 add %o0, 4, %o0 @@ -273,14 +246,14 @@ cc_end_cruft: subcc %o3, 2, %o3 b 4f or %g0, %g0, %o4 -2: EX(lduh [%o0 + 0x00], %o4, add %o3, 2) +2: EX(lduh [%o0 + 0x00], %o4) add %o0, 2, %o0 - EX2(sth %o4, [%o1 + 0x00]) + EX(sth %o4, [%o1 + 0x00]) be 6f add %o1, 2, %o1 sll %o4, 16, %o4 -4: EX(ldub [%o0 + 0x00], %o5, add %g0, 1) - EX2(stb %o5, [%o1 + 0x00]) +4: EX(ldub [%o0 + 0x00], %o5) + EX(stb %o5, [%o1 + 0x00]) sll %o5, 8, %o5 or %o5, %o4, %o4 6: addcc %o4, %g7, %g7 @@ -303,9 +276,9 @@ cc_dword_align: andcc %o0, 0x2, %g0 be 1f andcc %o0, 0x4, %g0 - EX(lduh [%o0 + 0x00], %g4, add %g1, 0) + EX(lduh [%o0 + 0x00], %g4) sub %g1, 2, %g1 - EX2(sth %g4, [%o1 + 0x00]) + EX(sth %g4, [%o1 + 0x00]) add %o0, 2, %o0 sll %g4, 16, %g4 addcc %g4, %g7, %g7 @@ -319,9 +292,9 @@ cc_dword_align: or %g3, %g7, %g7 1: be 3f andcc %g1, 0xffffff80, %g0 - EX(ld [%o0 + 0x00], %g4, add %g1, 0) + EX(ld [%o0 + 0x00], %g4) sub %g1, 4, %g1 - EX2(st %g4, [%o1 + 0x00]) + EX(st %g4, [%o1 + 0x00]) add %o0, 4, %o0 addcc %g4, %g7, %g7 add %o1, 4, %o1 @@ -335,6 +308,7 @@ cc_dword_align: */ .align 8 .globl __csum_partial_copy_sparc_generic + EXPORT_SYMBOL(__csum_partial_copy_sparc_generic) __csum_partial_copy_sparc_generic: /* %o0=src, %o1=dest, %g1=len, %g7=sum */ xor %o0, %o1, %o4 ! get changing bits @@ -350,7 +324,6 @@ __csum_partial_copy_sparc_generic: CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x20,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3) CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x40,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3) CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x60,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3) -10: EXT(5b, 10b, 20f) ! note for exception handling sub %g1, 128, %g1 ! detract from length addx %g0, %g7, %g7 ! add in last carry bit andcc %g1, 0xffffff80, %g0 ! more to csum? @@ -375,8 +348,7 @@ cctbl: CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x68,%g2,%g3,%g4,%g5) CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x28,%g2,%g3,%g4,%g5) CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x18,%g2,%g3,%g4,%g5) CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x08,%g2,%g3,%g4,%g5) -12: EXT(cctbl, 12b, 22f) ! note for exception table handling - addx %g0, %g7, %g7 +12: addx %g0, %g7, %g7 andcc %o3, 0xf, %g0 ! check for low bits set ccte: bne cc_end_cruft ! something left, handle it out of band andcc %o3, 8, %g0 ! begin checks for that code @@ -386,7 +358,6 @@ ccdbl: CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x00,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x20,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3) CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x40,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3) CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x60,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3) -11: EXT(ccdbl, 11b, 21f) ! note for exception table handling sub %g1, 128, %g1 ! detract from length addx %g0, %g7, %g7 ! add in last carry bit andcc %g1, 0xffffff80, %g0 ! more to csum? @@ -403,9 +374,9 @@ ccslow: cmp %g1, 0 be,a 1f srl %g1, 1, %g4 sub %g1, 1, %g1 - EX(ldub [%o0], %g5, add %g1, 1) + EX(ldub [%o0], %g5) add %o0, 1, %o0 - EX2(stb %g5, [%o1]) + EX(stb %g5, [%o1]) srl %g1, 1, %g4 add %o1, 1, %o1 1: cmp %g4, 0 @@ -414,34 +385,34 @@ ccslow: cmp %g1, 0 andcc %o0, 2, %g0 be,a 1f srl %g4, 1, %g4 - EX(lduh [%o0], %o4, add %g1, 0) + EX(lduh [%o0], %o4) sub %g1, 2, %g1 srl %o4, 8, %g2 sub %g4, 1, %g4 - EX2(stb %g2, [%o1]) + EX(stb %g2, [%o1]) add %o4, %g5, %g5 - EX2(stb %o4, [%o1 + 1]) + EX(stb %o4, [%o1 + 1]) add %o0, 2, %o0 srl %g4, 1, %g4 add %o1, 2, %o1 1: cmp %g4, 0 be,a 2f andcc %g1, 2, %g0 - EX3(ld [%o0], %o4) + EX(ld [%o0], %o4) 5: srl %o4, 24, %g2 srl %o4, 16, %g3 - EX2(stb %g2, [%o1]) + EX(stb %g2, [%o1]) srl %o4, 8, %g2 - EX2(stb %g3, [%o1 + 1]) + EX(stb %g3, [%o1 + 1]) add %o0, 4, %o0 - EX2(stb %g2, [%o1 + 2]) + EX(stb %g2, [%o1 + 2]) addcc %o4, %g5, %g5 - EX2(stb %o4, [%o1 + 3]) + EX(stb %o4, [%o1 + 3]) addx %g5, %g0, %g5 ! I am now to lazy to optimize this (question it add %o1, 4, %o1 ! is worthy). Maybe some day - with the sll/srl subcc %g4, 1, %g4 ! tricks bne,a 5b - EX3(ld [%o0], %o4) + EX(ld [%o0], %o4) sll %g5, 16, %g2 srl %g5, 16, %g5 srl %g2, 16, %g2 @@ -449,19 +420,19 @@ ccslow: cmp %g1, 0 add %g2, %g5, %g5 2: be,a 3f andcc %g1, 1, %g0 - EX(lduh [%o0], %o4, and %g1, 3) + EX(lduh [%o0], %o4) andcc %g1, 1, %g0 srl %o4, 8, %g2 add %o0, 2, %o0 - EX2(stb %g2, [%o1]) + EX(stb %g2, [%o1]) add %g5, %o4, %g5 - EX2(stb %o4, [%o1 + 1]) + EX(stb %o4, [%o1 + 1]) add %o1, 2, %o1 3: be,a 1f sll %g5, 16, %o4 - EX(ldub [%o0], %g2, add %g0, 1) + EX(ldub [%o0], %g2) sll %g2, 8, %o4 - EX2(stb %g2, [%o1]) + EX(stb %g2, [%o1]) add %g5, %o4, %g5 sll %g5, 16, %o4 1: addcc %o4, %g5, %g5 @@ -477,113 +448,10 @@ ccslow: cmp %g1, 0 4: addcc %g7, %g5, %g7 retl addx %g0, %g7, %o0 -__csum_partial_copy_end: /* We do these strange calculations for the csum_*_from_user case only, ie. * we only bother with faults on loads... */ -/* o2 = ((g2%20)&3)*8 - * o3 = g1 - (g2/20)*32 - o2 */ -20: - cmp %g2, 20 - blu,a 1f - and %g2, 3, %o2 - sub %g1, 32, %g1 - b 20b - sub %g2, 20, %g2 -1: - sll %o2, 3, %o2 - b 31f - sub %g1, %o2, %o3 - -/* o2 = (!(g2 & 15) ? 0 : (((g2 & 15) + 1) & ~1)*8) - * o3 = g1 - (g2/16)*32 - o2 */ -21: - andcc %g2, 15, %o3 - srl %g2, 4, %g2 - be,a 1f - clr %o2 - add %o3, 1, %o3 - and %o3, 14, %o3 - sll %o3, 3, %o2 -1: - sll %g2, 5, %g2 - sub %g1, %g2, %o3 - b 31f - sub %o3, %o2, %o3 - -/* o0 += (g2/10)*16 - 0x70 - * 01 += (g2/10)*16 - 0x70 - * o2 = (g2 % 10) ? 8 : 0 - * o3 += 0x70 - (g2/10)*16 - o2 */ -22: - cmp %g2, 10 - blu,a 1f - sub %o0, 0x70, %o0 - add %o0, 16, %o0 - add %o1, 16, %o1 - sub %o3, 16, %o3 - b 22b - sub %g2, 10, %g2 -1: - sub %o1, 0x70, %o1 - add %o3, 0x70, %o3 - clr %o2 - tst %g2 - bne,a 1f - mov 8, %o2 -1: - b 31f - sub %o3, %o2, %o3 -96: - and %g1, 3, %g1 - sll %g4, 2, %g4 - add %g1, %g4, %o3 -30: -/* %o1 is dst - * %o3 is # bytes to zero out - * %o4 is faulting address - * %o5 is %pc where fault occurred */ - clr %o2 -31: -/* %o0 is src - * %o1 is dst - * %o2 is # of bytes to copy from src to dst - * %o3 is # bytes to zero out - * %o4 is faulting address - * %o5 is %pc where fault occurred */ - save %sp, -104, %sp - mov %i5, %o0 - mov %i7, %o1 - mov %i4, %o2 - call lookup_fault - mov %g7, %i4 - cmp %o0, 2 - bne 1f - add %g0, -EFAULT, %i5 - tst %i2 - be 2f - mov %i0, %o1 - mov %i1, %o0 -5: - call memcpy - mov %i2, %o2 - tst %o0 - bne,a 2f - add %i3, %i2, %i3 - add %i1, %i2, %i1 -2: - mov %i1, %o0 -6: - call __bzero - mov %i3, %o1 -1: - ld [%sp + 168], %o2 ! struct_ptr of parent - st %i5, [%o2] - ret - restore - - .section __ex_table,#alloc - .align 4 - .word 5b,2 - .word 6b,2 +cc_fault: + retl + clr %o0 diff --git a/arch/sparc/lib/checksum_64.S b/arch/sparc/lib/checksum_64.S index 1d230f693dc4..32b626f3fe4d 100644 --- a/arch/sparc/lib/checksum_64.S +++ b/arch/sparc/lib/checksum_64.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* checksum.S: Sparc V9 optimized checksum code. * * Copyright(C) 1995 Linus Torvalds @@ -13,6 +14,7 @@ * BSD4.4 portable checksum routine */ +#include <linux/export.h> .text csum_partial_fix_alignment: @@ -37,6 +39,8 @@ csum_partial_fix_alignment: .align 32 .globl csum_partial + .type csum_partial,#function + EXPORT_SYMBOL(csum_partial) csum_partial: /* %o0=buff, %o1=len, %o2=sum */ prefetch [%o0 + 0x000], #n_reads clr %o4 diff --git a/arch/sparc/lib/clear_page.S b/arch/sparc/lib/clear_page.S index 77e531f6c2a7..e63458194f5a 100644 --- a/arch/sparc/lib/clear_page.S +++ b/arch/sparc/lib/clear_page.S @@ -1,13 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* clear_page.S: UltraSparc optimized clear page. * * Copyright (C) 1996, 1998, 1999, 2000, 2004 David S. Miller (davem@redhat.com) * Copyright (C) 1997 Jakub Jelinek (jakub@redhat.com) */ +#include <linux/export.h> +#include <linux/pgtable.h> #include <asm/visasm.h> #include <asm/thread_info.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/spitfire.h> #include <asm/head.h> @@ -26,6 +28,7 @@ .text .globl _clear_page + EXPORT_SYMBOL(_clear_page) _clear_page: /* %o0=dest */ ba,pt %xcc, clear_page_common clr %o4 @@ -35,12 +38,13 @@ _clear_page: /* %o0=dest */ */ .align 32 .globl clear_user_page + EXPORT_SYMBOL(clear_user_page) clear_user_page: /* %o0=dest, %o1=vaddr */ lduw [%g6 + TI_PRE_COUNT], %o2 - sethi %uhi(PAGE_OFFSET), %g2 + sethi %hi(PAGE_OFFSET), %g2 sethi %hi(PAGE_SIZE), %o4 - sllx %g2, 32, %g2 + ldx [%g2 + %lo(PAGE_OFFSET)], %g2 sethi %hi(PAGE_KERNEL_LOCKED), %g3 ldx [%g3 + %lo(PAGE_KERNEL_LOCKED)], %g3 diff --git a/arch/sparc/lib/cmpdi2.c b/arch/sparc/lib/cmpdi2.c deleted file mode 100644 index 8c1306437ed1..000000000000 --- a/arch/sparc/lib/cmpdi2.c +++ /dev/null @@ -1,27 +0,0 @@ -#include <linux/module.h> - -#include "libgcc.h" - -word_type __cmpdi2(long long a, long long b) -{ - const DWunion au = { - .ll = a - }; - const DWunion bu = { - .ll = b - }; - - if (au.s.high < bu.s.high) - return 0; - else if (au.s.high > bu.s.high) - return 2; - - if ((unsigned int) au.s.low < (unsigned int) bu.s.low) - return 0; - else if ((unsigned int) au.s.low > (unsigned int) bu.s.low) - return 2; - - return 1; -} - -EXPORT_SYMBOL(__cmpdi2); diff --git a/arch/sparc/lib/copy_in_user.S b/arch/sparc/lib/copy_in_user.S index 302c0e60dc2c..e23e6a69ff92 100644 --- a/arch/sparc/lib/copy_in_user.S +++ b/arch/sparc/lib/copy_in_user.S @@ -1,25 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* copy_in_user.S: Copy from userspace to userspace. * * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) */ +#include <linux/export.h> #include <linux/linkage.h> #include <asm/asi.h> #define XCC xcc -#define EX(x,y) \ +#define EX(x,y,z) \ 98: x,y; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, z; \ .text; \ .align 4; +#define EX_O4(x,y) EX(x,y,__retl_o4_plus_8) +#define EX_O2_4(x,y) EX(x,y,__retl_o2_plus_4) +#define EX_O2_1(x,y) EX(x,y,__retl_o2_plus_1) + .register %g2,#scratch .register %g3,#scratch .text +__retl_o4_plus_8: + add %o4, %o2, %o4 + retl + add %o4, 8, %o0 +__retl_o2_plus_4: + retl + add %o2, 4, %o0 +__retl_o2_plus_1: + retl + add %o2, 1, %o0 + .align 32 /* Don't try to get too fancy here, just nice and @@ -28,7 +45,7 @@ * to copy register windows around during thread cloning. */ -ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */ +ENTRY(raw_copy_in_user) /* %o0=dst, %o1=src, %o2=len */ cmp %o2, 0 be,pn %XCC, 85f or %o0, %o1, %o3 @@ -44,8 +61,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */ andn %o2, 0x7, %o4 and %o2, 0x7, %o2 1: subcc %o4, 0x8, %o4 - EX(ldxa [%o1] %asi, %o5) - EX(stxa %o5, [%o0] %asi) + EX_O4(ldxa [%o1] %asi, %o5) + EX_O4(stxa %o5, [%o0] %asi) add %o1, 0x8, %o1 bgu,pt %XCC, 1b add %o0, 0x8, %o0 @@ -53,8 +70,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */ be,pt %XCC, 1f nop sub %o2, 0x4, %o2 - EX(lduwa [%o1] %asi, %o5) - EX(stwa %o5, [%o0] %asi) + EX_O2_4(lduwa [%o1] %asi, %o5) + EX_O2_4(stwa %o5, [%o0] %asi) add %o1, 0x4, %o1 add %o0, 0x4, %o0 1: cmp %o2, 0 @@ -70,8 +87,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */ 82: subcc %o2, 4, %o2 - EX(lduwa [%o1] %asi, %g1) - EX(stwa %g1, [%o0] %asi) + EX_O2_4(lduwa [%o1] %asi, %g1) + EX_O2_4(stwa %g1, [%o0] %asi) add %o1, 4, %o1 bgu,pt %XCC, 82b add %o0, 4, %o0 @@ -82,11 +99,12 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */ .align 32 90: subcc %o2, 1, %o2 - EX(lduba [%o1] %asi, %g1) - EX(stba %g1, [%o0] %asi) + EX_O2_1(lduba [%o1] %asi, %g1) + EX_O2_1(stba %g1, [%o0] %asi) add %o1, 1, %o1 bgu,pt %XCC, 90b add %o0, 1, %o0 retl clr %o0 -ENDPROC(___copy_in_user) +ENDPROC(raw_copy_in_user) +EXPORT_SYMBOL(raw_copy_in_user) diff --git a/arch/sparc/lib/copy_page.S b/arch/sparc/lib/copy_page.S index 4d2df328e514..7a041f3ebc58 100644 --- a/arch/sparc/lib/copy_page.S +++ b/arch/sparc/lib/copy_page.S @@ -1,13 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* clear_page.S: UltraSparc optimized copy page. * * Copyright (C) 1996, 1998, 1999, 2000, 2004 David S. Miller (davem@redhat.com) * Copyright (C) 1997 Jakub Jelinek (jakub@redhat.com) */ +#include <linux/export.h> #include <asm/visasm.h> #include <asm/thread_info.h> #include <asm/page.h> -#include <asm/pgtable.h> +#include <linux/pgtable.h> #include <asm/spitfire.h> #include <asm/head.h> @@ -44,12 +46,13 @@ .align 32 .globl copy_user_page .type copy_user_page,#function + EXPORT_SYMBOL(copy_user_page) copy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */ lduw [%g6 + TI_PRE_COUNT], %o4 - sethi %uhi(PAGE_OFFSET), %g2 + sethi %hi(PAGE_OFFSET), %g2 sethi %hi(PAGE_SIZE), %o3 - sllx %g2, 32, %g2 + ldx [%g2 + %lo(PAGE_OFFSET)], %g2 sethi %hi(PAGE_KERNEL_LOCKED), %g3 ldx [%g3 + %lo(PAGE_KERNEL_LOCKED)], %g3 diff --git a/arch/sparc/lib/copy_user.S b/arch/sparc/lib/copy_user.S index ef095b6c43b1..7bb2ef68881d 100644 --- a/arch/sparc/lib/copy_user.S +++ b/arch/sparc/lib/copy_user.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* copy_user.S: Sparc optimized copy_from_user and copy_to_user code. * * Copyright(C) 1995 Linus Torvalds @@ -11,6 +12,7 @@ * Returns 0 if successful, otherwise count of bytes not copied yet */ +#include <linux/export.h> #include <asm/ptrace.h> #include <asm/asmmacro.h> #include <asm/page.h> @@ -19,98 +21,134 @@ /* Work around cpp -rob */ #define ALLOC #alloc #define EXECINSTR #execinstr + +#define EX_ENTRY(l1, l2) \ + .section __ex_table,ALLOC; \ + .align 4; \ + .word l1, l2; \ + .text; + #define EX(x,y,a,b) \ 98: x,y; \ .section .fixup,ALLOC,EXECINSTR; \ .align 4; \ -99: ba fixupretl; \ - a, b, %g3; \ - .section __ex_table,ALLOC; \ - .align 4; \ - .word 98b, 99b; \ - .text; \ - .align 4 +99: retl; \ + a, b, %o0; \ + EX_ENTRY(98b, 99b) #define EX2(x,y,c,d,e,a,b) \ 98: x,y; \ .section .fixup,ALLOC,EXECINSTR; \ .align 4; \ 99: c, d, e; \ - ba fixupretl; \ - a, b, %g3; \ - .section __ex_table,ALLOC; \ - .align 4; \ - .word 98b, 99b; \ - .text; \ - .align 4 + retl; \ + a, b, %o0; \ + EX_ENTRY(98b, 99b) #define EXO2(x,y) \ 98: x, y; \ - .section __ex_table,ALLOC; \ - .align 4; \ - .word 98b, 97f; \ - .text; \ - .align 4 + EX_ENTRY(98b, 97f) -#define EXT(start,end,handler) \ - .section __ex_table,ALLOC; \ - .align 4; \ - .word start, 0, end, handler; \ - .text; \ - .align 4 +#define LD(insn, src, offset, reg, label) \ +98: insn [%src + (offset)], %reg; \ + .section .fixup,ALLOC,EXECINSTR; \ +99: ba label; \ + mov offset, %g5; \ + EX_ENTRY(98b, 99b) -/* Please do not change following macros unless you change logic used - * in .fixup at the end of this file as well - */ +#define ST(insn, dst, offset, reg, label) \ +98: insn %reg, [%dst + (offset)]; \ + .section .fixup,ALLOC,EXECINSTR; \ +99: ba label; \ + mov offset, %g5; \ + EX_ENTRY(98b, 99b) /* Both these macros have to start with exactly the same insn */ +/* left: g7 + (g1 % 128) - offset */ #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ - ldd [%src + (offset) + 0x00], %t0; \ - ldd [%src + (offset) + 0x08], %t2; \ - ldd [%src + (offset) + 0x10], %t4; \ - ldd [%src + (offset) + 0x18], %t6; \ - st %t0, [%dst + (offset) + 0x00]; \ - st %t1, [%dst + (offset) + 0x04]; \ - st %t2, [%dst + (offset) + 0x08]; \ - st %t3, [%dst + (offset) + 0x0c]; \ - st %t4, [%dst + (offset) + 0x10]; \ - st %t5, [%dst + (offset) + 0x14]; \ - st %t6, [%dst + (offset) + 0x18]; \ - st %t7, [%dst + (offset) + 0x1c]; - + LD(ldd, src, offset + 0x00, t0, bigchunk_fault) \ + LD(ldd, src, offset + 0x08, t2, bigchunk_fault) \ + LD(ldd, src, offset + 0x10, t4, bigchunk_fault) \ + LD(ldd, src, offset + 0x18, t6, bigchunk_fault) \ + ST(st, dst, offset + 0x00, t0, bigchunk_fault) \ + ST(st, dst, offset + 0x04, t1, bigchunk_fault) \ + ST(st, dst, offset + 0x08, t2, bigchunk_fault) \ + ST(st, dst, offset + 0x0c, t3, bigchunk_fault) \ + ST(st, dst, offset + 0x10, t4, bigchunk_fault) \ + ST(st, dst, offset + 0x14, t5, bigchunk_fault) \ + ST(st, dst, offset + 0x18, t6, bigchunk_fault) \ + ST(st, dst, offset + 0x1c, t7, bigchunk_fault) + +/* left: g7 + (g1 % 128) - offset */ #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ - ldd [%src + (offset) + 0x00], %t0; \ - ldd [%src + (offset) + 0x08], %t2; \ - ldd [%src + (offset) + 0x10], %t4; \ - ldd [%src + (offset) + 0x18], %t6; \ - std %t0, [%dst + (offset) + 0x00]; \ - std %t2, [%dst + (offset) + 0x08]; \ - std %t4, [%dst + (offset) + 0x10]; \ - std %t6, [%dst + (offset) + 0x18]; + LD(ldd, src, offset + 0x00, t0, bigchunk_fault) \ + LD(ldd, src, offset + 0x08, t2, bigchunk_fault) \ + LD(ldd, src, offset + 0x10, t4, bigchunk_fault) \ + LD(ldd, src, offset + 0x18, t6, bigchunk_fault) \ + ST(std, dst, offset + 0x00, t0, bigchunk_fault) \ + ST(std, dst, offset + 0x08, t2, bigchunk_fault) \ + ST(std, dst, offset + 0x10, t4, bigchunk_fault) \ + ST(std, dst, offset + 0x18, t6, bigchunk_fault) + + .section .fixup,#alloc,#execinstr +bigchunk_fault: + sub %g7, %g5, %o0 + and %g1, 127, %g1 + retl + add %o0, %g1, %o0 +/* left: offset + 16 + (g1 % 16) */ #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldd [%src - (offset) - 0x10], %t0; \ - ldd [%src - (offset) - 0x08], %t2; \ - st %t0, [%dst - (offset) - 0x10]; \ - st %t1, [%dst - (offset) - 0x0c]; \ - st %t2, [%dst - (offset) - 0x08]; \ - st %t3, [%dst - (offset) - 0x04]; + LD(ldd, src, -(offset + 0x10), t0, lastchunk_fault) \ + LD(ldd, src, -(offset + 0x08), t2, lastchunk_fault) \ + ST(st, dst, -(offset + 0x10), t0, lastchunk_fault) \ + ST(st, dst, -(offset + 0x0c), t1, lastchunk_fault) \ + ST(st, dst, -(offset + 0x08), t2, lastchunk_fault) \ + ST(st, dst, -(offset + 0x04), t3, lastchunk_fault) -#define MOVE_HALFCHUNK(src, dst, offset, t0, t1, t2, t3) \ - lduh [%src + (offset) + 0x00], %t0; \ - lduh [%src + (offset) + 0x02], %t1; \ - lduh [%src + (offset) + 0x04], %t2; \ - lduh [%src + (offset) + 0x06], %t3; \ - sth %t0, [%dst + (offset) + 0x00]; \ - sth %t1, [%dst + (offset) + 0x02]; \ - sth %t2, [%dst + (offset) + 0x04]; \ - sth %t3, [%dst + (offset) + 0x06]; + .section .fixup,#alloc,#execinstr +lastchunk_fault: + and %g1, 15, %g1 + retl + sub %g1, %g5, %o0 +/* left: o3 + (o2 % 16) - offset */ +#define MOVE_HALFCHUNK(src, dst, offset, t0, t1, t2, t3) \ + LD(lduh, src, offset + 0x00, t0, halfchunk_fault) \ + LD(lduh, src, offset + 0x02, t1, halfchunk_fault) \ + LD(lduh, src, offset + 0x04, t2, halfchunk_fault) \ + LD(lduh, src, offset + 0x06, t3, halfchunk_fault) \ + ST(sth, dst, offset + 0x00, t0, halfchunk_fault) \ + ST(sth, dst, offset + 0x02, t1, halfchunk_fault) \ + ST(sth, dst, offset + 0x04, t2, halfchunk_fault) \ + ST(sth, dst, offset + 0x06, t3, halfchunk_fault) + +/* left: o3 + (o2 % 16) + offset + 2 */ #define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ - ldub [%src - (offset) - 0x02], %t0; \ - ldub [%src - (offset) - 0x01], %t1; \ - stb %t0, [%dst - (offset) - 0x02]; \ - stb %t1, [%dst - (offset) - 0x01]; + LD(ldub, src, -(offset + 0x02), t0, halfchunk_fault) \ + LD(ldub, src, -(offset + 0x01), t1, halfchunk_fault) \ + ST(stb, dst, -(offset + 0x02), t0, halfchunk_fault) \ + ST(stb, dst, -(offset + 0x01), t1, halfchunk_fault) + + .section .fixup,#alloc,#execinstr +halfchunk_fault: + and %o2, 15, %o2 + sub %o3, %g5, %o3 + retl + add %o2, %o3, %o0 + +/* left: offset + 2 + (o2 % 2) */ +#define MOVE_LAST_SHORTCHUNK(src, dst, offset, t0, t1) \ + LD(ldub, src, -(offset + 0x02), t0, last_shortchunk_fault) \ + LD(ldub, src, -(offset + 0x01), t1, last_shortchunk_fault) \ + ST(stb, dst, -(offset + 0x02), t0, last_shortchunk_fault) \ + ST(stb, dst, -(offset + 0x01), t1, last_shortchunk_fault) + + .section .fixup,#alloc,#execinstr +last_shortchunk_fault: + and %o2, 1, %o2 + retl + sub %o2, %g5, %o0 .text .align 4 @@ -119,6 +157,7 @@ __copy_user_begin: .globl __copy_user + EXPORT_SYMBOL(__copy_user) dword_align: andcc %o1, 1, %g0 be 4f @@ -179,8 +218,6 @@ __copy_user: /* %o0=dst %o1=src %o2=len */ MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) -80: - EXT(5b, 80b, 50f) subcc %g7, 128, %g7 add %o1, 128, %o1 bne 5b @@ -198,7 +235,6 @@ __copy_user: /* %o0=dst %o1=src %o2=len */ jmpl %o5 + %lo(copy_user_table_end), %g0 add %o0, %g7, %o0 -copy_user_table: MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5) MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5) MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5) @@ -207,7 +243,6 @@ copy_user_table: MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5) MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5) copy_user_table_end: - EXT(copy_user_table, copy_user_table_end, 51f) be copy_user_last7 andcc %g1, 4, %g0 @@ -247,8 +282,6 @@ ldd_std: MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) -81: - EXT(ldd_std, 81b, 52f) subcc %g7, 128, %g7 add %o1, 128, %o1 bne ldd_std @@ -287,8 +320,6 @@ cannot_optimize: 10: MOVE_HALFCHUNK(o1, o0, 0x00, g2, g3, g4, g5) MOVE_HALFCHUNK(o1, o0, 0x08, g2, g3, g4, g5) -82: - EXT(10b, 82b, 53f) subcc %o3, 0x10, %o3 add %o1, 0x10, %o1 bne 10b @@ -305,8 +336,6 @@ byte_chunk: MOVE_SHORTCHUNK(o1, o0, -0x0c, g2, g3) MOVE_SHORTCHUNK(o1, o0, -0x0e, g2, g3) MOVE_SHORTCHUNK(o1, o0, -0x10, g2, g3) -83: - EXT(byte_chunk, 83b, 54f) subcc %o3, 0x10, %o3 add %o1, 0x10, %o1 bne byte_chunk @@ -322,16 +351,14 @@ short_end: add %o1, %o3, %o1 jmpl %o5 + %lo(short_table_end), %g0 andcc %o2, 1, %g0 -84: - MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3) - MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3) - MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3) - MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3) - MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3) - MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3) - MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3) + MOVE_LAST_SHORTCHUNK(o1, o0, 0x0c, g2, g3) + MOVE_LAST_SHORTCHUNK(o1, o0, 0x0a, g2, g3) + MOVE_LAST_SHORTCHUNK(o1, o0, 0x08, g2, g3) + MOVE_LAST_SHORTCHUNK(o1, o0, 0x06, g2, g3) + MOVE_LAST_SHORTCHUNK(o1, o0, 0x04, g2, g3) + MOVE_LAST_SHORTCHUNK(o1, o0, 0x02, g2, g3) + MOVE_LAST_SHORTCHUNK(o1, o0, 0x00, g2, g3) short_table_end: - EXT(84b, short_table_end, 55f) be 1f nop EX(ldub [%o1], %g2, add %g0, 1) @@ -360,137 +387,8 @@ short_aligned_end: .section .fixup,#alloc,#execinstr .align 4 97: - mov %o2, %g3 -fixupretl: - sethi %hi(PAGE_OFFSET), %g1 - cmp %o0, %g1 - blu 1f - cmp %o1, %g1 - bgeu 1f - ld [%g6 + TI_PREEMPT], %g1 - cmp %g1, 0 - bne 1f - nop - save %sp, -64, %sp - mov %i0, %o0 - call __bzero - mov %g3, %o1 - restore -1: retl - mov %g3, %o0 - -/* exception routine sets %g2 to (broken_insn - first_insn)>>2 */ -50: -/* This magic counts how many bytes are left when crash in MOVE_BIGCHUNK - * happens. This is derived from the amount ldd reads, st stores, etc. - * x = g2 % 12; - * g3 = g1 + g7 - ((g2 / 12) * 32 + (x < 4) ? 0 : (x - 4) * 4); - * o0 += (g2 / 12) * 32; - */ - cmp %g2, 12 - add %o0, %g7, %o0 - bcs 1f - cmp %g2, 24 - bcs 2f - cmp %g2, 36 - bcs 3f - nop - sub %g2, 12, %g2 - sub %g7, 32, %g7 -3: sub %g2, 12, %g2 - sub %g7, 32, %g7 -2: sub %g2, 12, %g2 - sub %g7, 32, %g7 -1: cmp %g2, 4 - bcs,a 60f - clr %g2 - sub %g2, 4, %g2 - sll %g2, 2, %g2 -60: and %g1, 0x7f, %g3 - sub %o0, %g7, %o0 - add %g3, %g7, %g3 - ba fixupretl - sub %g3, %g2, %g3 -51: -/* i = 41 - g2; j = i % 6; - * g3 = (g1 & 15) + (i / 6) * 16 + (j < 4) ? (j + 1) * 4 : 16; - * o0 -= (i / 6) * 16 + 16; - */ - neg %g2 - and %g1, 0xf, %g1 - add %g2, 41, %g2 - add %o0, %g1, %o0 -1: cmp %g2, 6 - bcs,a 2f - cmp %g2, 4 - add %g1, 16, %g1 - b 1b - sub %g2, 6, %g2 -2: bcc,a 2f - mov 16, %g2 - inc %g2 - sll %g2, 2, %g2 -2: add %g1, %g2, %g3 - ba fixupretl - sub %o0, %g3, %o0 -52: -/* g3 = g1 + g7 - (g2 / 8) * 32 + (g2 & 4) ? (g2 & 3) * 8 : 0; - o0 += (g2 / 8) * 32 */ - andn %g2, 7, %g4 - add %o0, %g7, %o0 - andcc %g2, 4, %g0 - and %g2, 3, %g2 - sll %g4, 2, %g4 - sll %g2, 3, %g2 - bne 60b - sub %g7, %g4, %g7 - ba 60b - clr %g2 -53: -/* g3 = o3 + (o2 & 15) - (g2 & 8) - (g2 & 4) ? (g2 & 3) * 2 : 0; - o0 += (g2 & 8) */ - and %g2, 3, %g4 - andcc %g2, 4, %g0 - and %g2, 8, %g2 - sll %g4, 1, %g4 - be 1f - add %o0, %g2, %o0 - add %g2, %g4, %g2 -1: and %o2, 0xf, %g3 - add %g3, %o3, %g3 - ba fixupretl - sub %g3, %g2, %g3 -54: -/* g3 = o3 + (o2 & 15) - (g2 / 4) * 2 - (g2 & 2) ? (g2 & 1) : 0; - o0 += (g2 / 4) * 2 */ - srl %g2, 2, %o4 - and %g2, 1, %o5 - srl %g2, 1, %g2 - add %o4, %o4, %o4 - and %o5, %g2, %o5 - and %o2, 0xf, %o2 - add %o0, %o4, %o0 - sub %o3, %o5, %o3 - sub %o2, %o4, %o2 - ba fixupretl - add %o2, %o3, %g3 -55: -/* i = 27 - g2; - g3 = (o2 & 1) + i / 4 * 2 + !(i & 3); - o0 -= i / 4 * 2 + 1 */ - neg %g2 - and %o2, 1, %o2 - add %g2, 27, %g2 - srl %g2, 2, %o5 - andcc %g2, 3, %g0 - mov 1, %g2 - add %o5, %o5, %o5 - be,a 1f - clr %g2 -1: add %g2, %o5, %g3 - sub %o0, %g3, %o0 - ba fixupretl - add %g3, %o2, %g3 + retl + mov %o2, %o0 .globl __copy_user_end __copy_user_end: diff --git a/arch/sparc/lib/csum_copy.S b/arch/sparc/lib/csum_copy.S index e566c770a0f6..f968e83bc93b 100644 --- a/arch/sparc/lib/csum_copy.S +++ b/arch/sparc/lib/csum_copy.S @@ -1,8 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* csum_copy.S: Checksum+copy code for sparc64 * * Copyright (C) 2005 David S. Miller <davem@davemloft.net> */ +#include <linux/export.h> + #ifdef __KERNEL__ #define GLOBAL_SPARE %g7 #else @@ -63,9 +66,12 @@ add %o5, %o4, %o4 .globl FUNC_NAME -FUNC_NAME: /* %o0=src, %o1=dst, %o2=len, %o3=sum */ + .type FUNC_NAME,#function + EXPORT_SYMBOL(FUNC_NAME) +FUNC_NAME: /* %o0=src, %o1=dst, %o2=len */ LOAD(prefetch, %o0 + 0x000, #n_reads) xor %o0, %o1, %g1 + mov -1, %o3 clr %o4 andcc %g1, 0x3, %g0 bne,pn %icc, 95f diff --git a/arch/sparc/lib/csum_copy_from_user.S b/arch/sparc/lib/csum_copy_from_user.S index e0304e6a2242..b0ba8d4dd439 100644 --- a/arch/sparc/lib/csum_copy_from_user.S +++ b/arch/sparc/lib/csum_copy_from_user.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* csum_copy_from_user.S: Checksum+copy from userspace. * * Copyright (C) 2005 David S. Miller (davem@davemloft.net) @@ -8,14 +9,14 @@ .section .fixup, "ax"; \ .align 4; \ 99: retl; \ - mov -1, %o0; \ + mov 0, %o0; \ .section __ex_table,"a";\ .align 4; \ .word 98b, 99b; \ .text; \ .align 4; -#define FUNC_NAME __csum_partial_copy_from_user +#define FUNC_NAME csum_and_copy_from_user #define LOAD(type,addr,dest) type##a [addr] %asi, dest #include "csum_copy.S" diff --git a/arch/sparc/lib/csum_copy_to_user.S b/arch/sparc/lib/csum_copy_to_user.S index afd01acc587c..91ba36dbf7d2 100644 --- a/arch/sparc/lib/csum_copy_to_user.S +++ b/arch/sparc/lib/csum_copy_to_user.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* csum_copy_to_user.S: Checksum+copy to userspace. * * Copyright (C) 2005 David S. Miller (davem@davemloft.net) @@ -8,14 +9,14 @@ .section .fixup,"ax"; \ .align 4; \ 99: retl; \ - mov -1, %o0; \ + mov 0, %o0; \ .section __ex_table,"a";\ .align 4; \ .word 98b, 99b; \ .text; \ .align 4; -#define FUNC_NAME __csum_partial_copy_to_user +#define FUNC_NAME csum_and_copy_to_user #define STORE(type,src,addr) type##a src, [addr] %asi #include "csum_copy.S" diff --git a/arch/sparc/lib/divdi3.S b/arch/sparc/lib/divdi3.S index 9614b48b6ef8..4ba901acd572 100644 --- a/arch/sparc/lib/divdi3.S +++ b/arch/sparc/lib/divdi3.S @@ -1,22 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* Copyright (C) 1989, 1992, 1993, 1994, 1995 Free Software Foundation, Inc. This file is part of GNU CC. -GNU CC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2, or (at your option) -any later version. - -GNU CC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with GNU CC; see the file COPYING. If not, write to -the Free Software Foundation, 59 Temple Place - Suite 330, -Boston, MA 02111-1307, USA. */ + */ +#include <linux/export.h> .text .align 4 .globl __divdi3 @@ -279,3 +268,4 @@ __divdi3: .LL81: ret restore +EXPORT_SYMBOL(__divdi3) diff --git a/arch/sparc/lib/ffs.S b/arch/sparc/lib/ffs.S index b39389f69899..3a9ad8ffdfe8 100644 --- a/arch/sparc/lib/ffs.S +++ b/arch/sparc/lib/ffs.S @@ -1,3 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/export.h> #include <linux/linkage.h> .register %g2,#scratch @@ -65,6 +67,8 @@ ENTRY(__ffs) add %o2, %g1, %o0 ENDPROC(ffs) ENDPROC(__ffs) +EXPORT_SYMBOL(__ffs) +EXPORT_SYMBOL(ffs) .section .popc_6insn_patch, "ax" .word ffs diff --git a/arch/sparc/lib/fls.S b/arch/sparc/lib/fls.S new file mode 100644 index 000000000000..ccf97fb7d8cd --- /dev/null +++ b/arch/sparc/lib/fls.S @@ -0,0 +1,67 @@ +/* fls.S: SPARC default fls definition. + * + * SPARC default fls definition, which follows the same algorithm as + * in generic fls(). This function will be boot time patched on T4 + * and onward. + */ + +#include <linux/export.h> +#include <linux/linkage.h> + + .text + .register %g2, #scratch + .register %g3, #scratch +ENTRY(fls) + brz,pn %o0, 6f + mov 0, %o1 + sethi %hi(0xffff0000), %g3 + mov %o0, %g2 + andcc %o0, %g3, %g0 + be,pt %icc, 8f + mov 32, %o1 + sethi %hi(0xff000000), %g3 + andcc %g2, %g3, %g0 + bne,pt %icc, 3f + sethi %hi(0xf0000000), %g3 + sll %o0, 8, %o0 +1: + add %o1, -8, %o1 + sra %o0, 0, %o0 + mov %o0, %g2 +2: + sethi %hi(0xf0000000), %g3 +3: + andcc %g2, %g3, %g0 + bne,pt %icc, 4f + sethi %hi(0xc0000000), %g3 + sll %o0, 4, %o0 + add %o1, -4, %o1 + sra %o0, 0, %o0 + mov %o0, %g2 +4: + andcc %g2, %g3, %g0 + be,a,pt %icc, 7f + sll %o0, 2, %o0 +5: + xnor %g0, %o0, %o0 + srl %o0, 31, %o0 + sub %o1, %o0, %o1 +6: + jmp %o7 + 8 + sra %o1, 0, %o0 +7: + add %o1, -2, %o1 + ba,pt %xcc, 5b + sra %o0, 0, %o0 +8: + sll %o0, 16, %o0 + sethi %hi(0xff000000), %g3 + sra %o0, 0, %o0 + mov %o0, %g2 + andcc %g2, %g3, %g0 + bne,pt %icc, 2b + mov 16, %o1 + ba,pt %xcc, 1b + sll %o0, 8, %o0 +ENDPROC(fls) +EXPORT_SYMBOL(fls) diff --git a/arch/sparc/lib/fls64.S b/arch/sparc/lib/fls64.S new file mode 100644 index 000000000000..87005b67d378 --- /dev/null +++ b/arch/sparc/lib/fls64.S @@ -0,0 +1,61 @@ +/* fls64.S: SPARC default __fls definition. + * + * SPARC default __fls definition, which follows the same algorithm as + * in generic __fls(). This function will be boot time patched on T4 + * and onward. + */ + +#include <linux/export.h> +#include <linux/linkage.h> + + .text + .register %g2, #scratch + .register %g3, #scratch +ENTRY(__fls) + mov -1, %g2 + sllx %g2, 32, %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 1f + mov 63, %g1 + sllx %o0, 32, %o0 + mov 31, %g1 +1: + mov -1, %g2 + sllx %g2, 48, %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 2f + mov -1, %g2 + sllx %o0, 16, %o0 + add %g1, -16, %g1 +2: + mov -1, %g2 + sllx %g2, 56, %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 3f + mov -1, %g2 + sllx %o0, 8, %o0 + add %g1, -8, %g1 +3: + sllx %g2, 60, %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 4f + mov -1, %g2 + sllx %o0, 4, %o0 + add %g1, -4, %g1 +4: + sllx %g2, 62, %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 5f + mov -1, %g3 + sllx %o0, 2, %o0 + add %g1, -2, %g1 +5: + mov 0, %g2 + sllx %g3, 63, %g3 + and %o0, %g3, %o0 + movre %o0, 1, %g2 + sub %g1, %g2, %g1 + jmp %o7+8 + sra %g1, 0, %o0 +ENDPROC(__fls) +EXPORT_SYMBOL(__fls) diff --git a/arch/sparc/lib/hweight.S b/arch/sparc/lib/hweight.S index 95414e0a6808..eebee59b0655 100644 --- a/arch/sparc/lib/hweight.S +++ b/arch/sparc/lib/hweight.S @@ -1,12 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/export.h> #include <linux/linkage.h> .text .align 32 ENTRY(__arch_hweight8) - ba,pt %xcc, __sw_hweight8 + sethi %hi(__sw_hweight8), %g1 + jmpl %g1 + %lo(__sw_hweight8), %g0 nop - nop ENDPROC(__arch_hweight8) +EXPORT_SYMBOL(__arch_hweight8) .section .popc_3insn_patch, "ax" .word __arch_hweight8 sllx %o0, 64-8, %g1 @@ -15,10 +18,11 @@ ENDPROC(__arch_hweight8) .previous ENTRY(__arch_hweight16) - ba,pt %xcc, __sw_hweight16 + sethi %hi(__sw_hweight16), %g1 + jmpl %g1 + %lo(__sw_hweight16), %g0 nop - nop ENDPROC(__arch_hweight16) +EXPORT_SYMBOL(__arch_hweight16) .section .popc_3insn_patch, "ax" .word __arch_hweight16 sllx %o0, 64-16, %g1 @@ -27,10 +31,11 @@ ENDPROC(__arch_hweight16) .previous ENTRY(__arch_hweight32) - ba,pt %xcc, __sw_hweight32 + sethi %hi(__sw_hweight32), %g1 + jmpl %g1 + %lo(__sw_hweight32), %g0 nop - nop ENDPROC(__arch_hweight32) +EXPORT_SYMBOL(__arch_hweight32) .section .popc_3insn_patch, "ax" .word __arch_hweight32 sllx %o0, 64-32, %g1 @@ -39,10 +44,11 @@ ENDPROC(__arch_hweight32) .previous ENTRY(__arch_hweight64) - ba,pt %xcc, __sw_hweight64 + sethi %hi(__sw_hweight64), %g1 + jmpl %g1 + %lo(__sw_hweight64), %g0 nop - nop ENDPROC(__arch_hweight64) +EXPORT_SYMBOL(__arch_hweight64) .section .popc_3insn_patch, "ax" .word __arch_hweight64 retl diff --git a/arch/sparc/lib/iomap.c b/arch/sparc/lib/iomap.c index c4d42a50ebc0..f3a8cd491ce0 100644 --- a/arch/sparc/lib/iomap.c +++ b/arch/sparc/lib/iomap.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Implement the sparc iomap interfaces */ @@ -18,8 +19,10 @@ void ioport_unmap(void __iomem *addr) EXPORT_SYMBOL(ioport_map); EXPORT_SYMBOL(ioport_unmap); +#ifdef CONFIG_PCI void pci_iounmap(struct pci_dev *dev, void __iomem * addr) { /* nothing to do */ } EXPORT_SYMBOL(pci_iounmap); +#endif diff --git a/arch/sparc/lib/ipcsum.S b/arch/sparc/lib/ipcsum.S index 4742d59029ee..7fa8fd4b795a 100644 --- a/arch/sparc/lib/ipcsum.S +++ b/arch/sparc/lib/ipcsum.S @@ -1,3 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/export.h> #include <linux/linkage.h> .text @@ -31,3 +33,4 @@ ENTRY(ip_fast_csum) /* %o0 = iph, %o1 = ihl */ retl and %o2, %o1, %o0 ENDPROC(ip_fast_csum) +EXPORT_SYMBOL(ip_fast_csum) diff --git a/arch/sparc/lib/ksyms.c b/arch/sparc/lib/ksyms.c deleted file mode 100644 index 0c4e35e522fa..000000000000 --- a/arch/sparc/lib/ksyms.c +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Export of symbols defined in assembler - */ - -/* Tell string.h we don't want memcpy etc. as cpp defines */ -#define EXPORT_SYMTAB_STROPS - -#include <linux/module.h> -#include <linux/string.h> -#include <linux/types.h> - -#include <asm/checksum.h> -#include <asm/uaccess.h> -#include <asm/ftrace.h> - -/* string functions */ -EXPORT_SYMBOL(strlen); -EXPORT_SYMBOL(strncmp); - -/* mem* functions */ -extern void *__memscan_zero(void *, size_t); -extern void *__memscan_generic(void *, int, size_t); -extern void *__bzero(void *, size_t); - -EXPORT_SYMBOL(memscan); -EXPORT_SYMBOL(__memscan_zero); -EXPORT_SYMBOL(__memscan_generic); -EXPORT_SYMBOL(memcmp); -EXPORT_SYMBOL(memcpy); -EXPORT_SYMBOL(memset); -EXPORT_SYMBOL(memmove); -EXPORT_SYMBOL(__bzero); - -/* Networking helper routines. */ -EXPORT_SYMBOL(csum_partial); - -#ifdef CONFIG_MCOUNT -EXPORT_SYMBOL(_mcount); -#endif - -/* - * sparc - */ -#ifdef CONFIG_SPARC32 -extern int __ashrdi3(int, int); -extern int __ashldi3(int, int); -extern int __lshrdi3(int, int); -extern int __muldi3(int, int); -extern int __divdi3(int, int); - -extern void (*__copy_1page)(void *, const void *); -extern void (*bzero_1page)(void *); - -extern void ___rw_read_enter(void); -extern void ___rw_read_try(void); -extern void ___rw_read_exit(void); -extern void ___rw_write_enter(void); - -/* Networking helper routines. */ -EXPORT_SYMBOL(__csum_partial_copy_sparc_generic); - -/* Special internal versions of library functions. */ -EXPORT_SYMBOL(__copy_1page); -EXPORT_SYMBOL(__memmove); -EXPORT_SYMBOL(bzero_1page); - -/* Moving data to/from/in userspace. */ -EXPORT_SYMBOL(__copy_user); - -/* Used by asm/spinlock.h */ -#ifdef CONFIG_SMP -EXPORT_SYMBOL(___rw_read_enter); -EXPORT_SYMBOL(___rw_read_try); -EXPORT_SYMBOL(___rw_read_exit); -EXPORT_SYMBOL(___rw_write_enter); -#endif - -EXPORT_SYMBOL(__ashrdi3); -EXPORT_SYMBOL(__ashldi3); -EXPORT_SYMBOL(__lshrdi3); -EXPORT_SYMBOL(__muldi3); -EXPORT_SYMBOL(__divdi3); -#endif - -/* - * sparc64 - */ -#ifdef CONFIG_SPARC64 -/* Networking helper routines. */ -EXPORT_SYMBOL(csum_partial_copy_nocheck); -EXPORT_SYMBOL(__csum_partial_copy_from_user); -EXPORT_SYMBOL(__csum_partial_copy_to_user); -EXPORT_SYMBOL(ip_fast_csum); - -/* Moving data to/from/in userspace. */ -EXPORT_SYMBOL(___copy_to_user); -EXPORT_SYMBOL(___copy_from_user); -EXPORT_SYMBOL(___copy_in_user); -EXPORT_SYMBOL(__clear_user); - -/* RW semaphores */ -EXPORT_SYMBOL(__down_read); -EXPORT_SYMBOL(__down_read_trylock); -EXPORT_SYMBOL(__down_write); -EXPORT_SYMBOL(__down_write_trylock); -EXPORT_SYMBOL(__up_read); -EXPORT_SYMBOL(__up_write); -EXPORT_SYMBOL(__downgrade_write); - -/* Atomic counter implementation. */ -EXPORT_SYMBOL(atomic_add); -EXPORT_SYMBOL(atomic_add_ret); -EXPORT_SYMBOL(atomic_sub); -EXPORT_SYMBOL(atomic_sub_ret); -EXPORT_SYMBOL(atomic64_add); -EXPORT_SYMBOL(atomic64_add_ret); -EXPORT_SYMBOL(atomic64_sub); -EXPORT_SYMBOL(atomic64_sub_ret); -EXPORT_SYMBOL(atomic64_dec_if_positive); - -/* Atomic bit operations. */ -EXPORT_SYMBOL(test_and_set_bit); -EXPORT_SYMBOL(test_and_clear_bit); -EXPORT_SYMBOL(test_and_change_bit); -EXPORT_SYMBOL(set_bit); -EXPORT_SYMBOL(clear_bit); -EXPORT_SYMBOL(change_bit); - -/* Special internal versions of library functions. */ -EXPORT_SYMBOL(_clear_page); -EXPORT_SYMBOL(clear_user_page); -EXPORT_SYMBOL(copy_user_page); - -/* RAID code needs this */ -void VISenter(void); -EXPORT_SYMBOL(VISenter); - -/* CRYPTO code needs this */ -void VISenterhalf(void); -EXPORT_SYMBOL(VISenterhalf); - -extern void xor_vis_2(unsigned long, unsigned long *, unsigned long *); -extern void xor_vis_3(unsigned long, unsigned long *, unsigned long *, - unsigned long *); -extern void xor_vis_4(unsigned long, unsigned long *, unsigned long *, - unsigned long *, unsigned long *); -extern void xor_vis_5(unsigned long, unsigned long *, unsigned long *, - unsigned long *, unsigned long *, unsigned long *); -EXPORT_SYMBOL(xor_vis_2); -EXPORT_SYMBOL(xor_vis_3); -EXPORT_SYMBOL(xor_vis_4); -EXPORT_SYMBOL(xor_vis_5); - -extern void xor_niagara_2(unsigned long, unsigned long *, unsigned long *); -extern void xor_niagara_3(unsigned long, unsigned long *, unsigned long *, - unsigned long *); -extern void xor_niagara_4(unsigned long, unsigned long *, unsigned long *, - unsigned long *, unsigned long *); -extern void xor_niagara_5(unsigned long, unsigned long *, unsigned long *, - unsigned long *, unsigned long *, unsigned long *); - -EXPORT_SYMBOL(xor_niagara_2); -EXPORT_SYMBOL(xor_niagara_3); -EXPORT_SYMBOL(xor_niagara_4); -EXPORT_SYMBOL(xor_niagara_5); -#endif diff --git a/arch/sparc/lib/libgcc.h b/arch/sparc/lib/libgcc.h index b84fd797f3ea..79845c941b87 100644 --- a/arch/sparc/lib/libgcc.h +++ b/arch/sparc/lib/libgcc.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __ASM_LIBGCC_H #define __ASM_LIBGCC_H diff --git a/arch/sparc/lib/locks.S b/arch/sparc/lib/locks.S index 64f53f2b673d..47a39f4384a2 100644 --- a/arch/sparc/lib/locks.S +++ b/arch/sparc/lib/locks.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * locks.S: SMP low-level lock primitives on Sparc. * @@ -6,6 +7,7 @@ * Copyright (C) 1998 Jakub Jelinek (jj@ultra.linux.cz) */ +#include <linux/export.h> #include <asm/ptrace.h> #include <asm/psr.h> #include <asm/smp.h> @@ -48,6 +50,7 @@ ___rw_write_enter_spin_on_wlock: ld [%g1], %g2 .globl ___rw_read_enter +EXPORT_SYMBOL(___rw_read_enter) ___rw_read_enter: orcc %g2, 0x0, %g0 bne,a ___rw_read_enter_spin_on_wlock @@ -59,6 +62,7 @@ ___rw_read_enter: mov %g4, %o7 .globl ___rw_read_exit +EXPORT_SYMBOL(___rw_read_exit) ___rw_read_exit: orcc %g2, 0x0, %g0 bne,a ___rw_read_exit_spin_on_wlock @@ -70,6 +74,7 @@ ___rw_read_exit: mov %g4, %o7 .globl ___rw_read_try +EXPORT_SYMBOL(___rw_read_try) ___rw_read_try: orcc %g2, 0x0, %g0 bne ___rw_read_try_spin_on_wlock @@ -81,6 +86,7 @@ ___rw_read_try: mov %g4, %o7 .globl ___rw_write_enter +EXPORT_SYMBOL(___rw_write_enter) ___rw_write_enter: orcc %g2, 0x0, %g0 bne ___rw_write_enter_spin_on_wlock diff --git a/arch/sparc/lib/lshrdi3.S b/arch/sparc/lib/lshrdi3.S index 60ebc7cdbee0..09bf581a0ba5 100644 --- a/arch/sparc/lib/lshrdi3.S +++ b/arch/sparc/lib/lshrdi3.S @@ -1,3 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/export.h> #include <linux/linkage.h> ENTRY(__lshrdi3) @@ -25,3 +27,4 @@ ENTRY(__lshrdi3) retl nop ENDPROC(__lshrdi3) +EXPORT_SYMBOL(__lshrdi3) diff --git a/arch/sparc/lib/mcount.S b/arch/sparc/lib/mcount.S index 3ad6cbdc2163..f7f7910eb41e 100644 --- a/arch/sparc/lib/mcount.S +++ b/arch/sparc/lib/mcount.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2000 Anton Blanchard (anton@linuxcare.com) * @@ -5,6 +6,7 @@ * This can also be tweaked for kernel stack overflow detection. */ +#include <linux/export.h> #include <linux/linkage.h> /* @@ -16,6 +18,7 @@ .align 32 .globl _mcount .type _mcount,#function + EXPORT_SYMBOL(_mcount) .globl mcount .type mcount,#function _mcount: @@ -24,10 +27,7 @@ mcount: #ifdef CONFIG_DYNAMIC_FTRACE /* Do nothing, the retl/nop below is all we need. */ #else - sethi %hi(function_trace_stop), %g1 - lduw [%g1 + %lo(function_trace_stop)], %g2 - brnz,pn %g2, 2f - sethi %hi(ftrace_trace_function), %g1 + sethi %hi(ftrace_trace_function), %g1 sethi %hi(ftrace_stub), %g2 ldx [%g1 + %lo(ftrace_trace_function)], %g1 or %g2, %lo(ftrace_stub), %g2 @@ -80,11 +80,8 @@ ftrace_stub: .globl ftrace_caller .type ftrace_caller,#function ftrace_caller: - sethi %hi(function_trace_stop), %g1 mov %i7, %g2 - lduw [%g1 + %lo(function_trace_stop)], %g1 - brnz,pn %g1, ftrace_stub - mov %fp, %g3 + mov %fp, %g3 save %sp, -176, %sp mov %g2, %o1 mov %g2, %l0 diff --git a/arch/sparc/lib/memcmp.S b/arch/sparc/lib/memcmp.S index efa106c41ed0..c87e8000feba 100644 --- a/arch/sparc/lib/memcmp.S +++ b/arch/sparc/lib/memcmp.S @@ -1,9 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Sparc optimized memcmp code. * * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) * Copyright (C) 2000, 2008 David S. Miller (davem@davemloft.net) */ +#include <linux/export.h> #include <linux/linkage.h> #include <asm/asm.h> @@ -25,3 +27,4 @@ ENTRY(memcmp) 2: retl mov 0, %o0 ENDPROC(memcmp) +EXPORT_SYMBOL(memcmp) diff --git a/arch/sparc/lib/memcpy.S b/arch/sparc/lib/memcpy.S index 4d8c497517bd..57b1ae0f5924 100644 --- a/arch/sparc/lib/memcpy.S +++ b/arch/sparc/lib/memcpy.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* memcpy.S: Sparc optimized memcpy and memmove code * Hand optimized from GNU libc's memcpy and memmove * Copyright (C) 1991,1996 Free Software Foundation @@ -7,6 +8,8 @@ * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz) */ +#include <linux/export.h> + #define FUNC(x) \ .globl x; \ .type x,@function; \ @@ -58,93 +61,11 @@ x: stb %t0, [%dst - (offset) - 0x02]; \ stb %t1, [%dst - (offset) - 0x01]; -/* Both these macros have to start with exactly the same insn */ -#define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ - ldd [%src - (offset) - 0x20], %t0; \ - ldd [%src - (offset) - 0x18], %t2; \ - ldd [%src - (offset) - 0x10], %t4; \ - ldd [%src - (offset) - 0x08], %t6; \ - st %t0, [%dst - (offset) - 0x20]; \ - st %t1, [%dst - (offset) - 0x1c]; \ - st %t2, [%dst - (offset) - 0x18]; \ - st %t3, [%dst - (offset) - 0x14]; \ - st %t4, [%dst - (offset) - 0x10]; \ - st %t5, [%dst - (offset) - 0x0c]; \ - st %t6, [%dst - (offset) - 0x08]; \ - st %t7, [%dst - (offset) - 0x04]; - -#define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ - ldd [%src - (offset) - 0x20], %t0; \ - ldd [%src - (offset) - 0x18], %t2; \ - ldd [%src - (offset) - 0x10], %t4; \ - ldd [%src - (offset) - 0x08], %t6; \ - std %t0, [%dst - (offset) - 0x20]; \ - std %t2, [%dst - (offset) - 0x18]; \ - std %t4, [%dst - (offset) - 0x10]; \ - std %t6, [%dst - (offset) - 0x08]; - -#define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldd [%src + (offset) + 0x00], %t0; \ - ldd [%src + (offset) + 0x08], %t2; \ - st %t0, [%dst + (offset) + 0x00]; \ - st %t1, [%dst + (offset) + 0x04]; \ - st %t2, [%dst + (offset) + 0x08]; \ - st %t3, [%dst + (offset) + 0x0c]; - -#define RMOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ - ldub [%src + (offset) + 0x00], %t0; \ - ldub [%src + (offset) + 0x01], %t1; \ - stb %t0, [%dst + (offset) + 0x00]; \ - stb %t1, [%dst + (offset) + 0x01]; - -#define SMOVE_CHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, prev, shil, shir, offset2) \ - ldd [%src + (offset) + 0x00], %t0; \ - ldd [%src + (offset) + 0x08], %t2; \ - srl %t0, shir, %t5; \ - srl %t1, shir, %t6; \ - sll %t0, shil, %t0; \ - or %t5, %prev, %t5; \ - sll %t1, shil, %prev; \ - or %t6, %t0, %t0; \ - srl %t2, shir, %t1; \ - srl %t3, shir, %t6; \ - sll %t2, shil, %t2; \ - or %t1, %prev, %t1; \ - std %t4, [%dst + (offset) + (offset2) - 0x04]; \ - std %t0, [%dst + (offset) + (offset2) + 0x04]; \ - sll %t3, shil, %prev; \ - or %t6, %t2, %t4; - -#define SMOVE_ALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, prev, shil, shir, offset2) \ - ldd [%src + (offset) + 0x00], %t0; \ - ldd [%src + (offset) + 0x08], %t2; \ - srl %t0, shir, %t4; \ - srl %t1, shir, %t5; \ - sll %t0, shil, %t6; \ - or %t4, %prev, %t0; \ - sll %t1, shil, %prev; \ - or %t5, %t6, %t1; \ - srl %t2, shir, %t4; \ - srl %t3, shir, %t5; \ - sll %t2, shil, %t6; \ - or %t4, %prev, %t2; \ - sll %t3, shil, %prev; \ - or %t5, %t6, %t3; \ - std %t0, [%dst + (offset) + (offset2) + 0x00]; \ - std %t2, [%dst + (offset) + (offset2) + 0x08]; - .text .align 4 -0: - retl - nop ! Only bcopy returns here and it retuns void... - -#ifdef __KERNEL__ -FUNC(amemmove) -FUNC(__memmove) -#endif FUNC(memmove) +EXPORT_SYMBOL(memmove) cmp %o0, %o1 mov %o0, %g7 bleu 9f @@ -202,6 +123,7 @@ FUNC(memmove) add %o0, 2, %o0 FUNC(memcpy) /* %o0=dst %o1=src %o2=len */ +EXPORT_SYMBOL(memcpy) sub %o0, %o1, %o4 mov %o0, %g7 diff --git a/arch/sparc/lib/memmove.S b/arch/sparc/lib/memmove.S index b7f6334e159f..543dda7b9dac 100644 --- a/arch/sparc/lib/memmove.S +++ b/arch/sparc/lib/memmove.S @@ -1,16 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* memmove.S: Simple memmove implementation. * * Copyright (C) 1997, 2004 David S. Miller (davem@redhat.com) * Copyright (C) 1996, 1997, 1998, 1999 Jakub Jelinek (jj@ultra.linux.cz) */ +#include <linux/export.h> #include <linux/linkage.h> .text ENTRY(memmove) /* o0=dst o1=src o2=len */ - mov %o0, %g1 + brz,pn %o2, 99f + mov %o0, %g1 + cmp %o0, %o1 - bleu,pt %xcc, memcpy + bleu,pt %xcc, 2f add %o1, %o2, %g7 cmp %g7, %o0 bleu,pt %xcc, memcpy @@ -24,7 +28,35 @@ ENTRY(memmove) /* o0=dst o1=src o2=len */ stb %g7, [%o0] bne,pt %icc, 1b sub %o0, 1, %o0 - +99: retl mov %g1, %o0 + + /* We can't just call memcpy for these memmove cases. On some + * chips the memcpy uses cache initializing stores and when dst + * and src are close enough, those can clobber the source data + * before we've loaded it in. + */ +2: or %o0, %o1, %g7 + or %o2, %g7, %g7 + andcc %g7, 0x7, %g0 + bne,pn %xcc, 4f + nop + +3: ldx [%o1], %g7 + add %o1, 8, %o1 + subcc %o2, 8, %o2 + add %o0, 8, %o0 + bne,pt %icc, 3b + stx %g7, [%o0 - 0x8] + ba,a,pt %xcc, 99b + +4: ldub [%o1], %g7 + add %o1, 1, %o1 + subcc %o2, 1, %o2 + add %o0, 1, %o0 + bne,pt %icc, 4b + stb %g7, [%o0 - 0x1] + ba,a,pt %xcc, 99b ENDPROC(memmove) +EXPORT_SYMBOL(memmove) diff --git a/arch/sparc/lib/memscan_32.S b/arch/sparc/lib/memscan_32.S index 4ff1657dfc24..5386a3a20019 100644 --- a/arch/sparc/lib/memscan_32.S +++ b/arch/sparc/lib/memscan_32.S @@ -1,9 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * memscan.S: Optimized memscan for the Sparc. * * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) */ +#include <linux/export.h> + /* In essence, this is just a fancy strlen. */ #define LO_MAGIC 0x01010101 @@ -13,6 +16,8 @@ .align 4 .globl __memscan_zero, __memscan_generic .globl memscan +EXPORT_SYMBOL(__memscan_zero) +EXPORT_SYMBOL(__memscan_generic) __memscan_zero: /* %o0 = addr, %o1 = size */ cmp %o1, 0 diff --git a/arch/sparc/lib/memscan_64.S b/arch/sparc/lib/memscan_64.S index 5686dfa5dc15..70a4f21057f2 100644 --- a/arch/sparc/lib/memscan_64.S +++ b/arch/sparc/lib/memscan_64.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * memscan.S: Optimized memscan for Sparc64. * @@ -5,6 +6,8 @@ * Copyright (C) 1998 David S. Miller (davem@redhat.com) */ +#include <linux/export.h> + #define HI_MAGIC 0x8080808080808080 #define LO_MAGIC 0x0101010101010101 #define ASI_PL 0x88 @@ -12,7 +15,11 @@ .text .align 32 .globl __memscan_zero, __memscan_generic + .type __memscan_zero,#function + .type __memscan_generic,#function .globl memscan + EXPORT_SYMBOL(__memscan_zero) + EXPORT_SYMBOL(__memscan_generic) __memscan_zero: /* %o0 = bufp, %o1 = size */ diff --git a/arch/sparc/lib/memset.S b/arch/sparc/lib/memset.S index 99c017be8719..a33419dbb464 100644 --- a/arch/sparc/lib/memset.S +++ b/arch/sparc/lib/memset.S @@ -1,12 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* linux/arch/sparc/lib/memset.S: Sparc optimized memset, bzero and clear_user code * Copyright (C) 1991,1996 Free Software Foundation * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) * - * Returns 0, if ok, and number of bytes not yet set if exception - * occurs and we were called as clear_user. + * Calls to memset returns initial %o0. Calls to bzero returns 0, if ok, and + * number of bytes not yet set if exception occurs and we were called as + * clear_user. */ +#include <linux/export.h> #include <asm/ptrace.h> /* Work around cpp -rob */ @@ -16,7 +19,7 @@ 98: x,y; \ .section .fixup,ALLOC,EXECINSTR; \ .align 4; \ -99: ba 30f; \ +99: retl; \ a, b, %o0; \ .section __ex_table,ALLOC; \ .align 4; \ @@ -24,35 +27,44 @@ .text; \ .align 4 -#define EXT(start,end,handler) \ +#define STORE(source, base, offset, n) \ +98: std source, [base + offset + n]; \ + .section .fixup,ALLOC,EXECINSTR; \ + .align 4; \ +99: ba 30f; \ + sub %o3, n - offset, %o3; \ .section __ex_table,ALLOC; \ .align 4; \ - .word start, 0, end, handler; \ + .word 98b, 99b; \ .text; \ - .align 4 + .align 4; + +#define STORE_LAST(source, base, offset, n) \ + EX(std source, [base - offset - n], \ + add %o1, offset + n); /* Please don't change these macros, unless you change the logic * in the .fixup section below as well. * Store 64 bytes at (BASE + OFFSET) using value SOURCE. */ -#define ZERO_BIG_BLOCK(base, offset, source) \ - std source, [base + offset + 0x00]; \ - std source, [base + offset + 0x08]; \ - std source, [base + offset + 0x10]; \ - std source, [base + offset + 0x18]; \ - std source, [base + offset + 0x20]; \ - std source, [base + offset + 0x28]; \ - std source, [base + offset + 0x30]; \ - std source, [base + offset + 0x38]; +#define ZERO_BIG_BLOCK(base, offset, source) \ + STORE(source, base, offset, 0x00); \ + STORE(source, base, offset, 0x08); \ + STORE(source, base, offset, 0x10); \ + STORE(source, base, offset, 0x18); \ + STORE(source, base, offset, 0x20); \ + STORE(source, base, offset, 0x28); \ + STORE(source, base, offset, 0x30); \ + STORE(source, base, offset, 0x38); #define ZERO_LAST_BLOCKS(base, offset, source) \ - std source, [base - offset - 0x38]; \ - std source, [base - offset - 0x30]; \ - std source, [base - offset - 0x28]; \ - std source, [base - offset - 0x20]; \ - std source, [base - offset - 0x18]; \ - std source, [base - offset - 0x10]; \ - std source, [base - offset - 0x08]; \ - std source, [base - offset - 0x00]; + STORE_LAST(source, base, offset, 0x38); \ + STORE_LAST(source, base, offset, 0x30); \ + STORE_LAST(source, base, offset, 0x28); \ + STORE_LAST(source, base, offset, 0x20); \ + STORE_LAST(source, base, offset, 0x18); \ + STORE_LAST(source, base, offset, 0x10); \ + STORE_LAST(source, base, offset, 0x08); \ + STORE_LAST(source, base, offset, 0x00); .text .align 4 @@ -61,10 +73,13 @@ __bzero_begin: .globl __bzero + .type __bzero,#function .globl memset - .globl __memset_start, __memset_end -__memset_start: + EXPORT_SYMBOL(__bzero) + EXPORT_SYMBOL(memset) memset: + mov %o0, %g1 + mov 1, %g4 and %o1, 0xff, %g3 sll %g3, 8, %g2 or %g3, %g2, %g3 @@ -89,6 +104,7 @@ memset: sub %o0, %o2, %o0 __bzero: + clr %g4 mov %g0, %g3 1: cmp %o1, 7 @@ -113,8 +129,6 @@ __bzero: ZERO_BIG_BLOCK(%o0, 0x00, %g2) subcc %o3, 128, %o3 ZERO_BIG_BLOCK(%o0, 0x40, %g2) -11: - EXT(10b, 11b, 20f) bne 10b add %o0, 128, %o0 @@ -129,7 +143,6 @@ __bzero: jmp %o4 add %o0, %o2, %o0 -12: ZERO_LAST_BLOCKS(%o0, 0x48, %g2) ZERO_LAST_BLOCKS(%o0, 0x08, %g2) 13: @@ -151,8 +164,8 @@ __bzero: bne,a 8f EX(stb %g3, [%o0], and %o1, 1) 8: - retl - clr %o0 + b 0f + nop 7: be 13b orcc %o1, 0, %g0 @@ -164,39 +177,21 @@ __bzero: bne 8b EX(stb %g3, [%o0 - 1], add %o1, 1) 0: + andcc %g4, 1, %g0 + be 5f + nop + retl + mov %g1, %o0 +5: retl clr %o0 -__memset_end: .section .fixup,#alloc,#execinstr .align 4 -20: - cmp %g2, 8 - bleu 1f - and %o1, 0x7f, %o1 - sub %g2, 9, %g2 - add %o3, 64, %o3 -1: - sll %g2, 3, %g2 - add %o3, %o1, %o0 - b 30f - sub %o0, %g2, %o0 -21: - mov 8, %o0 - and %o1, 7, %o1 - sub %o0, %g2, %o0 - sll %o0, 3, %o0 - b 30f - add %o0, %o1, %o0 30: -/* %o4 is faulting address, %o5 is %pc where fault occurred */ - save %sp, -104, %sp - mov %i5, %o0 - mov %i7, %o1 - call lookup_fault - mov %i4, %o2 - ret - restore + and %o1, 0x7f, %o1 + retl + add %o3, %o1, %o0 .globl __bzero_end __bzero_end: diff --git a/arch/sparc/lib/muldi3.S b/arch/sparc/lib/muldi3.S index 9794939d1c12..7e1e8cd30a22 100644 --- a/arch/sparc/lib/muldi3.S +++ b/arch/sparc/lib/muldi3.S @@ -1,22 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* Copyright (C) 1989, 1992, 1993, 1994, 1995 Free Software Foundation, Inc. This file is part of GNU CC. -GNU CC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2, or (at your option) -any later version. - -GNU CC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with GNU CC; see the file COPYING. If not, write to -the Free Software Foundation, 59 Temple Place - Suite 330, -Boston, MA 02111-1307, USA. */ + */ +#include <linux/export.h> .text .align 4 .globl __muldi3 @@ -74,3 +63,4 @@ __muldi3: add %l2, %l0, %i0 ret restore %g0, %l3, %o1 +EXPORT_SYMBOL(__muldi3) diff --git a/arch/sparc/lib/multi3.S b/arch/sparc/lib/multi3.S new file mode 100644 index 000000000000..5bb4c122a2cf --- /dev/null +++ b/arch/sparc/lib/multi3.S @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/export.h> +#include <linux/linkage.h> + + .text + .align 4 +ENTRY(__multi3) /* %o0 = u, %o1 = v */ + mov %o1, %g1 + srl %o3, 0, %o4 + mulx %o4, %g1, %o1 + srlx %g1, 0x20, %g3 + mulx %g3, %o4, %g7 + sllx %g7, 0x20, %o5 + srl %g1, 0, %o4 + sub %o1, %o5, %o5 + srlx %o5, 0x20, %o5 + addcc %g7, %o5, %g7 + srlx %o3, 0x20, %o5 + mulx %o4, %o5, %o4 + mulx %g3, %o5, %o5 + sethi %hi(0x80000000), %g3 + addcc %g7, %o4, %g7 + srlx %g7, 0x20, %g7 + add %g3, %g3, %g3 + movcc %xcc, %g0, %g3 + addcc %o5, %g7, %o5 + sllx %o4, 0x20, %o4 + add %o1, %o4, %o1 + add %o5, %g3, %g2 + mulx %g1, %o2, %g1 + add %g1, %g2, %g1 + mulx %o0, %o3, %o0 + retl + add %g1, %o0, %o0 +ENDPROC(__multi3) +EXPORT_SYMBOL(__multi3) diff --git a/arch/sparc/lib/strlen.S b/arch/sparc/lib/strlen.S index 536f83507fbf..27478b3f1647 100644 --- a/arch/sparc/lib/strlen.S +++ b/arch/sparc/lib/strlen.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* strlen.S: Sparc optimized strlen code * Hand optimized from GNU libc's strlen * Copyright (C) 1991,1996 Free Software Foundation @@ -5,6 +6,7 @@ * Copyright (C) 1996, 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) */ +#include <linux/export.h> #include <linux/linkage.h> #include <asm/asm.h> @@ -78,3 +80,4 @@ ENTRY(strlen) retl mov 2, %o0 ENDPROC(strlen) +EXPORT_SYMBOL(strlen) diff --git a/arch/sparc/lib/strncmp_32.S b/arch/sparc/lib/strncmp_32.S index c0d1b568c1c5..387bbf621548 100644 --- a/arch/sparc/lib/strncmp_32.S +++ b/arch/sparc/lib/strncmp_32.S @@ -1,8 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * strncmp.S: Hand optimized Sparc assembly of GCC output from GNU libc * generic strncmp routine. */ +#include <linux/export.h> #include <linux/linkage.h> .text @@ -116,3 +118,4 @@ ENTRY(strncmp) retl sub %o3, %o0, %o0 ENDPROC(strncmp) +EXPORT_SYMBOL(strncmp) diff --git a/arch/sparc/lib/strncmp_64.S b/arch/sparc/lib/strncmp_64.S index 0656627166f3..76c1207ecf5a 100644 --- a/arch/sparc/lib/strncmp_64.S +++ b/arch/sparc/lib/strncmp_64.S @@ -1,9 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Sparc64 optimized strncmp code. * * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) */ +#include <linux/export.h> #include <linux/linkage.h> #include <asm/asi.h> @@ -28,3 +30,4 @@ ENTRY(strncmp) retl clr %o0 ENDPROC(strncmp) +EXPORT_SYMBOL(strncmp) diff --git a/arch/sparc/lib/ucmpdi2.c b/arch/sparc/lib/ucmpdi2.c deleted file mode 100644 index 1e06ed500682..000000000000 --- a/arch/sparc/lib/ucmpdi2.c +++ /dev/null @@ -1,19 +0,0 @@ -#include <linux/module.h> -#include "libgcc.h" - -word_type __ucmpdi2(unsigned long long a, unsigned long long b) -{ - const DWunion au = {.ll = a}; - const DWunion bu = {.ll = b}; - - if ((unsigned int) au.s.high < (unsigned int) bu.s.high) - return 0; - else if ((unsigned int) au.s.high > (unsigned int) bu.s.high) - return 2; - if ((unsigned int) au.s.low < (unsigned int) bu.s.low) - return 0; - else if ((unsigned int) au.s.low > (unsigned int) bu.s.low) - return 2; - return 1; -} -EXPORT_SYMBOL(__ucmpdi2); diff --git a/arch/sparc/lib/udivdi3.S b/arch/sparc/lib/udivdi3.S index 24e0a355e2e8..7a1117ec7696 100644 --- a/arch/sparc/lib/udivdi3.S +++ b/arch/sparc/lib/udivdi3.S @@ -1,21 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* Copyright (C) 1989, 1992, 1993, 1994, 1995 Free Software Foundation, Inc. This file is part of GNU CC. -GNU CC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2, or (at your option) -any later version. - -GNU CC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with GNU CC; see the file COPYING. If not, write to -the Free Software Foundation, 59 Temple Place - Suite 330, -Boston, MA 02111-1307, USA. */ + */ .text .align 4 diff --git a/arch/sparc/lib/user_fixup.c b/arch/sparc/lib/user_fixup.c deleted file mode 100644 index ac96ae236709..000000000000 --- a/arch/sparc/lib/user_fixup.c +++ /dev/null @@ -1,71 +0,0 @@ -/* user_fixup.c: Fix up user copy faults. - * - * Copyright (C) 2004 David S. Miller <davem@redhat.com> - */ - -#include <linux/compiler.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/module.h> - -#include <asm/uaccess.h> - -/* Calculating the exact fault address when using - * block loads and stores can be very complicated. - * - * Instead of trying to be clever and handling all - * of the cases, just fix things up simply here. - */ - -static unsigned long compute_size(unsigned long start, unsigned long size, unsigned long *offset) -{ - unsigned long fault_addr = current_thread_info()->fault_address; - unsigned long end = start + size; - - if (fault_addr < start || fault_addr >= end) { - *offset = 0; - } else { - *offset = fault_addr - start; - size = end - fault_addr; - } - return size; -} - -unsigned long copy_from_user_fixup(void *to, const void __user *from, unsigned long size) -{ - unsigned long offset; - - size = compute_size((unsigned long) from, size, &offset); - if (likely(size)) - memset(to + offset, 0, size); - - return size; -} -EXPORT_SYMBOL(copy_from_user_fixup); - -unsigned long copy_to_user_fixup(void __user *to, const void *from, unsigned long size) -{ - unsigned long offset; - - return compute_size((unsigned long) to, size, &offset); -} -EXPORT_SYMBOL(copy_to_user_fixup); - -unsigned long copy_in_user_fixup(void __user *to, void __user *from, unsigned long size) -{ - unsigned long fault_addr = current_thread_info()->fault_address; - unsigned long start = (unsigned long) to; - unsigned long end = start + size; - - if (fault_addr >= start && fault_addr < end) - return end - fault_addr; - - start = (unsigned long) from; - end = start + size; - if (fault_addr >= start && fault_addr < end) - return end - fault_addr; - - return size; -} -EXPORT_SYMBOL(copy_in_user_fixup); diff --git a/arch/sparc/lib/xor.S b/arch/sparc/lib/xor.S index 2c05641c3263..35461e3b2a9b 100644 --- a/arch/sparc/lib/xor.S +++ b/arch/sparc/lib/xor.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * arch/sparc64/lib/xor.S * @@ -8,6 +9,7 @@ * Copyright (C) 2006 David S. Miller <davem@davemloft.net> */ +#include <linux/export.h> #include <linux/linkage.h> #include <asm/visasm.h> #include <asm/asi.h> @@ -90,6 +92,7 @@ ENTRY(xor_vis_2) retl wr %g0, 0, %fprs ENDPROC(xor_vis_2) +EXPORT_SYMBOL(xor_vis_2) ENTRY(xor_vis_3) rd %fprs, %o5 @@ -156,6 +159,7 @@ ENTRY(xor_vis_3) retl wr %g0, 0, %fprs ENDPROC(xor_vis_3) +EXPORT_SYMBOL(xor_vis_3) ENTRY(xor_vis_4) rd %fprs, %o5 @@ -241,6 +245,7 @@ ENTRY(xor_vis_4) retl wr %g0, 0, %fprs ENDPROC(xor_vis_4) +EXPORT_SYMBOL(xor_vis_4) ENTRY(xor_vis_5) save %sp, -192, %sp @@ -347,6 +352,7 @@ ENTRY(xor_vis_5) ret restore ENDPROC(xor_vis_5) +EXPORT_SYMBOL(xor_vis_5) /* Niagara versions. */ ENTRY(xor_niagara_2) /* %o0=bytes, %o1=dest, %o2=src */ @@ -393,6 +399,7 @@ ENTRY(xor_niagara_2) /* %o0=bytes, %o1=dest, %o2=src */ ret restore ENDPROC(xor_niagara_2) +EXPORT_SYMBOL(xor_niagara_2) ENTRY(xor_niagara_3) /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2 */ save %sp, -192, %sp @@ -454,6 +461,7 @@ ENTRY(xor_niagara_3) /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2 */ ret restore ENDPROC(xor_niagara_3) +EXPORT_SYMBOL(xor_niagara_3) ENTRY(xor_niagara_4) /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3 */ save %sp, -192, %sp @@ -536,6 +544,7 @@ ENTRY(xor_niagara_4) /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3 */ ret restore ENDPROC(xor_niagara_4) +EXPORT_SYMBOL(xor_niagara_4) ENTRY(xor_niagara_5) /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3, %o5=src4 */ save %sp, -192, %sp @@ -634,3 +643,4 @@ ENTRY(xor_niagara_5) /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3, %o5=s ret restore ENDPROC(xor_niagara_5) +EXPORT_SYMBOL(xor_niagara_5) |
