]> bbs.cooldavid.org Git - net-next-2.6.git/blame - arch/cris/arch-v10/lib/string.c
CRIS: Import string.c (memcpy) from newlib: fixes compile error with gcc 4
[net-next-2.6.git] / arch / cris / arch-v10 / lib / string.c
CommitLineData
9fe3fd03
JN
1/* A memcpy for CRIS.
2 Copyright (C) 1994-2005 Axis Communications.
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
7 are met:
8
9 1. Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
11
12 2. Neither the name of Axis Communications nor the names of its
13 contributors may be used to endorse or promote products derived
14 from this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY AXIS COMMUNICATIONS AND ITS CONTRIBUTORS
17 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL AXIS
20 COMMUNICATIONS OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
21 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
25 STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
26 IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 POSSIBILITY OF SUCH DAMAGE. */
28
29/* FIXME: This file should really only be used for reference, as the
30 result is somewhat depending on gcc generating what we expect rather
31 than what we describe. An assembly file should be used instead. */
32
33#include <stddef.h>
34
35/* Break even between movem and move16 is really at 38.7 * 2, but
36 modulo 44, so up to the next multiple of 44, we use ordinary code. */
37#define MEMCPY_BY_BLOCK_THRESHOLD (44 * 2)
38
39/* No name ambiguities in this file. */
40__asm__ (".syntax no_register_prefix");
41
42void *
43memcpy(void *pdst, const void *psrc, size_t pn)
1da177e4 44{
9fe3fd03 45 /* Now we want the parameters put in special registers.
1da177e4 46 Make sure the compiler is able to make something useful of this.
9fe3fd03 47 As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
1da177e4 48
9fe3fd03
JN
49 If gcc was allright, it really would need no temporaries, and no
50 stack space to save stuff on. */
1da177e4
LT
51
52 register void *return_dst __asm__ ("r10") = pdst;
9fe3fd03
JN
53 register unsigned char *dst __asm__ ("r13") = pdst;
54 register unsigned const char *src __asm__ ("r11") = psrc;
1da177e4 55 register int n __asm__ ("r12") = pn;
9fe3fd03 56
1da177e4
LT
57 /* When src is aligned but not dst, this makes a few extra needless
58 cycles. I believe it would take as many to check that the
59 re-alignment was unnecessary. */
60 if (((unsigned long) dst & 3) != 0
61 /* Don't align if we wouldn't copy more than a few bytes; so we
62 don't have to check further for overflows. */
63 && n >= 3)
64 {
65 if ((unsigned long) dst & 1)
9fe3fd03
JN
66 {
67 n--;
68 *dst = *src;
69 src++;
70 dst++;
71 }
1da177e4
LT
72
73 if ((unsigned long) dst & 2)
9fe3fd03
JN
74 {
75 n -= 2;
76 *(short *) dst = *(short *) src;
77 src += 2;
78 dst += 2;
79 }
1da177e4
LT
80 }
81
9fe3fd03
JN
82 /* Decide which copying method to use. */
83 if (n >= MEMCPY_BY_BLOCK_THRESHOLD)
84 {
85 /* It is not optimal to tell the compiler about clobbering any
86 registers; that will move the saving/restoring of those registers
87 to the function prologue/epilogue, and make non-movem sizes
88 suboptimal. */
89 __asm__ volatile
90 ("\
91 ;; GCC does promise correct register allocations, but let's \n\
92 ;; make sure it keeps its promises. \n\
93 .ifnc %0-%1-%2,$r13-$r11-$r12 \n\
94 .error \"GCC reg alloc bug: %0-%1-%4 != $r13-$r12-$r11\" \n\
95 .endif \n\
96 \n\
97 ;; Save the registers we'll use in the movem process \n\
98 ;; on the stack. \n\
99 subq 11*4,sp \n\
100 movem r10,[sp] \n\
341ac6e4 101 \n\
9fe3fd03
JN
102 ;; Now we've got this: \n\
103 ;; r11 - src \n\
104 ;; r13 - dst \n\
105 ;; r12 - n \n\
341ac6e4 106 \n\
9fe3fd03
JN
107 ;; Update n for the first loop. \n\
108 subq 44,r12 \n\
341ac6e4 1090: \n\
9fe3fd03
JN
110"
111#ifdef __arch_common_v10_v32
112 /* Cater to branch offset difference between v32 and v10. We
113 assume the branch below has an 8-bit offset. */
114" setf\n"
115#endif
116" movem [r11+],r10 \n\
117 subq 44,r12 \n\
118 bge 0b \n\
119 movem r10,[r13+] \n\
341ac6e4 120 \n\
9fe3fd03
JN
121 ;; Compensate for last loop underflowing n. \n\
122 addq 44,r12 \n\
341ac6e4 123 \n\
9fe3fd03
JN
124 ;; Restore registers from stack. \n\
125 movem [sp+],r10"
1da177e4 126
9fe3fd03
JN
127 /* Outputs. */
128 : "=r" (dst), "=r" (src), "=r" (n)
1da177e4 129
9fe3fd03
JN
130 /* Inputs. */
131 : "0" (dst), "1" (src), "2" (n));
132 }
1da177e4 133
9fe3fd03
JN
134 while (n >= 16)
135 {
136 *(long *) dst = *(long *) src; dst += 4; src += 4;
137 *(long *) dst = *(long *) src; dst += 4; src += 4;
138 *(long *) dst = *(long *) src; dst += 4; src += 4;
139 *(long *) dst = *(long *) src; dst += 4; src += 4;
140
141 n -= 16;
142 }
1da177e4 143
1da177e4 144 switch (n)
9fe3fd03 145 {
1da177e4
LT
146 case 0:
147 break;
9fe3fd03 148
1da177e4 149 case 1:
9fe3fd03 150 *dst = *src;
1da177e4 151 break;
9fe3fd03 152
1da177e4 153 case 2:
9fe3fd03 154 *(short *) dst = *(short *) src;
1da177e4 155 break;
9fe3fd03 156
1da177e4 157 case 3:
9fe3fd03
JN
158 *(short *) dst = *(short *) src; dst += 2; src += 2;
159 *dst = *src;
1da177e4 160 break;
9fe3fd03 161
1da177e4 162 case 4:
9fe3fd03 163 *(long *) dst = *(long *) src;
1da177e4 164 break;
9fe3fd03 165
1da177e4 166 case 5:
9fe3fd03
JN
167 *(long *) dst = *(long *) src; dst += 4; src += 4;
168 *dst = *src;
1da177e4 169 break;
9fe3fd03 170
1da177e4 171 case 6:
9fe3fd03
JN
172 *(long *) dst = *(long *) src; dst += 4; src += 4;
173 *(short *) dst = *(short *) src;
1da177e4 174 break;
9fe3fd03 175
1da177e4 176 case 7:
9fe3fd03
JN
177 *(long *) dst = *(long *) src; dst += 4; src += 4;
178 *(short *) dst = *(short *) src; dst += 2; src += 2;
179 *dst = *src;
1da177e4 180 break;
9fe3fd03 181
1da177e4 182 case 8:
9fe3fd03
JN
183 *(long *) dst = *(long *) src; dst += 4; src += 4;
184 *(long *) dst = *(long *) src;
1da177e4 185 break;
9fe3fd03 186
1da177e4 187 case 9:
9fe3fd03
JN
188 *(long *) dst = *(long *) src; dst += 4; src += 4;
189 *(long *) dst = *(long *) src; dst += 4; src += 4;
190 *dst = *src;
1da177e4 191 break;
9fe3fd03 192
1da177e4 193 case 10:
9fe3fd03
JN
194 *(long *) dst = *(long *) src; dst += 4; src += 4;
195 *(long *) dst = *(long *) src; dst += 4; src += 4;
196 *(short *) dst = *(short *) src;
1da177e4 197 break;
9fe3fd03 198
1da177e4 199 case 11:
9fe3fd03
JN
200 *(long *) dst = *(long *) src; dst += 4; src += 4;
201 *(long *) dst = *(long *) src; dst += 4; src += 4;
202 *(short *) dst = *(short *) src; dst += 2; src += 2;
203 *dst = *src;
1da177e4 204 break;
9fe3fd03 205
1da177e4 206 case 12:
9fe3fd03
JN
207 *(long *) dst = *(long *) src; dst += 4; src += 4;
208 *(long *) dst = *(long *) src; dst += 4; src += 4;
209 *(long *) dst = *(long *) src;
1da177e4 210 break;
9fe3fd03 211
1da177e4 212 case 13:
9fe3fd03
JN
213 *(long *) dst = *(long *) src; dst += 4; src += 4;
214 *(long *) dst = *(long *) src; dst += 4; src += 4;
215 *(long *) dst = *(long *) src; dst += 4; src += 4;
216 *dst = *src;
1da177e4 217 break;
9fe3fd03 218
1da177e4 219 case 14:
9fe3fd03
JN
220 *(long *) dst = *(long *) src; dst += 4; src += 4;
221 *(long *) dst = *(long *) src; dst += 4; src += 4;
222 *(long *) dst = *(long *) src; dst += 4; src += 4;
223 *(short *) dst = *(short *) src;
1da177e4 224 break;
9fe3fd03 225
1da177e4 226 case 15:
9fe3fd03
JN
227 *(long *) dst = *(long *) src; dst += 4; src += 4;
228 *(long *) dst = *(long *) src; dst += 4; src += 4;
229 *(long *) dst = *(long *) src; dst += 4; src += 4;
230 *(short *) dst = *(short *) src; dst += 2; src += 2;
231 *dst = *src;
1da177e4 232 break;
9fe3fd03 233 }
1da177e4 234
9fe3fd03
JN
235 return return_dst;
236}