]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * fp_util.S | |
3 | * | |
4 | * Copyright Roman Zippel, 1997. All rights reserved. | |
5 | * | |
6 | * Redistribution and use in source and binary forms, with or without | |
7 | * modification, are permitted provided that the following conditions | |
8 | * are met: | |
9 | * 1. Redistributions of source code must retain the above copyright | |
10 | * notice, and the entire permission notice in its entirety, | |
11 | * including the disclaimer of warranties. | |
12 | * 2. Redistributions in binary form must reproduce the above copyright | |
13 | * notice, this list of conditions and the following disclaimer in the | |
14 | * documentation and/or other materials provided with the distribution. | |
15 | * 3. The name of the author may not be used to endorse or promote | |
16 | * products derived from this software without specific prior | |
17 | * written permission. | |
18 | * | |
19 | * ALTERNATIVELY, this product may be distributed under the terms of | |
20 | * the GNU General Public License, in which case the provisions of the GPL are | |
21 | * required INSTEAD OF the above restrictions. (This clause is | |
22 | * necessary due to a potential bad interaction between the GPL and | |
23 | * the restrictions contained in a BSD-style copyright.) | |
24 | * | |
25 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED | |
26 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | |
27 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
28 | * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, | |
29 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | |
30 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
31 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
32 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
33 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
34 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
35 | * OF THE POSSIBILITY OF SUCH DAMAGE. | |
36 | */ | |
37 | ||
38 | #include <linux/config.h> | |
39 | #include "fp_emu.h" | |
40 | ||
41 | /* | |
42 | * Here are lots of conversion and normalization functions mainly | |
43 | * used by fp_scan.S | |
44 | * Note that these functions are optimized for "normal" numbers, | |
45 | * these are handled first and exit as fast as possible, this is | |
46 | * especially important for fp_normalize_ext/fp_conv_ext2ext, as | |
47 | * it's called very often. | |
48 | * The register usage is optimized for fp_scan.S and which register | |
49 | * is currently at that time unused, be careful if you want change | |
50 | * something here. %d0 and %d1 is always usable, sometimes %d2 (or | |
51 | * only the lower half) most function have to return the %a0 | |
52 | * unmodified, so that the caller can immediately reuse it. | |
53 | */ | |
54 | ||
55 | .globl fp_ill, fp_end | |
56 | ||
57 | | exits from fp_scan: | |
58 | | illegal instruction | |
59 | fp_ill: | |
60 | printf ,"fp_illegal\n" | |
61 | rts | |
62 | | completed instruction | |
63 | fp_end: | |
64 | tst.l (TASK_MM-8,%a2) | |
65 | jmi 1f | |
66 | tst.l (TASK_MM-4,%a2) | |
67 | jmi 1f | |
68 | tst.l (TASK_MM,%a2) | |
69 | jpl 2f | |
70 | 1: printf ,"oops:%p,%p,%p\n",3,%a2@(TASK_MM-8),%a2@(TASK_MM-4),%a2@(TASK_MM) | |
71 | 2: clr.l %d0 | |
72 | rts | |
73 | ||
74 | .globl fp_conv_long2ext, fp_conv_single2ext | |
75 | .globl fp_conv_double2ext, fp_conv_ext2ext | |
76 | .globl fp_normalize_ext, fp_normalize_double | |
77 | .globl fp_normalize_single, fp_normalize_single_fast | |
78 | .globl fp_conv_ext2double, fp_conv_ext2single | |
79 | .globl fp_conv_ext2long, fp_conv_ext2short | |
80 | .globl fp_conv_ext2byte | |
81 | .globl fp_finalrounding_single, fp_finalrounding_single_fast | |
82 | .globl fp_finalrounding_double | |
83 | .globl fp_finalrounding, fp_finaltest, fp_final | |
84 | ||
85 | /* | |
86 | * First several conversion functions from a source operand | |
87 | * into the extended format. Note, that only fp_conv_ext2ext | |
88 | * normalizes the number and is always called after the other | |
89 | * conversion functions, which only move the information into | |
90 | * fp_ext structure. | |
91 | */ | |
92 | ||
93 | | fp_conv_long2ext: | |
94 | | | |
95 | | args: %d0 = source (32-bit long) | |
96 | | %a0 = destination (ptr to struct fp_ext) | |
97 | ||
98 | fp_conv_long2ext: | |
99 | printf PCONV,"l2e: %p -> %p(",2,%d0,%a0 | |
100 | clr.l %d1 | sign defaults to zero | |
101 | tst.l %d0 | |
102 | jeq fp_l2e_zero | is source zero? | |
103 | jpl 1f | positive? | |
104 | moveq #1,%d1 | |
105 | neg.l %d0 | |
106 | 1: swap %d1 | |
107 | move.w #0x3fff+31,%d1 | |
108 | move.l %d1,(%a0)+ | set sign / exp | |
109 | move.l %d0,(%a0)+ | set mantissa | |
110 | clr.l (%a0) | |
111 | subq.l #8,%a0 | restore %a0 | |
112 | printx PCONV,%a0@ | |
113 | printf PCONV,")\n" | |
114 | rts | |
115 | | source is zero | |
116 | fp_l2e_zero: | |
117 | clr.l (%a0)+ | |
118 | clr.l (%a0)+ | |
119 | clr.l (%a0) | |
120 | subq.l #8,%a0 | |
121 | printx PCONV,%a0@ | |
122 | printf PCONV,")\n" | |
123 | rts | |
124 | ||
125 | | fp_conv_single2ext | |
126 | | args: %d0 = source (single-precision fp value) | |
127 | | %a0 = dest (struct fp_ext *) | |
128 | ||
129 | fp_conv_single2ext: | |
130 | printf PCONV,"s2e: %p -> %p(",2,%d0,%a0 | |
131 | move.l %d0,%d1 | |
132 | lsl.l #8,%d0 | shift mantissa | |
133 | lsr.l #8,%d1 | exponent / sign | |
134 | lsr.l #7,%d1 | |
135 | lsr.w #8,%d1 | |
136 | jeq fp_s2e_small | zero / denormal? | |
137 | cmp.w #0xff,%d1 | NaN / Inf? | |
138 | jeq fp_s2e_large | |
139 | bset #31,%d0 | set explizit bit | |
140 | add.w #0x3fff-0x7f,%d1 | re-bias the exponent. | |
141 | 9: move.l %d1,(%a0)+ | fp_ext.sign, fp_ext.exp | |
142 | move.l %d0,(%a0)+ | high lword of fp_ext.mant | |
143 | clr.l (%a0) | low lword = 0 | |
144 | subq.l #8,%a0 | |
145 | printx PCONV,%a0@ | |
146 | printf PCONV,")\n" | |
147 | rts | |
148 | | zeros and denormalized | |
149 | fp_s2e_small: | |
150 | | exponent is zero, so explizit bit is already zero too | |
151 | tst.l %d0 | |
152 | jeq 9b | |
153 | move.w #0x4000-0x7f,%d1 | |
154 | jra 9b | |
155 | | infinities and NAN | |
156 | fp_s2e_large: | |
157 | bclr #31,%d0 | clear explizit bit | |
158 | move.w #0x7fff,%d1 | |
159 | jra 9b | |
160 | ||
161 | fp_conv_double2ext: | |
162 | #ifdef FPU_EMU_DEBUG | |
163 | getuser.l %a1@(0),%d0,fp_err_ua2,%a1 | |
164 | getuser.l %a1@(4),%d1,fp_err_ua2,%a1 | |
165 | printf PCONV,"d2e: %p%p -> %p(",3,%d0,%d1,%a0 | |
166 | #endif | |
167 | getuser.l (%a1)+,%d0,fp_err_ua2,%a1 | |
168 | move.l %d0,%d1 | |
169 | lsl.l #8,%d0 | shift high mantissa | |
170 | lsl.l #3,%d0 | |
171 | lsr.l #8,%d1 | exponent / sign | |
172 | lsr.l #7,%d1 | |
173 | lsr.w #5,%d1 | |
174 | jeq fp_d2e_small | zero / denormal? | |
175 | cmp.w #0x7ff,%d1 | NaN / Inf? | |
176 | jeq fp_d2e_large | |
177 | bset #31,%d0 | set explizit bit | |
178 | add.w #0x3fff-0x3ff,%d1 | re-bias the exponent. | |
179 | 9: move.l %d1,(%a0)+ | fp_ext.sign, fp_ext.exp | |
180 | move.l %d0,(%a0)+ | |
181 | getuser.l (%a1)+,%d0,fp_err_ua2,%a1 | |
182 | move.l %d0,%d1 | |
183 | lsl.l #8,%d0 | |
184 | lsl.l #3,%d0 | |
185 | move.l %d0,(%a0) | |
186 | moveq #21,%d0 | |
187 | lsr.l %d0,%d1 | |
188 | or.l %d1,-(%a0) | |
189 | subq.l #4,%a0 | |
190 | printx PCONV,%a0@ | |
191 | printf PCONV,")\n" | |
192 | rts | |
193 | | zeros and denormalized | |
194 | fp_d2e_small: | |
195 | | exponent is zero, so explizit bit is already zero too | |
196 | tst.l %d0 | |
197 | jeq 9b | |
198 | move.w #0x4000-0x3ff,%d1 | |
199 | jra 9b | |
200 | | infinities and NAN | |
201 | fp_d2e_large: | |
202 | bclr #31,%d0 | clear explizit bit | |
203 | move.w #0x7fff,%d1 | |
204 | jra 9b | |
205 | ||
206 | | fp_conv_ext2ext: | |
207 | | originally used to get longdouble from userspace, now it's | |
208 | | called before arithmetic operations to make sure the number | |
209 | | is normalized [maybe rename it?]. | |
210 | | args: %a0 = dest (struct fp_ext *) | |
211 | | returns 0 in %d0 for a NaN, otherwise 1 | |
212 | ||
213 | fp_conv_ext2ext: | |
214 | printf PCONV,"e2e: %p(",1,%a0 | |
215 | printx PCONV,%a0@ | |
216 | printf PCONV,"), " | |
217 | move.l (%a0)+,%d0 | |
218 | cmp.w #0x7fff,%d0 | Inf / NaN? | |
219 | jeq fp_e2e_large | |
220 | move.l (%a0),%d0 | |
221 | jpl fp_e2e_small | zero / denorm? | |
222 | | The high bit is set, so normalization is irrelevant. | |
223 | fp_e2e_checkround: | |
224 | subq.l #4,%a0 | |
225 | #ifdef CONFIG_M68KFPU_EMU_EXTRAPREC | |
226 | move.b (%a0),%d0 | |
227 | jne fp_e2e_round | |
228 | #endif | |
229 | printf PCONV,"%p(",1,%a0 | |
230 | printx PCONV,%a0@ | |
231 | printf PCONV,")\n" | |
232 | moveq #1,%d0 | |
233 | rts | |
234 | #ifdef CONFIG_M68KFPU_EMU_EXTRAPREC | |
235 | fp_e2e_round: | |
236 | fp_set_sr FPSR_EXC_INEX2 | |
237 | clr.b (%a0) | |
238 | move.w (FPD_RND,FPDATA),%d2 | |
239 | jne fp_e2e_roundother | %d2 == 0, round to nearest | |
240 | tst.b %d0 | test guard bit | |
241 | jpl 9f | zero is closer | |
242 | btst #0,(11,%a0) | test lsb bit | |
243 | jne fp_e2e_doroundup | round to infinity | |
244 | lsl.b #1,%d0 | check low bits | |
245 | jeq 9f | round to zero | |
246 | fp_e2e_doroundup: | |
247 | addq.l #1,(8,%a0) | |
248 | jcc 9f | |
249 | addq.l #1,(4,%a0) | |
250 | jcc 9f | |
251 | move.w #0x8000,(4,%a0) | |
252 | addq.w #1,(2,%a0) | |
253 | 9: printf PNORM,"%p(",1,%a0 | |
254 | printx PNORM,%a0@ | |
255 | printf PNORM,")\n" | |
256 | rts | |
257 | fp_e2e_roundother: | |
258 | subq.w #2,%d2 | |
259 | jcs 9b | %d2 < 2, round to zero | |
260 | jhi 1f | %d2 > 2, round to +infinity | |
261 | tst.b (1,%a0) | to -inf | |
262 | jne fp_e2e_doroundup | negative, round to infinity | |
263 | jra 9b | positive, round to zero | |
264 | 1: tst.b (1,%a0) | to +inf | |
265 | jeq fp_e2e_doroundup | positive, round to infinity | |
266 | jra 9b | negative, round to zero | |
267 | #endif | |
268 | | zeros and subnormals: | |
269 | | try to normalize these anyway. | |
270 | fp_e2e_small: | |
271 | jne fp_e2e_small1 | high lword zero? | |
272 | move.l (4,%a0),%d0 | |
273 | jne fp_e2e_small2 | |
274 | #ifdef CONFIG_M68KFPU_EMU_EXTRAPREC | |
275 | clr.l %d0 | |
276 | move.b (-4,%a0),%d0 | |
277 | jne fp_e2e_small3 | |
278 | #endif | |
279 | | Genuine zero. | |
280 | clr.w -(%a0) | |
281 | subq.l #2,%a0 | |
282 | printf PNORM,"%p(",1,%a0 | |
283 | printx PNORM,%a0@ | |
284 | printf PNORM,")\n" | |
285 | moveq #1,%d0 | |
286 | rts | |
287 | | definitely subnormal, need to shift all 64 bits | |
288 | fp_e2e_small1: | |
289 | bfffo %d0{#0,#32},%d1 | |
290 | move.w -(%a0),%d2 | |
291 | sub.w %d1,%d2 | |
292 | jcc 1f | |
293 | | Pathologically small, denormalize. | |
294 | add.w %d2,%d1 | |
295 | clr.w %d2 | |
296 | 1: move.w %d2,(%a0)+ | |
297 | move.w %d1,%d2 | |
298 | jeq fp_e2e_checkround | |
299 | | fancy 64-bit double-shift begins here | |
300 | lsl.l %d2,%d0 | |
301 | move.l %d0,(%a0)+ | |
302 | move.l (%a0),%d0 | |
303 | move.l %d0,%d1 | |
304 | lsl.l %d2,%d0 | |
305 | move.l %d0,(%a0) | |
306 | neg.w %d2 | |
307 | and.w #0x1f,%d2 | |
308 | lsr.l %d2,%d1 | |
309 | or.l %d1,-(%a0) | |
310 | #ifdef CONFIG_M68KFPU_EMU_EXTRAPREC | |
311 | fp_e2e_extra1: | |
312 | clr.l %d0 | |
313 | move.b (-4,%a0),%d0 | |
314 | neg.w %d2 | |
315 | add.w #24,%d2 | |
316 | jcc 1f | |
317 | clr.b (-4,%a0) | |
318 | lsl.l %d2,%d0 | |
319 | or.l %d0,(4,%a0) | |
320 | jra fp_e2e_checkround | |
321 | 1: addq.w #8,%d2 | |
322 | lsl.l %d2,%d0 | |
323 | move.b %d0,(-4,%a0) | |
324 | lsr.l #8,%d0 | |
325 | or.l %d0,(4,%a0) | |
326 | #endif | |
327 | jra fp_e2e_checkround | |
328 | | pathologically small subnormal | |
329 | fp_e2e_small2: | |
330 | bfffo %d0{#0,#32},%d1 | |
331 | add.w #32,%d1 | |
332 | move.w -(%a0),%d2 | |
333 | sub.w %d1,%d2 | |
334 | jcc 1f | |
335 | | Beyond pathologically small, denormalize. | |
336 | add.w %d2,%d1 | |
337 | clr.w %d2 | |
338 | 1: move.w %d2,(%a0)+ | |
339 | ext.l %d1 | |
340 | jeq fp_e2e_checkround | |
341 | clr.l (4,%a0) | |
342 | sub.w #32,%d2 | |
343 | jcs 1f | |
344 | lsl.l %d1,%d0 | lower lword needs only to be shifted | |
345 | move.l %d0,(%a0) | into the higher lword | |
346 | #ifdef CONFIG_M68KFPU_EMU_EXTRAPREC | |
347 | clr.l %d0 | |
348 | move.b (-4,%a0),%d0 | |
349 | clr.b (-4,%a0) | |
350 | neg.w %d1 | |
351 | add.w #32,%d1 | |
352 | bfins %d0,(%a0){%d1,#8} | |
353 | #endif | |
354 | jra fp_e2e_checkround | |
355 | 1: neg.w %d1 | lower lword is splitted between | |
356 | bfins %d0,(%a0){%d1,#32} | higher and lower lword | |
357 | #ifndef CONFIG_M68KFPU_EMU_EXTRAPREC | |
358 | jra fp_e2e_checkround | |
359 | #else | |
360 | move.w %d1,%d2 | |
361 | jra fp_e2e_extra1 | |
362 | | These are extremely small numbers, that will mostly end up as zero | |
363 | | anyway, so this is only important for correct rounding. | |
364 | fp_e2e_small3: | |
365 | bfffo %d0{#24,#8},%d1 | |
366 | add.w #40,%d1 | |
367 | move.w -(%a0),%d2 | |
368 | sub.w %d1,%d2 | |
369 | jcc 1f | |
370 | | Pathologically small, denormalize. | |
371 | add.w %d2,%d1 | |
372 | clr.w %d2 | |
373 | 1: move.w %d2,(%a0)+ | |
374 | ext.l %d1 | |
375 | jeq fp_e2e_checkround | |
376 | cmp.w #8,%d1 | |
377 | jcs 2f | |
378 | 1: clr.b (-4,%a0) | |
379 | sub.w #64,%d1 | |
380 | jcs 1f | |
381 | add.w #24,%d1 | |
382 | lsl.l %d1,%d0 | |
383 | move.l %d0,(%a0) | |
384 | jra fp_e2e_checkround | |
385 | 1: neg.w %d1 | |
386 | bfins %d0,(%a0){%d1,#8} | |
387 | jra fp_e2e_checkround | |
388 | 2: lsl.l %d1,%d0 | |
389 | move.b %d0,(-4,%a0) | |
390 | lsr.l #8,%d0 | |
391 | move.b %d0,(7,%a0) | |
392 | jra fp_e2e_checkround | |
393 | #endif | |
394 | 1: move.l %d0,%d1 | lower lword is splitted between | |
395 | lsl.l %d2,%d0 | higher and lower lword | |
396 | move.l %d0,(%a0) | |
397 | move.l %d1,%d0 | |
398 | neg.w %d2 | |
399 | add.w #32,%d2 | |
400 | lsr.l %d2,%d0 | |
401 | move.l %d0,-(%a0) | |
402 | jra fp_e2e_checkround | |
403 | | Infinities and NaNs | |
404 | fp_e2e_large: | |
405 | move.l (%a0)+,%d0 | |
406 | jne 3f | |
407 | 1: tst.l (%a0) | |
408 | jne 4f | |
409 | moveq #1,%d0 | |
410 | 2: subq.l #8,%a0 | |
411 | printf PCONV,"%p(",1,%a0 | |
412 | printx PCONV,%a0@ | |
413 | printf PCONV,")\n" | |
414 | rts | |
415 | | we have maybe a NaN, shift off the highest bit | |
416 | 3: lsl.l #1,%d0 | |
417 | jeq 1b | |
418 | | we have a NaN, clear the return value | |
419 | 4: clrl %d0 | |
420 | jra 2b | |
421 | ||
422 | ||
423 | /* | |
424 | * Normalization functions. Call these on the output of general | |
425 | * FP operators, and before any conversion into the destination | |
426 | * formats. fp_normalize_ext has always to be called first, the | |
427 | * following conversion functions expect an already normalized | |
428 | * number. | |
429 | */ | |
430 | ||
431 | | fp_normalize_ext: | |
432 | | normalize an extended in extended (unpacked) format, basically | |
433 | | it does the same as fp_conv_ext2ext, additionally it also does | |
434 | | the necessary postprocessing checks. | |
435 | | args: %a0 (struct fp_ext *) | |
436 | | NOTE: it does _not_ modify %a0/%a1 and the upper word of %d2 | |
437 | ||
438 | fp_normalize_ext: | |
439 | printf PNORM,"ne: %p(",1,%a0 | |
440 | printx PNORM,%a0@ | |
441 | printf PNORM,"), " | |
442 | move.l (%a0)+,%d0 | |
443 | cmp.w #0x7fff,%d0 | Inf / NaN? | |
444 | jeq fp_ne_large | |
445 | move.l (%a0),%d0 | |
446 | jpl fp_ne_small | zero / denorm? | |
447 | | The high bit is set, so normalization is irrelevant. | |
448 | fp_ne_checkround: | |
449 | subq.l #4,%a0 | |
450 | #ifdef CONFIG_M68KFPU_EMU_EXTRAPREC | |
451 | move.b (%a0),%d0 | |
452 | jne fp_ne_round | |
453 | #endif | |
454 | printf PNORM,"%p(",1,%a0 | |
455 | printx PNORM,%a0@ | |
456 | printf PNORM,")\n" | |
457 | rts | |
458 | #ifdef CONFIG_M68KFPU_EMU_EXTRAPREC | |
459 | fp_ne_round: | |
460 | fp_set_sr FPSR_EXC_INEX2 | |
461 | clr.b (%a0) | |
462 | move.w (FPD_RND,FPDATA),%d2 | |
463 | jne fp_ne_roundother | %d2 == 0, round to nearest | |
464 | tst.b %d0 | test guard bit | |
465 | jpl 9f | zero is closer | |
466 | btst #0,(11,%a0) | test lsb bit | |
467 | jne fp_ne_doroundup | round to infinity | |
468 | lsl.b #1,%d0 | check low bits | |
469 | jeq 9f | round to zero | |
470 | fp_ne_doroundup: | |
471 | addq.l #1,(8,%a0) | |
472 | jcc 9f | |
473 | addq.l #1,(4,%a0) | |
474 | jcc 9f | |
475 | addq.w #1,(2,%a0) | |
476 | move.w #0x8000,(4,%a0) | |
477 | 9: printf PNORM,"%p(",1,%a0 | |
478 | printx PNORM,%a0@ | |
479 | printf PNORM,")\n" | |
480 | rts | |
481 | fp_ne_roundother: | |
482 | subq.w #2,%d2 | |
483 | jcs 9b | %d2 < 2, round to zero | |
484 | jhi 1f | %d2 > 2, round to +infinity | |
485 | tst.b (1,%a0) | to -inf | |
486 | jne fp_ne_doroundup | negative, round to infinity | |
487 | jra 9b | positive, round to zero | |
488 | 1: tst.b (1,%a0) | to +inf | |
489 | jeq fp_ne_doroundup | positive, round to infinity | |
490 | jra 9b | negative, round to zero | |
491 | #endif | |
492 | | Zeros and subnormal numbers | |
493 | | These are probably merely subnormal, rather than "denormalized" | |
494 | | numbers, so we will try to make them normal again. | |
495 | fp_ne_small: | |
496 | jne fp_ne_small1 | high lword zero? | |
497 | move.l (4,%a0),%d0 | |
498 | jne fp_ne_small2 | |
499 | #ifdef CONFIG_M68KFPU_EMU_EXTRAPREC | |
500 | clr.l %d0 | |
501 | move.b (-4,%a0),%d0 | |
502 | jne fp_ne_small3 | |
503 | #endif | |
504 | | Genuine zero. | |
505 | clr.w -(%a0) | |
506 | subq.l #2,%a0 | |
507 | printf PNORM,"%p(",1,%a0 | |
508 | printx PNORM,%a0@ | |
509 | printf PNORM,")\n" | |
510 | rts | |
511 | | Subnormal. | |
512 | fp_ne_small1: | |
513 | bfffo %d0{#0,#32},%d1 | |
514 | move.w -(%a0),%d2 | |
515 | sub.w %d1,%d2 | |
516 | jcc 1f | |
517 | | Pathologically small, denormalize. | |
518 | add.w %d2,%d1 | |
519 | clr.w %d2 | |
520 | fp_set_sr FPSR_EXC_UNFL | |
521 | 1: move.w %d2,(%a0)+ | |
522 | move.w %d1,%d2 | |
523 | jeq fp_ne_checkround | |
524 | | This is exactly the same 64-bit double shift as seen above. | |
525 | lsl.l %d2,%d0 | |
526 | move.l %d0,(%a0)+ | |
527 | move.l (%a0),%d0 | |
528 | move.l %d0,%d1 | |
529 | lsl.l %d2,%d0 | |
530 | move.l %d0,(%a0) | |
531 | neg.w %d2 | |
532 | and.w #0x1f,%d2 | |
533 | lsr.l %d2,%d1 | |
534 | or.l %d1,-(%a0) | |
535 | #ifdef CONFIG_M68KFPU_EMU_EXTRAPREC | |
536 | fp_ne_extra1: | |
537 | clr.l %d0 | |
538 | move.b (-4,%a0),%d0 | |
539 | neg.w %d2 | |
540 | add.w #24,%d2 | |
541 | jcc 1f | |
542 | clr.b (-4,%a0) | |
543 | lsl.l %d2,%d0 | |
544 | or.l %d0,(4,%a0) | |
545 | jra fp_ne_checkround | |
546 | 1: addq.w #8,%d2 | |
547 | lsl.l %d2,%d0 | |
548 | move.b %d0,(-4,%a0) | |
549 | lsr.l #8,%d0 | |
550 | or.l %d0,(4,%a0) | |
551 | #endif | |
552 | jra fp_ne_checkround | |
553 | | May or may not be subnormal, if so, only 32 bits to shift. | |
554 | fp_ne_small2: | |
555 | bfffo %d0{#0,#32},%d1 | |
556 | add.w #32,%d1 | |
557 | move.w -(%a0),%d2 | |
558 | sub.w %d1,%d2 | |
559 | jcc 1f | |
560 | | Beyond pathologically small, denormalize. | |
561 | add.w %d2,%d1 | |
562 | clr.w %d2 | |
563 | fp_set_sr FPSR_EXC_UNFL | |
564 | 1: move.w %d2,(%a0)+ | |
565 | ext.l %d1 | |
566 | jeq fp_ne_checkround | |
567 | clr.l (4,%a0) | |
568 | sub.w #32,%d1 | |
569 | jcs 1f | |
570 | lsl.l %d1,%d0 | lower lword needs only to be shifted | |
571 | move.l %d0,(%a0) | into the higher lword | |
572 | #ifdef CONFIG_M68KFPU_EMU_EXTRAPREC | |
573 | clr.l %d0 | |
574 | move.b (-4,%a0),%d0 | |
575 | clr.b (-4,%a0) | |
576 | neg.w %d1 | |
577 | add.w #32,%d1 | |
578 | bfins %d0,(%a0){%d1,#8} | |
579 | #endif | |
580 | jra fp_ne_checkround | |
581 | 1: neg.w %d1 | lower lword is splitted between | |
582 | bfins %d0,(%a0){%d1,#32} | higher and lower lword | |
583 | #ifndef CONFIG_M68KFPU_EMU_EXTRAPREC | |
584 | jra fp_ne_checkround | |
585 | #else | |
586 | move.w %d1,%d2 | |
587 | jra fp_ne_extra1 | |
588 | | These are extremely small numbers, that will mostly end up as zero | |
589 | | anyway, so this is only important for correct rounding. | |
590 | fp_ne_small3: | |
591 | bfffo %d0{#24,#8},%d1 | |
592 | add.w #40,%d1 | |
593 | move.w -(%a0),%d2 | |
594 | sub.w %d1,%d2 | |
595 | jcc 1f | |
596 | | Pathologically small, denormalize. | |
597 | add.w %d2,%d1 | |
598 | clr.w %d2 | |
599 | 1: move.w %d2,(%a0)+ | |
600 | ext.l %d1 | |
601 | jeq fp_ne_checkround | |
602 | cmp.w #8,%d1 | |
603 | jcs 2f | |
604 | 1: clr.b (-4,%a0) | |
605 | sub.w #64,%d1 | |
606 | jcs 1f | |
607 | add.w #24,%d1 | |
608 | lsl.l %d1,%d0 | |
609 | move.l %d0,(%a0) | |
610 | jra fp_ne_checkround | |
611 | 1: neg.w %d1 | |
612 | bfins %d0,(%a0){%d1,#8} | |
613 | jra fp_ne_checkround | |
614 | 2: lsl.l %d1,%d0 | |
615 | move.b %d0,(-4,%a0) | |
616 | lsr.l #8,%d0 | |
617 | move.b %d0,(7,%a0) | |
618 | jra fp_ne_checkround | |
619 | #endif | |
620 | | Infinities and NaNs, again, same as above. | |
621 | fp_ne_large: | |
622 | move.l (%a0)+,%d0 | |
623 | jne 3f | |
624 | 1: tst.l (%a0) | |
625 | jne 4f | |
626 | 2: subq.l #8,%a0 | |
627 | printf PNORM,"%p(",1,%a0 | |
628 | printx PNORM,%a0@ | |
629 | printf PNORM,")\n" | |
630 | rts | |
631 | | we have maybe a NaN, shift off the highest bit | |
632 | 3: move.l %d0,%d1 | |
633 | lsl.l #1,%d1 | |
634 | jne 4f | |
635 | clr.l (-4,%a0) | |
636 | jra 1b | |
637 | | we have a NaN, test if it is signaling | |
638 | 4: bset #30,%d0 | |
639 | jne 2b | |
640 | fp_set_sr FPSR_EXC_SNAN | |
641 | move.l %d0,(-4,%a0) | |
642 | jra 2b | |
643 | ||
644 | | these next two do rounding as per the IEEE standard. | |
645 | | values for the rounding modes appear to be: | |
646 | | 0: Round to nearest | |
647 | | 1: Round to zero | |
648 | | 2: Round to -Infinity | |
649 | | 3: Round to +Infinity | |
650 | | both functions expect that fp_normalize was already | |
651 | | called (and extended argument is already normalized | |
652 | | as far as possible), these are used if there is different | |
653 | | rounding precision is selected and before converting | |
654 | | into single/double | |
655 | ||
656 | | fp_normalize_double: | |
657 | | normalize an extended with double (52-bit) precision | |
658 | | args: %a0 (struct fp_ext *) | |
659 | ||
660 | fp_normalize_double: | |
661 | printf PNORM,"nd: %p(",1,%a0 | |
662 | printx PNORM,%a0@ | |
663 | printf PNORM,"), " | |
664 | move.l (%a0)+,%d2 | |
665 | tst.w %d2 | |
666 | jeq fp_nd_zero | zero / denormalized | |
667 | cmp.w #0x7fff,%d2 | |
668 | jeq fp_nd_huge | NaN / infinitive. | |
669 | sub.w #0x4000-0x3ff,%d2 | will the exponent fit? | |
670 | jcs fp_nd_small | too small. | |
671 | cmp.w #0x7fe,%d2 | |
672 | jcc fp_nd_large | too big. | |
673 | addq.l #4,%a0 | |
674 | move.l (%a0),%d0 | low lword of mantissa | |
675 | | now, round off the low 11 bits. | |
676 | fp_nd_round: | |
677 | moveq #21,%d1 | |
678 | lsl.l %d1,%d0 | keep 11 low bits. | |
679 | jne fp_nd_checkround | Are they non-zero? | |
680 | | nothing to do here | |
681 | 9: subq.l #8,%a0 | |
682 | printf PNORM,"%p(",1,%a0 | |
683 | printx PNORM,%a0@ | |
684 | printf PNORM,")\n" | |
685 | rts | |
686 | | Be careful with the X bit! It contains the lsb | |
687 | | from the shift above, it is needed for round to nearest. | |
688 | fp_nd_checkround: | |
689 | fp_set_sr FPSR_EXC_INEX2 | INEX2 bit | |
690 | and.w #0xf800,(2,%a0) | clear bits 0-10 | |
691 | move.w (FPD_RND,FPDATA),%d2 | rounding mode | |
692 | jne 2f | %d2 == 0, round to nearest | |
693 | tst.l %d0 | test guard bit | |
694 | jpl 9b | zero is closer | |
695 | | here we test the X bit by adding it to %d2 | |
696 | clr.w %d2 | first set z bit, addx only clears it | |
697 | addx.w %d2,%d2 | test lsb bit | |
698 | | IEEE754-specified "round to even" behaviour. If the guard | |
699 | | bit is set, then the number is odd, so rounding works like | |
700 | | in grade-school arithmetic (i.e. 1.5 rounds to 2.0) | |
701 | | Otherwise, an equal distance rounds towards zero, so as not | |
702 | | to produce an odd number. This is strange, but it is what | |
703 | | the standard says. | |
704 | jne fp_nd_doroundup | round to infinity | |
705 | lsl.l #1,%d0 | check low bits | |
706 | jeq 9b | round to zero | |
707 | fp_nd_doroundup: | |
708 | | round (the mantissa, that is) towards infinity | |
709 | add.l #0x800,(%a0) | |
710 | jcc 9b | no overflow, good. | |
711 | addq.l #1,-(%a0) | extend to high lword | |
712 | jcc 1f | no overflow, good. | |
713 | | Yow! we have managed to overflow the mantissa. Since this | |
714 | | only happens when %d1 was 0xfffff800, it is now zero, so | |
715 | | reset the high bit, and increment the exponent. | |
716 | move.w #0x8000,(%a0) | |
717 | addq.w #1,-(%a0) | |
718 | cmp.w #0x43ff,(%a0)+ | exponent now overflown? | |
719 | jeq fp_nd_large | yes, so make it infinity. | |
720 | 1: subq.l #4,%a0 | |
721 | printf PNORM,"%p(",1,%a0 | |
722 | printx PNORM,%a0@ | |
723 | printf PNORM,")\n" | |
724 | rts | |
725 | 2: subq.w #2,%d2 | |
726 | jcs 9b | %d2 < 2, round to zero | |
727 | jhi 3f | %d2 > 2, round to +infinity | |
728 | | Round to +Inf or -Inf. High word of %d2 contains the | |
729 | | sign of the number, by the way. | |
730 | swap %d2 | to -inf | |
731 | tst.b %d2 | |
732 | jne fp_nd_doroundup | negative, round to infinity | |
733 | jra 9b | positive, round to zero | |
734 | 3: swap %d2 | to +inf | |
735 | tst.b %d2 | |
736 | jeq fp_nd_doroundup | positive, round to infinity | |
737 | jra 9b | negative, round to zero | |
738 | | Exponent underflow. Try to make a denormal, and set it to | |
739 | | the smallest possible fraction if this fails. | |
740 | fp_nd_small: | |
741 | fp_set_sr FPSR_EXC_UNFL | set UNFL bit | |
742 | move.w #0x3c01,(-2,%a0) | 2**-1022 | |
743 | neg.w %d2 | degree of underflow | |
744 | cmp.w #32,%d2 | single or double shift? | |
745 | jcc 1f | |
746 | | Again, another 64-bit double shift. | |
747 | move.l (%a0),%d0 | |
748 | move.l %d0,%d1 | |
749 | lsr.l %d2,%d0 | |
750 | move.l %d0,(%a0)+ | |
751 | move.l (%a0),%d0 | |
752 | lsr.l %d2,%d0 | |
753 | neg.w %d2 | |
754 | add.w #32,%d2 | |
755 | lsl.l %d2,%d1 | |
756 | or.l %d1,%d0 | |
757 | move.l (%a0),%d1 | |
758 | move.l %d0,(%a0) | |
759 | | Check to see if we shifted off any significant bits | |
760 | lsl.l %d2,%d1 | |
761 | jeq fp_nd_round | Nope, round. | |
762 | bset #0,%d0 | Yes, so set the "sticky bit". | |
763 | jra fp_nd_round | Now, round. | |
764 | | Another 64-bit single shift and store | |
765 | 1: sub.w #32,%d2 | |
766 | cmp.w #32,%d2 | Do we really need to shift? | |
767 | jcc 2f | No, the number is too small. | |
768 | move.l (%a0),%d0 | |
769 | clr.l (%a0)+ | |
770 | move.l %d0,%d1 | |
771 | lsr.l %d2,%d0 | |
772 | neg.w %d2 | |
773 | add.w #32,%d2 | |
774 | | Again, check to see if we shifted off any significant bits. | |
775 | tst.l (%a0) | |
776 | jeq 1f | |
777 | bset #0,%d0 | Sticky bit. | |
778 | 1: move.l %d0,(%a0) | |
779 | lsl.l %d2,%d1 | |
780 | jeq fp_nd_round | |
781 | bset #0,%d0 | |
782 | jra fp_nd_round | |
783 | | Sorry, the number is just too small. | |
784 | 2: clr.l (%a0)+ | |
785 | clr.l (%a0) | |
786 | moveq #1,%d0 | Smallest possible fraction, | |
787 | jra fp_nd_round | round as desired. | |
788 | | zero and denormalized | |
789 | fp_nd_zero: | |
790 | tst.l (%a0)+ | |
791 | jne 1f | |
792 | tst.l (%a0) | |
793 | jne 1f | |
794 | subq.l #8,%a0 | |
795 | printf PNORM,"%p(",1,%a0 | |
796 | printx PNORM,%a0@ | |
797 | printf PNORM,")\n" | |
798 | rts | zero. nothing to do. | |
799 | | These are not merely subnormal numbers, but true denormals, | |
800 | | i.e. pathologically small (exponent is 2**-16383) numbers. | |
801 | | It is clearly impossible for even a normal extended number | |
802 | | with that exponent to fit into double precision, so just | |
803 | | write these ones off as "too darn small". | |
804 | 1: fp_set_sr FPSR_EXC_UNFL | Set UNFL bit | |
805 | clr.l (%a0) | |
806 | clr.l -(%a0) | |
807 | move.w #0x3c01,-(%a0) | i.e. 2**-1022 | |
808 | addq.l #6,%a0 | |
809 | moveq #1,%d0 | |
810 | jra fp_nd_round | round. | |
811 | | Exponent overflow. Just call it infinity. | |
812 | fp_nd_large: | |
813 | move.w #0x7ff,%d0 | |
814 | and.w (6,%a0),%d0 | |
815 | jeq 1f | |
816 | fp_set_sr FPSR_EXC_INEX2 | |
817 | 1: fp_set_sr FPSR_EXC_OVFL | |
818 | move.w (FPD_RND,FPDATA),%d2 | |
819 | jne 3f | %d2 = 0 round to nearest | |
820 | 1: move.w #0x7fff,(-2,%a0) | |
821 | clr.l (%a0)+ | |
822 | clr.l (%a0) | |
823 | 2: subq.l #8,%a0 | |
824 | printf PNORM,"%p(",1,%a0 | |
825 | printx PNORM,%a0@ | |
826 | printf PNORM,")\n" | |
827 | rts | |
828 | 3: subq.w #2,%d2 | |
829 | jcs 5f | %d2 < 2, round to zero | |
830 | jhi 4f | %d2 > 2, round to +infinity | |
831 | tst.b (-3,%a0) | to -inf | |
832 | jne 1b | |
833 | jra 5f | |
834 | 4: tst.b (-3,%a0) | to +inf | |
835 | jeq 1b | |
836 | 5: move.w #0x43fe,(-2,%a0) | |
837 | moveq #-1,%d0 | |
838 | move.l %d0,(%a0)+ | |
839 | move.w #0xf800,%d0 | |
840 | move.l %d0,(%a0) | |
841 | jra 2b | |
842 | | Infinities or NaNs | |
843 | fp_nd_huge: | |
844 | subq.l #4,%a0 | |
845 | printf PNORM,"%p(",1,%a0 | |
846 | printx PNORM,%a0@ | |
847 | printf PNORM,")\n" | |
848 | rts | |
849 | ||
850 | | fp_normalize_single: | |
851 | | normalize an extended with single (23-bit) precision | |
852 | | args: %a0 (struct fp_ext *) | |
853 | ||
854 | fp_normalize_single: | |
855 | printf PNORM,"ns: %p(",1,%a0 | |
856 | printx PNORM,%a0@ | |
857 | printf PNORM,") " | |
858 | addq.l #2,%a0 | |
859 | move.w (%a0)+,%d2 | |
860 | jeq fp_ns_zero | zero / denormalized | |
861 | cmp.w #0x7fff,%d2 | |
862 | jeq fp_ns_huge | NaN / infinitive. | |
863 | sub.w #0x4000-0x7f,%d2 | will the exponent fit? | |
864 | jcs fp_ns_small | too small. | |
865 | cmp.w #0xfe,%d2 | |
866 | jcc fp_ns_large | too big. | |
867 | move.l (%a0)+,%d0 | get high lword of mantissa | |
868 | fp_ns_round: | |
869 | tst.l (%a0) | check the low lword | |
870 | jeq 1f | |
871 | | Set a sticky bit if it is non-zero. This should only | |
872 | | affect the rounding in what would otherwise be equal- | |
873 | | distance situations, which is what we want it to do. | |
874 | bset #0,%d0 | |
875 | 1: clr.l (%a0) | zap it from memory. | |
876 | | now, round off the low 8 bits of the hi lword. | |
877 | tst.b %d0 | 8 low bits. | |
878 | jne fp_ns_checkround | Are they non-zero? | |
879 | | nothing to do here | |
880 | subq.l #8,%a0 | |
881 | printf PNORM,"%p(",1,%a0 | |
882 | printx PNORM,%a0@ | |
883 | printf PNORM,")\n" | |
884 | rts | |
885 | fp_ns_checkround: | |
886 | fp_set_sr FPSR_EXC_INEX2 | INEX2 bit | |
887 | clr.b -(%a0) | clear low byte of high lword | |
888 | subq.l #3,%a0 | |
889 | move.w (FPD_RND,FPDATA),%d2 | rounding mode | |
890 | jne 2f | %d2 == 0, round to nearest | |
891 | tst.b %d0 | test guard bit | |
892 | jpl 9f | zero is closer | |
893 | btst #8,%d0 | test lsb bit | |
894 | | round to even behaviour, see above. | |
895 | jne fp_ns_doroundup | round to infinity | |
896 | lsl.b #1,%d0 | check low bits | |
897 | jeq 9f | round to zero | |
898 | fp_ns_doroundup: | |
899 | | round (the mantissa, that is) towards infinity | |
900 | add.l #0x100,(%a0) | |
901 | jcc 9f | no overflow, good. | |
902 | | Overflow. This means that the %d1 was 0xffffff00, so it | |
903 | | is now zero. We will set the mantissa to reflect this, and | |
904 | | increment the exponent (checking for overflow there too) | |
905 | move.w #0x8000,(%a0) | |
906 | addq.w #1,-(%a0) | |
907 | cmp.w #0x407f,(%a0)+ | exponent now overflown? | |
908 | jeq fp_ns_large | yes, so make it infinity. | |
909 | 9: subq.l #4,%a0 | |
910 | printf PNORM,"%p(",1,%a0 | |
911 | printx PNORM,%a0@ | |
912 | printf PNORM,")\n" | |
913 | rts | |
914 | | check nondefault rounding modes | |
915 | 2: subq.w #2,%d2 | |
916 | jcs 9b | %d2 < 2, round to zero | |
917 | jhi 3f | %d2 > 2, round to +infinity | |
918 | tst.b (-3,%a0) | to -inf | |
919 | jne fp_ns_doroundup | negative, round to infinity | |
920 | jra 9b | positive, round to zero | |
921 | 3: tst.b (-3,%a0) | to +inf | |
922 | jeq fp_ns_doroundup | positive, round to infinity | |
923 | jra 9b | negative, round to zero | |
924 | | Exponent underflow. Try to make a denormal, and set it to | |
925 | | the smallest possible fraction if this fails. | |
926 | fp_ns_small: | |
927 | fp_set_sr FPSR_EXC_UNFL | set UNFL bit | |
928 | move.w #0x3f81,(-2,%a0) | 2**-126 | |
929 | neg.w %d2 | degree of underflow | |
930 | cmp.w #32,%d2 | single or double shift? | |
931 | jcc 2f | |
932 | | a 32-bit shift. | |
933 | move.l (%a0),%d0 | |
934 | move.l %d0,%d1 | |
935 | lsr.l %d2,%d0 | |
936 | move.l %d0,(%a0)+ | |
937 | | Check to see if we shifted off any significant bits. | |
938 | neg.w %d2 | |
939 | add.w #32,%d2 | |
940 | lsl.l %d2,%d1 | |
941 | jeq 1f | |
942 | bset #0,%d0 | Sticky bit. | |
943 | | Check the lower lword | |
944 | 1: tst.l (%a0) | |
945 | jeq fp_ns_round | |
946 | clr (%a0) | |
947 | bset #0,%d0 | Sticky bit. | |
948 | jra fp_ns_round | |
949 | | Sorry, the number is just too small. | |
950 | 2: clr.l (%a0)+ | |
951 | clr.l (%a0) | |
952 | moveq #1,%d0 | Smallest possible fraction, | |
953 | jra fp_ns_round | round as desired. | |
954 | | Exponent overflow. Just call it infinity. | |
955 | fp_ns_large: | |
956 | tst.b (3,%a0) | |
957 | jeq 1f | |
958 | fp_set_sr FPSR_EXC_INEX2 | |
959 | 1: fp_set_sr FPSR_EXC_OVFL | |
960 | move.w (FPD_RND,FPDATA),%d2 | |
961 | jne 3f | %d2 = 0 round to nearest | |
962 | 1: move.w #0x7fff,(-2,%a0) | |
963 | clr.l (%a0)+ | |
964 | clr.l (%a0) | |
965 | 2: subq.l #8,%a0 | |
966 | printf PNORM,"%p(",1,%a0 | |
967 | printx PNORM,%a0@ | |
968 | printf PNORM,")\n" | |
969 | rts | |
970 | 3: subq.w #2,%d2 | |
971 | jcs 5f | %d2 < 2, round to zero | |
972 | jhi 4f | %d2 > 2, round to +infinity | |
973 | tst.b (-3,%a0) | to -inf | |
974 | jne 1b | |
975 | jra 5f | |
976 | 4: tst.b (-3,%a0) | to +inf | |
977 | jeq 1b | |
978 | 5: move.w #0x407e,(-2,%a0) | |
979 | move.l #0xffffff00,(%a0)+ | |
980 | clr.l (%a0) | |
981 | jra 2b | |
982 | | zero and denormalized | |
983 | fp_ns_zero: | |
984 | tst.l (%a0)+ | |
985 | jne 1f | |
986 | tst.l (%a0) | |
987 | jne 1f | |
988 | subq.l #8,%a0 | |
989 | printf PNORM,"%p(",1,%a0 | |
990 | printx PNORM,%a0@ | |
991 | printf PNORM,")\n" | |
992 | rts | zero. nothing to do. | |
993 | | These are not merely subnormal numbers, but true denormals, | |
994 | | i.e. pathologically small (exponent is 2**-16383) numbers. | |
995 | | It is clearly impossible for even a normal extended number | |
996 | | with that exponent to fit into single precision, so just | |
997 | | write these ones off as "too darn small". | |
998 | 1: fp_set_sr FPSR_EXC_UNFL | Set UNFL bit | |
999 | clr.l (%a0) | |
1000 | clr.l -(%a0) | |
1001 | move.w #0x3f81,-(%a0) | i.e. 2**-126 | |
1002 | addq.l #6,%a0 | |
1003 | moveq #1,%d0 | |
1004 | jra fp_ns_round | round. | |
1005 | | Infinities or NaNs | |
1006 | fp_ns_huge: | |
1007 | subq.l #4,%a0 | |
1008 | printf PNORM,"%p(",1,%a0 | |
1009 | printx PNORM,%a0@ | |
1010 | printf PNORM,")\n" | |
1011 | rts | |
1012 | ||
1013 | | fp_normalize_single_fast: | |
1014 | | normalize an extended with single (23-bit) precision | |
1015 | | this is only used by fsgldiv/fsgdlmul, where the | |
1016 | | operand is not completly normalized. | |
1017 | | args: %a0 (struct fp_ext *) | |
1018 | ||
1019 | fp_normalize_single_fast: | |
1020 | printf PNORM,"nsf: %p(",1,%a0 | |
1021 | printx PNORM,%a0@ | |
1022 | printf PNORM,") " | |
1023 | addq.l #2,%a0 | |
1024 | move.w (%a0)+,%d2 | |
1025 | cmp.w #0x7fff,%d2 | |
1026 | jeq fp_nsf_huge | NaN / infinitive. | |
1027 | move.l (%a0)+,%d0 | get high lword of mantissa | |
1028 | fp_nsf_round: | |
1029 | tst.l (%a0) | check the low lword | |
1030 | jeq 1f | |
1031 | | Set a sticky bit if it is non-zero. This should only | |
1032 | | affect the rounding in what would otherwise be equal- | |
1033 | | distance situations, which is what we want it to do. | |
1034 | bset #0,%d0 | |
1035 | 1: clr.l (%a0) | zap it from memory. | |
1036 | | now, round off the low 8 bits of the hi lword. | |
1037 | tst.b %d0 | 8 low bits. | |
1038 | jne fp_nsf_checkround | Are they non-zero? | |
1039 | | nothing to do here | |
1040 | subq.l #8,%a0 | |
1041 | printf PNORM,"%p(",1,%a0 | |
1042 | printx PNORM,%a0@ | |
1043 | printf PNORM,")\n" | |
1044 | rts | |
1045 | fp_nsf_checkround: | |
1046 | fp_set_sr FPSR_EXC_INEX2 | INEX2 bit | |
1047 | clr.b -(%a0) | clear low byte of high lword | |
1048 | subq.l #3,%a0 | |
1049 | move.w (FPD_RND,FPDATA),%d2 | rounding mode | |
1050 | jne 2f | %d2 == 0, round to nearest | |
1051 | tst.b %d0 | test guard bit | |
1052 | jpl 9f | zero is closer | |
1053 | btst #8,%d0 | test lsb bit | |
1054 | | round to even behaviour, see above. | |
1055 | jne fp_nsf_doroundup | round to infinity | |
1056 | lsl.b #1,%d0 | check low bits | |
1057 | jeq 9f | round to zero | |
1058 | fp_nsf_doroundup: | |
1059 | | round (the mantissa, that is) towards infinity | |
1060 | add.l #0x100,(%a0) | |
1061 | jcc 9f | no overflow, good. | |
1062 | | Overflow. This means that the %d1 was 0xffffff00, so it | |
1063 | | is now zero. We will set the mantissa to reflect this, and | |
1064 | | increment the exponent (checking for overflow there too) | |
1065 | move.w #0x8000,(%a0) | |
1066 | addq.w #1,-(%a0) | |
1067 | cmp.w #0x407f,(%a0)+ | exponent now overflown? | |
1068 | jeq fp_nsf_large | yes, so make it infinity. | |
1069 | 9: subq.l #4,%a0 | |
1070 | printf PNORM,"%p(",1,%a0 | |
1071 | printx PNORM,%a0@ | |
1072 | printf PNORM,")\n" | |
1073 | rts | |
1074 | | check nondefault rounding modes | |
1075 | 2: subq.w #2,%d2 | |
1076 | jcs 9b | %d2 < 2, round to zero | |
1077 | jhi 3f | %d2 > 2, round to +infinity | |
1078 | tst.b (-3,%a0) | to -inf | |
1079 | jne fp_nsf_doroundup | negative, round to infinity | |
1080 | jra 9b | positive, round to zero | |
1081 | 3: tst.b (-3,%a0) | to +inf | |
1082 | jeq fp_nsf_doroundup | positive, round to infinity | |
1083 | jra 9b | negative, round to zero | |
1084 | | Exponent overflow. Just call it infinity. | |
1085 | fp_nsf_large: | |
1086 | tst.b (3,%a0) | |
1087 | jeq 1f | |
1088 | fp_set_sr FPSR_EXC_INEX2 | |
1089 | 1: fp_set_sr FPSR_EXC_OVFL | |
1090 | move.w (FPD_RND,FPDATA),%d2 | |
1091 | jne 3f | %d2 = 0 round to nearest | |
1092 | 1: move.w #0x7fff,(-2,%a0) | |
1093 | clr.l (%a0)+ | |
1094 | clr.l (%a0) | |
1095 | 2: subq.l #8,%a0 | |
1096 | printf PNORM,"%p(",1,%a0 | |
1097 | printx PNORM,%a0@ | |
1098 | printf PNORM,")\n" | |
1099 | rts | |
1100 | 3: subq.w #2,%d2 | |
1101 | jcs 5f | %d2 < 2, round to zero | |
1102 | jhi 4f | %d2 > 2, round to +infinity | |
1103 | tst.b (-3,%a0) | to -inf | |
1104 | jne 1b | |
1105 | jra 5f | |
1106 | 4: tst.b (-3,%a0) | to +inf | |
1107 | jeq 1b | |
1108 | 5: move.w #0x407e,(-2,%a0) | |
1109 | move.l #0xffffff00,(%a0)+ | |
1110 | clr.l (%a0) | |
1111 | jra 2b | |
1112 | | Infinities or NaNs | |
1113 | fp_nsf_huge: | |
1114 | subq.l #4,%a0 | |
1115 | printf PNORM,"%p(",1,%a0 | |
1116 | printx PNORM,%a0@ | |
1117 | printf PNORM,")\n" | |
1118 | rts | |
1119 | ||
1120 | | conv_ext2int (macro): | |
1121 | | Generates a subroutine that converts an extended value to an | |
1122 | | integer of a given size, again, with the appropriate type of | |
1123 | | rounding. | |
1124 | ||
1125 | | Macro arguments: | |
1126 | | s: size, as given in an assembly instruction. | |
1127 | | b: number of bits in that size. | |
1128 | ||
1129 | | Subroutine arguments: | |
1130 | | %a0: source (struct fp_ext *) | |
1131 | ||
1132 | | Returns the integer in %d0 (like it should) | |
1133 | ||
1134 | .macro conv_ext2int s,b | |
1135 | .set inf,(1<<(\b-1))-1 | i.e. MAXINT | |
1136 | printf PCONV,"e2i%d: %p(",2,#\b,%a0 | |
1137 | printx PCONV,%a0@ | |
1138 | printf PCONV,") " | |
1139 | addq.l #2,%a0 | |
1140 | move.w (%a0)+,%d2 | exponent | |
1141 | jeq fp_e2i_zero\b | zero / denorm (== 0, here) | |
1142 | cmp.w #0x7fff,%d2 | |
1143 | jeq fp_e2i_huge\b | Inf / NaN | |
1144 | sub.w #0x3ffe,%d2 | |
1145 | jcs fp_e2i_small\b | |
1146 | cmp.w #\b,%d2 | |
1147 | jhi fp_e2i_large\b | |
1148 | move.l (%a0),%d0 | |
1149 | move.l %d0,%d1 | |
1150 | lsl.l %d2,%d1 | |
1151 | jne fp_e2i_round\b | |
1152 | tst.l (4,%a0) | |
1153 | jne fp_e2i_round\b | |
1154 | neg.w %d2 | |
1155 | add.w #32,%d2 | |
1156 | lsr.l %d2,%d0 | |
1157 | 9: tst.w (-4,%a0) | |
1158 | jne 1f | |
1159 | tst.\s %d0 | |
1160 | jmi fp_e2i_large\b | |
1161 | printf PCONV,"-> %p\n",1,%d0 | |
1162 | rts | |
1163 | 1: neg.\s %d0 | |
1164 | jeq 1f | |
1165 | jpl fp_e2i_large\b | |
1166 | 1: printf PCONV,"-> %p\n",1,%d0 | |
1167 | rts | |
1168 | fp_e2i_round\b: | |
1169 | fp_set_sr FPSR_EXC_INEX2 | INEX2 bit | |
1170 | neg.w %d2 | |
1171 | add.w #32,%d2 | |
1172 | .if \b>16 | |
1173 | jeq 5f | |
1174 | .endif | |
1175 | lsr.l %d2,%d0 | |
1176 | move.w (FPD_RND,FPDATA),%d2 | rounding mode | |
1177 | jne 2f | %d2 == 0, round to nearest | |
1178 | tst.l %d1 | test guard bit | |
1179 | jpl 9b | zero is closer | |
1180 | btst %d2,%d0 | test lsb bit (%d2 still 0) | |
1181 | jne fp_e2i_doroundup\b | |
1182 | lsl.l #1,%d1 | check low bits | |
1183 | jne fp_e2i_doroundup\b | |
1184 | tst.l (4,%a0) | |
1185 | jeq 9b | |
1186 | fp_e2i_doroundup\b: | |
1187 | addq.l #1,%d0 | |
1188 | jra 9b | |
1189 | | check nondefault rounding modes | |
1190 | 2: subq.w #2,%d2 | |
1191 | jcs 9b | %d2 < 2, round to zero | |
1192 | jhi 3f | %d2 > 2, round to +infinity | |
1193 | tst.w (-4,%a0) | to -inf | |
1194 | jne fp_e2i_doroundup\b | negative, round to infinity | |
1195 | jra 9b | positive, round to zero | |
1196 | 3: tst.w (-4,%a0) | to +inf | |
1197 | jeq fp_e2i_doroundup\b | positive, round to infinity | |
1198 | jra 9b | negative, round to zero | |
1199 | | we are only want -2**127 get correctly rounded here, | |
1200 | | since the guard bit is in the lower lword. | |
1201 | | everything else ends up anyway as overflow. | |
1202 | .if \b>16 | |
1203 | 5: move.w (FPD_RND,FPDATA),%d2 | rounding mode | |
1204 | jne 2b | %d2 == 0, round to nearest | |
1205 | move.l (4,%a0),%d1 | test guard bit | |
1206 | jpl 9b | zero is closer | |
1207 | lsl.l #1,%d1 | check low bits | |
1208 | jne fp_e2i_doroundup\b | |
1209 | jra 9b | |
1210 | .endif | |
1211 | fp_e2i_zero\b: | |
1212 | clr.l %d0 | |
1213 | tst.l (%a0)+ | |
1214 | jne 1f | |
1215 | tst.l (%a0) | |
1216 | jeq 3f | |
1217 | 1: subq.l #4,%a0 | |
1218 | fp_clr_sr FPSR_EXC_UNFL | fp_normalize_ext has set this bit | |
1219 | fp_e2i_small\b: | |
1220 | fp_set_sr FPSR_EXC_INEX2 | |
1221 | clr.l %d0 | |
1222 | move.w (FPD_RND,FPDATA),%d2 | rounding mode | |
1223 | subq.w #2,%d2 | |
1224 | jcs 3f | %d2 < 2, round to nearest/zero | |
1225 | jhi 2f | %d2 > 2, round to +infinity | |
1226 | tst.w (-4,%a0) | to -inf | |
1227 | jeq 3f | |
1228 | subq.\s #1,%d0 | |
1229 | jra 3f | |
1230 | 2: tst.w (-4,%a0) | to +inf | |
1231 | jne 3f | |
1232 | addq.\s #1,%d0 | |
1233 | 3: printf PCONV,"-> %p\n",1,%d0 | |
1234 | rts | |
1235 | fp_e2i_large\b: | |
1236 | fp_set_sr FPSR_EXC_OPERR | |
1237 | move.\s #inf,%d0 | |
1238 | tst.w (-4,%a0) | |
1239 | jeq 1f | |
1240 | addq.\s #1,%d0 | |
1241 | 1: printf PCONV,"-> %p\n",1,%d0 | |
1242 | rts | |
1243 | fp_e2i_huge\b: | |
1244 | move.\s (%a0),%d0 | |
1245 | tst.l (%a0) | |
1246 | jne 1f | |
1247 | tst.l (%a0) | |
1248 | jeq fp_e2i_large\b | |
1249 | | fp_normalize_ext has set this bit already | |
1250 | | and made the number nonsignaling | |
1251 | 1: fp_tst_sr FPSR_EXC_SNAN | |
1252 | jne 1f | |
1253 | fp_set_sr FPSR_EXC_OPERR | |
1254 | 1: printf PCONV,"-> %p\n",1,%d0 | |
1255 | rts | |
1256 | .endm | |
1257 | ||
1258 | fp_conv_ext2long: | |
1259 | conv_ext2int l,32 | |
1260 | ||
1261 | fp_conv_ext2short: | |
1262 | conv_ext2int w,16 | |
1263 | ||
1264 | fp_conv_ext2byte: | |
1265 | conv_ext2int b,8 | |
1266 | ||
1267 | fp_conv_ext2double: | |
1268 | jsr fp_normalize_double | |
1269 | printf PCONV,"e2d: %p(",1,%a0 | |
1270 | printx PCONV,%a0@ | |
1271 | printf PCONV,"), " | |
1272 | move.l (%a0)+,%d2 | |
1273 | cmp.w #0x7fff,%d2 | |
1274 | jne 1f | |
1275 | move.w #0x7ff,%d2 | |
1276 | move.l (%a0)+,%d0 | |
1277 | jra 2f | |
1278 | 1: sub.w #0x3fff-0x3ff,%d2 | |
1279 | move.l (%a0)+,%d0 | |
1280 | jmi 2f | |
1281 | clr.w %d2 | |
1282 | 2: lsl.w #5,%d2 | |
1283 | lsl.l #7,%d2 | |
1284 | lsl.l #8,%d2 | |
1285 | move.l %d0,%d1 | |
1286 | lsl.l #1,%d0 | |
1287 | lsr.l #4,%d0 | |
1288 | lsr.l #8,%d0 | |
1289 | or.l %d2,%d0 | |
1290 | putuser.l %d0,(%a1)+,fp_err_ua2,%a1 | |
1291 | moveq #21,%d0 | |
1292 | lsl.l %d0,%d1 | |
1293 | move.l (%a0),%d0 | |
1294 | lsr.l #4,%d0 | |
1295 | lsr.l #7,%d0 | |
1296 | or.l %d1,%d0 | |
1297 | putuser.l %d0,(%a1),fp_err_ua2,%a1 | |
1298 | #ifdef FPU_EMU_DEBUG | |
1299 | getuser.l %a1@(-4),%d0,fp_err_ua2,%a1 | |
1300 | getuser.l %a1@(0),%d1,fp_err_ua2,%a1 | |
1301 | printf PCONV,"%p(%08x%08x)\n",3,%a1,%d0,%d1 | |
1302 | #endif | |
1303 | rts | |
1304 | ||
1305 | fp_conv_ext2single: | |
1306 | jsr fp_normalize_single | |
1307 | printf PCONV,"e2s: %p(",1,%a0 | |
1308 | printx PCONV,%a0@ | |
1309 | printf PCONV,"), " | |
1310 | move.l (%a0)+,%d1 | |
1311 | cmp.w #0x7fff,%d1 | |
1312 | jne 1f | |
1313 | move.w #0xff,%d1 | |
1314 | move.l (%a0)+,%d0 | |
1315 | jra 2f | |
1316 | 1: sub.w #0x3fff-0x7f,%d1 | |
1317 | move.l (%a0)+,%d0 | |
1318 | jmi 2f | |
1319 | clr.w %d1 | |
1320 | 2: lsl.w #8,%d1 | |
1321 | lsl.l #7,%d1 | |
1322 | lsl.l #8,%d1 | |
1323 | bclr #31,%d0 | |
1324 | lsr.l #8,%d0 | |
1325 | or.l %d1,%d0 | |
1326 | printf PCONV,"%08x\n",1,%d0 | |
1327 | rts | |
1328 | ||
1329 | | special return addresses for instr that | |
1330 | | encode the rounding precision in the opcode | |
1331 | | (e.g. fsmove,fdmove) | |
1332 | ||
1333 | fp_finalrounding_single: | |
1334 | addq.l #8,%sp | |
1335 | jsr fp_normalize_ext | |
1336 | jsr fp_normalize_single | |
1337 | jra fp_finaltest | |
1338 | ||
1339 | fp_finalrounding_single_fast: | |
1340 | addq.l #8,%sp | |
1341 | jsr fp_normalize_ext | |
1342 | jsr fp_normalize_single_fast | |
1343 | jra fp_finaltest | |
1344 | ||
1345 | fp_finalrounding_double: | |
1346 | addq.l #8,%sp | |
1347 | jsr fp_normalize_ext | |
1348 | jsr fp_normalize_double | |
1349 | jra fp_finaltest | |
1350 | ||
1351 | | fp_finaltest: | |
1352 | | set the emulated status register based on the outcome of an | |
1353 | | emulated instruction. | |
1354 | ||
1355 | fp_finalrounding: | |
1356 | addq.l #8,%sp | |
1357 | | printf ,"f: %p\n",1,%a0 | |
1358 | jsr fp_normalize_ext | |
1359 | move.w (FPD_PREC,FPDATA),%d0 | |
1360 | subq.w #1,%d0 | |
1361 | jcs fp_finaltest | |
1362 | jne 1f | |
1363 | jsr fp_normalize_single | |
1364 | jra 2f | |
1365 | 1: jsr fp_normalize_double | |
1366 | 2:| printf ,"f: %p\n",1,%a0 | |
1367 | fp_finaltest: | |
1368 | | First, we do some of the obvious tests for the exception | |
1369 | | status byte and condition code bytes of fp_sr here, so that | |
1370 | | they do not have to be handled individually by every | |
1371 | | emulated instruction. | |
1372 | clr.l %d0 | |
1373 | addq.l #1,%a0 | |
1374 | tst.b (%a0)+ | sign | |
1375 | jeq 1f | |
1376 | bset #FPSR_CC_NEG-24,%d0 | N bit | |
1377 | 1: cmp.w #0x7fff,(%a0)+ | exponent | |
1378 | jeq 2f | |
1379 | | test for zero | |
1380 | moveq #FPSR_CC_Z-24,%d1 | |
1381 | tst.l (%a0)+ | |
1382 | jne 9f | |
1383 | tst.l (%a0) | |
1384 | jne 9f | |
1385 | jra 8f | |
1386 | | infinitiv and NAN | |
1387 | 2: moveq #FPSR_CC_NAN-24,%d1 | |
1388 | move.l (%a0)+,%d2 | |
1389 | lsl.l #1,%d2 | ignore high bit | |
1390 | jne 8f | |
1391 | tst.l (%a0) | |
1392 | jne 8f | |
1393 | moveq #FPSR_CC_INF-24,%d1 | |
1394 | 8: bset %d1,%d0 | |
1395 | 9: move.b %d0,(FPD_FPSR+0,FPDATA) | set condition test result | |
1396 | | move instructions enter here | |
1397 | | Here, we test things in the exception status byte, and set | |
1398 | | other things in the accrued exception byte accordingly. | |
1399 | | Emulated instructions can set various things in the former, | |
1400 | | as defined in fp_emu.h. | |
1401 | fp_final: | |
1402 | move.l (FPD_FPSR,FPDATA),%d0 | |
1403 | #if 0 | |
1404 | btst #FPSR_EXC_SNAN,%d0 | EXC_SNAN | |
1405 | jne 1f | |
1406 | btst #FPSR_EXC_OPERR,%d0 | EXC_OPERR | |
1407 | jeq 2f | |
1408 | 1: bset #FPSR_AEXC_IOP,%d0 | set IOP bit | |
1409 | 2: btst #FPSR_EXC_OVFL,%d0 | EXC_OVFL | |
1410 | jeq 1f | |
1411 | bset #FPSR_AEXC_OVFL,%d0 | set OVFL bit | |
1412 | 1: btst #FPSR_EXC_UNFL,%d0 | EXC_UNFL | |
1413 | jeq 1f | |
1414 | btst #FPSR_EXC_INEX2,%d0 | EXC_INEX2 | |
1415 | jeq 1f | |
1416 | bset #FPSR_AEXC_UNFL,%d0 | set UNFL bit | |
1417 | 1: btst #FPSR_EXC_DZ,%d0 | EXC_INEX1 | |
1418 | jeq 1f | |
1419 | bset #FPSR_AEXC_DZ,%d0 | set DZ bit | |
1420 | 1: btst #FPSR_EXC_OVFL,%d0 | EXC_OVFL | |
1421 | jne 1f | |
1422 | btst #FPSR_EXC_INEX2,%d0 | EXC_INEX2 | |
1423 | jne 1f | |
1424 | btst #FPSR_EXC_INEX1,%d0 | EXC_INEX1 | |
1425 | jeq 2f | |
1426 | 1: bset #FPSR_AEXC_INEX,%d0 | set INEX bit | |
1427 | 2: move.l %d0,(FPD_FPSR,FPDATA) | |
1428 | #else | |
1429 | | same as above, greatly optimized, but untested (yet) | |
1430 | move.l %d0,%d2 | |
1431 | lsr.l #5,%d0 | |
1432 | move.l %d0,%d1 | |
1433 | lsr.l #4,%d1 | |
1434 | or.l %d0,%d1 | |
1435 | and.b #0x08,%d1 | |
1436 | move.l %d2,%d0 | |
1437 | lsr.l #6,%d0 | |
1438 | or.l %d1,%d0 | |
1439 | move.l %d2,%d1 | |
1440 | lsr.l #4,%d1 | |
1441 | or.b #0xdf,%d1 | |
1442 | and.b %d1,%d0 | |
1443 | move.l %d2,%d1 | |
1444 | lsr.l #7,%d1 | |
1445 | and.b #0x80,%d1 | |
1446 | or.b %d1,%d0 | |
1447 | and.b #0xf8,%d0 | |
1448 | or.b %d0,%d2 | |
1449 | move.l %d2,(FPD_FPSR,FPDATA) | |
1450 | #endif | |
1451 | move.b (FPD_FPSR+2,FPDATA),%d0 | |
1452 | and.b (FPD_FPCR+2,FPDATA),%d0 | |
1453 | jeq 1f | |
1454 | printf ,"send signal!!!\n" | |
1455 | 1: jra fp_end |