[BACK]Return to add_n.S CVS log [TXT][DIR] Up to [local] / OpenXM / src / kan96xx / gmp-2.0.2 / mpn / m88k / mc88110

Annotation of OpenXM/src/kan96xx/gmp-2.0.2/mpn/m88k/mc88110/add_n.S, Revision 1.1.1.1

1.1       maekawa     1: ; mc88110 __mpn_add_n -- Add two limb vectors of the same length > 0 and store
                      2: ; sum in a third limb vector.
                      3:
                      4: ; Copyright (C) 1995, 1996 Free Software Foundation, Inc.
                      5:
                      6: ; This file is part of the GNU MP Library.
                      7:
                      8: ; The GNU MP Library is free software; you can redistribute it and/or modify
                      9: ; it under the terms of the GNU Library General Public License as published by
                     10: ; the Free Software Foundation; either version 2 of the License, or (at your
                     11: ; option) any later version.
                     12:
                     13: ; The GNU MP Library is distributed in the hope that it will be useful, but
                     14: ; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
                     15: ; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
                     16: ; License for more details.
                     17:
                     18: ; You should have received a copy of the GNU Library General Public License
                     19: ; along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     20: ; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     21: ; MA 02111-1307, USA.
                     22:
                     23:
                     24: ; INPUT PARAMETERS
                     25: #define res_ptr        r2
                     26: #define s1_ptr r3
                     27: #define s2_ptr r4
                     28: #define size   r5
                     29:
                     30: #include "sysdep.h"
                     31:
                     32:        text
                     33:        align   16
                     34:        global  C_SYMBOL_NAME(__mpn_add_n)
                     35: C_SYMBOL_NAME(__mpn_add_n):
                     36:        addu.co  r0,r0,r0               ; clear cy flag
                     37:        xor      r12,s2_ptr,res_ptr
                     38:        bb1      2,r12,L1
                     39: ; **  V1a  **
                     40: L0:    bb0      2,res_ptr,L_v1         ; branch if res_ptr is aligned?
                     41: /* Add least significant limb separately to align res_ptr and s2_ptr */
                     42:        ld       r10,s1_ptr,0
                     43:        addu     s1_ptr,s1_ptr,4
                     44:        ld       r8,s2_ptr,0
                     45:        addu     s2_ptr,s2_ptr,4
                     46:        subu     size,size,1
                     47:        addu.co  r6,r10,r8
                     48:        st       r6,res_ptr,0
                     49:        addu     res_ptr,res_ptr,4
                     50: L_v1:  cmp      r12,size,2
                     51:        bb1      lt,r12,Lend2
                     52:
                     53:        ld       r10,s1_ptr,0
                     54:        ld       r12,s1_ptr,4
                     55:        ld.d     r8,s2_ptr,0
                     56:        subu     size,size,10
                     57:        bcnd     lt0,size,Lfin1
                     58: /* Add blocks of 8 limbs until less than 8 limbs remain */
                     59:        align    8
                     60: Loop1: subu     size,size,8
                     61:        addu.cio r6,r10,r8
                     62:        ld       r10,s1_ptr,8
                     63:        addu.cio r7,r12,r9
                     64:        ld       r12,s1_ptr,12
                     65:        ld.d     r8,s2_ptr,8
                     66:        st.d     r6,res_ptr,0
                     67:        addu.cio r6,r10,r8
                     68:        ld       r10,s1_ptr,16
                     69:        addu.cio r7,r12,r9
                     70:        ld       r12,s1_ptr,20
                     71:        ld.d     r8,s2_ptr,16
                     72:        st.d     r6,res_ptr,8
                     73:        addu.cio r6,r10,r8
                     74:        ld       r10,s1_ptr,24
                     75:        addu.cio r7,r12,r9
                     76:        ld       r12,s1_ptr,28
                     77:        ld.d     r8,s2_ptr,24
                     78:        st.d     r6,res_ptr,16
                     79:        addu.cio r6,r10,r8
                     80:        ld       r10,s1_ptr,32
                     81:        addu.cio r7,r12,r9
                     82:        ld       r12,s1_ptr,36
                     83:        addu     s1_ptr,s1_ptr,32
                     84:        ld.d     r8,s2_ptr,32
                     85:        addu     s2_ptr,s2_ptr,32
                     86:        st.d     r6,res_ptr,24
                     87:        addu     res_ptr,res_ptr,32
                     88:        bcnd     ge0,size,Loop1
                     89:
                     90: Lfin1: addu     size,size,8-2
                     91:        bcnd     lt0,size,Lend1
                     92: /* Add blocks of 2 limbs until less than 2 limbs remain */
                     93: Loope1:        addu.cio r6,r10,r8
                     94:        ld       r10,s1_ptr,8
                     95:        addu.cio r7,r12,r9
                     96:        ld       r12,s1_ptr,12
                     97:        ld.d     r8,s2_ptr,8
                     98:        st.d     r6,res_ptr,0
                     99:        subu     size,size,2
                    100:        addu     s1_ptr,s1_ptr,8
                    101:        addu     s2_ptr,s2_ptr,8
                    102:        addu     res_ptr,res_ptr,8
                    103:        bcnd     ge0,size,Loope1
                    104: Lend1: addu.cio r6,r10,r8
                    105:        addu.cio r7,r12,r9
                    106:        st.d     r6,res_ptr,0
                    107:
                    108:        bb0      0,size,Lret1
                    109: /* Add last limb */
                    110:        ld       r10,s1_ptr,8
                    111:        ld       r8,s2_ptr,8
                    112:        addu.cio r6,r10,r8
                    113:        st       r6,res_ptr,8
                    114:
                    115: Lret1: jmp.n    r1
                    116:        addu.ci  r2,r0,r0               ; return carry-out from most sign. limb
                    117:
                    118: L1:    xor      r12,s1_ptr,res_ptr
                    119:        bb1      2,r12,L2
                    120: ; **  V1b  **
                    121:        or       r12,r0,s2_ptr
                    122:        or       s2_ptr,r0,s1_ptr
                    123:        or       s1_ptr,r0,r12
                    124:        br       L0
                    125:
                    126: ; **  V2  **
                    127: /* If we come here, the alignment of s1_ptr and res_ptr as well as the
                    128:    alignment of s2_ptr and res_ptr differ.  Since there are only two ways
                    129:    things can be aligned (that we care about) we now know that the alignment
                    130:    of s1_ptr and s2_ptr are the same.  */
                    131:
                    132: L2:    cmp      r12,size,1
                    133:        bb1      eq,r12,Ljone
                    134:        bb0      2,s1_ptr,L_v2          ; branch if s1_ptr is aligned
                    135: /* Add least significant limb separately to align res_ptr and s2_ptr */
                    136:        ld       r10,s1_ptr,0
                    137:        addu     s1_ptr,s1_ptr,4
                    138:        ld       r8,s2_ptr,0
                    139:        addu     s2_ptr,s2_ptr,4
                    140:        subu     size,size,1
                    141:        addu.co  r6,r10,r8
                    142:        st       r6,res_ptr,0
                    143:        addu     res_ptr,res_ptr,4
                    144:
                    145: L_v2:  subu     size,size,8
                    146:        bcnd     lt0,size,Lfin2
                    147: /* Add blocks of 8 limbs until less than 8 limbs remain */
                    148:        align    8
                    149: Loop2: subu     size,size,8
                    150:        ld.d     r8,s1_ptr,0
                    151:        ld.d     r6,s2_ptr,0
                    152:        addu.cio r8,r8,r6
                    153:        st       r8,res_ptr,0
                    154:        addu.cio r9,r9,r7
                    155:        st       r9,res_ptr,4
                    156:        ld.d     r8,s1_ptr,8
                    157:        ld.d     r6,s2_ptr,8
                    158:        addu.cio r8,r8,r6
                    159:        st       r8,res_ptr,8
                    160:        addu.cio r9,r9,r7
                    161:        st       r9,res_ptr,12
                    162:        ld.d     r8,s1_ptr,16
                    163:        ld.d     r6,s2_ptr,16
                    164:        addu.cio r8,r8,r6
                    165:        st       r8,res_ptr,16
                    166:        addu.cio r9,r9,r7
                    167:        st       r9,res_ptr,20
                    168:        ld.d     r8,s1_ptr,24
                    169:        ld.d     r6,s2_ptr,24
                    170:        addu.cio r8,r8,r6
                    171:        st       r8,res_ptr,24
                    172:        addu.cio r9,r9,r7
                    173:        st       r9,res_ptr,28
                    174:        addu     s1_ptr,s1_ptr,32
                    175:        addu     s2_ptr,s2_ptr,32
                    176:        addu     res_ptr,res_ptr,32
                    177:        bcnd     ge0,size,Loop2
                    178:
                    179: Lfin2: addu     size,size,8-2
                    180:        bcnd     lt0,size,Lend2
                    181: Loope2:        ld.d     r8,s1_ptr,0
                    182:        ld.d     r6,s2_ptr,0
                    183:        addu.cio r8,r8,r6
                    184:        st       r8,res_ptr,0
                    185:        addu.cio r9,r9,r7
                    186:        st       r9,res_ptr,4
                    187:        subu     size,size,2
                    188:        addu     s1_ptr,s1_ptr,8
                    189:        addu     s2_ptr,s2_ptr,8
                    190:        addu     res_ptr,res_ptr,8
                    191:        bcnd     ge0,size,Loope2
                    192: Lend2: bb0      0,size,Lret2
                    193: /* Add last limb */
                    194: Ljone: ld       r10,s1_ptr,0
                    195:        ld       r8,s2_ptr,0
                    196:        addu.cio r6,r10,r8
                    197:        st       r6,res_ptr,0
                    198:
                    199: Lret2: jmp.n    r1
                    200:        addu.ci  r2,r0,r0               ; return carry-out from most sign. limb

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>