Annotation of OpenXM_contrib/gmp/mpn/sparc32/sub_n.S, Revision 1.1
1.1 ! maekawa 1: ! SPARC __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
! 2: ! store difference in a third limb vector.
! 3:
! 4: ! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
! 5:
! 6: ! This file is part of the GNU MP Library.
! 7:
! 8: ! The GNU MP Library is free software; you can redistribute it and/or modify
! 9: ! it under the terms of the GNU Library General Public License as published by
! 10: ! the Free Software Foundation; either version 2 of the License, or (at your
! 11: ! option) any later version.
! 12:
! 13: ! The GNU MP Library is distributed in the hope that it will be useful, but
! 14: ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 15: ! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
! 16: ! License for more details.
! 17:
! 18: ! You should have received a copy of the GNU Library General Public License
! 19: ! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! 20: ! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! 21: ! MA 02111-1307, USA.
! 22:
! 23:
! 24: ! INPUT PARAMETERS
! 25: #define res_ptr %o0
! 26: #define s1_ptr %o1
! 27: #define s2_ptr %o2
! 28: #define size %o3
! 29:
! 30: #include "sysdep.h"
! 31:
! 32: .text
! 33: .align 4
! 34: .global C_SYMBOL_NAME(__mpn_sub_n)
! 35: C_SYMBOL_NAME(__mpn_sub_n):
! 36: xor s2_ptr,res_ptr,%g1
! 37: andcc %g1,4,%g0
! 38: bne L1 ! branch if alignment differs
! 39: nop
! 40: ! ** V1a **
! 41: andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0
! 42: be L_v1 ! if no, branch
! 43: nop
! 44: /* Add least significant limb separately to align res_ptr and s2_ptr */
! 45: ld [s1_ptr],%g4
! 46: add s1_ptr,4,s1_ptr
! 47: ld [s2_ptr],%g2
! 48: add s2_ptr,4,s2_ptr
! 49: add size,-1,size
! 50: subcc %g4,%g2,%o4
! 51: st %o4,[res_ptr]
! 52: add res_ptr,4,res_ptr
! 53: L_v1: addx %g0,%g0,%o4 ! save cy in register
! 54: cmp size,2 ! if size < 2 ...
! 55: bl Lend2 ! ... branch to tail code
! 56: subcc %g0,%o4,%g0 ! restore cy
! 57:
! 58: ld [s1_ptr+0],%g4
! 59: addcc size,-10,size
! 60: ld [s1_ptr+4],%g1
! 61: ldd [s2_ptr+0],%g2
! 62: blt Lfin1
! 63: subcc %g0,%o4,%g0 ! restore cy
! 64: /* Add blocks of 8 limbs until less than 8 limbs remain */
! 65: Loop1: subxcc %g4,%g2,%o4
! 66: ld [s1_ptr+8],%g4
! 67: subxcc %g1,%g3,%o5
! 68: ld [s1_ptr+12],%g1
! 69: ldd [s2_ptr+8],%g2
! 70: std %o4,[res_ptr+0]
! 71: subxcc %g4,%g2,%o4
! 72: ld [s1_ptr+16],%g4
! 73: subxcc %g1,%g3,%o5
! 74: ld [s1_ptr+20],%g1
! 75: ldd [s2_ptr+16],%g2
! 76: std %o4,[res_ptr+8]
! 77: subxcc %g4,%g2,%o4
! 78: ld [s1_ptr+24],%g4
! 79: subxcc %g1,%g3,%o5
! 80: ld [s1_ptr+28],%g1
! 81: ldd [s2_ptr+24],%g2
! 82: std %o4,[res_ptr+16]
! 83: subxcc %g4,%g2,%o4
! 84: ld [s1_ptr+32],%g4
! 85: subxcc %g1,%g3,%o5
! 86: ld [s1_ptr+36],%g1
! 87: ldd [s2_ptr+32],%g2
! 88: std %o4,[res_ptr+24]
! 89: addx %g0,%g0,%o4 ! save cy in register
! 90: addcc size,-8,size
! 91: add s1_ptr,32,s1_ptr
! 92: add s2_ptr,32,s2_ptr
! 93: add res_ptr,32,res_ptr
! 94: bge Loop1
! 95: subcc %g0,%o4,%g0 ! restore cy
! 96:
! 97: Lfin1: addcc size,8-2,size
! 98: blt Lend1
! 99: subcc %g0,%o4,%g0 ! restore cy
! 100: /* Add blocks of 2 limbs until less than 2 limbs remain */
! 101: Loope1: subxcc %g4,%g2,%o4
! 102: ld [s1_ptr+8],%g4
! 103: subxcc %g1,%g3,%o5
! 104: ld [s1_ptr+12],%g1
! 105: ldd [s2_ptr+8],%g2
! 106: std %o4,[res_ptr+0]
! 107: addx %g0,%g0,%o4 ! save cy in register
! 108: addcc size,-2,size
! 109: add s1_ptr,8,s1_ptr
! 110: add s2_ptr,8,s2_ptr
! 111: add res_ptr,8,res_ptr
! 112: bge Loope1
! 113: subcc %g0,%o4,%g0 ! restore cy
! 114: Lend1: subxcc %g4,%g2,%o4
! 115: subxcc %g1,%g3,%o5
! 116: std %o4,[res_ptr+0]
! 117: addx %g0,%g0,%o4 ! save cy in register
! 118:
! 119: andcc size,1,%g0
! 120: be Lret1
! 121: subcc %g0,%o4,%g0 ! restore cy
! 122: /* Add last limb */
! 123: ld [s1_ptr+8],%g4
! 124: ld [s2_ptr+8],%g2
! 125: subxcc %g4,%g2,%o4
! 126: st %o4,[res_ptr+8]
! 127:
! 128: Lret1: retl
! 129: addx %g0,%g0,%o0 ! return carry-out from most sign. limb
! 130:
! 131: L1: xor s1_ptr,res_ptr,%g1
! 132: andcc %g1,4,%g0
! 133: bne L2
! 134: nop
! 135: ! ** V1b **
! 136: andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0
! 137: be L_v1b ! if no, branch
! 138: nop
! 139: /* Add least significant limb separately to align res_ptr and s1_ptr */
! 140: ld [s2_ptr],%g4
! 141: add s2_ptr,4,s2_ptr
! 142: ld [s1_ptr],%g2
! 143: add s1_ptr,4,s1_ptr
! 144: add size,-1,size
! 145: subcc %g2,%g4,%o4
! 146: st %o4,[res_ptr]
! 147: add res_ptr,4,res_ptr
! 148: L_v1b: addx %g0,%g0,%o4 ! save cy in register
! 149: cmp size,2 ! if size < 2 ...
! 150: bl Lend2 ! ... branch to tail code
! 151: subcc %g0,%o4,%g0 ! restore cy
! 152:
! 153: ld [s2_ptr+0],%g4
! 154: addcc size,-10,size
! 155: ld [s2_ptr+4],%g1
! 156: ldd [s1_ptr+0],%g2
! 157: blt Lfin1b
! 158: subcc %g0,%o4,%g0 ! restore cy
! 159: /* Add blocks of 8 limbs until less than 8 limbs remain */
! 160: Loop1b: subxcc %g2,%g4,%o4
! 161: ld [s2_ptr+8],%g4
! 162: subxcc %g3,%g1,%o5
! 163: ld [s2_ptr+12],%g1
! 164: ldd [s1_ptr+8],%g2
! 165: std %o4,[res_ptr+0]
! 166: subxcc %g2,%g4,%o4
! 167: ld [s2_ptr+16],%g4
! 168: subxcc %g3,%g1,%o5
! 169: ld [s2_ptr+20],%g1
! 170: ldd [s1_ptr+16],%g2
! 171: std %o4,[res_ptr+8]
! 172: subxcc %g2,%g4,%o4
! 173: ld [s2_ptr+24],%g4
! 174: subxcc %g3,%g1,%o5
! 175: ld [s2_ptr+28],%g1
! 176: ldd [s1_ptr+24],%g2
! 177: std %o4,[res_ptr+16]
! 178: subxcc %g2,%g4,%o4
! 179: ld [s2_ptr+32],%g4
! 180: subxcc %g3,%g1,%o5
! 181: ld [s2_ptr+36],%g1
! 182: ldd [s1_ptr+32],%g2
! 183: std %o4,[res_ptr+24]
! 184: addx %g0,%g0,%o4 ! save cy in register
! 185: addcc size,-8,size
! 186: add s1_ptr,32,s1_ptr
! 187: add s2_ptr,32,s2_ptr
! 188: add res_ptr,32,res_ptr
! 189: bge Loop1b
! 190: subcc %g0,%o4,%g0 ! restore cy
! 191:
! 192: Lfin1b: addcc size,8-2,size
! 193: blt Lend1b
! 194: subcc %g0,%o4,%g0 ! restore cy
! 195: /* Add blocks of 2 limbs until less than 2 limbs remain */
! 196: Loope1b:subxcc %g2,%g4,%o4
! 197: ld [s2_ptr+8],%g4
! 198: subxcc %g3,%g1,%o5
! 199: ld [s2_ptr+12],%g1
! 200: ldd [s1_ptr+8],%g2
! 201: std %o4,[res_ptr+0]
! 202: addx %g0,%g0,%o4 ! save cy in register
! 203: addcc size,-2,size
! 204: add s1_ptr,8,s1_ptr
! 205: add s2_ptr,8,s2_ptr
! 206: add res_ptr,8,res_ptr
! 207: bge Loope1b
! 208: subcc %g0,%o4,%g0 ! restore cy
! 209: Lend1b: subxcc %g2,%g4,%o4
! 210: subxcc %g3,%g1,%o5
! 211: std %o4,[res_ptr+0]
! 212: addx %g0,%g0,%o4 ! save cy in register
! 213:
! 214: andcc size,1,%g0
! 215: be Lret1b
! 216: subcc %g0,%o4,%g0 ! restore cy
! 217: /* Add last limb */
! 218: ld [s2_ptr+8],%g4
! 219: ld [s1_ptr+8],%g2
! 220: subxcc %g2,%g4,%o4
! 221: st %o4,[res_ptr+8]
! 222:
! 223: Lret1b: retl
! 224: addx %g0,%g0,%o0 ! return carry-out from most sign. limb
! 225:
! 226: ! ** V2 **
! 227: /* If we come here, the alignment of s1_ptr and res_ptr as well as the
! 228: alignment of s2_ptr and res_ptr differ. Since there are only two ways
! 229: things can be aligned (that we care about) we now know that the alignment
! 230: of s1_ptr and s2_ptr are the same. */
! 231:
! 232: L2: cmp size,1
! 233: be Ljone
! 234: nop
! 235: andcc s1_ptr,4,%g0 ! s1_ptr unaligned? Side effect: cy=0
! 236: be L_v2 ! if no, branch
! 237: nop
! 238: /* Add least significant limb separately to align s1_ptr and s2_ptr */
! 239: ld [s1_ptr],%g4
! 240: add s1_ptr,4,s1_ptr
! 241: ld [s2_ptr],%g2
! 242: add s2_ptr,4,s2_ptr
! 243: add size,-1,size
! 244: subcc %g4,%g2,%o4
! 245: st %o4,[res_ptr]
! 246: add res_ptr,4,res_ptr
! 247:
! 248: L_v2: addx %g0,%g0,%o4 ! save cy in register
! 249: addcc size,-8,size
! 250: blt Lfin2
! 251: subcc %g0,%o4,%g0 ! restore cy
! 252: /* Add blocks of 8 limbs until less than 8 limbs remain */
! 253: Loop2: ldd [s1_ptr+0],%g2
! 254: ldd [s2_ptr+0],%o4
! 255: subxcc %g2,%o4,%g2
! 256: st %g2,[res_ptr+0]
! 257: subxcc %g3,%o5,%g3
! 258: st %g3,[res_ptr+4]
! 259: ldd [s1_ptr+8],%g2
! 260: ldd [s2_ptr+8],%o4
! 261: subxcc %g2,%o4,%g2
! 262: st %g2,[res_ptr+8]
! 263: subxcc %g3,%o5,%g3
! 264: st %g3,[res_ptr+12]
! 265: ldd [s1_ptr+16],%g2
! 266: ldd [s2_ptr+16],%o4
! 267: subxcc %g2,%o4,%g2
! 268: st %g2,[res_ptr+16]
! 269: subxcc %g3,%o5,%g3
! 270: st %g3,[res_ptr+20]
! 271: ldd [s1_ptr+24],%g2
! 272: ldd [s2_ptr+24],%o4
! 273: subxcc %g2,%o4,%g2
! 274: st %g2,[res_ptr+24]
! 275: subxcc %g3,%o5,%g3
! 276: st %g3,[res_ptr+28]
! 277: addx %g0,%g0,%o4 ! save cy in register
! 278: addcc size,-8,size
! 279: add s1_ptr,32,s1_ptr
! 280: add s2_ptr,32,s2_ptr
! 281: add res_ptr,32,res_ptr
! 282: bge Loop2
! 283: subcc %g0,%o4,%g0 ! restore cy
! 284:
! 285: Lfin2: addcc size,8-2,size
! 286: blt Lend2
! 287: subcc %g0,%o4,%g0 ! restore cy
! 288: Loope2: ldd [s1_ptr+0],%g2
! 289: ldd [s2_ptr+0],%o4
! 290: subxcc %g2,%o4,%g2
! 291: st %g2,[res_ptr+0]
! 292: subxcc %g3,%o5,%g3
! 293: st %g3,[res_ptr+4]
! 294: addx %g0,%g0,%o4 ! save cy in register
! 295: addcc size,-2,size
! 296: add s1_ptr,8,s1_ptr
! 297: add s2_ptr,8,s2_ptr
! 298: add res_ptr,8,res_ptr
! 299: bge Loope2
! 300: subcc %g0,%o4,%g0 ! restore cy
! 301: Lend2: andcc size,1,%g0
! 302: be Lret2
! 303: subcc %g0,%o4,%g0 ! restore cy
! 304: /* Add last limb */
! 305: Ljone: ld [s1_ptr],%g4
! 306: ld [s2_ptr],%g2
! 307: subxcc %g4,%g2,%o4
! 308: st %o4,[res_ptr]
! 309:
! 310: Lret2: retl
! 311: addx %g0,%g0,%o0 ! return carry-out from most sign. limb
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>