/* ----------------------------------------------------------------------- unix.S - Copyright (c) 1998 Red Hat, Inc. Copyright (c) 2000 Hewlett Packard Company IA64/unix Foreign Function Interface Primary author: Hans Boehm, HP Labs Loosely modeled on Cygnus code for other platforms. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the ``Software''), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL CYGNUS SOLUTIONS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ----------------------------------------------------------------------- */ #define LIBFFI_ASM #include #include #include "ia64_flags.h" /* parameters: */ #define callback in0 #define ecifp in1 #define bytes in2 #define flags in3 #define raddr in4 #define fn in5 #define FLOAT_SZ 8 /* in-memory size of fp operands */ /* Allocate an ia64_args structure on the stack; call ffi_prep_args */ /* to fill it in with argument values; copy those to the real */ /* registers, leaving overflow arguments on the stack. Then call fn */ /* and move the result from registers into *raddr. */ .pred.safe_across_calls p1-p5,p16-p63 .text .align 16 .global ffi_call_unix .proc ffi_call_unix ffi_call_unix: .prologue .save ar.pfs,r38 /* loc0 */ alloc loc0=ar.pfs,6,6,8,0 .save rp,loc1 mov loc1=b0; .vframe loc5 mov loc5=sp; .body sub sp=sp,bytes mov loc4=r1 /* Save gp */ ld8 r8=[callback],8 /* code address of callback */ ;; mov out0=sp mov out1=ecifp mov out2=bytes ld8 r1=[callback] /* Set up gp for callback. Unnecessary? */ mov b6=r8 ;; br.call.sptk.many b0 = b6 /* call ffi_prep_args */ cmp.eq p6,p0=0,r8 /* r8 nonzero ==> need fp regs */ ;; (p6) add loc2=32+8*FLOAT_SZ,sp (p6) br.cond.dptk.many fp_done ;; /* Quiets warning; needed? */ add loc2=32,sp add loc3=32+FLOAT_SZ,sp ;; ldfd f8=[loc2],2*FLOAT_SZ ldfd f9=[loc3],2*FLOAT_SZ ;; ldfd f10=[loc2],2*FLOAT_SZ ldfd f11=[loc3],2*FLOAT_SZ ;; ldfd f12=[loc2],2*FLOAT_SZ ldfd f13=[loc3],2*FLOAT_SZ ;; ldfd f14=[loc2],2*FLOAT_SZ ldfd f15=[loc3] fp_done: add r9=16,sp /* Pointer to r8_contents */ /* loc2 points at first integer register value. */ add loc3=8,loc2 ;; ld8 r8=[r9] /* Just in case we return large struct */ ld8 out0=[loc2],16 ld8 out1=[loc3],16 ;; ld8 out2=[loc2],16 ld8 out3=[loc3],16 ;; ld8 out4=[loc2],16 ld8 out5=[loc3],16 ;; ld8 out6=[loc2] ld8 out7=[loc3] /* Set sp to 16 bytes below the first stack parameter. This */ /* is the value currently in loc2. */ mov sp=loc2 ld8 r8=[fn],8 ;; ld8 r1=[fn] /* Set up gp */ mov b6=r8;; br.call.sptk.many b0 = b6 /* call fn */ /* Handle return value. */ cmp.eq p6,p0=0,raddr cmp.eq p7,p0=FFI_TYPE_INT,flags cmp.eq p10,p0=FFI_IS_SMALL_STRUCT2,flags cmp.eq p11,p0=FFI_IS_SMALL_STRUCT3,flags cmp.eq p12,p0=FFI_IS_SMALL_STRUCT4,flags ;; (p6) br.cond.dpnt.few done /* Dont copy ret values if raddr = 0 */ (p7) br.cond.dptk.few copy1 (p10) br.cond.dpnt.few copy2 (p11) br.cond.dpnt.few copy3 (p12) br.cond.dpnt.few copy4 cmp.eq p8,p0=FFI_TYPE_FLOAT,flags cmp.eq p9,p0=FFI_TYPE_DOUBLE,flags tbit.nz p6,p0=flags,FLOAT_FP_AGGREGATE_BIT tbit.nz p7,p0=flags,DOUBLE_FP_AGGREGATE_BIT ;; (p8) stfs [raddr]=f8 (p9) stfd [raddr]=f8 ;; .label_state 1 (p6) br.cond.dpnt.few handle_float_hfa (p7) br.cond.dpnt.few handle_double_hfa br done copy4: add loc3=24,raddr ;; st8 [loc3]=r11 copy3: add loc3=16,raddr ;; st8 [loc3]=r10 copy2: add loc3=8,raddr ;; st8 [loc3]=r9 copy1: st8 [raddr]=r8 /* In the big struct case, raddr was passed as an argument. */ /* In the void case there was nothing to do. */ done: mov r1=loc4 /* Restore gp */ mov ar.pfs = loc0 mov b0 = loc1 .restore sp mov sp = loc5 br.ret.sptk.many b0 handle_double_hfa: .body .copy_state 1 /* Homogeneous floating point array of doubles is returned in */ /* registers f8-f15. Save one at a time to return area. */ and flags=0xf,flags /* Retrieve size */ ;; cmp.eq p6,p0=2,flags cmp.eq p7,p0=3,flags cmp.eq p8,p0=4,flags cmp.eq p9,p0=5,flags cmp.eq p10,p0=6,flags cmp.eq p11,p0=7,flags cmp.eq p12,p0=8,flags ;; (p6) br.cond.dptk.few dhfa2 (p7) br.cond.dptk.few dhfa3 (p8) br.cond.dptk.few dhfa4 (p9) br.cond.dptk.few dhfa5 (p10) br.cond.dptk.few dhfa6 (p11) br.cond.dptk.few dhfa7 dhfa8: add loc3=7*8,raddr ;; stfd [loc3]=f15 dhfa7: add loc3=6*8,raddr ;; stfd [loc3]=f14 dhfa6: add loc3=5*8,raddr ;; stfd [loc3]=f13 dhfa5: add loc3=4*8,raddr ;; stfd [loc3]=f12 dhfa4: add loc3=3*8,raddr ;; stfd [loc3]=f11 dhfa3: add loc3=2*8,raddr ;; stfd [loc3]=f10 dhfa2: add loc3=1*8,raddr ;; stfd [loc3]=f9 stfd [raddr]=f8 br done handle_float_hfa: /* Homogeneous floating point array of floats is returned in */ /* registers f8-f15. Save one at a time to return area. */ and flags=0xf,flags /* Retrieve size */ ;; cmp.eq p6,p0=2,flags cmp.eq p7,p0=3,flags cmp.eq p8,p0=4,flags cmp.eq p9,p0=5,flags cmp.eq p10,p0=6,flags cmp.eq p11,p0=7,flags cmp.eq p12,p0=8,flags ;; (p6) br.cond.dptk.few shfa2 (p7) br.cond.dptk.few shfa3 (p8) br.cond.dptk.few shfa4 (p9) br.cond.dptk.few shfa5 (p10) br.cond.dptk.few shfa6 (p11) br.cond.dptk.few shfa7 shfa8: add loc3=7*4,raddr ;; stfd [loc3]=f15 shfa7: add loc3=6*4,raddr ;; stfd [loc3]=f14 shfa6: add loc3=5*4,raddr ;; stfd [loc3]=f13 shfa5: add loc3=4*4,raddr ;; stfd [loc3]=f12 shfa4: add loc3=3*4,raddr ;; stfd [loc3]=f11 shfa3: add loc3=2*4,raddr ;; stfd [loc3]=f10 shfa2: add loc3=1*4,raddr ;; stfd [loc3]=f9 stfd [raddr]=f8 br done .endp ffi_call_unix .pred.safe_across_calls p1-p5,p16-p63 .text .align 16 .global ffi_closure_UNIX .proc ffi_closure_UNIX ffi_closure_UNIX: .prologue .save ar.pfs,r40 /* loc0 */ alloc loc0=ar.pfs,8,3,2,0 .save rp,loc1 mov loc1=b0 .vframe loc2 mov loc2=sp /* Retrieve closure pointer and real gp. */ mov out0=gp add gp=16,gp ;; ld8 gp=[gp] /* Reserve a structia64_args on the stack such that arguments */ /* past the first 8 are automatically placed in the right */ /* slot. Note that when we start the sp points at 2 8-byte */ /* scratch words, followed by the extra arguments. */ # define BASIC_ARGS_SZ (8*FLOAT_SZ+8*8+2*8) # define FIRST_FP_OFFSET (4*8) add r14=-(BASIC_ARGS_SZ-FIRST_FP_OFFSET),sp add r15=-(BASIC_ARGS_SZ-FIRST_FP_OFFSET-FLOAT_SZ),sp add sp=-BASIC_ARGS_SZ,sp /* r14 points to fp_regs[0], r15 points to fp_regs[1] */ ;; stfd [r14]=f8,2*FLOAT_SZ stfd [r15]=f9,2*FLOAT_SZ ;; stfd [r14]=f10,2*FLOAT_SZ stfd [r15]=f11,2*FLOAT_SZ ;; stfd [r14]=f12,2*FLOAT_SZ stfd [r15]=f13,2*FLOAT_SZ ;; stfd [r14]=f14,FLOAT_SZ+8 stfd [r15]=f15,2*8 ;; /* r14 points to first parameter register area, r15 to second. */ st8 [r14]=in0,2*8 st8 [r15]=in1,2*8 ;; st8 [r14]=in2,2*8 st8 [r15]=in3,2*8 ;; st8 [r14]=in4,2*8 st8 [r15]=in5,2*8 ;; st8 [r14]=in6,2*8 st8 [r15]=in7,2*8 /* Call ffi_closure_UNIX_inner */ mov out1=sp br.call.sptk.many b0=ffi_closure_UNIX_inner ;; mov b0=loc1 mov ar.pfs=loc0 .restore sp mov sp=loc2 br.ret.sptk.many b0 .endp ffi_closure_UNIX