From fcbf63e62c627deae76c1b8cb8c0876c536ed811 Mon Sep 17 00:00:00 2001 From: Jari Vetoniemi Date: Mon, 16 Mar 2020 18:49:26 +0900 Subject: Fresh start --- jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/unix64.S | 432 ++++++++++++++++++++++ 1 file changed, 432 insertions(+) create mode 100644 jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/unix64.S (limited to 'jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/unix64.S') diff --git a/jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/unix64.S b/jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/unix64.S new file mode 100644 index 0000000..dcd6bc7 --- /dev/null +++ b/jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/unix64.S @@ -0,0 +1,432 @@ +/* ----------------------------------------------------------------------- + unix64.S - Copyright (c) 2013 The Written Word, Inc. + - Copyright (c) 2008 Red Hat, Inc + - Copyright (c) 2002 Bo Thorsen + + x86-64 Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#ifdef __x86_64__ +#define LIBFFI_ASM +#include +#include + +.text + +/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags, + void *raddr, void (*fnaddr)(void)); + + Bit o trickiness here -- ARGS+BYTES is the base of the stack frame + for this function. This has been allocated by ffi_call. We also + deallocate some of the stack that has been alloca'd. */ + + .align 2 + .globl ffi_call_unix64 + .type ffi_call_unix64,@function + +ffi_call_unix64: +.LUW0: + movq (%rsp), %r10 /* Load return address. */ + leaq (%rdi, %rsi), %rax /* Find local stack base. */ + movq %rdx, (%rax) /* Save flags. */ + movq %rcx, 8(%rax) /* Save raddr. */ + movq %rbp, 16(%rax) /* Save old frame pointer. */ + movq %r10, 24(%rax) /* Relocate return address. */ + movq %rax, %rbp /* Finalize local stack frame. */ +.LUW1: + movq %rdi, %r10 /* Save a copy of the register area. */ + movq %r8, %r11 /* Save a copy of the target fn. */ + movl %r9d, %eax /* Set number of SSE registers. */ + + /* Load up all argument registers. */ + movq (%r10), %rdi + movq 8(%r10), %rsi + movq 16(%r10), %rdx + movq 24(%r10), %rcx + movq 32(%r10), %r8 + movq 40(%r10), %r9 + testl %eax, %eax + jnz .Lload_sse +.Lret_from_load_sse: + + /* Deallocate the reg arg area. */ + leaq 176(%r10), %rsp + + /* Call the user function. */ + call *%r11 + + /* Deallocate stack arg area; local stack frame in redzone. */ + leaq 24(%rbp), %rsp + + movq 0(%rbp), %rcx /* Reload flags. */ + movq 8(%rbp), %rdi /* Reload raddr. */ + movq 16(%rbp), %rbp /* Reload old frame pointer. */ +.LUW2: + + /* The first byte of the flags contains the FFI_TYPE. */ + movzbl %cl, %r10d + leaq .Lstore_table(%rip), %r11 + movslq (%r11, %r10, 4), %r10 + addq %r11, %r10 + jmp *%r10 + +.Lstore_table: + .long .Lst_void-.Lstore_table /* FFI_TYPE_VOID */ + .long .Lst_sint32-.Lstore_table /* FFI_TYPE_INT */ + .long .Lst_float-.Lstore_table /* FFI_TYPE_FLOAT */ + .long .Lst_double-.Lstore_table /* FFI_TYPE_DOUBLE */ + .long .Lst_ldouble-.Lstore_table /* FFI_TYPE_LONGDOUBLE */ + .long .Lst_uint8-.Lstore_table /* FFI_TYPE_UINT8 */ + .long .Lst_sint8-.Lstore_table /* FFI_TYPE_SINT8 */ + .long .Lst_uint16-.Lstore_table /* FFI_TYPE_UINT16 */ + .long .Lst_sint16-.Lstore_table /* FFI_TYPE_SINT16 */ + .long .Lst_uint32-.Lstore_table /* FFI_TYPE_UINT32 */ + .long .Lst_sint32-.Lstore_table /* FFI_TYPE_SINT32 */ + .long .Lst_int64-.Lstore_table /* FFI_TYPE_UINT64 */ + .long .Lst_int64-.Lstore_table /* FFI_TYPE_SINT64 */ + .long .Lst_struct-.Lstore_table /* FFI_TYPE_STRUCT */ + .long .Lst_int64-.Lstore_table /* FFI_TYPE_POINTER */ + + .align 2 +.Lst_void: + ret + .align 2 + +.Lst_uint8: + movzbq %al, %rax + movq %rax, (%rdi) + ret + .align 2 +.Lst_sint8: + movsbq %al, %rax + movq %rax, (%rdi) + ret + .align 2 +.Lst_uint16: + movzwq %ax, %rax + movq %rax, (%rdi) + .align 2 +.Lst_sint16: + movswq %ax, %rax + movq %rax, (%rdi) + ret + .align 2 +.Lst_uint32: + movl %eax, %eax + movq %rax, (%rdi) + .align 2 +.Lst_sint32: + cltq + movq %rax, (%rdi) + ret + .align 2 +.Lst_int64: + movq %rax, (%rdi) + ret + + .align 2 +.Lst_float: + movss %xmm0, (%rdi) + ret + .align 2 +.Lst_double: + movsd %xmm0, (%rdi) + ret +.Lst_ldouble: + fstpt (%rdi) + ret + + .align 2 +.Lst_struct: + leaq -20(%rsp), %rsi /* Scratch area in redzone. */ + + /* We have to locate the values now, and since we don't want to + write too much data into the user's return value, we spill the + value to a 16 byte scratch area first. Bits 8, 9, and 10 + control where the values are located. Only one of the three + bits will be set; see ffi_prep_cif_machdep for the pattern. */ + movd %xmm0, %r10 + movd %xmm1, %r11 + testl $0x100, %ecx + cmovnz %rax, %rdx + cmovnz %r10, %rax + testl $0x200, %ecx + cmovnz %r10, %rdx + testl $0x400, %ecx + cmovnz %r10, %rax + cmovnz %r11, %rdx + movq %rax, (%rsi) + movq %rdx, 8(%rsi) + + /* Bits 12-31 contain the true size of the structure. Copy from + the scratch area to the true destination. */ + shrl $12, %ecx + rep movsb + ret + + /* Many times we can avoid loading any SSE registers at all. + It's not worth an indirect jump to load the exact set of + SSE registers needed; zero or all is a good compromise. */ + .align 2 +.LUW3: +.Lload_sse: + movdqa 48(%r10), %xmm0 + movdqa 64(%r10), %xmm1 + movdqa 80(%r10), %xmm2 + movdqa 96(%r10), %xmm3 + movdqa 112(%r10), %xmm4 + movdqa 128(%r10), %xmm5 + movdqa 144(%r10), %xmm6 + movdqa 160(%r10), %xmm7 + jmp .Lret_from_load_sse + +.LUW4: + .size ffi_call_unix64,.-ffi_call_unix64 + + .align 2 + .globl ffi_closure_unix64 + .type ffi_closure_unix64,@function + +ffi_closure_unix64: +.LUW5: + /* The carry flag is set by the trampoline iff SSE registers + are used. Don't clobber it before the branch instruction. */ + leaq -200(%rsp), %rsp +.LUW6: + movq %rdi, (%rsp) + movq %rsi, 8(%rsp) + movq %rdx, 16(%rsp) + movq %rcx, 24(%rsp) + movq %r8, 32(%rsp) + movq %r9, 40(%rsp) + jc .Lsave_sse +.Lret_from_save_sse: + + movq %r10, %rdi + leaq 176(%rsp), %rsi + movq %rsp, %rdx + leaq 208(%rsp), %rcx + call ffi_closure_unix64_inner@PLT + + /* Deallocate stack frame early; return value is now in redzone. */ + addq $200, %rsp +.LUW7: + + /* The first byte of the return value contains the FFI_TYPE. */ + movzbl %al, %r10d + leaq .Lload_table(%rip), %r11 + movslq (%r11, %r10, 4), %r10 + addq %r11, %r10 + jmp *%r10 + +.Lload_table: + .long .Lld_void-.Lload_table /* FFI_TYPE_VOID */ + .long .Lld_int32-.Lload_table /* FFI_TYPE_INT */ + .long .Lld_float-.Lload_table /* FFI_TYPE_FLOAT */ + .long .Lld_double-.Lload_table /* FFI_TYPE_DOUBLE */ + .long .Lld_ldouble-.Lload_table /* FFI_TYPE_LONGDOUBLE */ + .long .Lld_int8-.Lload_table /* FFI_TYPE_UINT8 */ + .long .Lld_int8-.Lload_table /* FFI_TYPE_SINT8 */ + .long .Lld_int16-.Lload_table /* FFI_TYPE_UINT16 */ + .long .Lld_int16-.Lload_table /* FFI_TYPE_SINT16 */ + .long .Lld_int32-.Lload_table /* FFI_TYPE_UINT32 */ + .long .Lld_int32-.Lload_table /* FFI_TYPE_SINT32 */ + .long .Lld_int64-.Lload_table /* FFI_TYPE_UINT64 */ + .long .Lld_int64-.Lload_table /* FFI_TYPE_SINT64 */ + .long .Lld_struct-.Lload_table /* FFI_TYPE_STRUCT */ + .long .Lld_int64-.Lload_table /* FFI_TYPE_POINTER */ + + .align 2 +.Lld_void: + ret + + .align 2 +.Lld_int8: + movzbl -24(%rsp), %eax + ret + .align 2 +.Lld_int16: + movzwl -24(%rsp), %eax + ret + .align 2 +.Lld_int32: + movl -24(%rsp), %eax + ret + .align 2 +.Lld_int64: + movq -24(%rsp), %rax + ret + + .align 2 +.Lld_float: + movss -24(%rsp), %xmm0 + ret + .align 2 +.Lld_double: + movsd -24(%rsp), %xmm0 + ret + .align 2 +.Lld_ldouble: + fldt -24(%rsp) + ret + + .align 2 +.Lld_struct: + /* There are four possibilities here, %rax/%rdx, %xmm0/%rax, + %rax/%xmm0, %xmm0/%xmm1. We collapse two by always loading + both rdx and xmm1 with the second word. For the remaining, + bit 8 set means xmm0 gets the second word, and bit 9 means + that rax gets the second word. */ + movq -24(%rsp), %rcx + movq -16(%rsp), %rdx + movq -16(%rsp), %xmm1 + testl $0x100, %eax + cmovnz %rdx, %rcx + movd %rcx, %xmm0 + testl $0x200, %eax + movq -24(%rsp), %rax + cmovnz %rdx, %rax + ret + + /* See the comment above .Lload_sse; the same logic applies here. */ + .align 2 +.LUW8: +.Lsave_sse: + movdqa %xmm0, 48(%rsp) + movdqa %xmm1, 64(%rsp) + movdqa %xmm2, 80(%rsp) + movdqa %xmm3, 96(%rsp) + movdqa %xmm4, 112(%rsp) + movdqa %xmm5, 128(%rsp) + movdqa %xmm6, 144(%rsp) + movdqa %xmm7, 160(%rsp) + jmp .Lret_from_save_sse + +.LUW9: + .size ffi_closure_unix64,.-ffi_closure_unix64 + +#ifdef __GNUC__ +/* Only emit DWARF unwind info when building with the GNU toolchain. */ + +#ifdef HAVE_AS_X86_64_UNWIND_SECTION_TYPE + .section .eh_frame,"a",@unwind +#else + .section .eh_frame,"a",@progbits +#endif +.Lframe1: + .long .LECIE1-.LSCIE1 /* CIE Length */ +.LSCIE1: + .long 0 /* CIE Identifier Tag */ + .byte 1 /* CIE Version */ + .ascii "zR\0" /* CIE Augmentation */ + .uleb128 1 /* CIE Code Alignment Factor */ + .sleb128 -8 /* CIE Data Alignment Factor */ + .byte 0x10 /* CIE RA Column */ + .uleb128 1 /* Augmentation size */ + .byte 0x1b /* FDE Encoding (pcrel sdata4) */ + .byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */ + .uleb128 7 + .uleb128 8 + .byte 0x80+16 /* DW_CFA_offset, %rip offset 1*-8 */ + .uleb128 1 + .align 8 +.LECIE1: +.LSFDE1: + .long .LEFDE1-.LASFDE1 /* FDE Length */ +.LASFDE1: + .long .LASFDE1-.Lframe1 /* FDE CIE offset */ +#if HAVE_AS_X86_PCREL + .long .LUW0-. /* FDE initial location */ +#else + .long .LUW0@rel +#endif + .long .LUW4-.LUW0 /* FDE address range */ + .uleb128 0x0 /* Augmentation size */ + + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LUW1-.LUW0 + + /* New stack frame based off rbp. This is a itty bit of unwind + trickery in that the CFA *has* changed. There is no easy way + to describe it correctly on entry to the function. Fortunately, + it doesn't matter too much since at all points we can correctly + unwind back to ffi_call. Note that the location to which we + moved the return address is (the new) CFA-8, so from the + perspective of the unwind info, it hasn't moved. */ + .byte 0xc /* DW_CFA_def_cfa, %rbp offset 32 */ + .uleb128 6 + .uleb128 32 + .byte 0x80+6 /* DW_CFA_offset, %rbp offset 2*-8 */ + .uleb128 2 + .byte 0xa /* DW_CFA_remember_state */ + + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LUW2-.LUW1 + .byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */ + .uleb128 7 + .uleb128 8 + .byte 0xc0+6 /* DW_CFA_restore, %rbp */ + + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LUW3-.LUW2 + .byte 0xb /* DW_CFA_restore_state */ + + .align 8 +.LEFDE1: +.LSFDE3: + .long .LEFDE3-.LASFDE3 /* FDE Length */ +.LASFDE3: + .long .LASFDE3-.Lframe1 /* FDE CIE offset */ +#if HAVE_AS_X86_PCREL + .long .LUW5-. /* FDE initial location */ +#else + .long .LUW5@rel +#endif + .long .LUW9-.LUW5 /* FDE address range */ + .uleb128 0x0 /* Augmentation size */ + + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LUW6-.LUW5 + .byte 0xe /* DW_CFA_def_cfa_offset */ + .uleb128 208 + .byte 0xa /* DW_CFA_remember_state */ + + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LUW7-.LUW6 + .byte 0xe /* DW_CFA_def_cfa_offset */ + .uleb128 8 + + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LUW8-.LUW7 + .byte 0xb /* DW_CFA_restore_state */ + + .align 8 +.LEFDE3: + +#endif /* __GNUC__ */ + +#endif /* __x86_64__ */ + +#if defined __ELF__ && defined __linux__ + .section .note.GNU-stack,"",@progbits +#endif -- cgit v1.2.3