From fcbf63e62c627deae76c1b8cb8c0876c536ed811 Mon Sep 17 00:00:00 2001
From: Jari Vetoniemi <jari.vetoniemi@indooratlas.com>
Date: Mon, 16 Mar 2020 18:49:26 +0900
Subject: Fresh start

---
 .../ext/fiddle/libffi-3.2.1/src/x86/darwin64.S     | 416 +++++++++++++++++++++
 1 file changed, 416 insertions(+)
 create mode 100644 jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/darwin64.S

(limited to 'jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/darwin64.S')

diff --git a/jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/darwin64.S b/jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/darwin64.S
new file mode 100644
index 0000000..2f7394e
--- /dev/null
+++ b/jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/darwin64.S
@@ -0,0 +1,416 @@
+/* -----------------------------------------------------------------------
+   darwin64.S - Copyright (c) 2006 Free Software Foundation, Inc.
+	        Copyright (c) 2008 Red Hat, Inc.
+   derived from unix64.S
+
+   x86-64 Foreign Function Interface for Darwin.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR
+   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+   OTHER DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#ifdef __x86_64__
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+
+	.file "darwin64.S"
+.text
+
+/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
+		    void *raddr, void (*fnaddr)(void));
+
+   Bit o trickiness here -- ARGS+BYTES is the base of the stack frame
+   for this function.  This has been allocated by ffi_call.  We also
+   deallocate some of the stack that has been alloca'd.  */
+
+	.align	3
+	.globl	_ffi_call_unix64
+
+_ffi_call_unix64:
+LUW0:
+	movq	(%rsp), %r10		/* Load return address.  */
+	leaq	(%rdi, %rsi), %rax	/* Find local stack base.  */
+	movq	%rdx, (%rax)		/* Save flags.  */
+	movq	%rcx, 8(%rax)		/* Save raddr.  */
+	movq	%rbp, 16(%rax)		/* Save old frame pointer.  */
+	movq	%r10, 24(%rax)		/* Relocate return address.  */
+	movq	%rax, %rbp		/* Finalize local stack frame.  */
+LUW1:
+	movq	%rdi, %r10		/* Save a copy of the register area. */
+	movq	%r8, %r11		/* Save a copy of the target fn.  */
+	movl	%r9d, %eax		/* Set number of SSE registers.  */
+
+	/* Load up all argument registers.  */
+	movq	(%r10), %rdi
+	movq	8(%r10), %rsi
+	movq	16(%r10), %rdx
+	movq	24(%r10), %rcx
+	movq	32(%r10), %r8
+	movq	40(%r10), %r9
+	testl	%eax, %eax
+	jnz	Lload_sse
+Lret_from_load_sse:
+
+	/* Deallocate the reg arg area.  */
+	leaq	176(%r10), %rsp
+
+	/* Call the user function.  */
+	call	*%r11
+
+	/* Deallocate stack arg area; local stack frame in redzone.  */
+	leaq	24(%rbp), %rsp
+
+	movq	0(%rbp), %rcx		/* Reload flags.  */
+	movq	8(%rbp), %rdi		/* Reload raddr.  */
+	movq	16(%rbp), %rbp		/* Reload old frame pointer.  */
+LUW2:
+
+	/* The first byte of the flags contains the FFI_TYPE.  */
+	movzbl	%cl, %r10d
+	leaq	Lstore_table(%rip), %r11
+	movslq	(%r11, %r10, 4), %r10
+	addq	%r11, %r10
+	jmp	*%r10
+
+Lstore_table:
+	.long	Lst_void-Lstore_table		/* FFI_TYPE_VOID */
+	.long	Lst_sint32-Lstore_table		/* FFI_TYPE_INT */
+	.long	Lst_float-Lstore_table		/* FFI_TYPE_FLOAT */
+	.long	Lst_double-Lstore_table		/* FFI_TYPE_DOUBLE */
+	.long	Lst_ldouble-Lstore_table	/* FFI_TYPE_LONGDOUBLE */
+	.long	Lst_uint8-Lstore_table		/* FFI_TYPE_UINT8 */
+	.long	Lst_sint8-Lstore_table		/* FFI_TYPE_SINT8 */
+	.long	Lst_uint16-Lstore_table		/* FFI_TYPE_UINT16 */
+	.long	Lst_sint16-Lstore_table		/* FFI_TYPE_SINT16 */
+	.long	Lst_uint32-Lstore_table		/* FFI_TYPE_UINT32 */
+	.long	Lst_sint32-Lstore_table		/* FFI_TYPE_SINT32 */
+	.long	Lst_int64-Lstore_table		/* FFI_TYPE_UINT64 */
+	.long	Lst_int64-Lstore_table		/* FFI_TYPE_SINT64 */
+	.long	Lst_struct-Lstore_table		/* FFI_TYPE_STRUCT */
+	.long	Lst_int64-Lstore_table		/* FFI_TYPE_POINTER */
+
+	.text
+	.align	3
+Lst_void:
+	ret
+	.align	3
+Lst_uint8:
+	movzbq	%al, %rax
+	movq	%rax, (%rdi)
+	ret
+	.align	3
+Lst_sint8:
+	movsbq	%al, %rax
+	movq	%rax, (%rdi)
+	ret
+	.align	3
+Lst_uint16:
+	movzwq	%ax, %rax
+	movq	%rax, (%rdi)
+	.align	3
+Lst_sint16:
+	movswq	%ax, %rax
+	movq	%rax, (%rdi)
+	ret
+	.align	3
+Lst_uint32:
+	movl	%eax, %eax
+	movq	%rax, (%rdi)
+	.align	3
+Lst_sint32:
+	cltq
+	movq	%rax, (%rdi)
+	ret
+	.align	3
+Lst_int64:
+	movq	%rax, (%rdi)
+	ret
+	.align	3
+Lst_float:
+	movss	%xmm0, (%rdi)
+	ret
+	.align	3
+Lst_double:
+	movsd	%xmm0, (%rdi)
+	ret
+Lst_ldouble:
+	fstpt	(%rdi)
+	ret
+	.align	3
+Lst_struct:
+	leaq	-20(%rsp), %rsi		/* Scratch area in redzone.  */
+
+	/* We have to locate the values now, and since we don't want to
+	   write too much data into the user's return value, we spill the
+	   value to a 16 byte scratch area first.  Bits 8, 9, and 10
+	   control where the values are located.  Only one of the three
+	   bits will be set; see ffi_prep_cif_machdep for the pattern.  */
+	movd	%xmm0, %r10
+	movd	%xmm1, %r11
+	testl	$0x100, %ecx
+	cmovnz	%rax, %rdx
+	cmovnz	%r10, %rax
+	testl	$0x200, %ecx
+	cmovnz	%r10, %rdx
+	testl	$0x400, %ecx
+	cmovnz	%r10, %rax
+	cmovnz	%r11, %rdx
+	movq	%rax, (%rsi)
+	movq	%rdx, 8(%rsi)
+
+	/* Bits 12-31 contain the true size of the structure.  Copy from
+	   the scratch area to the true destination.  */
+	shrl	$12, %ecx
+	rep movsb
+	ret
+
+	/* Many times we can avoid loading any SSE registers at all.
+	   It's not worth an indirect jump to load the exact set of
+	   SSE registers needed; zero or all is a good compromise.  */
+	.align	3
+LUW3:
+Lload_sse:
+	movdqa	48(%r10), %xmm0
+	movdqa	64(%r10), %xmm1
+	movdqa	80(%r10), %xmm2
+	movdqa	96(%r10), %xmm3
+	movdqa	112(%r10), %xmm4
+	movdqa	128(%r10), %xmm5
+	movdqa	144(%r10), %xmm6
+	movdqa	160(%r10), %xmm7
+	jmp	Lret_from_load_sse
+
+LUW4:
+	.align	3
+	.globl	_ffi_closure_unix64
+
+_ffi_closure_unix64:
+LUW5:
+	/* The carry flag is set by the trampoline iff SSE registers
+	   are used.  Don't clobber it before the branch instruction.  */
+	leaq    -200(%rsp), %rsp
+LUW6:
+	movq	%rdi, (%rsp)
+	movq    %rsi, 8(%rsp)
+	movq    %rdx, 16(%rsp)
+	movq    %rcx, 24(%rsp)
+	movq    %r8, 32(%rsp)
+	movq    %r9, 40(%rsp)
+	jc      Lsave_sse
+Lret_from_save_sse:
+
+	movq	%r10, %rdi
+	leaq	176(%rsp), %rsi
+	movq	%rsp, %rdx
+	leaq	208(%rsp), %rcx
+	call	_ffi_closure_unix64_inner
+
+	/* Deallocate stack frame early; return value is now in redzone.  */
+	addq	$200, %rsp
+LUW7:
+
+	/* The first byte of the return value contains the FFI_TYPE.  */
+	movzbl	%al, %r10d
+	leaq	Lload_table(%rip), %r11
+	movslq	(%r11, %r10, 4), %r10
+	addq	%r11, %r10
+	jmp	*%r10
+
+Lload_table:
+	.long	Lld_void-Lload_table		/* FFI_TYPE_VOID */
+	.long	Lld_int32-Lload_table		/* FFI_TYPE_INT */
+	.long	Lld_float-Lload_table		/* FFI_TYPE_FLOAT */
+	.long	Lld_double-Lload_table		/* FFI_TYPE_DOUBLE */
+	.long	Lld_ldouble-Lload_table		/* FFI_TYPE_LONGDOUBLE */
+	.long	Lld_int8-Lload_table		/* FFI_TYPE_UINT8 */
+	.long	Lld_int8-Lload_table		/* FFI_TYPE_SINT8 */
+	.long	Lld_int16-Lload_table		/* FFI_TYPE_UINT16 */
+	.long	Lld_int16-Lload_table		/* FFI_TYPE_SINT16 */
+	.long	Lld_int32-Lload_table		/* FFI_TYPE_UINT32 */
+	.long	Lld_int32-Lload_table		/* FFI_TYPE_SINT32 */
+	.long	Lld_int64-Lload_table		/* FFI_TYPE_UINT64 */
+	.long	Lld_int64-Lload_table		/* FFI_TYPE_SINT64 */
+	.long	Lld_struct-Lload_table		/* FFI_TYPE_STRUCT */
+	.long	Lld_int64-Lload_table		/* FFI_TYPE_POINTER */
+
+	.text
+	.align	3
+Lld_void:
+	ret
+	.align	3
+Lld_int8:
+	movzbl	-24(%rsp), %eax
+	ret
+	.align	3
+Lld_int16:
+	movzwl	-24(%rsp), %eax
+	ret
+	.align	3
+Lld_int32:
+	movl	-24(%rsp), %eax
+	ret
+	.align	3
+Lld_int64:
+	movq	-24(%rsp), %rax
+	ret
+	.align	3
+Lld_float:
+	movss	-24(%rsp), %xmm0
+	ret
+	.align	3
+Lld_double:
+	movsd	-24(%rsp), %xmm0
+	ret
+	.align	3
+Lld_ldouble:
+	fldt	-24(%rsp)
+	ret
+	.align	3
+Lld_struct:
+	/* There are four possibilities here, %rax/%rdx, %xmm0/%rax,
+	   %rax/%xmm0, %xmm0/%xmm1.  We collapse two by always loading
+	   both rdx and xmm1 with the second word.  For the remaining,
+	   bit 8 set means xmm0 gets the second word, and bit 9 means
+	   that rax gets the second word.  */
+	movq	-24(%rsp), %rcx
+	movq	-16(%rsp), %rdx
+	movq	-16(%rsp), %xmm1
+	testl	$0x100, %eax
+	cmovnz	%rdx, %rcx
+	movd	%rcx, %xmm0
+	testl	$0x200, %eax
+	movq	-24(%rsp), %rax
+	cmovnz	%rdx, %rax
+	ret
+
+	/* See the comment above Lload_sse; the same logic applies here.  */
+	.align	3
+LUW8:
+Lsave_sse:
+	movdqa	%xmm0, 48(%rsp)
+	movdqa	%xmm1, 64(%rsp)
+	movdqa	%xmm2, 80(%rsp)
+	movdqa	%xmm3, 96(%rsp)
+	movdqa	%xmm4, 112(%rsp)
+	movdqa	%xmm5, 128(%rsp)
+	movdqa	%xmm6, 144(%rsp)
+	movdqa	%xmm7, 160(%rsp)
+	jmp	Lret_from_save_sse
+
+LUW9:
+.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
+EH_frame1:
+	.set	L$set$0,LECIE1-LSCIE1		/* CIE Length */
+	.long	L$set$0
+LSCIE1:
+	.long	0x0		/* CIE Identifier Tag */
+	.byte	0x1		/* CIE Version */
+	.ascii	"zR\0"		/* CIE Augmentation */
+	.byte	0x1		/* uleb128 0x1; CIE Code Alignment Factor */
+	.byte	0x78		/* sleb128 -8; CIE Data Alignment Factor */
+	.byte	0x10		/* CIE RA Column */
+	.byte	0x1		/* uleb128 0x1; Augmentation size */
+	.byte	0x10		/* FDE Encoding (pcrel sdata4) */
+	.byte	0xc		/* DW_CFA_def_cfa, %rsp offset 8 */
+	.byte	0x7		/* uleb128 0x7 */
+	.byte	0x8		/* uleb128 0x8 */
+	.byte	0x90		/* DW_CFA_offset, column 0x10 */
+	.byte	0x1
+	.align	3
+LECIE1:
+	.globl _ffi_call_unix64.eh
+_ffi_call_unix64.eh:
+LSFDE1:
+	.set	L$set$1,LEFDE1-LASFDE1	/* FDE Length */
+	.long	L$set$1
+LASFDE1:
+	.long	LASFDE1-EH_frame1	/* FDE CIE offset */
+	.quad	LUW0-.			/* FDE initial location */
+	.set	L$set$2,LUW4-LUW0	/* FDE address range */
+	.quad	L$set$2
+	.byte	0x0			/* Augmentation size */
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.set	L$set$3,LUW1-LUW0
+	.long	L$set$3
+
+	/* New stack frame based off rbp.  This is a itty bit of unwind
+	   trickery in that the CFA *has* changed.  There is no easy way
+	   to describe it correctly on entry to the function.  Fortunately,
+	   it doesn't matter too much since at all points we can correctly
+	   unwind back to ffi_call.  Note that the location to which we
+	   moved the return address is (the new) CFA-8, so from the
+	   perspective of the unwind info, it hasn't moved.  */
+	.byte	0xc			/* DW_CFA_def_cfa, %rbp offset 32 */
+	.byte	0x6
+	.byte	0x20
+	.byte	0x80+6			/* DW_CFA_offset, %rbp offset 2*-8 */
+	.byte	0x2
+	.byte	0xa			/* DW_CFA_remember_state */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.set	L$set$4,LUW2-LUW1
+	.long	L$set$4
+	.byte	0xc			/* DW_CFA_def_cfa, %rsp offset 8 */
+	.byte	0x7
+	.byte	0x8
+	.byte	0xc0+6			/* DW_CFA_restore, %rbp */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.set	L$set$5,LUW3-LUW2
+	.long	L$set$5
+	.byte	0xb			/* DW_CFA_restore_state */
+
+	.align	3
+LEFDE1:
+	.globl _ffi_closure_unix64.eh
+_ffi_closure_unix64.eh:
+LSFDE3:
+	.set	L$set$6,LEFDE3-LASFDE3	/* FDE Length */
+	.long	L$set$6
+LASFDE3:
+	.long	LASFDE3-EH_frame1	/* FDE CIE offset */
+	.quad	LUW5-.			/* FDE initial location */
+	.set	L$set$7,LUW9-LUW5	/* FDE address range */
+	.quad	L$set$7
+	.byte	0x0			/* Augmentation size */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.set	L$set$8,LUW6-LUW5
+	.long	L$set$8
+	.byte	0xe			/* DW_CFA_def_cfa_offset */
+	.byte	208,1			/* uleb128 208 */
+	.byte	0xa			/* DW_CFA_remember_state */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.set	L$set$9,LUW7-LUW6
+	.long	L$set$9
+	.byte	0xe			/* DW_CFA_def_cfa_offset */
+	.byte	0x8
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.set	L$set$10,LUW8-LUW7
+	.long	L$set$10
+	.byte	0xb			/* DW_CFA_restore_state */
+
+	.align	3
+LEFDE3:
+	.subsections_via_symbols
+
+#endif /* __x86_64__ */
-- 
cgit v1.2.3