From fcbf63e62c627deae76c1b8cb8c0876c536ed811 Mon Sep 17 00:00:00 2001
From: Jari Vetoniemi <jari.vetoniemi@indooratlas.com>
Date: Mon, 16 Mar 2020 18:49:26 +0900
Subject: Fresh start

---
 jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/darwin.S  |  444 +++++++
 .../ext/fiddle/libffi-3.2.1/src/x86/darwin64.S     |  416 ++++++
 jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/ffi.c     |  935 ++++++++++++++
 jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/ffi64.c   |  676 ++++++++++
 .../ext/fiddle/libffi-3.2.1/src/x86/ffitarget.h    |  152 +++
 jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/freebsd.S |  463 +++++++
 jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/sysv.S    |  483 +++++++
 jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/unix64.S  |  432 +++++++
 jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/win32.S   | 1351 ++++++++++++++++++++
 jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/win64.S   |  519 ++++++++
 10 files changed, 5871 insertions(+)
 create mode 100644 jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/darwin.S
 create mode 100644 jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/darwin64.S
 create mode 100644 jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/ffi.c
 create mode 100644 jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/ffi64.c
 create mode 100644 jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/ffitarget.h
 create mode 100644 jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/freebsd.S
 create mode 100644 jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/sysv.S
 create mode 100644 jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/unix64.S
 create mode 100644 jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/win32.S
 create mode 100644 jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/win64.S

(limited to 'jni/ruby/ext/fiddle/libffi-3.2.1/src/x86')

diff --git a/jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/darwin.S b/jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/darwin.S
new file mode 100644
index 0000000..8f0f070
--- /dev/null
+++ b/jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/darwin.S
@@ -0,0 +1,444 @@
+/* -----------------------------------------------------------------------
+   darwin.S - Copyright (c) 1996, 1998, 2001, 2002, 2003, 2005  Red Hat, Inc.
+	Copyright (C) 2008  Free Software Foundation, Inc.
+
+   X86 Foreign Function Interface
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   -----------------------------------------------------------------------
+   */
+
+#ifndef __x86_64__
+
+#define LIBFFI_ASM	
+#include <fficonfig.h>
+#include <ffi.h>
+
+.text
+
+.globl _ffi_prep_args
+
+	.align 4
+.globl _ffi_call_SYSV
+
+_ffi_call_SYSV:
+.LFB1:
+        pushl %ebp
+.LCFI0:
+        movl  %esp,%ebp
+.LCFI1:
+        subl $8,%esp
+	/* Make room for all of the new args.  */
+	movl  16(%ebp),%ecx
+	subl  %ecx,%esp
+
+	movl  %esp,%eax
+
+	/* Place all of the ffi_prep_args in position  */
+	subl  $8,%esp
+	pushl 12(%ebp)
+	pushl %eax
+	call  *8(%ebp)
+
+	/* Return stack to previous state and call the function  */
+	addl  $16,%esp	
+
+	call  *28(%ebp)
+
+	/* Load %ecx with the return type code  */
+	movl  20(%ebp),%ecx	
+
+	/* Protect %esi.  We're going to pop it in the epilogue.  */
+	pushl %esi
+
+	/* If the return value pointer is NULL, assume no return value.  */
+	cmpl  $0,24(%ebp)
+	jne  0f
+
+	/* Even if there is no space for the return value, we are 
+	   obliged to handle floating-point values.  */
+	cmpl  $FFI_TYPE_FLOAT,%ecx
+	jne   noretval
+	fstp  %st(0)
+
+	jmp   epilogue
+0:
+	.align 4
+	call 1f
+.Lstore_table:
+	.long   noretval-.Lstore_table		/* FFI_TYPE_VOID */
+	.long   retint-.Lstore_table		/* FFI_TYPE_INT */
+	.long   retfloat-.Lstore_table		/* FFI_TYPE_FLOAT */
+	.long   retdouble-.Lstore_table		/* FFI_TYPE_DOUBLE */
+	.long   retlongdouble-.Lstore_table     /* FFI_TYPE_LONGDOUBLE */
+	.long   retuint8-.Lstore_table		/* FFI_TYPE_UINT8 */
+	.long   retsint8-.Lstore_table		/* FFI_TYPE_SINT8 */
+	.long   retuint16-.Lstore_table		/* FFI_TYPE_UINT16 */
+	.long   retsint16-.Lstore_table		/* FFI_TYPE_SINT16 */
+	.long   retint-.Lstore_table		/* FFI_TYPE_UINT32 */
+	.long   retint-.Lstore_table		/* FFI_TYPE_SINT32 */
+	.long   retint64-.Lstore_table		/* FFI_TYPE_UINT64 */
+	.long   retint64-.Lstore_table		/* FFI_TYPE_SINT64 */
+	.long   retstruct-.Lstore_table		/* FFI_TYPE_STRUCT */
+	.long   retint-.Lstore_table		/* FFI_TYPE_POINTER */
+	.long   retstruct1b-.Lstore_table	/* FFI_TYPE_SMALL_STRUCT_1B */
+	.long   retstruct2b-.Lstore_table	/* FFI_TYPE_SMALL_STRUCT_2B */
+1:
+	pop  %esi
+	add  (%esi, %ecx, 4), %esi
+	jmp  *%esi
+
+	/* Sign/zero extend as appropriate.  */
+retsint8:
+	movsbl  %al, %eax
+	jmp  retint
+
+retsint16:
+	movswl  %ax, %eax
+	jmp  retint
+
+retuint8:
+	movzbl  %al, %eax
+	jmp  retint
+
+retuint16:
+	movzwl  %ax, %eax
+	jmp  retint
+
+retfloat:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx
+	fstps (%ecx)
+	jmp   epilogue
+
+retdouble:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx
+	fstpl (%ecx)
+	jmp   epilogue
+
+retlongdouble:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx
+	fstpt (%ecx)
+	jmp   epilogue
+
+retint64:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx
+	movl  %eax,0(%ecx)
+	movl  %edx,4(%ecx)
+	jmp   epilogue
+
+retstruct1b:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx
+	movb  %al,0(%ecx)
+	jmp   epilogue
+
+retstruct2b:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx
+	movw  %ax,0(%ecx)
+	jmp   epilogue
+
+retint:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx
+	movl  %eax,0(%ecx)
+
+retstruct:
+	/* Nothing to do!  */
+
+noretval:
+epilogue:
+	popl %esi
+	movl %ebp,%esp
+	popl %ebp
+	ret
+
+.LFE1:
+.ffi_call_SYSV_end:
+
+	.align	4
+FFI_HIDDEN (ffi_closure_SYSV)
+.globl _ffi_closure_SYSV
+
+_ffi_closure_SYSV:
+.LFB2:
+	pushl	%ebp
+.LCFI2:
+	movl	%esp, %ebp
+.LCFI3:
+	subl	$40, %esp
+	leal	-24(%ebp), %edx
+	movl	%edx, -12(%ebp)	/* resp */
+	leal	8(%ebp), %edx
+	movl	%edx, 4(%esp)	/* args = __builtin_dwarf_cfa () */
+	leal	-12(%ebp), %edx
+	movl	%edx, (%esp)	/* &resp */
+	movl	%ebx, 8(%esp)
+.LCFI7:
+	call	L_ffi_closure_SYSV_inner$stub
+	movl	8(%esp), %ebx
+	movl	-12(%ebp), %ecx
+	cmpl	$FFI_TYPE_INT, %eax
+	je	.Lcls_retint
+
+	/* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
+	   FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32.  */
+	cmpl	$FFI_TYPE_UINT64, %eax
+	jge	0f
+	cmpl	$FFI_TYPE_UINT8, %eax
+	jge	.Lcls_retint
+
+0:	cmpl	$FFI_TYPE_FLOAT, %eax
+	je	.Lcls_retfloat
+	cmpl	$FFI_TYPE_DOUBLE, %eax
+	je	.Lcls_retdouble
+	cmpl	$FFI_TYPE_LONGDOUBLE, %eax
+	je	.Lcls_retldouble
+	cmpl	$FFI_TYPE_SINT64, %eax
+	je	.Lcls_retllong
+	cmpl	$FFI_TYPE_SMALL_STRUCT_1B, %eax
+	je	.Lcls_retstruct1b
+	cmpl	$FFI_TYPE_SMALL_STRUCT_2B, %eax
+	je	.Lcls_retstruct2b
+	cmpl	$FFI_TYPE_STRUCT, %eax
+	je	.Lcls_retstruct
+.Lcls_epilogue:
+	movl	%ebp, %esp
+	popl	%ebp
+	ret
+.Lcls_retint:
+	movl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+.Lcls_retfloat:
+	flds	(%ecx)
+	jmp	.Lcls_epilogue
+.Lcls_retdouble:
+	fldl	(%ecx)
+	jmp	.Lcls_epilogue
+.Lcls_retldouble:
+	fldt	(%ecx)
+	jmp	.Lcls_epilogue
+.Lcls_retllong:
+	movl	(%ecx), %eax
+	movl	4(%ecx), %edx
+	jmp	.Lcls_epilogue
+.Lcls_retstruct1b:
+	movsbl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+.Lcls_retstruct2b:
+	movswl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+.Lcls_retstruct:
+	lea -8(%ebp),%esp
+	movl	%ebp, %esp
+	popl	%ebp
+	ret $4
+.LFE2:
+
+#if !FFI_NO_RAW_API
+
+#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3)
+#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
+#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
+#define CIF_FLAGS_OFFSET 20
+
+	.align	4
+FFI_HIDDEN (ffi_closure_raw_SYSV)
+.globl _ffi_closure_raw_SYSV
+
+_ffi_closure_raw_SYSV:
+.LFB3:
+	pushl	%ebp
+.LCFI4:
+	movl	%esp, %ebp
+.LCFI5:
+	pushl	%esi
+.LCFI6:
+	subl	$36, %esp
+	movl	RAW_CLOSURE_CIF_OFFSET(%eax), %esi	 /* closure->cif */
+	movl	RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */
+	movl	%edx, 12(%esp)	/* user_data */
+	leal	8(%ebp), %edx	/* __builtin_dwarf_cfa () */
+	movl	%edx, 8(%esp)	/* raw_args */
+	leal	-24(%ebp), %edx
+	movl	%edx, 4(%esp)	/* &res */
+	movl	%esi, (%esp)	/* cif */
+	call	*RAW_CLOSURE_FUN_OFFSET(%eax)		 /* closure->fun */
+	movl	CIF_FLAGS_OFFSET(%esi), %eax		 /* rtype */
+	cmpl	$FFI_TYPE_INT, %eax
+	je	.Lrcls_retint
+
+	/* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
+	   FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32.  */
+	cmpl	$FFI_TYPE_UINT64, %eax
+	jge	0f
+	cmpl	$FFI_TYPE_UINT8, %eax
+	jge	.Lrcls_retint
+0:
+	cmpl	$FFI_TYPE_FLOAT, %eax
+	je	.Lrcls_retfloat
+	cmpl	$FFI_TYPE_DOUBLE, %eax
+	je	.Lrcls_retdouble
+	cmpl	$FFI_TYPE_LONGDOUBLE, %eax
+	je	.Lrcls_retldouble
+	cmpl	$FFI_TYPE_SINT64, %eax
+	je	.Lrcls_retllong
+.Lrcls_epilogue:
+	addl	$36, %esp
+	popl	%esi
+	popl	%ebp
+	ret
+.Lrcls_retint:
+	movl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+.Lrcls_retfloat:
+	flds	-24(%ebp)
+	jmp	.Lrcls_epilogue
+.Lrcls_retdouble:
+	fldl	-24(%ebp)
+	jmp	.Lrcls_epilogue
+.Lrcls_retldouble:
+	fldt	-24(%ebp)
+	jmp	.Lrcls_epilogue
+.Lrcls_retllong:
+	movl	-24(%ebp), %eax
+	movl	-20(%ebp), %edx
+	jmp	.Lrcls_epilogue
+.LFE3:
+#endif
+
+.section __IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5
+L_ffi_closure_SYSV_inner$stub:
+	.indirect_symbol _ffi_closure_SYSV_inner
+	hlt ; hlt ; hlt ; hlt ; hlt
+
+
+.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
+EH_frame1:
+	.set	L$set$0,LECIE1-LSCIE1
+	.long	L$set$0
+LSCIE1:
+	.long	0x0
+	.byte	0x1
+	.ascii "zR\0"
+	.byte	0x1
+	.byte	0x7c
+	.byte	0x8
+	.byte	0x1
+	.byte	0x10
+	.byte	0xc
+	.byte	0x5
+	.byte	0x4
+	.byte	0x88
+	.byte	0x1
+	.align 2
+LECIE1:
+.globl _ffi_call_SYSV.eh
+_ffi_call_SYSV.eh:
+LSFDE1:
+	.set	L$set$1,LEFDE1-LASFDE1
+	.long	L$set$1
+LASFDE1:
+	.long	LASFDE1-EH_frame1
+	.long	.LFB1-.
+	.set L$set$2,.LFE1-.LFB1
+	.long L$set$2
+	.byte	0x0
+	.byte	0x4
+	.set L$set$3,.LCFI0-.LFB1
+	.long L$set$3
+	.byte	0xe
+	.byte	0x8
+	.byte	0x84
+	.byte	0x2
+	.byte	0x4
+	.set L$set$4,.LCFI1-.LCFI0
+	.long L$set$4
+	.byte	0xd
+	.byte	0x4
+	.align 2
+LEFDE1:
+.globl _ffi_closure_SYSV.eh
+_ffi_closure_SYSV.eh:
+LSFDE2:
+	.set	L$set$5,LEFDE2-LASFDE2
+	.long	L$set$5
+LASFDE2:
+	.long	LASFDE2-EH_frame1
+	.long	.LFB2-.
+	.set L$set$6,.LFE2-.LFB2
+	.long L$set$6
+	.byte	0x0
+	.byte	0x4
+	.set L$set$7,.LCFI2-.LFB2
+	.long L$set$7
+	.byte	0xe
+	.byte	0x8
+	.byte	0x84
+	.byte	0x2
+	.byte	0x4
+	.set L$set$8,.LCFI3-.LCFI2
+	.long L$set$8
+	.byte	0xd
+	.byte	0x4
+	.align 2
+LEFDE2:
+
+#if !FFI_NO_RAW_API
+
+.globl _ffi_closure_raw_SYSV.eh
+_ffi_closure_raw_SYSV.eh:
+LSFDE3:
+	.set	L$set$10,LEFDE3-LASFDE3
+	.long	L$set$10
+LASFDE3:
+	.long	LASFDE3-EH_frame1
+	.long	.LFB3-.
+	.set L$set$11,.LFE3-.LFB3
+	.long L$set$11
+	.byte	0x0
+	.byte	0x4
+	.set L$set$12,.LCFI4-.LFB3
+	.long L$set$12
+	.byte	0xe
+	.byte	0x8
+	.byte	0x84
+	.byte	0x2
+	.byte	0x4
+	.set L$set$13,.LCFI5-.LCFI4
+	.long L$set$13
+	.byte	0xd
+	.byte	0x4
+	.byte	0x4
+	.set L$set$14,.LCFI6-.LCFI5
+	.long L$set$14
+	.byte	0x85
+	.byte	0x3
+	.align 2
+LEFDE3:
+
+#endif
+
+#endif /* ifndef __x86_64__ */
diff --git a/jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/darwin64.S b/jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/darwin64.S
new file mode 100644
index 0000000..2f7394e
--- /dev/null
+++ b/jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/darwin64.S
@@ -0,0 +1,416 @@
+/* -----------------------------------------------------------------------
+   darwin64.S - Copyright (c) 2006 Free Software Foundation, Inc.
+	        Copyright (c) 2008 Red Hat, Inc.
+   derived from unix64.S
+
+   x86-64 Foreign Function Interface for Darwin.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR
+   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+   OTHER DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#ifdef __x86_64__
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+
+	.file "darwin64.S"
+.text
+
+/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
+		    void *raddr, void (*fnaddr)(void));
+
+   Bit o trickiness here -- ARGS+BYTES is the base of the stack frame
+   for this function.  This has been allocated by ffi_call.  We also
+   deallocate some of the stack that has been alloca'd.  */
+
+	.align	3
+	.globl	_ffi_call_unix64
+
+_ffi_call_unix64:
+LUW0:
+	movq	(%rsp), %r10		/* Load return address.  */
+	leaq	(%rdi, %rsi), %rax	/* Find local stack base.  */
+	movq	%rdx, (%rax)		/* Save flags.  */
+	movq	%rcx, 8(%rax)		/* Save raddr.  */
+	movq	%rbp, 16(%rax)		/* Save old frame pointer.  */
+	movq	%r10, 24(%rax)		/* Relocate return address.  */
+	movq	%rax, %rbp		/* Finalize local stack frame.  */
+LUW1:
+	movq	%rdi, %r10		/* Save a copy of the register area. */
+	movq	%r8, %r11		/* Save a copy of the target fn.  */
+	movl	%r9d, %eax		/* Set number of SSE registers.  */
+
+	/* Load up all argument registers.  */
+	movq	(%r10), %rdi
+	movq	8(%r10), %rsi
+	movq	16(%r10), %rdx
+	movq	24(%r10), %rcx
+	movq	32(%r10), %r8
+	movq	40(%r10), %r9
+	testl	%eax, %eax
+	jnz	Lload_sse
+Lret_from_load_sse:
+
+	/* Deallocate the reg arg area.  */
+	leaq	176(%r10), %rsp
+
+	/* Call the user function.  */
+	call	*%r11
+
+	/* Deallocate stack arg area; local stack frame in redzone.  */
+	leaq	24(%rbp), %rsp
+
+	movq	0(%rbp), %rcx		/* Reload flags.  */
+	movq	8(%rbp), %rdi		/* Reload raddr.  */
+	movq	16(%rbp), %rbp		/* Reload old frame pointer.  */
+LUW2:
+
+	/* The first byte of the flags contains the FFI_TYPE.  */
+	movzbl	%cl, %r10d
+	leaq	Lstore_table(%rip), %r11
+	movslq	(%r11, %r10, 4), %r10
+	addq	%r11, %r10
+	jmp	*%r10
+
+Lstore_table:
+	.long	Lst_void-Lstore_table		/* FFI_TYPE_VOID */
+	.long	Lst_sint32-Lstore_table		/* FFI_TYPE_INT */
+	.long	Lst_float-Lstore_table		/* FFI_TYPE_FLOAT */
+	.long	Lst_double-Lstore_table		/* FFI_TYPE_DOUBLE */
+	.long	Lst_ldouble-Lstore_table	/* FFI_TYPE_LONGDOUBLE */
+	.long	Lst_uint8-Lstore_table		/* FFI_TYPE_UINT8 */
+	.long	Lst_sint8-Lstore_table		/* FFI_TYPE_SINT8 */
+	.long	Lst_uint16-Lstore_table		/* FFI_TYPE_UINT16 */
+	.long	Lst_sint16-Lstore_table		/* FFI_TYPE_SINT16 */
+	.long	Lst_uint32-Lstore_table		/* FFI_TYPE_UINT32 */
+	.long	Lst_sint32-Lstore_table		/* FFI_TYPE_SINT32 */
+	.long	Lst_int64-Lstore_table		/* FFI_TYPE_UINT64 */
+	.long	Lst_int64-Lstore_table		/* FFI_TYPE_SINT64 */
+	.long	Lst_struct-Lstore_table		/* FFI_TYPE_STRUCT */
+	.long	Lst_int64-Lstore_table		/* FFI_TYPE_POINTER */
+
+	.text
+	.align	3
+Lst_void:
+	ret
+	.align	3
+Lst_uint8:
+	movzbq	%al, %rax
+	movq	%rax, (%rdi)
+	ret
+	.align	3
+Lst_sint8:
+	movsbq	%al, %rax
+	movq	%rax, (%rdi)
+	ret
+	.align	3
+Lst_uint16:
+	movzwq	%ax, %rax
+	movq	%rax, (%rdi)
+	.align	3
+Lst_sint16:
+	movswq	%ax, %rax
+	movq	%rax, (%rdi)
+	ret
+	.align	3
+Lst_uint32:
+	movl	%eax, %eax
+	movq	%rax, (%rdi)
+	.align	3
+Lst_sint32:
+	cltq
+	movq	%rax, (%rdi)
+	ret
+	.align	3
+Lst_int64:
+	movq	%rax, (%rdi)
+	ret
+	.align	3
+Lst_float:
+	movss	%xmm0, (%rdi)
+	ret
+	.align	3
+Lst_double:
+	movsd	%xmm0, (%rdi)
+	ret
+Lst_ldouble:
+	fstpt	(%rdi)
+	ret
+	.align	3
+Lst_struct:
+	leaq	-20(%rsp), %rsi		/* Scratch area in redzone.  */
+
+	/* We have to locate the values now, and since we don't want to
+	   write too much data into the user's return value, we spill the
+	   value to a 16 byte scratch area first.  Bits 8, 9, and 10
+	   control where the values are located.  Only one of the three
+	   bits will be set; see ffi_prep_cif_machdep for the pattern.  */
+	movd	%xmm0, %r10
+	movd	%xmm1, %r11
+	testl	$0x100, %ecx
+	cmovnz	%rax, %rdx
+	cmovnz	%r10, %rax
+	testl	$0x200, %ecx
+	cmovnz	%r10, %rdx
+	testl	$0x400, %ecx
+	cmovnz	%r10, %rax
+	cmovnz	%r11, %rdx
+	movq	%rax, (%rsi)
+	movq	%rdx, 8(%rsi)
+
+	/* Bits 12-31 contain the true size of the structure.  Copy from
+	   the scratch area to the true destination.  */
+	shrl	$12, %ecx
+	rep movsb
+	ret
+
+	/* Many times we can avoid loading any SSE registers at all.
+	   It's not worth an indirect jump to load the exact set of
+	   SSE registers needed; zero or all is a good compromise.  */
+	.align	3
+LUW3:
+Lload_sse:
+	movdqa	48(%r10), %xmm0
+	movdqa	64(%r10), %xmm1
+	movdqa	80(%r10), %xmm2
+	movdqa	96(%r10), %xmm3
+	movdqa	112(%r10), %xmm4
+	movdqa	128(%r10), %xmm5
+	movdqa	144(%r10), %xmm6
+	movdqa	160(%r10), %xmm7
+	jmp	Lret_from_load_sse
+
+LUW4:
+	.align	3
+	.globl	_ffi_closure_unix64
+
+_ffi_closure_unix64:
+LUW5:
+	/* The carry flag is set by the trampoline iff SSE registers
+	   are used.  Don't clobber it before the branch instruction.  */
+	leaq    -200(%rsp), %rsp
+LUW6:
+	movq	%rdi, (%rsp)
+	movq    %rsi, 8(%rsp)
+	movq    %rdx, 16(%rsp)
+	movq    %rcx, 24(%rsp)
+	movq    %r8, 32(%rsp)
+	movq    %r9, 40(%rsp)
+	jc      Lsave_sse
+Lret_from_save_sse:
+
+	movq	%r10, %rdi
+	leaq	176(%rsp), %rsi
+	movq	%rsp, %rdx
+	leaq	208(%rsp), %rcx
+	call	_ffi_closure_unix64_inner
+
+	/* Deallocate stack frame early; return value is now in redzone.  */
+	addq	$200, %rsp
+LUW7:
+
+	/* The first byte of the return value contains the FFI_TYPE.  */
+	movzbl	%al, %r10d
+	leaq	Lload_table(%rip), %r11
+	movslq	(%r11, %r10, 4), %r10
+	addq	%r11, %r10
+	jmp	*%r10
+
+Lload_table:
+	.long	Lld_void-Lload_table		/* FFI_TYPE_VOID */
+	.long	Lld_int32-Lload_table		/* FFI_TYPE_INT */
+	.long	Lld_float-Lload_table		/* FFI_TYPE_FLOAT */
+	.long	Lld_double-Lload_table		/* FFI_TYPE_DOUBLE */
+	.long	Lld_ldouble-Lload_table		/* FFI_TYPE_LONGDOUBLE */
+	.long	Lld_int8-Lload_table		/* FFI_TYPE_UINT8 */
+	.long	Lld_int8-Lload_table		/* FFI_TYPE_SINT8 */
+	.long	Lld_int16-Lload_table		/* FFI_TYPE_UINT16 */
+	.long	Lld_int16-Lload_table		/* FFI_TYPE_SINT16 */
+	.long	Lld_int32-Lload_table		/* FFI_TYPE_UINT32 */
+	.long	Lld_int32-Lload_table		/* FFI_TYPE_SINT32 */
+	.long	Lld_int64-Lload_table		/* FFI_TYPE_UINT64 */
+	.long	Lld_int64-Lload_table		/* FFI_TYPE_SINT64 */
+	.long	Lld_struct-Lload_table		/* FFI_TYPE_STRUCT */
+	.long	Lld_int64-Lload_table		/* FFI_TYPE_POINTER */
+
+	.text
+	.align	3
+Lld_void:
+	ret
+	.align	3
+Lld_int8:
+	movzbl	-24(%rsp), %eax
+	ret
+	.align	3
+Lld_int16:
+	movzwl	-24(%rsp), %eax
+	ret
+	.align	3
+Lld_int32:
+	movl	-24(%rsp), %eax
+	ret
+	.align	3
+Lld_int64:
+	movq	-24(%rsp), %rax
+	ret
+	.align	3
+Lld_float:
+	movss	-24(%rsp), %xmm0
+	ret
+	.align	3
+Lld_double:
+	movsd	-24(%rsp), %xmm0
+	ret
+	.align	3
+Lld_ldouble:
+	fldt	-24(%rsp)
+	ret
+	.align	3
+Lld_struct:
+	/* There are four possibilities here, %rax/%rdx, %xmm0/%rax,
+	   %rax/%xmm0, %xmm0/%xmm1.  We collapse two by always loading
+	   both rdx and xmm1 with the second word.  For the remaining,
+	   bit 8 set means xmm0 gets the second word, and bit 9 means
+	   that rax gets the second word.  */
+	movq	-24(%rsp), %rcx
+	movq	-16(%rsp), %rdx
+	movq	-16(%rsp), %xmm1
+	testl	$0x100, %eax
+	cmovnz	%rdx, %rcx
+	movd	%rcx, %xmm0
+	testl	$0x200, %eax
+	movq	-24(%rsp), %rax
+	cmovnz	%rdx, %rax
+	ret
+
+	/* See the comment above Lload_sse; the same logic applies here.  */
+	.align	3
+LUW8:
+Lsave_sse:
+	movdqa	%xmm0, 48(%rsp)
+	movdqa	%xmm1, 64(%rsp)
+	movdqa	%xmm2, 80(%rsp)
+	movdqa	%xmm3, 96(%rsp)
+	movdqa	%xmm4, 112(%rsp)
+	movdqa	%xmm5, 128(%rsp)
+	movdqa	%xmm6, 144(%rsp)
+	movdqa	%xmm7, 160(%rsp)
+	jmp	Lret_from_save_sse
+
+LUW9:
+.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
+EH_frame1:
+	.set	L$set$0,LECIE1-LSCIE1		/* CIE Length */
+	.long	L$set$0
+LSCIE1:
+	.long	0x0		/* CIE Identifier Tag */
+	.byte	0x1		/* CIE Version */
+	.ascii	"zR\0"		/* CIE Augmentation */
+	.byte	0x1		/* uleb128 0x1; CIE Code Alignment Factor */
+	.byte	0x78		/* sleb128 -8; CIE Data Alignment Factor */
+	.byte	0x10		/* CIE RA Column */
+	.byte	0x1		/* uleb128 0x1; Augmentation size */
+	.byte	0x10		/* FDE Encoding (pcrel sdata4) */
+	.byte	0xc		/* DW_CFA_def_cfa, %rsp offset 8 */
+	.byte	0x7		/* uleb128 0x7 */
+	.byte	0x8		/* uleb128 0x8 */
+	.byte	0x90		/* DW_CFA_offset, column 0x10 */
+	.byte	0x1
+	.align	3
+LECIE1:
+	.globl _ffi_call_unix64.eh
+_ffi_call_unix64.eh:
+LSFDE1:
+	.set	L$set$1,LEFDE1-LASFDE1	/* FDE Length */
+	.long	L$set$1
+LASFDE1:
+	.long	LASFDE1-EH_frame1	/* FDE CIE offset */
+	.quad	LUW0-.			/* FDE initial location */
+	.set	L$set$2,LUW4-LUW0	/* FDE address range */
+	.quad	L$set$2
+	.byte	0x0			/* Augmentation size */
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.set	L$set$3,LUW1-LUW0
+	.long	L$set$3
+
+	/* New stack frame based off rbp.  This is a itty bit of unwind
+	   trickery in that the CFA *has* changed.  There is no easy way
+	   to describe it correctly on entry to the function.  Fortunately,
+	   it doesn't matter too much since at all points we can correctly
+	   unwind back to ffi_call.  Note that the location to which we
+	   moved the return address is (the new) CFA-8, so from the
+	   perspective of the unwind info, it hasn't moved.  */
+	.byte	0xc			/* DW_CFA_def_cfa, %rbp offset 32 */
+	.byte	0x6
+	.byte	0x20
+	.byte	0x80+6			/* DW_CFA_offset, %rbp offset 2*-8 */
+	.byte	0x2
+	.byte	0xa			/* DW_CFA_remember_state */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.set	L$set$4,LUW2-LUW1
+	.long	L$set$4
+	.byte	0xc			/* DW_CFA_def_cfa, %rsp offset 8 */
+	.byte	0x7
+	.byte	0x8
+	.byte	0xc0+6			/* DW_CFA_restore, %rbp */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.set	L$set$5,LUW3-LUW2
+	.long	L$set$5
+	.byte	0xb			/* DW_CFA_restore_state */
+
+	.align	3
+LEFDE1:
+	.globl _ffi_closure_unix64.eh
+_ffi_closure_unix64.eh:
+LSFDE3:
+	.set	L$set$6,LEFDE3-LASFDE3	/* FDE Length */
+	.long	L$set$6
+LASFDE3:
+	.long	LASFDE3-EH_frame1	/* FDE CIE offset */
+	.quad	LUW5-.			/* FDE initial location */
+	.set	L$set$7,LUW9-LUW5	/* FDE address range */
+	.quad	L$set$7
+	.byte	0x0			/* Augmentation size */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.set	L$set$8,LUW6-LUW5
+	.long	L$set$8
+	.byte	0xe			/* DW_CFA_def_cfa_offset */
+	.byte	208,1			/* uleb128 208 */
+	.byte	0xa			/* DW_CFA_remember_state */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.set	L$set$9,LUW7-LUW6
+	.long	L$set$9
+	.byte	0xe			/* DW_CFA_def_cfa_offset */
+	.byte	0x8
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.set	L$set$10,LUW8-LUW7
+	.long	L$set$10
+	.byte	0xb			/* DW_CFA_restore_state */
+
+	.align	3
+LEFDE3:
+	.subsections_via_symbols
+
+#endif /* __x86_64__ */
diff --git a/jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/ffi.c b/jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/ffi.c
new file mode 100644
index 0000000..4c68dd5
--- /dev/null
+++ b/jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/ffi.c
@@ -0,0 +1,935 @@
+/* -----------------------------------------------------------------------
+   ffi.c - Copyright (c) 1996, 1998, 1999, 2001, 2007, 2008  Red Hat, Inc.
+           Copyright (c) 2002  Ranjit Mathew
+           Copyright (c) 2002  Bo Thorsen
+           Copyright (c) 2002  Roger Sayle
+           Copyright (C) 2008, 2010  Free Software Foundation, Inc.
+
+   x86 Foreign Function Interface
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#if !defined(__x86_64__) || defined(_WIN64) || defined(__CYGWIN__)
+
+#ifdef _WIN64
+#include <windows.h>
+#endif
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+#include <stdlib.h>
+
+
+/* ffi_prep_args is called by the assembly routine once stack space
+   has been allocated for the function's arguments */
+
+unsigned int ffi_prep_args(char *stack, extended_cif *ecif);
+unsigned int ffi_prep_args(char *stack, extended_cif *ecif)
+{
+  register unsigned int i;
+  register void **p_argv;
+  register char *argp;
+  register ffi_type **p_arg;
+#ifndef X86_WIN64
+  const int cabi = ecif->cif->abi;
+  const int dir = (cabi == FFI_PASCAL || cabi == FFI_REGISTER) ? -1 : +1;
+  unsigned int stack_args_count = 0;
+  void *p_stack_data[3];
+  char *argp2 = stack;
+#else
+  #define dir 1
+#endif
+
+  argp = stack;
+
+  if ((ecif->cif->flags == FFI_TYPE_STRUCT
+       || ecif->cif->flags == FFI_TYPE_MS_STRUCT)
+#ifdef X86_WIN64
+      && ((ecif->cif->rtype->size & (1 | 2 | 4 | 8)) == 0)
+#endif
+      )
+    {
+#ifndef X86_WIN64
+      /* For fastcall/thiscall/register this is first register-passed
+         argument.  */
+      if (cabi == FFI_THISCALL || cabi == FFI_FASTCALL || cabi == FFI_REGISTER)
+        {
+          p_stack_data[stack_args_count] = argp;
+          ++stack_args_count;
+        }
+#endif
+
+      *(void **) argp = ecif->rvalue;
+      argp += sizeof(void*);
+    }
+
+  p_arg  = ecif->cif->arg_types;
+  p_argv = ecif->avalue;
+  if (dir < 0)
+    {
+      const int nargs = ecif->cif->nargs - 1;
+      if (nargs > 0)
+      {
+        p_arg  += nargs;
+        p_argv += nargs;
+      }
+    }
+
+  for (i = ecif->cif->nargs;
+       i != 0;
+       i--, p_arg += dir, p_argv += dir)
+    {
+      size_t z;
+
+      /* Align if necessary */
+      if ((sizeof(void*) - 1) & (size_t) argp)
+        argp = (char *) ALIGN(argp, sizeof(void*));
+
+      z = (*p_arg)->size;
+
+#ifdef X86_WIN64
+      if (z > FFI_SIZEOF_ARG
+          || ((*p_arg)->type == FFI_TYPE_STRUCT
+              && (z & (1 | 2 | 4 | 8)) == 0)
+#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
+          || ((*p_arg)->type == FFI_TYPE_LONGDOUBLE)
+#endif
+          )
+        {
+          z = FFI_SIZEOF_ARG;
+          *(void **)argp = *p_argv;
+        }
+      else if ((*p_arg)->type == FFI_TYPE_FLOAT)
+        {
+          memcpy(argp, *p_argv, z);
+        }
+      else
+#endif
+      if (z < FFI_SIZEOF_ARG)
+        {
+          z = FFI_SIZEOF_ARG;
+          switch ((*p_arg)->type)
+            {
+            case FFI_TYPE_SINT8:
+              *(ffi_sarg *) argp = (ffi_sarg)*(SINT8 *)(* p_argv);
+              break;
+
+            case FFI_TYPE_UINT8:
+              *(ffi_arg *) argp = (ffi_arg)*(UINT8 *)(* p_argv);
+              break;
+
+            case FFI_TYPE_SINT16:
+              *(ffi_sarg *) argp = (ffi_sarg)*(SINT16 *)(* p_argv);
+              break;
+
+            case FFI_TYPE_UINT16:
+              *(ffi_arg *) argp = (ffi_arg)*(UINT16 *)(* p_argv);
+              break;
+
+            case FFI_TYPE_SINT32:
+              *(ffi_sarg *) argp = (ffi_sarg)*(SINT32 *)(* p_argv);
+              break;
+
+            case FFI_TYPE_UINT32:
+              *(ffi_arg *) argp = (ffi_arg)*(UINT32 *)(* p_argv);
+              break;
+
+            case FFI_TYPE_STRUCT:
+              *(ffi_arg *) argp = *(ffi_arg *)(* p_argv);
+              break;
+
+            default:
+              FFI_ASSERT(0);
+            }
+        }
+      else
+        {
+          memcpy(argp, *p_argv, z);
+        }
+
+#ifndef X86_WIN64
+    /* For thiscall/fastcall/register convention register-passed arguments
+       are the first two none-floating-point arguments with a size
+       smaller or equal to sizeof (void*).  */
+    if ((z == FFI_SIZEOF_ARG)
+        && ((cabi == FFI_REGISTER)
+          || (cabi == FFI_THISCALL && stack_args_count < 1)
+          || (cabi == FFI_FASTCALL && stack_args_count < 2))
+        && ((*p_arg)->type != FFI_TYPE_FLOAT && (*p_arg)->type != FFI_TYPE_STRUCT)
+       )
+      {
+        if (dir < 0 && stack_args_count > 2)
+          {
+            /* Iterating arguments backwards, so first register-passed argument
+               will be passed last. Shift temporary values to make place. */
+            p_stack_data[0] = p_stack_data[1];
+            p_stack_data[1] = p_stack_data[2];
+            stack_args_count = 2;
+          }
+
+        p_stack_data[stack_args_count] = argp;
+        ++stack_args_count;
+      }
+#endif
+
+#ifdef X86_WIN64
+      argp += (z + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
+#else
+      argp += z;
+#endif
+    }
+
+#ifndef X86_WIN64
+  /* We need to move the register-passed arguments for thiscall/fastcall/register
+     on top of stack, so that those can be moved to registers by call-handler.  */
+  if (stack_args_count > 0)
+    {
+      if (dir < 0 && stack_args_count > 1)
+        {
+          /* Reverse order if iterating arguments backwards */
+          ffi_arg tmp = *(ffi_arg*) p_stack_data[0];
+          *(ffi_arg*) p_stack_data[0] = *(ffi_arg*) p_stack_data[stack_args_count - 1];
+          *(ffi_arg*) p_stack_data[stack_args_count - 1] = tmp;
+        }
+      
+      int i;
+      for (i = 0; i < stack_args_count; i++)
+        {
+          if (p_stack_data[i] != argp2)
+            {
+              ffi_arg tmp = *(ffi_arg*) p_stack_data[i];
+              memmove (argp2 + FFI_SIZEOF_ARG, argp2, (size_t) ((char*) p_stack_data[i] - (char*)argp2));
+              *(ffi_arg *) argp2 = tmp;
+            }
+
+          argp2 += FFI_SIZEOF_ARG;
+        }
+    }
+
+    return stack_args_count;
+#endif
+    return 0;
+}
+
+/* Perform machine dependent cif processing */
+ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
+{
+  unsigned int i;
+  ffi_type **ptr;
+
+  /* Set the return type flag */
+  switch (cif->rtype->type)
+    {
+    case FFI_TYPE_VOID:
+    case FFI_TYPE_UINT8:
+    case FFI_TYPE_UINT16:
+    case FFI_TYPE_SINT8:
+    case FFI_TYPE_SINT16:
+#ifdef X86_WIN64
+    case FFI_TYPE_UINT32:
+    case FFI_TYPE_SINT32:
+#endif
+    case FFI_TYPE_SINT64:
+    case FFI_TYPE_FLOAT:
+    case FFI_TYPE_DOUBLE:
+#ifndef X86_WIN64
+#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
+    case FFI_TYPE_LONGDOUBLE:
+#endif
+#endif
+      cif->flags = (unsigned) cif->rtype->type;
+      break;
+
+    case FFI_TYPE_UINT64:
+#ifdef X86_WIN64
+    case FFI_TYPE_POINTER:
+#endif
+      cif->flags = FFI_TYPE_SINT64;
+      break;
+
+    case FFI_TYPE_STRUCT:
+#ifndef X86
+      if (cif->rtype->size == 1)
+        {
+          cif->flags = FFI_TYPE_SMALL_STRUCT_1B; /* same as char size */
+        }
+      else if (cif->rtype->size == 2)
+        {
+          cif->flags = FFI_TYPE_SMALL_STRUCT_2B; /* same as short size */
+        }
+      else if (cif->rtype->size == 4)
+        {
+#ifdef X86_WIN64
+          cif->flags = FFI_TYPE_SMALL_STRUCT_4B;
+#else
+          cif->flags = FFI_TYPE_INT; /* same as int type */
+#endif
+        }
+      else if (cif->rtype->size == 8)
+        {
+          cif->flags = FFI_TYPE_SINT64; /* same as int64 type */
+        }
+      else
+#endif
+        {
+#ifdef X86_WIN32
+          if (cif->abi == FFI_MS_CDECL)
+            cif->flags = FFI_TYPE_MS_STRUCT;
+          else
+#endif
+            cif->flags = FFI_TYPE_STRUCT;
+          /* allocate space for return value pointer */
+          cif->bytes += ALIGN(sizeof(void*), FFI_SIZEOF_ARG);
+        }
+      break;
+
+    default:
+#ifdef X86_WIN64
+      cif->flags = FFI_TYPE_SINT64;
+      break;
+    case FFI_TYPE_INT:
+      cif->flags = FFI_TYPE_SINT32;
+#else
+      cif->flags = FFI_TYPE_INT;
+#endif
+      break;
+    }
+
+  for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
+    {
+      if (((*ptr)->alignment - 1) & cif->bytes)
+        cif->bytes = ALIGN(cif->bytes, (*ptr)->alignment);
+      cif->bytes += (unsigned)ALIGN((*ptr)->size, FFI_SIZEOF_ARG);
+    }
+
+#ifdef X86_WIN64
+  /* ensure space for storing four registers */
+  cif->bytes += 4 * FFI_SIZEOF_ARG;
+#endif
+
+#ifndef X86_WIN32
+#ifndef X86_WIN64
+  if (cif->abi == FFI_SYSV || cif->abi == FFI_UNIX64)
+#endif
+    cif->bytes = (cif->bytes + 15) & ~0xF;
+#endif
+
+  return FFI_OK;
+}
+
+#ifdef X86_WIN64
+extern int
+ffi_call_win64(unsigned int (*)(char *, extended_cif *), extended_cif *,
+               unsigned, unsigned, unsigned *, void (*fn)(void));
+#else
+extern void
+ffi_call_win32(unsigned int (*)(char *, extended_cif *), extended_cif *,
+               unsigned, unsigned, unsigned, unsigned *, void (*fn)(void));
+extern void ffi_call_SYSV(void (*)(char *, extended_cif *), extended_cif *,
+                          unsigned, unsigned, unsigned *, void (*fn)(void));
+#endif
+
+void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+  extended_cif ecif;
+
+  ecif.cif = cif;
+  ecif.avalue = avalue;
+  
+  /* If the return value is a struct and we don't have a return */
+  /* value address then we need to make one                     */
+
+#ifdef X86_WIN64
+  if (rvalue == NULL
+      && cif->flags == FFI_TYPE_STRUCT
+      && ((cif->rtype->size & (1 | 2 | 4 | 8)) == 0))
+    {
+      ecif.rvalue = alloca((cif->rtype->size + 0xF) & ~0xF);
+    }
+#else
+  if (rvalue == NULL
+      && (cif->flags == FFI_TYPE_STRUCT
+          || cif->flags == FFI_TYPE_MS_STRUCT))
+    {
+      ecif.rvalue = alloca(cif->rtype->size);
+    }
+#endif
+  else
+    ecif.rvalue = rvalue;
+    
+  
+  switch (cif->abi) 
+    {
+#ifdef X86_WIN64
+    case FFI_WIN64:
+      ffi_call_win64(ffi_prep_args, &ecif, cif->bytes,
+                     cif->flags, ecif.rvalue, fn);
+      break;
+#else
+#ifndef X86_WIN32
+    case FFI_SYSV:
+      ffi_call_SYSV(ffi_prep_args, &ecif, cif->bytes, cif->flags, ecif.rvalue,
+                    fn);
+      break;
+#else
+    case FFI_SYSV:
+    case FFI_MS_CDECL:
+#endif
+    case FFI_STDCALL:
+    case FFI_THISCALL:
+    case FFI_FASTCALL:
+    case FFI_PASCAL:
+    case FFI_REGISTER:
+      ffi_call_win32(ffi_prep_args, &ecif, cif->abi, cif->bytes, cif->flags,
+                     ecif.rvalue, fn);
+      break;
+#endif
+    default:
+      FFI_ASSERT(0);
+      break;
+    }
+}
+
+
+/** private members **/
+
+/* The following __attribute__((regparm(1))) decorations will have no effect
+   on MSVC or SUNPRO_C -- standard conventions apply. */
+static unsigned int ffi_prep_incoming_args (char *stack, void **ret,
+                                            void** args, ffi_cif* cif);
+void FFI_HIDDEN ffi_closure_SYSV (ffi_closure *)
+     __attribute__ ((regparm(1)));
+unsigned int FFI_HIDDEN ffi_closure_SYSV_inner (ffi_closure *, void **, void *)
+     __attribute__ ((regparm(1)));
+unsigned int FFI_HIDDEN ffi_closure_WIN32_inner (ffi_closure *, void **, void *)
+     __attribute__ ((regparm(1)));
+void FFI_HIDDEN ffi_closure_raw_SYSV (ffi_raw_closure *)
+     __attribute__ ((regparm(1)));
+#ifdef X86_WIN32
+void FFI_HIDDEN ffi_closure_raw_THISCALL (ffi_raw_closure *)
+     __attribute__ ((regparm(1)));
+#endif
+#ifndef X86_WIN64
+void FFI_HIDDEN ffi_closure_STDCALL (ffi_closure *);
+void FFI_HIDDEN ffi_closure_THISCALL (ffi_closure *);
+void FFI_HIDDEN ffi_closure_FASTCALL (ffi_closure *);
+void FFI_HIDDEN ffi_closure_REGISTER (ffi_closure *);
+#else
+void FFI_HIDDEN ffi_closure_win64 (ffi_closure *);
+#endif
+
+/* This function is jumped to by the trampoline */
+
+#ifdef X86_WIN64
+void * FFI_HIDDEN
+ffi_closure_win64_inner (ffi_closure *closure, void *args) {
+  ffi_cif       *cif;
+  void         **arg_area;
+  void          *result;
+  void          *resp = &result;
+
+  cif         = closure->cif;
+  arg_area    = (void**) alloca (cif->nargs * sizeof (void*));  
+
+  /* this call will initialize ARG_AREA, such that each
+   * element in that array points to the corresponding 
+   * value on the stack; and if the function returns
+   * a structure, it will change RESP to point to the
+   * structure return address.  */
+
+  ffi_prep_incoming_args(args, &resp, arg_area, cif);
+  
+  (closure->fun) (cif, resp, arg_area, closure->user_data);
+
+  /* The result is returned in rax.  This does the right thing for
+     result types except for floats; we have to 'mov xmm0, rax' in the
+     caller to correct this.
+     TODO: structure sizes of 3 5 6 7 are returned by reference, too!!!
+  */
+  return cif->rtype->size > sizeof(void *) ? resp : *(void **)resp;
+}
+
+#else
+unsigned int FFI_HIDDEN __attribute__ ((regparm(1)))
+ffi_closure_SYSV_inner (ffi_closure *closure, void **respp, void *args)
+{
+  /* our various things...  */
+  ffi_cif       *cif;
+  void         **arg_area;
+
+  cif         = closure->cif;
+  arg_area    = (void**) alloca (cif->nargs * sizeof (void*));  
+
+  /* this call will initialize ARG_AREA, such that each
+   * element in that array points to the corresponding 
+   * value on the stack; and if the function returns
+   * a structure, it will change RESP to point to the
+   * structure return address.  */
+
+  ffi_prep_incoming_args(args, respp, arg_area, cif);
+
+  (closure->fun) (cif, *respp, arg_area, closure->user_data);
+
+  return cif->flags;
+}
+
+unsigned int FFI_HIDDEN __attribute__ ((regparm(1)))
+ffi_closure_WIN32_inner (ffi_closure *closure, void **respp, void *args)
+{
+  /* our various things...  */
+  ffi_cif       *cif;
+  void         **arg_area;
+  unsigned int   ret;
+
+  cif         = closure->cif;
+  arg_area    = (void**) alloca (cif->nargs * sizeof (void*));  
+
+  /* this call will initialize ARG_AREA, such that each
+   * element in that array points to the corresponding 
+   * value on the stack; and if the function returns
+   * a structure, it will change RESP to point to the
+   * structure return address.  */
+
+  ret = ffi_prep_incoming_args(args, respp, arg_area, cif);
+
+  (closure->fun) (cif, *respp, arg_area, closure->user_data);
+
+  return ret;
+}
+#endif /* !X86_WIN64 */
+
+static unsigned int
+ffi_prep_incoming_args(char *stack, void **rvalue, void **avalue,
+                       ffi_cif *cif)
+{
+  register unsigned int i;
+  register void **p_argv;
+  register char *argp;
+  register ffi_type **p_arg;
+#ifndef X86_WIN64
+  const int cabi = cif->abi;
+  const int dir = (cabi == FFI_PASCAL || cabi == FFI_REGISTER) ? -1 : +1;
+  const unsigned int max_stack_count = (cabi == FFI_THISCALL) ? 1
+                                     : (cabi == FFI_FASTCALL) ? 2
+                                     : (cabi == FFI_REGISTER) ? 3
+                                     : 0;
+  unsigned int passed_regs = 0;
+  void *p_stack_data[3] = { stack - 1 };
+#else
+  #define dir 1
+#endif
+
+  argp = stack;
+#ifndef X86_WIN64
+  argp += max_stack_count * FFI_SIZEOF_ARG;
+#endif
+
+  if ((cif->flags == FFI_TYPE_STRUCT
+       || cif->flags == FFI_TYPE_MS_STRUCT)
+#ifdef X86_WIN64
+      && ((cif->rtype->size & (1 | 2 | 4 | 8)) == 0)
+#endif
+      )
+    {
+#ifndef X86_WIN64
+      if (passed_regs < max_stack_count)
+        {
+          *rvalue = *(void**) (stack + (passed_regs*FFI_SIZEOF_ARG));
+          ++passed_regs;
+        }
+      else
+#endif
+        {
+          *rvalue = *(void **) argp;
+          argp += sizeof(void *);
+        }
+    }
+
+#ifndef X86_WIN64
+  /* Do register arguments first  */
+  for (i = 0, p_arg = cif->arg_types; 
+       i < cif->nargs && passed_regs < max_stack_count;
+       i++, p_arg++)
+    {
+      if ((*p_arg)->type == FFI_TYPE_FLOAT
+         || (*p_arg)->type == FFI_TYPE_STRUCT)
+        continue;
+
+      size_t sz = (*p_arg)->size;
+      if(sz == 0 || sz > FFI_SIZEOF_ARG)
+        continue;
+
+      p_stack_data[passed_regs] = avalue + i;
+      avalue[i] = stack + (passed_regs*FFI_SIZEOF_ARG);
+      ++passed_regs;
+    }
+#endif
+
+  p_arg = cif->arg_types;
+  p_argv = avalue;
+  if (dir < 0)
+    {
+      const int nargs = cif->nargs - 1;
+      if (nargs > 0)
+      {
+        p_arg  += nargs;
+        p_argv += nargs;
+      }
+    }
+
+  for (i = cif->nargs;
+       i != 0;
+       i--, p_arg += dir, p_argv += dir)
+    {
+      size_t z;
+
+      /* Align if necessary */
+      if ((sizeof(void*) - 1) & (size_t) argp)
+        argp = (char *) ALIGN(argp, sizeof(void*));
+
+      z = (*p_arg)->size;
+
+#ifdef X86_WIN64
+      if (z > FFI_SIZEOF_ARG
+          || ((*p_arg)->type == FFI_TYPE_STRUCT
+              && (z & (1 | 2 | 4 | 8)) == 0)
+#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
+          || ((*p_arg)->type == FFI_TYPE_LONGDOUBLE)
+#endif
+          )
+        {
+          z = FFI_SIZEOF_ARG;
+          *p_argv = *(void **)argp;
+        }
+      else
+#else
+      if (passed_regs > 0
+          && z <= FFI_SIZEOF_ARG
+          && (p_argv == p_stack_data[0]
+            || p_argv == p_stack_data[1]
+            || p_argv == p_stack_data[2]))
+        {
+          /* Already assigned a register value */
+          continue;
+        }
+      else
+#endif
+        {
+          /* because we're little endian, this is what it turns into.   */
+          *p_argv = (void*) argp;
+        }
+
+#ifdef X86_WIN64
+      argp += (z + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
+#else
+      argp += z;
+#endif
+    }
+
+  return (int)((size_t)argp - (size_t)stack);
+}
+
+#define FFI_INIT_TRAMPOLINE_WIN64(TRAMP,FUN,CTX,MASK) \
+{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
+   void*  __fun = (void*)(FUN); \
+   void*  __ctx = (void*)(CTX); \
+   *(unsigned char*) &__tramp[0] = 0x41; \
+   *(unsigned char*) &__tramp[1] = 0xbb; \
+   *(unsigned int*) &__tramp[2] = MASK; /* mov $mask, %r11 */ \
+   *(unsigned char*) &__tramp[6] = 0x48; \
+   *(unsigned char*) &__tramp[7] = 0xb8; \
+   *(void**) &__tramp[8] = __ctx; /* mov __ctx, %rax */ \
+   *(unsigned char *)  &__tramp[16] = 0x49; \
+   *(unsigned char *)  &__tramp[17] = 0xba; \
+   *(void**) &__tramp[18] = __fun; /* mov __fun, %r10 */ \
+   *(unsigned char *)  &__tramp[26] = 0x41; \
+   *(unsigned char *)  &__tramp[27] = 0xff; \
+   *(unsigned char *)  &__tramp[28] = 0xe2; /* jmp %r10 */ \
+ }
+
+/* How to make a trampoline.  Derived from gcc/config/i386/i386.c. */
+
+#define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX) \
+{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
+   unsigned int  __fun = (unsigned int)(FUN); \
+   unsigned int  __ctx = (unsigned int)(CTX); \
+   unsigned int  __dis = __fun - (__ctx + 10);  \
+   *(unsigned char*) &__tramp[0] = 0xb8; \
+   *(unsigned int*)  &__tramp[1] = __ctx; /* movl __ctx, %eax */ \
+   *(unsigned char*) &__tramp[5] = 0xe9; \
+   *(unsigned int*)  &__tramp[6] = __dis; /* jmp __fun  */ \
+ }
+
+#define FFI_INIT_TRAMPOLINE_RAW_THISCALL(TRAMP,FUN,CTX,SIZE) \
+{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
+   unsigned int  __fun = (unsigned int)(FUN); \
+   unsigned int  __ctx = (unsigned int)(CTX); \
+   unsigned int  __dis = __fun - (__ctx + 49);  \
+   unsigned short __size = (unsigned short)(SIZE); \
+   *(unsigned int *) &__tramp[0] = 0x8324048b;      /* mov (%esp), %eax */ \
+   *(unsigned int *) &__tramp[4] = 0x4c890cec;      /* sub $12, %esp */ \
+   *(unsigned int *) &__tramp[8] = 0x04890424;      /* mov %ecx, 4(%esp) */ \
+   *(unsigned char*) &__tramp[12] = 0x24;           /* mov %eax, (%esp) */ \
+   *(unsigned char*) &__tramp[13] = 0xb8; \
+   *(unsigned int *) &__tramp[14] = __size;         /* mov __size, %eax */ \
+   *(unsigned int *) &__tramp[18] = 0x08244c8d;     /* lea 8(%esp), %ecx */ \
+   *(unsigned int *) &__tramp[22] = 0x4802e8c1;     /* shr $2, %eax ; dec %eax */ \
+   *(unsigned short*) &__tramp[26] = 0x0b74;        /* jz 1f */ \
+   *(unsigned int *) &__tramp[28] = 0x8908518b;     /* 2b: mov 8(%ecx), %edx */ \
+   *(unsigned int *) &__tramp[32] = 0x04c18311;     /* mov %edx, (%ecx) ; add $4, %ecx */ \
+   *(unsigned char*) &__tramp[36] = 0x48;           /* dec %eax */ \
+   *(unsigned short*) &__tramp[37] = 0xf575;        /* jnz 2b ; 1f: */ \
+   *(unsigned char*) &__tramp[39] = 0xb8; \
+   *(unsigned int*)  &__tramp[40] = __ctx;          /* movl __ctx, %eax */ \
+   *(unsigned char *)  &__tramp[44] = 0xe8; \
+   *(unsigned int*)  &__tramp[45] = __dis;          /* call __fun  */ \
+   *(unsigned char*)  &__tramp[49] = 0xc2;          /* ret  */ \
+   *(unsigned short*)  &__tramp[50] = (__size + 8); /* ret (__size + 8)  */ \
+ }
+
+#define FFI_INIT_TRAMPOLINE_WIN32(TRAMP,FUN,CTX)  \
+{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
+   unsigned int  __fun = (unsigned int)(FUN); \
+   unsigned int  __ctx = (unsigned int)(CTX); \
+   unsigned int  __dis = __fun - (__ctx + 10); \
+   *(unsigned char*) &__tramp[0] = 0x68; \
+   *(unsigned int*)  &__tramp[1] = __ctx; /* push __ctx */ \
+   *(unsigned char*) &__tramp[5] = 0xe9; \
+   *(unsigned int*)  &__tramp[6] = __dis; /* jmp __fun  */ \
+ }
+
+/* the cif must already be prep'ed */
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure* closure,
+                      ffi_cif* cif,
+                      void (*fun)(ffi_cif*,void*,void**,void*),
+                      void *user_data,
+                      void *codeloc)
+{
+#ifdef X86_WIN64
+#define ISFLOAT(IDX) (cif->arg_types[IDX]->type == FFI_TYPE_FLOAT || cif->arg_types[IDX]->type == FFI_TYPE_DOUBLE)
+#define FLAG(IDX) (cif->nargs>(IDX)&&ISFLOAT(IDX)?(1<<(IDX)):0)
+  if (cif->abi == FFI_WIN64) 
+    {
+      int mask = FLAG(0)|FLAG(1)|FLAG(2)|FLAG(3);
+      FFI_INIT_TRAMPOLINE_WIN64 (&closure->tramp[0],
+                                 &ffi_closure_win64,
+                                 codeloc, mask);
+      /* make sure we can execute here */
+    }
+#else
+  if (cif->abi == FFI_SYSV)
+    {
+      FFI_INIT_TRAMPOLINE (&closure->tramp[0],
+                           &ffi_closure_SYSV,
+                           (void*)codeloc);
+    }
+  else if (cif->abi == FFI_REGISTER)
+    {
+      FFI_INIT_TRAMPOLINE_WIN32 (&closure->tramp[0],
+                                   &ffi_closure_REGISTER,
+                                   (void*)codeloc);
+    }
+  else if (cif->abi == FFI_FASTCALL)
+    {
+      FFI_INIT_TRAMPOLINE_WIN32 (&closure->tramp[0],
+                                   &ffi_closure_FASTCALL,
+                                   (void*)codeloc);
+    }
+  else if (cif->abi == FFI_THISCALL)
+    {
+      FFI_INIT_TRAMPOLINE_WIN32 (&closure->tramp[0],
+                                   &ffi_closure_THISCALL,
+                                   (void*)codeloc);
+    }
+  else if (cif->abi == FFI_STDCALL || cif->abi == FFI_PASCAL)
+    {
+      FFI_INIT_TRAMPOLINE_WIN32 (&closure->tramp[0],
+                                   &ffi_closure_STDCALL,
+                                   (void*)codeloc);
+    }
+#ifdef X86_WIN32
+  else if (cif->abi == FFI_MS_CDECL)
+    {
+      FFI_INIT_TRAMPOLINE (&closure->tramp[0],
+                           &ffi_closure_SYSV,
+                           (void*)codeloc);
+    }
+#endif /* X86_WIN32 */
+#endif /* !X86_WIN64 */
+  else
+    {
+      return FFI_BAD_ABI;
+    }
+    
+  closure->cif  = cif;
+  closure->user_data = user_data;
+  closure->fun  = fun;
+
+  return FFI_OK;
+}
+
+/* ------- Native raw API support -------------------------------- */
+
+#if !FFI_NO_RAW_API
+
+ffi_status
+ffi_prep_raw_closure_loc (ffi_raw_closure* closure,
+                          ffi_cif* cif,
+                          void (*fun)(ffi_cif*,void*,ffi_raw*,void*),
+                          void *user_data,
+                          void *codeloc)
+{
+  int i;
+
+  if (cif->abi != FFI_SYSV
+#ifdef X86_WIN32
+      && cif->abi != FFI_THISCALL
+#endif
+     )
+    return FFI_BAD_ABI;
+
+  /* we currently don't support certain kinds of arguments for raw
+     closures.  This should be implemented by a separate assembly
+     language routine, since it would require argument processing,
+     something we don't do now for performance.  */
+
+  for (i = cif->nargs-1; i >= 0; i--)
+    {
+      FFI_ASSERT (cif->arg_types[i]->type != FFI_TYPE_STRUCT);
+      FFI_ASSERT (cif->arg_types[i]->type != FFI_TYPE_LONGDOUBLE);
+    }
+  
+#ifdef X86_WIN32
+  if (cif->abi == FFI_SYSV)
+    {
+#endif
+  FFI_INIT_TRAMPOLINE (&closure->tramp[0], &ffi_closure_raw_SYSV,
+                       codeloc);
+#ifdef X86_WIN32
+    }
+  else if (cif->abi == FFI_THISCALL)
+    {
+      FFI_INIT_TRAMPOLINE_RAW_THISCALL (&closure->tramp[0], &ffi_closure_raw_THISCALL, codeloc, cif->bytes);
+    }
+#endif
+  closure->cif  = cif;
+  closure->user_data = user_data;
+  closure->fun  = fun;
+
+  return FFI_OK;
+}
+
+static unsigned int 
+ffi_prep_args_raw(char *stack, extended_cif *ecif)
+{
+  const ffi_cif *cif = ecif->cif;
+  unsigned int i, passed_regs = 0;
+  
+#ifndef X86_WIN64
+  const unsigned int abi = cif->abi;
+  const unsigned int max_regs = (abi == FFI_THISCALL) ? 1
+                              : (abi == FFI_FASTCALL) ? 2
+                              : (abi == FFI_REGISTER) ? 3
+                              : 0;
+
+  if (cif->flags == FFI_TYPE_STRUCT)
+    ++passed_regs;
+  
+  for (i = 0; i < cif->nargs && passed_regs <= max_regs; i++)
+    {
+      if (cif->arg_types[i]->type == FFI_TYPE_FLOAT
+         || cif->arg_types[i]->type == FFI_TYPE_STRUCT)
+        continue;
+
+      size_t sz = cif->arg_types[i]->size;
+      if (sz == 0 || sz > FFI_SIZEOF_ARG)
+        continue;
+
+      ++passed_regs;
+    }
+#endif
+
+  memcpy (stack, ecif->avalue, cif->bytes);
+  return passed_regs;
+}
+
+/* we borrow this routine from libffi (it must be changed, though, to
+ * actually call the function passed in the first argument.  as of
+ * libffi-1.20, this is not the case.)
+ */
+
+void
+ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *fake_avalue)
+{
+  extended_cif ecif;
+  void **avalue = (void **)fake_avalue;
+
+  ecif.cif = cif;
+  ecif.avalue = avalue;
+  
+  /* If the return value is a struct and we don't have a return */
+  /* value address then we need to make one                     */
+
+  if (rvalue == NULL
+      && (cif->flags == FFI_TYPE_STRUCT
+          || cif->flags == FFI_TYPE_MS_STRUCT))
+    {
+      ecif.rvalue = alloca(cif->rtype->size);
+    }
+  else
+    ecif.rvalue = rvalue;
+    
+  
+  switch (cif->abi) 
+    {
+#ifndef X86_WIN32
+    case FFI_SYSV:
+      ffi_call_SYSV(ffi_prep_args_raw, &ecif, cif->bytes, cif->flags,
+                    ecif.rvalue, fn);
+      break;
+#else
+    case FFI_SYSV:
+    case FFI_MS_CDECL:
+#endif
+#ifndef X86_WIN64
+    case FFI_STDCALL:
+    case FFI_THISCALL:
+    case FFI_FASTCALL:
+    case FFI_PASCAL:
+    case FFI_REGISTER:
+      ffi_call_win32(ffi_prep_args_raw, &ecif, cif->abi, cif->bytes, cif->flags,
+                     ecif.rvalue, fn);
+      break;
+#endif
+    default:
+      FFI_ASSERT(0);
+      break;
+    }
+}
+
+#endif
+
+#endif /* !__x86_64__  || X86_WIN64 */
+
diff --git a/jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/ffi64.c b/jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/ffi64.c
new file mode 100644
index 0000000..5a5e043
--- /dev/null
+++ b/jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/ffi64.c
@@ -0,0 +1,676 @@
+/* -----------------------------------------------------------------------
+   ffi64.c - Copyright (c) 2013  The Written Word, Inc.
+             Copyright (c) 2011  Anthony Green
+             Copyright (c) 2008, 2010  Red Hat, Inc.
+             Copyright (c) 2002, 2007  Bo Thorsen <bo@suse.de>
+
+   x86-64 Foreign Function Interface
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+#include <stdlib.h>
+#include <stdarg.h>
+
+#ifdef __x86_64__
+
+#define MAX_GPR_REGS 6
+#define MAX_SSE_REGS 8
+
+#if defined(__INTEL_COMPILER)
+#include "xmmintrin.h"
+#define UINT128 __m128
+#else
+#if defined(__SUNPRO_C)
+#include <sunmedia_types.h>
+#define UINT128 __m128i
+#else
+#define UINT128 __int128_t
+#endif
+#endif
+
+union big_int_union
+{
+  UINT32 i32;
+  UINT64 i64;
+  UINT128 i128;
+};
+
+struct register_args
+{
+  /* Registers for argument passing.  */
+  UINT64 gpr[MAX_GPR_REGS];
+  union big_int_union sse[MAX_SSE_REGS];
+};
+
+extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
+			     void *raddr, void (*fnaddr)(void), unsigned ssecount);
+
+/* All reference to register classes here is identical to the code in
+   gcc/config/i386/i386.c. Do *not* change one without the other.  */
+
+/* Register class used for passing given 64bit part of the argument.
+   These represent classes as documented by the PS ABI, with the
+   exception of SSESF, SSEDF classes, that are basically SSE class,
+   just gcc will use SF or DFmode move instead of DImode to avoid
+   reformatting penalties.
+
+   Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
+   whenever possible (upper half does contain padding).  */
+enum x86_64_reg_class
+  {
+    X86_64_NO_CLASS,
+    X86_64_INTEGER_CLASS,
+    X86_64_INTEGERSI_CLASS,
+    X86_64_SSE_CLASS,
+    X86_64_SSESF_CLASS,
+    X86_64_SSEDF_CLASS,
+    X86_64_SSEUP_CLASS,
+    X86_64_X87_CLASS,
+    X86_64_X87UP_CLASS,
+    X86_64_COMPLEX_X87_CLASS,
+    X86_64_MEMORY_CLASS
+  };
+
+#define MAX_CLASSES 4
+
+#define SSE_CLASS_P(X)	((X) >= X86_64_SSE_CLASS && X <= X86_64_SSEUP_CLASS)
+
+/* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
+   of this code is to classify each 8bytes of incoming argument by the register
+   class and assign registers accordingly.  */
+
+/* Return the union class of CLASS1 and CLASS2.
+   See the x86-64 PS ABI for details.  */
+
+static enum x86_64_reg_class
+merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
+{
+  /* Rule #1: If both classes are equal, this is the resulting class.  */
+  if (class1 == class2)
+    return class1;
+
+  /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
+     the other class.  */
+  if (class1 == X86_64_NO_CLASS)
+    return class2;
+  if (class2 == X86_64_NO_CLASS)
+    return class1;
+
+  /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
+  if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
+    return X86_64_MEMORY_CLASS;
+
+  /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
+  if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
+      || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
+    return X86_64_INTEGERSI_CLASS;
+  if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
+      || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
+    return X86_64_INTEGER_CLASS;
+
+  /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
+     MEMORY is used.  */
+  if (class1 == X86_64_X87_CLASS
+      || class1 == X86_64_X87UP_CLASS
+      || class1 == X86_64_COMPLEX_X87_CLASS
+      || class2 == X86_64_X87_CLASS
+      || class2 == X86_64_X87UP_CLASS
+      || class2 == X86_64_COMPLEX_X87_CLASS)
+    return X86_64_MEMORY_CLASS;
+
+  /* Rule #6: Otherwise class SSE is used.  */
+  return X86_64_SSE_CLASS;
+}
+
+/* Classify the argument of type TYPE and mode MODE.
+   CLASSES will be filled by the register class used to pass each word
+   of the operand.  The number of words is returned.  In case the parameter
+   should be passed in memory, 0 is returned. As a special case for zero
+   sized containers, classes[0] will be NO_CLASS and 1 is returned.
+
+   See the x86-64 PS ABI for details.
+*/
+static size_t
+classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
+		   size_t byte_offset)
+{
+  switch (type->type)
+    {
+    case FFI_TYPE_UINT8:
+    case FFI_TYPE_SINT8:
+    case FFI_TYPE_UINT16:
+    case FFI_TYPE_SINT16:
+    case FFI_TYPE_UINT32:
+    case FFI_TYPE_SINT32:
+    case FFI_TYPE_UINT64:
+    case FFI_TYPE_SINT64:
+    case FFI_TYPE_POINTER:
+      {
+	size_t size = byte_offset + type->size;
+
+	if (size <= 4)
+	  {
+	    classes[0] = X86_64_INTEGERSI_CLASS;
+	    return 1;
+	  }
+	else if (size <= 8)
+	  {
+	    classes[0] = X86_64_INTEGER_CLASS;
+	    return 1;
+	  }
+	else if (size <= 12)
+	  {
+	    classes[0] = X86_64_INTEGER_CLASS;
+	    classes[1] = X86_64_INTEGERSI_CLASS;
+	    return 2;
+	  }
+	else if (size <= 16)
+	  {
+	    classes[0] = classes[1] = X86_64_INTEGERSI_CLASS;
+	    return 2;
+	  }
+	else
+	  FFI_ASSERT (0);
+      }
+    case FFI_TYPE_FLOAT:
+      if (!(byte_offset % 8))
+	classes[0] = X86_64_SSESF_CLASS;
+      else
+	classes[0] = X86_64_SSE_CLASS;
+      return 1;
+    case FFI_TYPE_DOUBLE:
+      classes[0] = X86_64_SSEDF_CLASS;
+      return 1;
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+    case FFI_TYPE_LONGDOUBLE:
+      classes[0] = X86_64_X87_CLASS;
+      classes[1] = X86_64_X87UP_CLASS;
+      return 2;
+#endif
+    case FFI_TYPE_STRUCT:
+      {
+	const size_t UNITS_PER_WORD = 8;
+	size_t words = (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+	ffi_type **ptr;
+	int i;
+	enum x86_64_reg_class subclasses[MAX_CLASSES];
+
+	/* If the struct is larger than 32 bytes, pass it on the stack.  */
+	if (type->size > 32)
+	  return 0;
+
+	for (i = 0; i < words; i++)
+	  classes[i] = X86_64_NO_CLASS;
+
+	/* Zero sized arrays or structures are NO_CLASS.  We return 0 to
+	   signalize memory class, so handle it as special case.  */
+	if (!words)
+	  {
+	    classes[0] = X86_64_NO_CLASS;
+	    return 1;
+	  }
+
+	/* Merge the fields of structure.  */
+	for (ptr = type->elements; *ptr != NULL; ptr++)
+	  {
+	    size_t num;
+
+	    byte_offset = ALIGN (byte_offset, (*ptr)->alignment);
+
+	    num = classify_argument (*ptr, subclasses, byte_offset % 8);
+	    if (num == 0)
+	      return 0;
+	    for (i = 0; i < num; i++)
+	      {
+		size_t pos = byte_offset / 8;
+		classes[i + pos] =
+		  merge_classes (subclasses[i], classes[i + pos]);
+	      }
+
+	    byte_offset += (*ptr)->size;
+	  }
+
+	if (words > 2)
+	  {
+	    /* When size > 16 bytes, if the first one isn't
+	       X86_64_SSE_CLASS or any other ones aren't
+	       X86_64_SSEUP_CLASS, everything should be passed in
+	       memory.  */
+	    if (classes[0] != X86_64_SSE_CLASS)
+	      return 0;
+
+	    for (i = 1; i < words; i++)
+	      if (classes[i] != X86_64_SSEUP_CLASS)
+		return 0;
+	  }
+
+	/* Final merger cleanup.  */
+	for (i = 0; i < words; i++)
+	  {
+	    /* If one class is MEMORY, everything should be passed in
+	       memory.  */
+	    if (classes[i] == X86_64_MEMORY_CLASS)
+	      return 0;
+
+	    /* The X86_64_SSEUP_CLASS should be always preceded by
+	       X86_64_SSE_CLASS or X86_64_SSEUP_CLASS.  */
+	    if (classes[i] == X86_64_SSEUP_CLASS
+		&& classes[i - 1] != X86_64_SSE_CLASS
+		&& classes[i - 1] != X86_64_SSEUP_CLASS)
+	      {
+		/* The first one should never be X86_64_SSEUP_CLASS.  */
+		FFI_ASSERT (i != 0);
+		classes[i] = X86_64_SSE_CLASS;
+	      }
+
+	    /*  If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
+		everything should be passed in memory.  */
+	    if (classes[i] == X86_64_X87UP_CLASS
+		&& (classes[i - 1] != X86_64_X87_CLASS))
+	      {
+		/* The first one should never be X86_64_X87UP_CLASS.  */
+		FFI_ASSERT (i != 0);
+		return 0;
+	      }
+	  }
+	return words;
+      }
+
+    default:
+      FFI_ASSERT(0);
+    }
+  return 0; /* Never reached.  */
+}
+
+/* Examine the argument and return set number of register required in each
+   class.  Return zero iff parameter should be passed in memory, otherwise
+   the number of registers.  */
+
+static size_t
+examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES],
+		  _Bool in_return, int *pngpr, int *pnsse)
+{
+  size_t n;
+  int i, ngpr, nsse;
+
+  n = classify_argument (type, classes, 0);
+  if (n == 0)
+    return 0;
+
+  ngpr = nsse = 0;
+  for (i = 0; i < n; ++i)
+    switch (classes[i])
+      {
+      case X86_64_INTEGER_CLASS:
+      case X86_64_INTEGERSI_CLASS:
+	ngpr++;
+	break;
+      case X86_64_SSE_CLASS:
+      case X86_64_SSESF_CLASS:
+      case X86_64_SSEDF_CLASS:
+	nsse++;
+	break;
+      case X86_64_NO_CLASS:
+      case X86_64_SSEUP_CLASS:
+	break;
+      case X86_64_X87_CLASS:
+      case X86_64_X87UP_CLASS:
+      case X86_64_COMPLEX_X87_CLASS:
+	return in_return != 0;
+      default:
+	abort ();
+      }
+
+  *pngpr = ngpr;
+  *pnsse = nsse;
+
+  return n;
+}
+
+/* Perform machine dependent cif processing.  */
+
+ffi_status
+ffi_prep_cif_machdep (ffi_cif *cif)
+{
+  int gprcount, ssecount, i, avn, ngpr, nsse, flags;
+  enum x86_64_reg_class classes[MAX_CLASSES];
+  size_t bytes, n;
+
+  gprcount = ssecount = 0;
+
+  flags = cif->rtype->type;
+  if (flags != FFI_TYPE_VOID)
+    {
+      n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
+      if (n == 0)
+	{
+	  /* The return value is passed in memory.  A pointer to that
+	     memory is the first argument.  Allocate a register for it.  */
+	  gprcount++;
+	  /* We don't have to do anything in asm for the return.  */
+	  flags = FFI_TYPE_VOID;
+	}
+      else if (flags == FFI_TYPE_STRUCT)
+	{
+	  /* Mark which registers the result appears in.  */
+	  _Bool sse0 = SSE_CLASS_P (classes[0]);
+	  _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
+	  if (sse0 && !sse1)
+	    flags |= 1 << 8;
+	  else if (!sse0 && sse1)
+	    flags |= 1 << 9;
+	  else if (sse0 && sse1)
+	    flags |= 1 << 10;
+	  /* Mark the true size of the structure.  */
+	  flags |= cif->rtype->size << 12;
+	}
+    }
+
+  /* Go over all arguments and determine the way they should be passed.
+     If it's in a register and there is space for it, let that be so. If
+     not, add it's size to the stack byte count.  */
+  for (bytes = 0, i = 0, avn = cif->nargs; i < avn; i++)
+    {
+      if (examine_argument (cif->arg_types[i], classes, 0, &ngpr, &nsse) == 0
+	  || gprcount + ngpr > MAX_GPR_REGS
+	  || ssecount + nsse > MAX_SSE_REGS)
+	{
+	  long align = cif->arg_types[i]->alignment;
+
+	  if (align < 8)
+	    align = 8;
+
+	  bytes = ALIGN (bytes, align);
+	  bytes += cif->arg_types[i]->size;
+	}
+      else
+	{
+	  gprcount += ngpr;
+	  ssecount += nsse;
+	}
+    }
+  if (ssecount)
+    flags |= 1 << 11;
+  cif->flags = flags;
+  cif->bytes = (unsigned)ALIGN (bytes, 8);
+
+  return FFI_OK;
+}
+
+void
+ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+  enum x86_64_reg_class classes[MAX_CLASSES];
+  char *stack, *argp;
+  ffi_type **arg_types;
+  int gprcount, ssecount, ngpr, nsse, i, avn;
+  _Bool ret_in_memory;
+  struct register_args *reg_args;
+
+  /* Can't call 32-bit mode from 64-bit mode.  */
+  FFI_ASSERT (cif->abi == FFI_UNIX64);
+
+  /* If the return value is a struct and we don't have a return value
+     address then we need to make one.  Note the setting of flags to
+     VOID above in ffi_prep_cif_machdep.  */
+  ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT
+		   && (cif->flags & 0xff) == FFI_TYPE_VOID);
+  if (rvalue == NULL && ret_in_memory)
+    rvalue = alloca (cif->rtype->size);
+
+  /* Allocate the space for the arguments, plus 4 words of temp space.  */
+  stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8);
+  reg_args = (struct register_args *) stack;
+  argp = stack + sizeof (struct register_args);
+
+  gprcount = ssecount = 0;
+
+  /* If the return value is passed in memory, add the pointer as the
+     first integer argument.  */
+  if (ret_in_memory)
+    reg_args->gpr[gprcount++] = (unsigned long) rvalue;
+
+  avn = cif->nargs;
+  arg_types = cif->arg_types;
+
+  for (i = 0; i < avn; ++i)
+    {
+      size_t n, size = arg_types[i]->size;
+
+      n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
+      if (n == 0
+	  || gprcount + ngpr > MAX_GPR_REGS
+	  || ssecount + nsse > MAX_SSE_REGS)
+	{
+	  long align = arg_types[i]->alignment;
+
+	  /* Stack arguments are *always* at least 8 byte aligned.  */
+	  if (align < 8)
+	    align = 8;
+
+	  /* Pass this argument in memory.  */
+	  argp = (void *) ALIGN (argp, align);
+	  memcpy (argp, avalue[i], size);
+	  argp += size;
+	}
+      else
+	{
+	  /* The argument is passed entirely in registers.  */
+	  char *a = (char *) avalue[i];
+	  int j;
+
+	  for (j = 0; j < n; j++, a += 8, size -= 8)
+	    {
+	      switch (classes[j])
+		{
+		case X86_64_INTEGER_CLASS:
+		case X86_64_INTEGERSI_CLASS:
+		  /* Sign-extend integer arguments passed in general
+		     purpose registers, to cope with the fact that
+		     LLVM incorrectly assumes that this will be done
+		     (the x86-64 PS ABI does not specify this). */
+		  switch (arg_types[i]->type)
+		    {
+		    case FFI_TYPE_SINT8:
+		      *(SINT64 *)&reg_args->gpr[gprcount] = (SINT64) *((SINT8 *) a);
+		      break;
+		    case FFI_TYPE_SINT16:
+		      *(SINT64 *)&reg_args->gpr[gprcount] = (SINT64) *((SINT16 *) a);
+		      break;
+		    case FFI_TYPE_SINT32:
+		      *(SINT64 *)&reg_args->gpr[gprcount] = (SINT64) *((SINT32 *) a);
+		      break;
+		    default:
+		      reg_args->gpr[gprcount] = 0;
+		      memcpy (&reg_args->gpr[gprcount], a, size < 8 ? size : 8);
+		    }
+		  gprcount++;
+		  break;
+		case X86_64_SSE_CLASS:
+		case X86_64_SSEDF_CLASS:
+		  reg_args->sse[ssecount++].i64 = *(UINT64 *) a;
+		  break;
+		case X86_64_SSESF_CLASS:
+		  reg_args->sse[ssecount++].i32 = *(UINT32 *) a;
+		  break;
+		default:
+		  abort();
+		}
+	    }
+	}
+    }
+
+  ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args),
+		   cif->flags, rvalue, fn, ssecount);
+}
+
+
+extern void ffi_closure_unix64(void);
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure* closure,
+		      ffi_cif* cif,
+		      void (*fun)(ffi_cif*, void*, void**, void*),
+		      void *user_data,
+		      void *codeloc)
+{
+  volatile unsigned short *tramp;
+
+  /* Sanity check on the cif ABI.  */
+  {
+    int abi = cif->abi;
+    if (UNLIKELY (! (abi > FFI_FIRST_ABI && abi < FFI_LAST_ABI)))
+      return FFI_BAD_ABI;
+  }
+
+  tramp = (volatile unsigned short *) &closure->tramp[0];
+
+  tramp[0] = 0xbb49;		/* mov <code>, %r11	*/
+  *((unsigned long long * volatile) &tramp[1])
+    = (unsigned long) ffi_closure_unix64;
+  tramp[5] = 0xba49;		/* mov <data>, %r10	*/
+  *((unsigned long long * volatile) &tramp[6])
+    = (unsigned long) codeloc;
+
+  /* Set the carry bit iff the function uses any sse registers.
+     This is clc or stc, together with the first byte of the jmp.  */
+  tramp[10] = cif->flags & (1 << 11) ? 0x49f9 : 0x49f8;
+
+  tramp[11] = 0xe3ff;			/* jmp *%r11    */
+
+  closure->cif = cif;
+  closure->fun = fun;
+  closure->user_data = user_data;
+
+  return FFI_OK;
+}
+
+int
+ffi_closure_unix64_inner(ffi_closure *closure, void *rvalue,
+			 struct register_args *reg_args, char *argp)
+{
+  ffi_cif *cif;
+  void **avalue;
+  ffi_type **arg_types;
+  long i, avn;
+  int gprcount, ssecount, ngpr, nsse;
+  int ret;
+
+  cif = closure->cif;
+  avalue = alloca(cif->nargs * sizeof(void *));
+  gprcount = ssecount = 0;
+
+  ret = cif->rtype->type;
+  if (ret != FFI_TYPE_VOID)
+    {
+      enum x86_64_reg_class classes[MAX_CLASSES];
+      size_t n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
+      if (n == 0)
+	{
+	  /* The return value goes in memory.  Arrange for the closure
+	     return value to go directly back to the original caller.  */
+	  rvalue = (void *) (unsigned long) reg_args->gpr[gprcount++];
+	  /* We don't have to do anything in asm for the return.  */
+	  ret = FFI_TYPE_VOID;
+	}
+      else if (ret == FFI_TYPE_STRUCT && n == 2)
+	{
+	  /* Mark which register the second word of the structure goes in.  */
+	  _Bool sse0 = SSE_CLASS_P (classes[0]);
+	  _Bool sse1 = SSE_CLASS_P (classes[1]);
+	  if (!sse0 && sse1)
+	    ret |= 1 << 8;
+	  else if (sse0 && !sse1)
+	    ret |= 1 << 9;
+	}
+    }
+
+  avn = cif->nargs;
+  arg_types = cif->arg_types;
+
+  for (i = 0; i < avn; ++i)
+    {
+      enum x86_64_reg_class classes[MAX_CLASSES];
+      size_t n;
+
+      n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
+      if (n == 0
+	  || gprcount + ngpr > MAX_GPR_REGS
+	  || ssecount + nsse > MAX_SSE_REGS)
+	{
+	  long align = arg_types[i]->alignment;
+
+	  /* Stack arguments are *always* at least 8 byte aligned.  */
+	  if (align < 8)
+	    align = 8;
+
+	  /* Pass this argument in memory.  */
+	  argp = (void *) ALIGN (argp, align);
+	  avalue[i] = argp;
+	  argp += arg_types[i]->size;
+	}
+      /* If the argument is in a single register, or two consecutive
+	 integer registers, then we can use that address directly.  */
+      else if (n == 1
+	       || (n == 2 && !(SSE_CLASS_P (classes[0])
+			       || SSE_CLASS_P (classes[1]))))
+	{
+	  /* The argument is in a single register.  */
+	  if (SSE_CLASS_P (classes[0]))
+	    {
+	      avalue[i] = &reg_args->sse[ssecount];
+	      ssecount += n;
+	    }
+	  else
+	    {
+	      avalue[i] = &reg_args->gpr[gprcount];
+	      gprcount += n;
+	    }
+	}
+      /* Otherwise, allocate space to make them consecutive.  */
+      else
+	{
+	  char *a = alloca (16);
+	  int j;
+
+	  avalue[i] = a;
+	  for (j = 0; j < n; j++, a += 8)
+	    {
+	      if (SSE_CLASS_P (classes[j]))
+		memcpy (a, &reg_args->sse[ssecount++], 8);
+	      else
+		memcpy (a, &reg_args->gpr[gprcount++], 8);
+	    }
+	}
+    }
+
+  /* Invoke the closure.  */
+  closure->fun (cif, rvalue, avalue, closure->user_data);
+
+  /* Tell assembly how to perform return type promotions.  */
+  return ret;
+}
+
+#endif /* __x86_64__ */
diff --git a/jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/ffitarget.h b/jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/ffitarget.h
new file mode 100644
index 0000000..ff0f718
--- /dev/null
+++ b/jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/ffitarget.h
@@ -0,0 +1,152 @@
+/* -----------------------------------------------------------------*-C-*-
+   ffitarget.h - Copyright (c) 2012, 2014  Anthony Green
+                 Copyright (c) 1996-2003, 2010  Red Hat, Inc.
+                 Copyright (C) 2008  Free Software Foundation, Inc.
+
+   Target configuration macros for x86 and x86-64.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+
+   ----------------------------------------------------------------------- */
+
+#ifndef LIBFFI_TARGET_H
+#define LIBFFI_TARGET_H
+
+#ifndef LIBFFI_H
+#error "Please do not include ffitarget.h directly into your source.  Use ffi.h instead."
+#endif
+
+/* ---- System specific configurations ----------------------------------- */
+
+/* For code common to all platforms on x86 and x86_64. */
+#define X86_ANY
+
+#if defined (X86_64) && defined (__i386__)
+#undef X86_64
+#define X86
+#endif
+
+#ifdef X86_WIN64
+#define FFI_SIZEOF_ARG 8
+#define USE_BUILTIN_FFS 0 /* not yet implemented in mingw-64 */
+#endif
+
+#define FFI_TARGET_SPECIFIC_STACK_SPACE_ALLOCATION
+#ifndef _MSC_VER
+#define FFI_TARGET_HAS_COMPLEX_TYPE
+#endif
+
+/* ---- Generic type definitions ----------------------------------------- */
+
+#ifndef LIBFFI_ASM
+#ifdef X86_WIN64
+#ifdef _MSC_VER
+typedef unsigned __int64       ffi_arg;
+typedef __int64                ffi_sarg;
+#else
+typedef unsigned long long     ffi_arg;
+typedef long long              ffi_sarg;
+#endif
+#else
+#if defined __x86_64__ && defined __ILP32__
+#define FFI_SIZEOF_ARG 8
+#define FFI_SIZEOF_JAVA_RAW  4
+typedef unsigned long long     ffi_arg;
+typedef long long              ffi_sarg;
+#else
+typedef unsigned long          ffi_arg;
+typedef signed long            ffi_sarg;
+#endif
+#endif
+
+typedef enum ffi_abi {
+  FFI_FIRST_ABI = 0,
+
+  /* ---- Intel x86 Win32 ---------- */
+#ifdef X86_WIN32
+  FFI_SYSV,
+  FFI_STDCALL,
+  FFI_THISCALL,
+  FFI_FASTCALL,
+  FFI_MS_CDECL,
+  FFI_PASCAL,
+  FFI_REGISTER,
+  FFI_LAST_ABI,
+#ifdef _MSC_VER
+  FFI_DEFAULT_ABI = FFI_MS_CDECL
+#else
+  FFI_DEFAULT_ABI = FFI_SYSV
+#endif
+
+#elif defined(X86_WIN64)
+  FFI_WIN64,
+  FFI_LAST_ABI,
+  FFI_DEFAULT_ABI = FFI_WIN64
+
+#else
+  /* ---- Intel x86 and AMD x86-64 - */
+  FFI_SYSV,
+  FFI_UNIX64,   /* Unix variants all use the same ABI for x86-64  */
+  FFI_THISCALL,
+  FFI_FASTCALL,
+  FFI_STDCALL,
+  FFI_PASCAL,
+  FFI_REGISTER,
+  FFI_LAST_ABI,
+#if defined(__i386__) || defined(__i386)
+  FFI_DEFAULT_ABI = FFI_SYSV
+#else
+  FFI_DEFAULT_ABI = FFI_UNIX64
+#endif
+#endif
+} ffi_abi;
+#endif
+
+/* ---- Definitions for closures ----------------------------------------- */
+
+#define FFI_CLOSURES 1
+#define FFI_TYPE_SMALL_STRUCT_1B (FFI_TYPE_LAST + 1)
+#define FFI_TYPE_SMALL_STRUCT_2B (FFI_TYPE_LAST + 2)
+#define FFI_TYPE_SMALL_STRUCT_4B (FFI_TYPE_LAST + 3)
+#define FFI_TYPE_MS_STRUCT       (FFI_TYPE_LAST + 4)
+
+#if defined (X86_64) || (defined (__x86_64__) && defined (X86_DARWIN))
+#define FFI_TRAMPOLINE_SIZE 24
+#define FFI_NATIVE_RAW_API 0
+#else
+#ifdef X86_WIN32
+#define FFI_TRAMPOLINE_SIZE 52
+#else
+#ifdef X86_WIN64
+#define FFI_TRAMPOLINE_SIZE 29
+#define FFI_NATIVE_RAW_API 0
+#define FFI_NO_RAW_API 1
+#else
+#define FFI_TRAMPOLINE_SIZE 10
+#endif
+#endif
+#ifndef X86_WIN64
+#define FFI_NATIVE_RAW_API 1  /* x86 has native raw api support */
+#endif
+#endif
+
+#endif
+
diff --git a/jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/freebsd.S b/jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/freebsd.S
new file mode 100644
index 0000000..97e0b4e
--- /dev/null
+++ b/jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/freebsd.S
@@ -0,0 +1,463 @@
+/* -----------------------------------------------------------------------
+   freebsd.S - Copyright (c) 1996, 1998, 2001, 2002, 2003, 2005  Red Hat, Inc.
+	       Copyright (c) 2008  Björn König
+	
+   X86 Foreign Function Interface for FreeBSD
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+----------------------------------------------------------------------- */
+
+#ifndef __x86_64__
+
+#define LIBFFI_ASM	
+#include <fficonfig.h>
+#include <ffi.h>
+
+.text
+
+.globl ffi_prep_args
+
+	.align 4
+.globl ffi_call_SYSV
+        .type    ffi_call_SYSV,@function
+
+ffi_call_SYSV:
+.LFB1:
+        pushl %ebp
+.LCFI0:
+        movl  %esp,%ebp
+.LCFI1:
+	/* Make room for all of the new args.  */
+	movl  16(%ebp),%ecx
+	subl  %ecx,%esp
+
+	/* Align the stack pointer to 16-bytes */
+	andl  $0xfffffff0, %esp
+
+	movl  %esp,%eax
+
+	/* Place all of the ffi_prep_args in position  */
+	pushl 12(%ebp)
+	pushl %eax
+	call  *8(%ebp)
+
+	/* Return stack to previous state and call the function  */
+	addl  $8,%esp	
+
+	call  *28(%ebp)
+
+	/* Load %ecx with the return type code  */
+	movl  20(%ebp),%ecx	
+
+	/* Protect %esi.  We're going to pop it in the epilogue.  */
+	pushl %esi
+
+	/* If the return value pointer is NULL, assume no return value.  */
+	cmpl  $0,24(%ebp)
+	jne  0f
+
+	/* Even if there is no space for the return value, we are 
+	   obliged to handle floating-point values.  */
+	cmpl  $FFI_TYPE_FLOAT,%ecx
+	jne   noretval
+	fstp  %st(0)
+
+        jmp   epilogue
+
+0:
+	call  1f
+
+.Lstore_table:
+	.long	noretval-.Lstore_table	/* FFI_TYPE_VOID */
+	.long	retint-.Lstore_table	/* FFI_TYPE_INT */
+	.long	retfloat-.Lstore_table	/* FFI_TYPE_FLOAT */
+	.long	retdouble-.Lstore_table	/* FFI_TYPE_DOUBLE */
+	.long	retlongdouble-.Lstore_table	/* FFI_TYPE_LONGDOUBLE */
+	.long	retuint8-.Lstore_table	/* FFI_TYPE_UINT8 */
+	.long	retsint8-.Lstore_table	/* FFI_TYPE_SINT8 */
+	.long	retuint16-.Lstore_table	/* FFI_TYPE_UINT16 */
+	.long	retsint16-.Lstore_table	/* FFI_TYPE_SINT16 */
+	.long	retint-.Lstore_table	/* FFI_TYPE_UINT32 */
+	.long	retint-.Lstore_table	/* FFI_TYPE_SINT32 */
+	.long	retint64-.Lstore_table	/* FFI_TYPE_UINT64 */
+	.long	retint64-.Lstore_table	/* FFI_TYPE_SINT64 */
+	.long	retstruct-.Lstore_table	/* FFI_TYPE_STRUCT */
+	.long	retint-.Lstore_table	/* FFI_TYPE_POINTER */
+	.long   retstruct1b-.Lstore_table	/* FFI_TYPE_SMALL_STRUCT_1B */
+	.long   retstruct2b-.Lstore_table	/* FFI_TYPE_SMALL_STRUCT_2B */
+
+1:
+	pop  %esi
+	add  (%esi, %ecx, 4), %esi
+	jmp  *%esi
+
+	/* Sign/zero extend as appropriate.  */
+retsint8:
+	movsbl  %al, %eax
+	jmp  retint
+
+retsint16:
+	movswl  %ax, %eax
+	jmp  retint
+
+retuint8:
+	movzbl  %al, %eax
+	jmp  retint
+
+retuint16:
+	movzwl  %ax, %eax
+	jmp  retint
+
+retfloat:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx	
+	fstps (%ecx)
+	jmp   epilogue
+
+retdouble:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx	
+	fstpl (%ecx)
+	jmp   epilogue
+
+retlongdouble:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx	
+	fstpt (%ecx)
+	jmp   epilogue
+	
+retint64:	
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx	
+	movl  %eax,0(%ecx)
+	movl  %edx,4(%ecx)
+	jmp   epilogue
+	
+retstruct1b:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx
+	movb  %al,0(%ecx)
+	jmp   epilogue
+
+retstruct2b:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx
+	movw  %ax,0(%ecx)
+	jmp   epilogue
+
+retint:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx	
+	movl  %eax,0(%ecx)
+
+retstruct:
+	/* Nothing to do!  */
+
+noretval:
+epilogue:
+        popl %esi
+        movl %ebp,%esp
+        popl %ebp
+        ret
+.LFE1:
+.ffi_call_SYSV_end:
+        .size    ffi_call_SYSV,.ffi_call_SYSV_end-ffi_call_SYSV
+
+	.align	4
+FFI_HIDDEN (ffi_closure_SYSV)
+.globl ffi_closure_SYSV
+	.type	ffi_closure_SYSV, @function
+
+ffi_closure_SYSV:
+.LFB2:
+	pushl	%ebp
+.LCFI2:
+	movl	%esp, %ebp
+.LCFI3:
+	subl	$40, %esp
+	leal	-24(%ebp), %edx
+	movl	%edx, -12(%ebp)	/* resp */
+	leal	8(%ebp), %edx
+	movl	%edx, 4(%esp)	/* args = __builtin_dwarf_cfa () */
+	leal	-12(%ebp), %edx
+	movl	%edx, (%esp)	/* &resp */
+#if defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE || !defined __PIC__
+	call	ffi_closure_SYSV_inner
+#else
+	movl	%ebx, 8(%esp)
+.LCFI7:
+	call	1f
+1:	popl	%ebx
+	addl	$_GLOBAL_OFFSET_TABLE_+[.-1b], %ebx
+	call	ffi_closure_SYSV_inner@PLT
+	movl	8(%esp), %ebx
+#endif
+	movl	-12(%ebp), %ecx
+	cmpl	$FFI_TYPE_INT, %eax
+	je	.Lcls_retint
+
+	/* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
+	   FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32.  */
+	cmpl	$FFI_TYPE_UINT64, %eax
+	jge	0f
+	cmpl	$FFI_TYPE_UINT8, %eax
+	jge	.Lcls_retint
+	
+0:	cmpl	$FFI_TYPE_FLOAT, %eax
+	je	.Lcls_retfloat
+	cmpl	$FFI_TYPE_DOUBLE, %eax
+	je	.Lcls_retdouble
+	cmpl	$FFI_TYPE_LONGDOUBLE, %eax
+	je	.Lcls_retldouble
+	cmpl	$FFI_TYPE_SINT64, %eax
+	je	.Lcls_retllong
+	cmpl	$FFI_TYPE_SMALL_STRUCT_1B, %eax
+	je	.Lcls_retstruct1b
+	cmpl	$FFI_TYPE_SMALL_STRUCT_2B, %eax
+	je	.Lcls_retstruct2b
+	cmpl	$FFI_TYPE_STRUCT, %eax
+	je	.Lcls_retstruct
+.Lcls_epilogue:
+	movl	%ebp, %esp
+	popl	%ebp
+	ret
+.Lcls_retint:
+	movl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+.Lcls_retfloat:
+	flds	(%ecx)
+	jmp	.Lcls_epilogue
+.Lcls_retdouble:
+	fldl	(%ecx)
+	jmp	.Lcls_epilogue
+.Lcls_retldouble:
+	fldt	(%ecx)
+	jmp	.Lcls_epilogue
+.Lcls_retllong:
+	movl	(%ecx), %eax
+	movl	4(%ecx), %edx
+	jmp	.Lcls_epilogue
+.Lcls_retstruct1b:
+	movsbl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+.Lcls_retstruct2b:
+	movswl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+.Lcls_retstruct:
+	movl	%ebp, %esp
+	popl	%ebp
+	ret	$4
+.LFE2:
+	.size	ffi_closure_SYSV, .-ffi_closure_SYSV
+
+#if !FFI_NO_RAW_API
+
+#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3)
+#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
+#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
+#define CIF_FLAGS_OFFSET 20
+
+	.align	4
+FFI_HIDDEN (ffi_closure_raw_SYSV)
+.globl ffi_closure_raw_SYSV
+	.type	ffi_closure_raw_SYSV, @function
+
+ffi_closure_raw_SYSV:
+.LFB3:
+	pushl	%ebp
+.LCFI4:
+	movl	%esp, %ebp
+.LCFI5:
+	pushl	%esi
+.LCFI6:
+	subl	$36, %esp
+	movl	RAW_CLOSURE_CIF_OFFSET(%eax), %esi	 /* closure->cif */
+	movl	RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */
+	movl	%edx, 12(%esp)	/* user_data */
+	leal	8(%ebp), %edx	/* __builtin_dwarf_cfa () */
+	movl	%edx, 8(%esp)	/* raw_args */
+	leal	-24(%ebp), %edx
+	movl	%edx, 4(%esp)	/* &res */
+	movl	%esi, (%esp)	/* cif */
+	call	*RAW_CLOSURE_FUN_OFFSET(%eax)		 /* closure->fun */
+	movl	CIF_FLAGS_OFFSET(%esi), %eax		 /* rtype */
+	cmpl	$FFI_TYPE_INT, %eax
+	je	.Lrcls_retint
+
+	/* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
+	   FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32.  */
+	cmpl	$FFI_TYPE_UINT64, %eax
+	jge	0f
+	cmpl	$FFI_TYPE_UINT8, %eax
+	jge	.Lrcls_retint
+0:
+	cmpl	$FFI_TYPE_FLOAT, %eax
+	je	.Lrcls_retfloat
+	cmpl	$FFI_TYPE_DOUBLE, %eax
+	je	.Lrcls_retdouble
+	cmpl	$FFI_TYPE_LONGDOUBLE, %eax
+	je	.Lrcls_retldouble
+	cmpl	$FFI_TYPE_SINT64, %eax
+	je	.Lrcls_retllong
+.Lrcls_epilogue:
+	addl	$36, %esp
+	popl	%esi
+	popl	%ebp
+	ret
+.Lrcls_retint:
+	movl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+.Lrcls_retfloat:
+	flds	-24(%ebp)
+	jmp	.Lrcls_epilogue
+.Lrcls_retdouble:
+	fldl	-24(%ebp)
+	jmp	.Lrcls_epilogue
+.Lrcls_retldouble:
+	fldt	-24(%ebp)
+	jmp	.Lrcls_epilogue
+.Lrcls_retllong:
+	movl	-24(%ebp), %eax
+	movl	-20(%ebp), %edx
+	jmp	.Lrcls_epilogue
+.LFE3:
+	.size	ffi_closure_raw_SYSV, .-ffi_closure_raw_SYSV
+#endif
+
+	.section	.eh_frame,EH_FRAME_FLAGS,@progbits
+.Lframe1:
+	.long	.LECIE1-.LSCIE1	/* Length of Common Information Entry */
+.LSCIE1:
+	.long	0x0	/* CIE Identifier Tag */
+	.byte	0x1	/* CIE Version */
+#ifdef __PIC__
+	.ascii "zR\0"	/* CIE Augmentation */
+#else
+	.ascii "\0"	/* CIE Augmentation */
+#endif
+	.byte	0x1	/* .uleb128 0x1; CIE Code Alignment Factor */
+	.byte	0x7c	/* .sleb128 -4; CIE Data Alignment Factor */
+	.byte	0x8	/* CIE RA Column */
+#ifdef __PIC__
+	.byte	0x1	/* .uleb128 0x1; Augmentation size */
+	.byte	0x1b	/* FDE Encoding (pcrel sdata4) */
+#endif
+	.byte	0xc	/* DW_CFA_def_cfa */
+	.byte	0x4	/* .uleb128 0x4 */
+	.byte	0x4	/* .uleb128 0x4 */
+	.byte	0x88	/* DW_CFA_offset, column 0x8 */
+	.byte	0x1	/* .uleb128 0x1 */
+	.align 4
+.LECIE1:
+.LSFDE1:
+	.long	.LEFDE1-.LASFDE1	/* FDE Length */
+.LASFDE1:
+	.long	.LASFDE1-.Lframe1	/* FDE CIE offset */
+#ifdef __PIC__
+	.long	.LFB1-.	/* FDE initial location */
+#else
+	.long	.LFB1	/* FDE initial location */
+#endif
+	.long	.LFE1-.LFB1	/* FDE address range */
+#ifdef __PIC__
+	.byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI0-.LFB1
+	.byte	0xe	/* DW_CFA_def_cfa_offset */
+	.byte	0x8	/* .uleb128 0x8 */
+	.byte	0x85	/* DW_CFA_offset, column 0x5 */
+	.byte	0x2	/* .uleb128 0x2 */
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI1-.LCFI0
+	.byte	0xd	/* DW_CFA_def_cfa_register */
+	.byte	0x5	/* .uleb128 0x5 */
+	.align 4
+.LEFDE1:
+.LSFDE2:
+	.long	.LEFDE2-.LASFDE2	/* FDE Length */
+.LASFDE2:
+	.long	.LASFDE2-.Lframe1	/* FDE CIE offset */
+#ifdef __PIC__
+	.long	.LFB2-.	/* FDE initial location */
+#else
+	.long	.LFB2
+#endif
+	.long	.LFE2-.LFB2	/* FDE address range */
+#ifdef __PIC__
+	.byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI2-.LFB2
+	.byte	0xe	/* DW_CFA_def_cfa_offset */
+	.byte	0x8	/* .uleb128 0x8 */
+	.byte	0x85	/* DW_CFA_offset, column 0x5 */
+	.byte	0x2	/* .uleb128 0x2 */
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI3-.LCFI2
+	.byte	0xd	/* DW_CFA_def_cfa_register */
+	.byte	0x5	/* .uleb128 0x5 */
+#if !defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE && defined __PIC__
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI7-.LCFI3
+	.byte	0x83	/* DW_CFA_offset, column 0x3 */
+	.byte	0xa	/* .uleb128 0xa */
+#endif
+	.align 4
+.LEFDE2:
+
+#if !FFI_NO_RAW_API
+
+.LSFDE3:
+	.long	.LEFDE3-.LASFDE3	/* FDE Length */
+.LASFDE3:
+	.long	.LASFDE3-.Lframe1	/* FDE CIE offset */
+#ifdef __PIC__
+	.long	.LFB3-.	/* FDE initial location */
+#else
+	.long	.LFB3
+#endif
+	.long	.LFE3-.LFB3	/* FDE address range */
+#ifdef __PIC__
+	.byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI4-.LFB3
+	.byte	0xe	/* DW_CFA_def_cfa_offset */
+	.byte	0x8	/* .uleb128 0x8 */
+	.byte	0x85	/* DW_CFA_offset, column 0x5 */
+	.byte	0x2	/* .uleb128 0x2 */
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI5-.LCFI4
+	.byte	0xd	/* DW_CFA_def_cfa_register */
+	.byte	0x5	/* .uleb128 0x5 */
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI6-.LCFI5
+	.byte	0x86	/* DW_CFA_offset, column 0x6 */
+	.byte	0x3	/* .uleb128 0x3 */
+	.align 4
+.LEFDE3:
+
+#endif
+
+#endif /* ifndef __x86_64__ */
+
+	.section .note.GNU-stack,"",%progbits
diff --git a/jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/sysv.S b/jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/sysv.S
new file mode 100644
index 0000000..3bd5477
--- /dev/null
+++ b/jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/sysv.S
@@ -0,0 +1,483 @@
+/* -----------------------------------------------------------------------
+   sysv.S - Copyright (c) 2013  The Written Word, Inc.
+	  - Copyright (c) 1996,1998,2001-2003,2005,2008,2010  Red Hat, Inc.
+   
+   X86 Foreign Function Interface 
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#ifndef __x86_64__
+
+#define LIBFFI_ASM	
+#include <fficonfig.h>
+#include <ffi.h>
+
+.text
+
+.globl ffi_prep_args
+
+	.align 4
+.globl ffi_call_SYSV
+        .type    ffi_call_SYSV,@function
+
+ffi_call_SYSV:
+.LFB1:
+        pushl %ebp
+.LCFI0:
+        movl  %esp,%ebp
+.LCFI1:
+	/* Make room for all of the new args.  */
+	movl  16(%ebp),%ecx
+	subl  %ecx,%esp
+
+        /* Align the stack pointer to 16-bytes */
+        andl  $0xfffffff0, %esp
+
+	movl  %esp,%eax
+
+	/* Place all of the ffi_prep_args in position  */
+	pushl 12(%ebp)
+	pushl %eax
+	call  *8(%ebp)
+
+	/* Return stack to previous state and call the function  */
+	addl  $8,%esp	
+
+	call  *28(%ebp)
+
+	/* Load %ecx with the return type code  */
+	movl  20(%ebp),%ecx	
+
+	/* Protect %esi.  We're going to pop it in the epilogue.  */
+	pushl %esi
+
+	/* If the return value pointer is NULL, assume no return value.  */
+	cmpl  $0,24(%ebp)
+	jne  0f
+
+	/* Even if there is no space for the return value, we are 
+	   obliged to handle floating-point values.  */
+	cmpl  $FFI_TYPE_FLOAT,%ecx
+	jne   noretval
+	fstp  %st(0)
+
+        jmp   epilogue
+
+0:
+	call  1f
+
+.Lstore_table:
+	.long	noretval-.Lstore_table	/* FFI_TYPE_VOID */
+	.long	retint-.Lstore_table	/* FFI_TYPE_INT */
+	.long	retfloat-.Lstore_table	/* FFI_TYPE_FLOAT */
+	.long	retdouble-.Lstore_table	/* FFI_TYPE_DOUBLE */
+	.long	retlongdouble-.Lstore_table	/* FFI_TYPE_LONGDOUBLE */
+	.long	retuint8-.Lstore_table	/* FFI_TYPE_UINT8 */
+	.long	retsint8-.Lstore_table	/* FFI_TYPE_SINT8 */
+	.long	retuint16-.Lstore_table	/* FFI_TYPE_UINT16 */
+	.long	retsint16-.Lstore_table	/* FFI_TYPE_SINT16 */
+	.long	retint-.Lstore_table	/* FFI_TYPE_UINT32 */
+	.long	retint-.Lstore_table	/* FFI_TYPE_SINT32 */
+	.long	retint64-.Lstore_table	/* FFI_TYPE_UINT64 */
+	.long	retint64-.Lstore_table	/* FFI_TYPE_SINT64 */
+	.long	retstruct-.Lstore_table	/* FFI_TYPE_STRUCT */
+	.long	retint-.Lstore_table	/* FFI_TYPE_POINTER */
+
+1:
+	pop  %esi
+	add  (%esi, %ecx, 4), %esi
+	jmp  *%esi
+
+	/* Sign/zero extend as appropriate.  */
+retsint8:
+	movsbl  %al, %eax
+	jmp  retint
+
+retsint16:
+	movswl  %ax, %eax
+	jmp  retint
+
+retuint8:
+	movzbl  %al, %eax
+	jmp  retint
+
+retuint16:
+	movzwl  %ax, %eax
+	jmp  retint
+
+retfloat:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx	
+	fstps (%ecx)
+	jmp   epilogue
+
+retdouble:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx	
+	fstpl (%ecx)
+	jmp   epilogue
+
+retlongdouble:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx	
+	fstpt (%ecx)
+	jmp   epilogue
+	
+retint64:	
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx	
+	movl  %eax,0(%ecx)
+	movl  %edx,4(%ecx)
+	jmp   epilogue
+	
+retint:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx	
+	movl  %eax,0(%ecx)
+
+retstruct:
+	/* Nothing to do!  */
+
+noretval:
+epilogue:
+        popl %esi
+        movl %ebp,%esp
+        popl %ebp
+        ret
+.LFE1:
+.ffi_call_SYSV_end:
+        .size    ffi_call_SYSV,.ffi_call_SYSV_end-ffi_call_SYSV
+
+	.align	4
+FFI_HIDDEN (ffi_closure_SYSV)
+.globl ffi_closure_SYSV
+	.type	ffi_closure_SYSV, @function
+
+ffi_closure_SYSV:
+.LFB2:
+	pushl	%ebp
+.LCFI2:
+	movl	%esp, %ebp
+.LCFI3:
+	subl	$40, %esp
+	leal	-24(%ebp), %edx
+	movl	%edx, -12(%ebp)	/* resp */
+	leal	8(%ebp), %edx
+#ifdef __SUNPRO_C
+	/* The SUNPRO compiler doesn't support GCC's regparm function
+  	   attribute, so we have to pass all three arguments to
+	   ffi_closure_SYSV_inner on the stack.  */
+	movl	%edx, 8(%esp)	/* args = __builtin_dwarf_cfa () */
+	leal	-12(%ebp), %edx
+	movl	%edx, 4(%esp)	/* &resp */
+	movl    %eax, (%esp)    /* closure */
+#else
+	movl	%edx, 4(%esp)	/* args = __builtin_dwarf_cfa () */
+	leal	-12(%ebp), %edx
+	movl	%edx, (%esp)	/* &resp */
+#endif
+#if defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE || !defined __PIC__
+	call	ffi_closure_SYSV_inner
+#else
+	movl	%ebx, 8(%esp)
+.LCFI7:
+	call	1f
+1:	popl	%ebx
+	addl	$_GLOBAL_OFFSET_TABLE_+[.-1b], %ebx
+	call	ffi_closure_SYSV_inner@PLT
+	movl	8(%esp), %ebx
+#endif
+	movl	-12(%ebp), %ecx
+	cmpl	$FFI_TYPE_INT, %eax
+	je	.Lcls_retint
+
+	/* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
+	   FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32.  */
+	cmpl	$FFI_TYPE_UINT64, %eax
+	jge	0f
+	cmpl	$FFI_TYPE_UINT8, %eax
+	jge	.Lcls_retint
+	
+0:	cmpl	$FFI_TYPE_FLOAT, %eax
+	je	.Lcls_retfloat
+	cmpl	$FFI_TYPE_DOUBLE, %eax
+	je	.Lcls_retdouble
+	cmpl	$FFI_TYPE_LONGDOUBLE, %eax
+	je	.Lcls_retldouble
+	cmpl	$FFI_TYPE_SINT64, %eax
+	je	.Lcls_retllong
+	cmpl	$FFI_TYPE_STRUCT, %eax
+	je	.Lcls_retstruct
+.Lcls_epilogue:
+	movl	%ebp, %esp
+	popl	%ebp
+	ret
+.Lcls_retint:
+	movl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+.Lcls_retfloat:
+	flds	(%ecx)
+	jmp	.Lcls_epilogue
+.Lcls_retdouble:
+	fldl	(%ecx)
+	jmp	.Lcls_epilogue
+.Lcls_retldouble:
+	fldt	(%ecx)
+	jmp	.Lcls_epilogue
+.Lcls_retllong:
+	movl	(%ecx), %eax
+	movl	4(%ecx), %edx
+	jmp	.Lcls_epilogue
+.Lcls_retstruct:
+	movl	%ebp, %esp
+	popl	%ebp
+	ret	$4
+.LFE2:
+	.size	ffi_closure_SYSV, .-ffi_closure_SYSV
+
+#if !FFI_NO_RAW_API
+
+/* Precalculate for e.g. the Solaris 10/x86 assembler.  */
+#if FFI_TRAMPOLINE_SIZE == 10
+#define RAW_CLOSURE_CIF_OFFSET 12
+#define RAW_CLOSURE_FUN_OFFSET 16
+#define RAW_CLOSURE_USER_DATA_OFFSET 20
+#elif FFI_TRAMPOLINE_SIZE == 24
+#define RAW_CLOSURE_CIF_OFFSET 24
+#define RAW_CLOSURE_FUN_OFFSET 28
+#define RAW_CLOSURE_USER_DATA_OFFSET 32
+#else
+#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3)
+#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
+#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
+#endif
+#define CIF_FLAGS_OFFSET 20
+
+	.align	4
+FFI_HIDDEN (ffi_closure_raw_SYSV)
+.globl ffi_closure_raw_SYSV
+	.type	ffi_closure_raw_SYSV, @function
+
+ffi_closure_raw_SYSV:
+.LFB3:
+	pushl	%ebp
+.LCFI4:
+	movl	%esp, %ebp
+.LCFI5:
+	pushl	%esi
+.LCFI6:
+	subl	$36, %esp
+	movl	RAW_CLOSURE_CIF_OFFSET(%eax), %esi	 /* closure->cif */
+	movl	RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */
+	movl	%edx, 12(%esp)	/* user_data */
+	leal	8(%ebp), %edx	/* __builtin_dwarf_cfa () */
+	movl	%edx, 8(%esp)	/* raw_args */
+	leal	-24(%ebp), %edx
+	movl	%edx, 4(%esp)	/* &res */
+	movl	%esi, (%esp)	/* cif */
+	call	*RAW_CLOSURE_FUN_OFFSET(%eax)		 /* closure->fun */
+	movl	CIF_FLAGS_OFFSET(%esi), %eax		 /* rtype */
+	cmpl	$FFI_TYPE_INT, %eax
+	je	.Lrcls_retint
+
+	/* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
+	   FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32.  */
+	cmpl	$FFI_TYPE_UINT64, %eax
+	jge	0f
+	cmpl	$FFI_TYPE_UINT8, %eax
+	jge	.Lrcls_retint
+0:
+	cmpl	$FFI_TYPE_FLOAT, %eax
+	je	.Lrcls_retfloat
+	cmpl	$FFI_TYPE_DOUBLE, %eax
+	je	.Lrcls_retdouble
+	cmpl	$FFI_TYPE_LONGDOUBLE, %eax
+	je	.Lrcls_retldouble
+	cmpl	$FFI_TYPE_SINT64, %eax
+	je	.Lrcls_retllong
+.Lrcls_epilogue:
+	addl	$36, %esp
+	popl	%esi
+	popl	%ebp
+	ret
+.Lrcls_retint:
+	movl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+.Lrcls_retfloat:
+	flds	-24(%ebp)
+	jmp	.Lrcls_epilogue
+.Lrcls_retdouble:
+	fldl	-24(%ebp)
+	jmp	.Lrcls_epilogue
+.Lrcls_retldouble:
+	fldt	-24(%ebp)
+	jmp	.Lrcls_epilogue
+.Lrcls_retllong:
+	movl	-24(%ebp), %eax
+	movl	-20(%ebp), %edx
+	jmp	.Lrcls_epilogue
+.LFE3:
+	.size	ffi_closure_raw_SYSV, .-ffi_closure_raw_SYSV
+#endif
+
+#if defined __GNUC__
+/* Only emit dwarf unwind info when building with GNU toolchain.  */
+
+#if defined __PIC__
+# if defined __sun__ && defined __svr4__
+/* 32-bit Solaris 2/x86 uses datarel encoding for PIC.  GNU ld before 2.22
+   doesn't correctly sort .eh_frame_hdr with mixed encodings, so match this.  */
+#  define FDE_ENCODING		0x30	/* datarel */
+#  define FDE_ENCODE(X)		X@GOTOFF
+# else
+#  define FDE_ENCODING		0x1b	/* pcrel sdata4 */
+#  if defined HAVE_AS_X86_PCREL
+#   define FDE_ENCODE(X)	X-.
+#  else
+#   define FDE_ENCODE(X)	X@rel
+#  endif
+# endif
+#else
+# define FDE_ENCODING		0	/* absolute */
+# define FDE_ENCODE(X)		X
+#endif
+
+	.section	.eh_frame,EH_FRAME_FLAGS,@progbits
+.Lframe1:
+	.long	.LECIE1-.LSCIE1	/* Length of Common Information Entry */
+.LSCIE1:
+	.long	0x0	/* CIE Identifier Tag */
+	.byte	0x1	/* CIE Version */
+#ifdef HAVE_AS_ASCII_PSEUDO_OP
+#ifdef __PIC__
+	.ascii "zR\0"	/* CIE Augmentation */
+#else
+	.ascii "\0"	/* CIE Augmentation */
+#endif
+#elif defined HAVE_AS_STRING_PSEUDO_OP
+#ifdef __PIC__
+	.string "zR"	/* CIE Augmentation */
+#else
+	.string ""	/* CIE Augmentation */
+#endif
+#else
+#error missing .ascii/.string
+#endif
+	.byte	0x1	/* .uleb128 0x1; CIE Code Alignment Factor */
+	.byte	0x7c	/* .sleb128 -4; CIE Data Alignment Factor */
+	.byte	0x8	/* CIE RA Column */
+#ifdef __PIC__
+	.byte	0x1	/* .uleb128 0x1; Augmentation size */
+	.byte	FDE_ENCODING
+#endif
+	.byte	0xc	/* DW_CFA_def_cfa */
+	.byte	0x4	/* .uleb128 0x4 */
+	.byte	0x4	/* .uleb128 0x4 */
+	.byte	0x88	/* DW_CFA_offset, column 0x8 */
+	.byte	0x1	/* .uleb128 0x1 */
+	.align 4
+.LECIE1:
+.LSFDE1:
+	.long	.LEFDE1-.LASFDE1	/* FDE Length */
+.LASFDE1:
+	.long	.LASFDE1-.Lframe1	/* FDE CIE offset */
+	.long	FDE_ENCODE(.LFB1)	/* FDE initial location */
+	.long	.LFE1-.LFB1		/* FDE address range */
+#ifdef __PIC__
+	.byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI0-.LFB1
+	.byte	0xe	/* DW_CFA_def_cfa_offset */
+	.byte	0x8	/* .uleb128 0x8 */
+	.byte	0x85	/* DW_CFA_offset, column 0x5 */
+	.byte	0x2	/* .uleb128 0x2 */
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI1-.LCFI0
+	.byte	0xd	/* DW_CFA_def_cfa_register */
+	.byte	0x5	/* .uleb128 0x5 */
+	.align 4
+.LEFDE1:
+.LSFDE2:
+	.long	.LEFDE2-.LASFDE2	/* FDE Length */
+.LASFDE2:
+	.long	.LASFDE2-.Lframe1	/* FDE CIE offset */
+	.long	FDE_ENCODE(.LFB2)	/* FDE initial location */
+	.long	.LFE2-.LFB2		/* FDE address range */
+#ifdef __PIC__
+	.byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI2-.LFB2
+	.byte	0xe	/* DW_CFA_def_cfa_offset */
+	.byte	0x8	/* .uleb128 0x8 */
+	.byte	0x85	/* DW_CFA_offset, column 0x5 */
+	.byte	0x2	/* .uleb128 0x2 */
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI3-.LCFI2
+	.byte	0xd	/* DW_CFA_def_cfa_register */
+	.byte	0x5	/* .uleb128 0x5 */
+#if !defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE && defined __PIC__
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI7-.LCFI3
+	.byte	0x83	/* DW_CFA_offset, column 0x3 */
+	.byte	0xa	/* .uleb128 0xa */
+#endif
+	.align 4
+.LEFDE2:
+
+#if !FFI_NO_RAW_API
+
+.LSFDE3:
+	.long	.LEFDE3-.LASFDE3	/* FDE Length */
+.LASFDE3:
+	.long	.LASFDE3-.Lframe1	/* FDE CIE offset */
+	.long	FDE_ENCODE(.LFB3)	/* FDE initial location */
+	.long	.LFE3-.LFB3		/* FDE address range */
+#ifdef __PIC__
+	.byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI4-.LFB3
+	.byte	0xe	/* DW_CFA_def_cfa_offset */
+	.byte	0x8	/* .uleb128 0x8 */
+	.byte	0x85	/* DW_CFA_offset, column 0x5 */
+	.byte	0x2	/* .uleb128 0x2 */
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI5-.LCFI4
+	.byte	0xd	/* DW_CFA_def_cfa_register */
+	.byte	0x5	/* .uleb128 0x5 */
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI6-.LCFI5
+	.byte	0x86	/* DW_CFA_offset, column 0x6 */
+	.byte	0x3	/* .uleb128 0x3 */
+	.align 4
+.LEFDE3:
+
+#endif
+#endif
+
+#endif /* ifndef __x86_64__ */
+
+#if defined __ELF__ && defined __linux__
+	.section	.note.GNU-stack,"",@progbits
+#endif
diff --git a/jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/unix64.S b/jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/unix64.S
new file mode 100644
index 0000000..dcd6bc7
--- /dev/null
+++ b/jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/unix64.S
@@ -0,0 +1,432 @@
+/* -----------------------------------------------------------------------
+   unix64.S - Copyright (c) 2013  The Written Word, Inc.
+	    - Copyright (c) 2008  Red Hat, Inc
+	    - Copyright (c) 2002  Bo Thorsen <bo@suse.de>
+
+   x86-64 Foreign Function Interface 
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#ifdef __x86_64__
+#define LIBFFI_ASM	
+#include <fficonfig.h>
+#include <ffi.h>
+
+.text
+
+/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
+	            void *raddr, void (*fnaddr)(void));
+
+   Bit o trickiness here -- ARGS+BYTES is the base of the stack frame
+   for this function.  This has been allocated by ffi_call.  We also
+   deallocate some of the stack that has been alloca'd.  */
+
+	.align	2
+	.globl	ffi_call_unix64
+	.type	ffi_call_unix64,@function
+
+ffi_call_unix64:
+.LUW0:
+	movq	(%rsp), %r10		/* Load return address.  */
+	leaq	(%rdi, %rsi), %rax	/* Find local stack base.  */
+	movq	%rdx, (%rax)		/* Save flags.  */
+	movq	%rcx, 8(%rax)		/* Save raddr.  */
+	movq	%rbp, 16(%rax)		/* Save old frame pointer.  */
+	movq	%r10, 24(%rax)		/* Relocate return address.  */
+	movq	%rax, %rbp		/* Finalize local stack frame.  */
+.LUW1:
+	movq	%rdi, %r10		/* Save a copy of the register area. */
+	movq	%r8, %r11		/* Save a copy of the target fn.  */
+	movl	%r9d, %eax		/* Set number of SSE registers.  */
+
+	/* Load up all argument registers.  */
+	movq	(%r10), %rdi
+	movq	8(%r10), %rsi
+	movq	16(%r10), %rdx
+	movq	24(%r10), %rcx
+	movq	32(%r10), %r8
+	movq	40(%r10), %r9
+	testl	%eax, %eax
+	jnz	.Lload_sse
+.Lret_from_load_sse:
+
+	/* Deallocate the reg arg area.  */
+	leaq	176(%r10), %rsp
+
+	/* Call the user function.  */
+	call	*%r11
+
+	/* Deallocate stack arg area; local stack frame in redzone.  */
+	leaq	24(%rbp), %rsp
+
+	movq	0(%rbp), %rcx		/* Reload flags.  */
+	movq	8(%rbp), %rdi		/* Reload raddr.  */
+	movq	16(%rbp), %rbp		/* Reload old frame pointer.  */
+.LUW2:
+
+	/* The first byte of the flags contains the FFI_TYPE.  */
+	movzbl	%cl, %r10d
+	leaq	.Lstore_table(%rip), %r11
+	movslq	(%r11, %r10, 4), %r10
+	addq	%r11, %r10
+	jmp	*%r10
+
+.Lstore_table:
+	.long	.Lst_void-.Lstore_table		/* FFI_TYPE_VOID */
+	.long	.Lst_sint32-.Lstore_table	/* FFI_TYPE_INT */
+	.long	.Lst_float-.Lstore_table	/* FFI_TYPE_FLOAT */
+	.long	.Lst_double-.Lstore_table	/* FFI_TYPE_DOUBLE */
+	.long	.Lst_ldouble-.Lstore_table	/* FFI_TYPE_LONGDOUBLE */
+	.long	.Lst_uint8-.Lstore_table	/* FFI_TYPE_UINT8 */
+	.long	.Lst_sint8-.Lstore_table	/* FFI_TYPE_SINT8 */
+	.long	.Lst_uint16-.Lstore_table	/* FFI_TYPE_UINT16 */
+	.long	.Lst_sint16-.Lstore_table	/* FFI_TYPE_SINT16 */
+	.long	.Lst_uint32-.Lstore_table	/* FFI_TYPE_UINT32 */
+	.long	.Lst_sint32-.Lstore_table	/* FFI_TYPE_SINT32 */
+	.long	.Lst_int64-.Lstore_table	/* FFI_TYPE_UINT64 */
+	.long	.Lst_int64-.Lstore_table	/* FFI_TYPE_SINT64 */
+	.long	.Lst_struct-.Lstore_table	/* FFI_TYPE_STRUCT */
+	.long	.Lst_int64-.Lstore_table	/* FFI_TYPE_POINTER */
+
+	.align 2
+.Lst_void:
+	ret
+	.align 2
+
+.Lst_uint8:
+	movzbq	%al, %rax
+	movq	%rax, (%rdi)
+	ret
+	.align 2
+.Lst_sint8:
+	movsbq	%al, %rax
+	movq	%rax, (%rdi)
+	ret
+	.align 2
+.Lst_uint16:
+	movzwq	%ax, %rax
+	movq	%rax, (%rdi)
+	.align 2
+.Lst_sint16:
+	movswq	%ax, %rax
+	movq	%rax, (%rdi)
+	ret
+	.align 2
+.Lst_uint32:
+	movl	%eax, %eax
+	movq	%rax, (%rdi)
+	.align 2
+.Lst_sint32:
+	cltq
+	movq	%rax, (%rdi)
+	ret
+	.align 2
+.Lst_int64:
+	movq	%rax, (%rdi)
+	ret
+
+	.align 2
+.Lst_float:
+	movss	%xmm0, (%rdi)
+	ret
+	.align 2
+.Lst_double:
+	movsd	%xmm0, (%rdi)
+	ret
+.Lst_ldouble:
+	fstpt	(%rdi)
+	ret
+
+	.align 2
+.Lst_struct:
+	leaq	-20(%rsp), %rsi		/* Scratch area in redzone.  */
+
+	/* We have to locate the values now, and since we don't want to
+	   write too much data into the user's return value, we spill the
+	   value to a 16 byte scratch area first.  Bits 8, 9, and 10
+	   control where the values are located.  Only one of the three
+	   bits will be set; see ffi_prep_cif_machdep for the pattern.  */
+	movd	%xmm0, %r10
+	movd	%xmm1, %r11
+	testl	$0x100, %ecx
+	cmovnz	%rax, %rdx
+	cmovnz	%r10, %rax
+	testl	$0x200, %ecx
+	cmovnz	%r10, %rdx
+	testl	$0x400, %ecx
+	cmovnz	%r10, %rax
+	cmovnz	%r11, %rdx
+	movq	%rax, (%rsi)
+	movq	%rdx, 8(%rsi)
+
+	/* Bits 12-31 contain the true size of the structure.  Copy from
+	   the scratch area to the true destination.  */
+	shrl	$12, %ecx
+	rep movsb
+	ret
+
+	/* Many times we can avoid loading any SSE registers at all.
+	   It's not worth an indirect jump to load the exact set of
+	   SSE registers needed; zero or all is a good compromise.  */
+	.align 2
+.LUW3:
+.Lload_sse:
+	movdqa	48(%r10), %xmm0
+	movdqa	64(%r10), %xmm1
+	movdqa	80(%r10), %xmm2
+	movdqa	96(%r10), %xmm3
+	movdqa	112(%r10), %xmm4
+	movdqa	128(%r10), %xmm5
+	movdqa	144(%r10), %xmm6
+	movdqa	160(%r10), %xmm7
+	jmp	.Lret_from_load_sse
+
+.LUW4:
+	.size    ffi_call_unix64,.-ffi_call_unix64
+
+	.align	2
+	.globl ffi_closure_unix64
+	.type	ffi_closure_unix64,@function
+
+ffi_closure_unix64:
+.LUW5:
+	/* The carry flag is set by the trampoline iff SSE registers
+	   are used.  Don't clobber it before the branch instruction.  */
+	leaq    -200(%rsp), %rsp
+.LUW6:
+	movq	%rdi, (%rsp)
+	movq    %rsi, 8(%rsp)
+	movq    %rdx, 16(%rsp)
+	movq    %rcx, 24(%rsp)
+	movq    %r8, 32(%rsp)
+	movq    %r9, 40(%rsp)
+	jc      .Lsave_sse
+.Lret_from_save_sse:
+
+	movq	%r10, %rdi
+	leaq	176(%rsp), %rsi
+	movq	%rsp, %rdx
+	leaq	208(%rsp), %rcx
+	call	ffi_closure_unix64_inner@PLT
+
+	/* Deallocate stack frame early; return value is now in redzone.  */
+	addq	$200, %rsp
+.LUW7:
+
+	/* The first byte of the return value contains the FFI_TYPE.  */
+	movzbl	%al, %r10d
+	leaq	.Lload_table(%rip), %r11
+	movslq	(%r11, %r10, 4), %r10
+	addq	%r11, %r10
+	jmp	*%r10
+
+.Lload_table:
+	.long	.Lld_void-.Lload_table		/* FFI_TYPE_VOID */
+	.long	.Lld_int32-.Lload_table		/* FFI_TYPE_INT */
+	.long	.Lld_float-.Lload_table		/* FFI_TYPE_FLOAT */
+	.long	.Lld_double-.Lload_table	/* FFI_TYPE_DOUBLE */
+	.long	.Lld_ldouble-.Lload_table	/* FFI_TYPE_LONGDOUBLE */
+	.long	.Lld_int8-.Lload_table		/* FFI_TYPE_UINT8 */
+	.long	.Lld_int8-.Lload_table		/* FFI_TYPE_SINT8 */
+	.long	.Lld_int16-.Lload_table		/* FFI_TYPE_UINT16 */
+	.long	.Lld_int16-.Lload_table		/* FFI_TYPE_SINT16 */
+	.long	.Lld_int32-.Lload_table		/* FFI_TYPE_UINT32 */
+	.long	.Lld_int32-.Lload_table		/* FFI_TYPE_SINT32 */
+	.long	.Lld_int64-.Lload_table		/* FFI_TYPE_UINT64 */
+	.long	.Lld_int64-.Lload_table		/* FFI_TYPE_SINT64 */
+	.long	.Lld_struct-.Lload_table	/* FFI_TYPE_STRUCT */
+	.long	.Lld_int64-.Lload_table		/* FFI_TYPE_POINTER */
+
+	.align 2
+.Lld_void:
+	ret
+
+	.align 2
+.Lld_int8:
+	movzbl	-24(%rsp), %eax
+	ret
+	.align 2
+.Lld_int16:
+	movzwl	-24(%rsp), %eax
+	ret
+	.align 2
+.Lld_int32:
+	movl	-24(%rsp), %eax
+	ret
+	.align 2
+.Lld_int64:
+	movq	-24(%rsp), %rax
+	ret
+
+	.align 2
+.Lld_float:
+	movss	-24(%rsp), %xmm0
+	ret
+	.align 2
+.Lld_double:
+	movsd	-24(%rsp), %xmm0
+	ret
+	.align 2
+.Lld_ldouble:
+	fldt	-24(%rsp)
+	ret
+
+	.align 2
+.Lld_struct:
+	/* There are four possibilities here, %rax/%rdx, %xmm0/%rax,
+	   %rax/%xmm0, %xmm0/%xmm1.  We collapse two by always loading
+	   both rdx and xmm1 with the second word.  For the remaining,
+	   bit 8 set means xmm0 gets the second word, and bit 9 means
+	   that rax gets the second word.  */
+	movq	-24(%rsp), %rcx
+	movq	-16(%rsp), %rdx
+	movq	-16(%rsp), %xmm1
+	testl	$0x100, %eax
+	cmovnz	%rdx, %rcx
+	movd	%rcx, %xmm0
+	testl	$0x200, %eax
+	movq	-24(%rsp), %rax
+	cmovnz	%rdx, %rax
+	ret
+
+	/* See the comment above .Lload_sse; the same logic applies here.  */
+	.align 2
+.LUW8:
+.Lsave_sse:
+	movdqa	%xmm0, 48(%rsp)
+	movdqa	%xmm1, 64(%rsp)
+	movdqa	%xmm2, 80(%rsp)
+	movdqa	%xmm3, 96(%rsp)
+	movdqa	%xmm4, 112(%rsp)
+	movdqa	%xmm5, 128(%rsp)
+	movdqa	%xmm6, 144(%rsp)
+	movdqa	%xmm7, 160(%rsp)
+	jmp	.Lret_from_save_sse
+
+.LUW9:
+	.size	ffi_closure_unix64,.-ffi_closure_unix64
+
+#ifdef __GNUC__
+/* Only emit DWARF unwind info when building with the GNU toolchain.  */
+
+#ifdef HAVE_AS_X86_64_UNWIND_SECTION_TYPE
+	.section	.eh_frame,"a",@unwind
+#else
+	.section	.eh_frame,"a",@progbits
+#endif
+.Lframe1:
+	.long	.LECIE1-.LSCIE1		/* CIE Length */
+.LSCIE1:
+	.long	0			/* CIE Identifier Tag */
+	.byte	1			/* CIE Version */
+	.ascii "zR\0"			/* CIE Augmentation */
+	.uleb128 1			/* CIE Code Alignment Factor */
+	.sleb128 -8			/* CIE Data Alignment Factor */
+	.byte	0x10			/* CIE RA Column */
+	.uleb128 1			/* Augmentation size */
+	.byte	0x1b			/* FDE Encoding (pcrel sdata4) */
+	.byte	0xc			/* DW_CFA_def_cfa, %rsp offset 8 */
+	.uleb128 7
+	.uleb128 8
+	.byte	0x80+16			/* DW_CFA_offset, %rip offset 1*-8 */
+	.uleb128 1
+	.align 8
+.LECIE1:
+.LSFDE1:
+	.long	.LEFDE1-.LASFDE1	/* FDE Length */
+.LASFDE1:
+	.long	.LASFDE1-.Lframe1	/* FDE CIE offset */
+#if HAVE_AS_X86_PCREL
+	.long	.LUW0-.			/* FDE initial location */
+#else
+	.long	.LUW0@rel
+#endif
+	.long	.LUW4-.LUW0		/* FDE address range */
+	.uleb128 0x0			/* Augmentation size */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	.LUW1-.LUW0
+
+	/* New stack frame based off rbp.  This is a itty bit of unwind
+	   trickery in that the CFA *has* changed.  There is no easy way
+	   to describe it correctly on entry to the function.  Fortunately,
+	   it doesn't matter too much since at all points we can correctly
+	   unwind back to ffi_call.  Note that the location to which we
+	   moved the return address is (the new) CFA-8, so from the
+	   perspective of the unwind info, it hasn't moved.  */
+	.byte	0xc			/* DW_CFA_def_cfa, %rbp offset 32 */
+	.uleb128 6
+	.uleb128 32
+	.byte	0x80+6			/* DW_CFA_offset, %rbp offset 2*-8 */
+	.uleb128 2
+	.byte	0xa			/* DW_CFA_remember_state */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	.LUW2-.LUW1
+	.byte	0xc			/* DW_CFA_def_cfa, %rsp offset 8 */
+	.uleb128 7
+	.uleb128 8
+	.byte	0xc0+6			/* DW_CFA_restore, %rbp */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	.LUW3-.LUW2
+	.byte	0xb			/* DW_CFA_restore_state */
+
+	.align 8
+.LEFDE1:
+.LSFDE3:
+	.long	.LEFDE3-.LASFDE3	/* FDE Length */
+.LASFDE3:
+	.long	.LASFDE3-.Lframe1	/* FDE CIE offset */
+#if HAVE_AS_X86_PCREL
+	.long	.LUW5-.			/* FDE initial location */
+#else
+	.long	.LUW5@rel
+#endif
+	.long	.LUW9-.LUW5		/* FDE address range */
+	.uleb128 0x0			/* Augmentation size */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	.LUW6-.LUW5
+	.byte	0xe			/* DW_CFA_def_cfa_offset */
+	.uleb128 208
+	.byte	0xa			/* DW_CFA_remember_state */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	.LUW7-.LUW6
+	.byte	0xe			/* DW_CFA_def_cfa_offset */
+	.uleb128 8
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	.LUW8-.LUW7
+	.byte	0xb			/* DW_CFA_restore_state */
+
+	.align 8
+.LEFDE3:
+
+#endif /* __GNUC__ */
+	
+#endif /* __x86_64__ */
+
+#if defined __ELF__ && defined __linux__
+	.section	.note.GNU-stack,"",@progbits
+#endif
diff --git a/jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/win32.S b/jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/win32.S
new file mode 100644
index 0000000..3680bf5
--- /dev/null
+++ b/jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/win32.S
@@ -0,0 +1,1351 @@
+/* -----------------------------------------------------------------------
+   win32.S - Copyright (c) 2014  Anthony Green
+             Copyright (c) 1996, 1998, 2001, 2002, 2009  Red Hat, Inc.
+             Copyright (c) 2001  John Beniton
+             Copyright (c) 2002  Ranjit Mathew
+             Copyright (c) 2009  Daniel Witte
+
+
+   X86 Foreign Function Interface
+ 
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+ 
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+ 
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   -----------------------------------------------------------------------
+   */
+ 
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+
+#define CIF_BYTES_OFFSET 16
+#define CIF_FLAGS_OFFSET 20
+
+#ifdef _MSC_VER
+
+#define CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) AND NOT 3)
+
+.386
+.MODEL FLAT, C
+
+EXTRN ffi_closure_SYSV_inner:NEAR
+EXTRN ffi_closure_WIN32_inner:NEAR
+
+_TEXT SEGMENT
+
+ffi_call_win32 PROC NEAR,
+    ffi_prep_args : NEAR PTR DWORD,
+    ecif          : NEAR PTR DWORD,
+    cif_abi       : DWORD,
+    cif_bytes     : DWORD,
+    cif_flags     : DWORD,
+    rvalue        : NEAR PTR DWORD,
+    fn            : NEAR PTR DWORD
+
+        ;; Make room for all of the new args.
+        mov  ecx, cif_bytes
+        sub  esp, ecx
+
+        mov  eax, esp
+
+        ;; Call ffi_prep_args
+        push ecif
+        push eax
+        call ffi_prep_args
+        add  esp, 8
+
+        ;; Prepare registers
+        ;; EAX stores the number of register arguments
+        cmp  eax, 0
+        je   fun
+        cmp  eax, 3
+        jl   prepr_two_cmp
+        
+        mov  ecx, esp
+        add  esp, 12
+        mov  eax, DWORD PTR [ecx+8]
+        jmp  prepr_two
+prepr_two_cmp:
+        cmp  eax, 2
+        jl   prepr_one_prep
+        mov  ecx, esp
+        add  esp, 8
+prepr_two:
+        mov  edx, DWORD PTR [ecx+4]
+        jmp  prepr_one
+prepr_one_prep:
+        mov  ecx, esp
+        add  esp, 4
+prepr_one:
+        mov  ecx, DWORD PTR [ecx]
+        cmp  cif_abi, 7 ;; FFI_REGISTER
+        jne  fun
+
+        xchg ecx, eax
+
+fun:
+        ;; Call function
+        call fn
+
+        ;; Load ecx with the return type code
+        mov  ecx, cif_flags
+
+        ;; If the return value pointer is NULL, assume no return value.
+        cmp  rvalue, 0
+        jne  ca_jumptable
+
+        ;; Even if there is no space for the return value, we are
+        ;; obliged to handle floating-point values.
+        cmp  ecx, FFI_TYPE_FLOAT
+        jne  ca_epilogue
+        fstp st(0)
+
+        jmp  ca_epilogue
+
+ca_jumptable:
+        jmp  [ca_jumpdata + 4 * ecx]
+ca_jumpdata:
+        ;; Do not insert anything here between label and jump table.
+        dd offset ca_epilogue       ;; FFI_TYPE_VOID
+        dd offset ca_retint         ;; FFI_TYPE_INT
+        dd offset ca_retfloat       ;; FFI_TYPE_FLOAT
+        dd offset ca_retdouble      ;; FFI_TYPE_DOUBLE
+        dd offset ca_retlongdouble  ;; FFI_TYPE_LONGDOUBLE
+        dd offset ca_retuint8       ;; FFI_TYPE_UINT8
+        dd offset ca_retsint8       ;; FFI_TYPE_SINT8
+        dd offset ca_retuint16      ;; FFI_TYPE_UINT16
+        dd offset ca_retsint16      ;; FFI_TYPE_SINT16
+        dd offset ca_retint         ;; FFI_TYPE_UINT32
+        dd offset ca_retint         ;; FFI_TYPE_SINT32
+        dd offset ca_retint64       ;; FFI_TYPE_UINT64
+        dd offset ca_retint64       ;; FFI_TYPE_SINT64
+        dd offset ca_epilogue       ;; FFI_TYPE_STRUCT
+        dd offset ca_retint         ;; FFI_TYPE_POINTER
+        dd offset ca_retstruct1b    ;; FFI_TYPE_SMALL_STRUCT_1B
+        dd offset ca_retstruct2b    ;; FFI_TYPE_SMALL_STRUCT_2B
+        dd offset ca_retint         ;; FFI_TYPE_SMALL_STRUCT_4B
+        dd offset ca_epilogue       ;; FFI_TYPE_MS_STRUCT
+
+        /* Sign/zero extend as appropriate.  */
+ca_retuint8:
+        movzx eax, al
+        jmp   ca_retint
+
+ca_retsint8:
+        movsx eax, al
+        jmp   ca_retint
+
+ca_retuint16:
+        movzx eax, ax
+        jmp   ca_retint
+
+ca_retsint16:
+        movsx eax, ax
+        jmp   ca_retint
+
+ca_retint:
+        ;; Load %ecx with the pointer to storage for the return value
+        mov   ecx, rvalue
+        mov   [ecx + 0], eax
+        jmp   ca_epilogue
+
+ca_retint64:
+        ;; Load %ecx with the pointer to storage for the return value
+        mov   ecx, rvalue
+        mov   [ecx + 0], eax
+        mov   [ecx + 4], edx
+        jmp   ca_epilogue
+
+ca_retfloat:
+        ;; Load %ecx with the pointer to storage for the return value
+        mov   ecx, rvalue
+        fstp  DWORD PTR [ecx]
+        jmp   ca_epilogue
+
+ca_retdouble:
+        ;; Load %ecx with the pointer to storage for the return value
+        mov   ecx, rvalue
+        fstp  QWORD PTR [ecx]
+        jmp   ca_epilogue
+
+ca_retlongdouble:
+        ;; Load %ecx with the pointer to storage for the return value
+        mov   ecx, rvalue
+        fstp  TBYTE PTR [ecx]
+        jmp   ca_epilogue
+
+ca_retstruct1b:
+        ;; Load %ecx with the pointer to storage for the return value
+        mov   ecx, rvalue
+        mov   [ecx + 0], al
+        jmp   ca_epilogue
+
+ca_retstruct2b:
+        ;; Load %ecx with the pointer to storage for the return value
+        mov   ecx, rvalue
+        mov   [ecx + 0], ax
+        jmp   ca_epilogue
+
+ca_epilogue:
+        ;; Epilogue code is autogenerated.
+        ret
+ffi_call_win32 ENDP
+
+ffi_closure_THISCALL PROC NEAR
+        ;; Insert the register argument on the stack as the first argument
+        xchg	DWORD PTR [esp+4], ecx
+        xchg	DWORD PTR [esp], ecx
+        push	ecx
+        jmp	ffi_closure_STDCALL
+ffi_closure_THISCALL ENDP
+
+ffi_closure_FASTCALL PROC NEAR
+        ;; Insert the 2 register arguments on the stack as the first argument
+        xchg	DWORD PTR [esp+4], edx
+        xchg	DWORD PTR [esp], ecx
+        push	edx
+        push	ecx
+        jmp	ffi_closure_STDCALL
+ffi_closure_FASTCALL ENDP
+
+ffi_closure_REGISTER PROC NEAR
+        ;; Insert the 3 register arguments on the stack as the first argument
+        push	eax
+        xchg	DWORD PTR [esp+8], ecx
+        xchg	DWORD PTR [esp+4], edx
+        push	ecx
+        push	edx
+        jmp	ffi_closure_STDCALL
+ffi_closure_REGISTER ENDP
+
+ffi_closure_SYSV PROC NEAR FORCEFRAME
+    ;; the ffi_closure ctx is passed in eax by the trampoline.
+
+        sub  esp, 40
+        lea  edx, [ebp - 24]
+        mov  [ebp - 12], edx         ;; resp
+        lea  edx, [ebp + 8]
+stub::
+        mov  [esp + 8], edx          ;; args
+        lea  edx, [ebp - 12]
+        mov  [esp + 4], edx          ;; &resp
+        mov  [esp], eax              ;; closure
+        call ffi_closure_SYSV_inner
+        mov  ecx, [ebp - 12]
+
+cs_jumptable:
+        jmp  [cs_jumpdata + 4 * eax]
+cs_jumpdata:
+        ;; Do not insert anything here between the label and jump table.
+        dd offset cs_epilogue       ;; FFI_TYPE_VOID
+        dd offset cs_retint         ;; FFI_TYPE_INT
+        dd offset cs_retfloat       ;; FFI_TYPE_FLOAT
+        dd offset cs_retdouble      ;; FFI_TYPE_DOUBLE
+        dd offset cs_retlongdouble  ;; FFI_TYPE_LONGDOUBLE
+        dd offset cs_retuint8       ;; FFI_TYPE_UINT8
+        dd offset cs_retsint8       ;; FFI_TYPE_SINT8
+        dd offset cs_retuint16      ;; FFI_TYPE_UINT16
+        dd offset cs_retsint16      ;; FFI_TYPE_SINT16
+        dd offset cs_retint         ;; FFI_TYPE_UINT32
+        dd offset cs_retint         ;; FFI_TYPE_SINT32
+        dd offset cs_retint64       ;; FFI_TYPE_UINT64
+        dd offset cs_retint64       ;; FFI_TYPE_SINT64
+        dd offset cs_retstruct      ;; FFI_TYPE_STRUCT
+        dd offset cs_retint         ;; FFI_TYPE_POINTER
+        dd offset cs_retsint8       ;; FFI_TYPE_SMALL_STRUCT_1B
+        dd offset cs_retsint16      ;; FFI_TYPE_SMALL_STRUCT_2B
+        dd offset cs_retint         ;; FFI_TYPE_SMALL_STRUCT_4B
+        dd offset cs_retmsstruct    ;; FFI_TYPE_MS_STRUCT
+
+cs_retuint8:
+        movzx eax, BYTE PTR [ecx]
+        jmp   cs_epilogue
+
+cs_retsint8:
+        movsx eax, BYTE PTR [ecx]
+        jmp   cs_epilogue
+
+cs_retuint16:
+        movzx eax, WORD PTR [ecx]
+        jmp   cs_epilogue
+
+cs_retsint16:
+        movsx eax, WORD PTR [ecx]
+        jmp   cs_epilogue
+
+cs_retint:
+        mov   eax, [ecx]
+        jmp   cs_epilogue
+
+cs_retint64:
+        mov   eax, [ecx + 0]
+        mov   edx, [ecx + 4]
+        jmp   cs_epilogue
+
+cs_retfloat:
+        fld   DWORD PTR [ecx]
+        jmp   cs_epilogue
+
+cs_retdouble:
+        fld   QWORD PTR [ecx]
+        jmp   cs_epilogue
+
+cs_retlongdouble:
+        fld   TBYTE PTR [ecx]
+        jmp   cs_epilogue
+
+cs_retstruct:
+        ;; Caller expects us to pop struct return value pointer hidden arg.
+        ;; Epilogue code is autogenerated.
+        ret	4
+
+cs_retmsstruct:
+        ;; Caller expects us to return a pointer to the real return value.
+        mov   eax, ecx
+        ;; Caller doesn't expects us to pop struct return value pointer hidden arg.
+        jmp   cs_epilogue
+
+cs_epilogue:
+        ;; Epilogue code is autogenerated.
+        ret
+ffi_closure_SYSV ENDP
+
+#if !FFI_NO_RAW_API
+
+#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) AND NOT 3)
+#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
+#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
+
+ffi_closure_raw_THISCALL PROC NEAR USES esi FORCEFRAME
+        sub esp, 36
+        mov  esi, [eax + RAW_CLOSURE_CIF_OFFSET]        ;; closure->cif
+        mov  edx, [eax + RAW_CLOSURE_USER_DATA_OFFSET]  ;; closure->user_data
+        mov [esp + 12], edx
+        lea edx, [ebp + 12]
+        jmp stubraw
+ffi_closure_raw_THISCALL ENDP
+
+ffi_closure_raw_SYSV PROC NEAR USES esi FORCEFRAME
+    ;; the ffi_closure ctx is passed in eax by the trampoline.
+
+        sub  esp, 40
+        mov  esi, [eax + RAW_CLOSURE_CIF_OFFSET]        ;; closure->cif
+        mov  edx, [eax + RAW_CLOSURE_USER_DATA_OFFSET]  ;; closure->user_data
+        mov  [esp + 12], edx                            ;; user_data
+        lea  edx, [ebp + 8]
+stubraw::
+        mov  [esp + 8], edx                             ;; raw_args
+        lea  edx, [ebp - 24]
+        mov  [esp + 4], edx                             ;; &res
+        mov  [esp], esi                                 ;; cif
+        call DWORD PTR [eax + RAW_CLOSURE_FUN_OFFSET]   ;; closure->fun
+        mov  eax, [esi + CIF_FLAGS_OFFSET]              ;; cif->flags
+        lea  ecx, [ebp - 24]
+
+cr_jumptable:
+        jmp  [cr_jumpdata + 4 * eax]
+cr_jumpdata:
+        ;; Do not insert anything here between the label and jump table.
+        dd offset cr_epilogue       ;; FFI_TYPE_VOID
+        dd offset cr_retint         ;; FFI_TYPE_INT
+        dd offset cr_retfloat       ;; FFI_TYPE_FLOAT
+        dd offset cr_retdouble      ;; FFI_TYPE_DOUBLE
+        dd offset cr_retlongdouble  ;; FFI_TYPE_LONGDOUBLE
+        dd offset cr_retuint8       ;; FFI_TYPE_UINT8
+        dd offset cr_retsint8       ;; FFI_TYPE_SINT8
+        dd offset cr_retuint16      ;; FFI_TYPE_UINT16
+        dd offset cr_retsint16      ;; FFI_TYPE_SINT16
+        dd offset cr_retint         ;; FFI_TYPE_UINT32
+        dd offset cr_retint         ;; FFI_TYPE_SINT32
+        dd offset cr_retint64       ;; FFI_TYPE_UINT64
+        dd offset cr_retint64       ;; FFI_TYPE_SINT64
+        dd offset cr_epilogue       ;; FFI_TYPE_STRUCT
+        dd offset cr_retint         ;; FFI_TYPE_POINTER
+        dd offset cr_retsint8       ;; FFI_TYPE_SMALL_STRUCT_1B
+        dd offset cr_retsint16      ;; FFI_TYPE_SMALL_STRUCT_2B
+        dd offset cr_retint         ;; FFI_TYPE_SMALL_STRUCT_4B
+        dd offset cr_epilogue       ;; FFI_TYPE_MS_STRUCT
+
+cr_retuint8:
+        movzx eax, BYTE PTR [ecx]
+        jmp   cr_epilogue
+
+cr_retsint8:
+        movsx eax, BYTE PTR [ecx]
+        jmp   cr_epilogue
+
+cr_retuint16:
+        movzx eax, WORD PTR [ecx]
+        jmp   cr_epilogue
+
+cr_retsint16:
+        movsx eax, WORD PTR [ecx]
+        jmp   cr_epilogue
+
+cr_retint:
+        mov   eax, [ecx]
+        jmp   cr_epilogue
+
+cr_retint64:
+        mov   eax, [ecx + 0]
+        mov   edx, [ecx + 4]
+        jmp   cr_epilogue
+
+cr_retfloat:
+        fld   DWORD PTR [ecx]
+        jmp   cr_epilogue
+
+cr_retdouble:
+        fld   QWORD PTR [ecx]
+        jmp   cr_epilogue
+
+cr_retlongdouble:
+        fld   TBYTE PTR [ecx]
+        jmp   cr_epilogue
+
+cr_epilogue:
+        ;; Epilogue code is autogenerated.
+        ret
+ffi_closure_raw_SYSV ENDP
+
+#endif /* !FFI_NO_RAW_API */
+
+ffi_closure_STDCALL PROC NEAR FORCEFRAME
+        mov  eax, [esp] ;; the ffi_closure ctx passed by the trampoline.
+
+        sub  esp, 40
+        lea  edx, [ebp - 24]
+        mov  [ebp - 12], edx         ;; resp
+        lea  edx, [ebp + 12]         ;; account for stub return address on stack
+        mov  [esp + 8], edx          ;; args
+        lea  edx, [ebp - 12]
+        mov  [esp + 4], edx          ;; &resp
+        mov  [esp], eax              ;; closure
+        call ffi_closure_WIN32_inner
+        mov  ecx, [ebp - 12]
+
+        xchg [ebp + 4], eax          ;;xchg size of stack parameters and ffi_closure ctx
+        mov  eax, DWORD PTR [eax + CLOSURE_CIF_OFFSET]
+        mov  eax, DWORD PTR [eax + CIF_FLAGS_OFFSET]
+
+cd_jumptable:
+        jmp  [cd_jumpdata + 4 * eax]
+cd_jumpdata:
+        ;; Do not insert anything here between the label and jump table.
+        dd offset cd_epilogue       ;; FFI_TYPE_VOID
+        dd offset cd_retint         ;; FFI_TYPE_INT
+        dd offset cd_retfloat       ;; FFI_TYPE_FLOAT
+        dd offset cd_retdouble      ;; FFI_TYPE_DOUBLE
+        dd offset cd_retlongdouble  ;; FFI_TYPE_LONGDOUBLE
+        dd offset cd_retuint8       ;; FFI_TYPE_UINT8
+        dd offset cd_retsint8       ;; FFI_TYPE_SINT8
+        dd offset cd_retuint16      ;; FFI_TYPE_UINT16
+        dd offset cd_retsint16      ;; FFI_TYPE_SINT16
+        dd offset cd_retint         ;; FFI_TYPE_UINT32
+        dd offset cd_retint         ;; FFI_TYPE_SINT32
+        dd offset cd_retint64       ;; FFI_TYPE_UINT64
+        dd offset cd_retint64       ;; FFI_TYPE_SINT64
+        dd offset cd_epilogue       ;; FFI_TYPE_STRUCT
+        dd offset cd_retint         ;; FFI_TYPE_POINTER
+        dd offset cd_retsint8       ;; FFI_TYPE_SMALL_STRUCT_1B
+        dd offset cd_retsint16      ;; FFI_TYPE_SMALL_STRUCT_2B
+        dd offset cd_retint         ;; FFI_TYPE_SMALL_STRUCT_4B
+
+cd_retuint8:
+        movzx eax, BYTE PTR [ecx]
+        jmp   cd_epilogue
+
+cd_retsint8:
+        movsx eax, BYTE PTR [ecx]
+        jmp   cd_epilogue
+
+cd_retuint16:
+        movzx eax, WORD PTR [ecx]
+        jmp   cd_epilogue
+
+cd_retsint16:
+        movsx eax, WORD PTR [ecx]
+        jmp   cd_epilogue
+
+cd_retint:
+        mov   eax, [ecx]
+        jmp   cd_epilogue
+
+cd_retint64:
+        mov   eax, [ecx + 0]
+        mov   edx, [ecx + 4]
+        jmp   cd_epilogue
+
+cd_retfloat:
+        fld   DWORD PTR [ecx]
+        jmp   cd_epilogue
+
+cd_retdouble:
+        fld   QWORD PTR [ecx]
+        jmp   cd_epilogue
+
+cd_retlongdouble:
+        fld   TBYTE PTR [ecx]
+        jmp   cd_epilogue
+
+cd_epilogue:
+        mov   esp, ebp
+        pop   ebp
+        mov   ecx, [esp + 4]  ;; Return address
+        add   esp, [esp]      ;; Parameters stack size
+        add   esp, 8
+        jmp   ecx
+ffi_closure_STDCALL ENDP
+
+_TEXT ENDS
+END
+
+#else
+
+#define CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3)
+
+#if defined(SYMBOL_UNDERSCORE)
+#define USCORE_SYMBOL(x) _##x
+#else
+#define USCORE_SYMBOL(x) x
+#endif
+        .text
+ 
+        # This assumes we are using gas.
+        .balign 16
+FFI_HIDDEN(ffi_call_win32)
+        .globl	USCORE_SYMBOL(ffi_call_win32)
+#if defined(X86_WIN32) && !defined(__OS2__)
+        .def	_ffi_call_win32;	.scl	2;	.type	32;	.endef
+#endif
+USCORE_SYMBOL(ffi_call_win32):
+.LFB1:
+        pushl %ebp
+.LCFI0:
+        movl  %esp,%ebp
+.LCFI1:
+        # Make room for all of the new args.
+        movl  20(%ebp),%ecx                                                     
+        subl  %ecx,%esp
+ 
+        movl  %esp,%eax
+ 
+        # Call ffi_prep_args
+        pushl 12(%ebp)
+        pushl %eax
+        call  *8(%ebp)
+        addl  $8,%esp
+
+        # Prepare registers
+        # EAX stores the number of register arguments
+        cmpl  $0, %eax
+        je    .fun
+        cmpl  $3, %eax
+        jl    .prepr_two_cmp
+        
+        movl  %esp, %ecx
+        addl  $12, %esp
+        movl  8(%ecx), %eax
+        jmp   .prepr_two
+.prepr_two_cmp:
+        cmpl  $2, %eax
+        jl    .prepr_one_prep
+        movl  %esp, %ecx
+        addl  $8, %esp
+.prepr_two:
+        movl  4(%ecx), %edx
+        jmp   .prepr_one
+.prepr_one_prep:
+        movl  %esp, %ecx
+        addl  $4, %esp
+.prepr_one:
+        movl  (%ecx), %ecx
+        cmpl  $7, 16(%ebp) # FFI_REGISTER
+        jne   .fun
+
+        xchgl %eax, %ecx
+        
+.fun:
+        # FIXME: Align the stack to a 128-bit boundary to avoid
+        # potential performance hits.
+
+        # Call function
+        call  *32(%ebp)
+ 
+        # stdcall functions pop arguments off the stack themselves
+
+        # Load %ecx with the return type code
+        movl  24(%ebp),%ecx
+ 
+        # If the return value pointer is NULL, assume no return value.
+        cmpl  $0,28(%ebp)
+        jne   0f
+ 
+        # Even if there is no space for the return value, we are
+        # obliged to handle floating-point values.
+        cmpl  $FFI_TYPE_FLOAT,%ecx
+        jne   .Lnoretval
+        fstp  %st(0)
+ 
+        jmp   .Lepilogue
+
+0:
+        call 1f
+        # Do not insert anything here between the call and the jump table.
+.Lstore_table:
+        .long	.Lnoretval-.Lstore_table	/* FFI_TYPE_VOID */
+        .long	.Lretint-.Lstore_table		/* FFI_TYPE_INT */
+        .long	.Lretfloat-.Lstore_table	/* FFI_TYPE_FLOAT */
+        .long	.Lretdouble-.Lstore_table	/* FFI_TYPE_DOUBLE */
+        .long	.Lretlongdouble-.Lstore_table	/* FFI_TYPE_LONGDOUBLE */
+        .long	.Lretuint8-.Lstore_table	/* FFI_TYPE_UINT8 */
+        .long	.Lretsint8-.Lstore_table	/* FFI_TYPE_SINT8 */
+        .long	.Lretuint16-.Lstore_table	/* FFI_TYPE_UINT16 */
+        .long	.Lretsint16-.Lstore_table	/* FFI_TYPE_SINT16 */
+        .long	.Lretint-.Lstore_table		/* FFI_TYPE_UINT32 */
+        .long	.Lretint-.Lstore_table		/* FFI_TYPE_SINT32 */
+        .long	.Lretint64-.Lstore_table	/* FFI_TYPE_UINT64 */
+        .long	.Lretint64-.Lstore_table	/* FFI_TYPE_SINT64 */
+        .long	.Lretstruct-.Lstore_table	/* FFI_TYPE_STRUCT */
+        .long	.Lretint-.Lstore_table		/* FFI_TYPE_POINTER */
+        .long	.Lretstruct1b-.Lstore_table	/* FFI_TYPE_SMALL_STRUCT_1B */
+        .long	.Lretstruct2b-.Lstore_table	/* FFI_TYPE_SMALL_STRUCT_2B */
+        .long	.Lretstruct4b-.Lstore_table	/* FFI_TYPE_SMALL_STRUCT_4B */
+        .long	.Lretstruct-.Lstore_table	/* FFI_TYPE_MS_STRUCT */
+1:
+        shl	$2, %ecx
+        add	(%esp),%ecx
+        mov	(%ecx),%ecx
+        add	(%esp),%ecx
+        add	$4, %esp
+        jmp	*%ecx
+
+        /* Sign/zero extend as appropriate.  */
+.Lretsint8:
+        movsbl	%al, %eax
+        jmp	.Lretint
+
+.Lretsint16:
+        movswl	%ax, %eax
+        jmp	.Lretint
+
+.Lretuint8:
+        movzbl	%al, %eax
+        jmp	.Lretint
+
+.Lretuint16:
+        movzwl	%ax, %eax
+        jmp	.Lretint
+
+.Lretint:
+        # Load %ecx with the pointer to storage for the return value
+        movl  28(%ebp),%ecx
+        movl  %eax,0(%ecx)
+        jmp   .Lepilogue
+ 
+.Lretfloat:
+         # Load %ecx with the pointer to storage for the return value
+        movl  28(%ebp),%ecx
+        fstps (%ecx)
+        jmp   .Lepilogue
+ 
+.Lretdouble:
+        # Load %ecx with the pointer to storage for the return value
+        movl  28(%ebp),%ecx
+        fstpl (%ecx)
+        jmp   .Lepilogue
+ 
+.Lretlongdouble:
+        # Load %ecx with the pointer to storage for the return value
+        movl  28(%ebp),%ecx
+        fstpt (%ecx)
+        jmp   .Lepilogue
+ 
+.Lretint64:
+        # Load %ecx with the pointer to storage for the return value
+        movl  28(%ebp),%ecx
+        movl  %eax,0(%ecx)
+        movl  %edx,4(%ecx)
+        jmp   .Lepilogue
+
+.Lretstruct1b:
+        # Load %ecx with the pointer to storage for the return value
+        movl  28(%ebp),%ecx
+        movb  %al,0(%ecx)
+        jmp   .Lepilogue
+ 
+.Lretstruct2b:
+        # Load %ecx with the pointer to storage for the return value
+        movl  28(%ebp),%ecx
+        movw  %ax,0(%ecx)
+        jmp   .Lepilogue
+
+.Lretstruct4b:
+        # Load %ecx with the pointer to storage for the return value
+        movl  28(%ebp),%ecx
+        movl  %eax,0(%ecx)
+        jmp   .Lepilogue
+
+.Lretstruct:
+        # Nothing to do!
+ 
+.Lnoretval:
+.Lepilogue:
+        movl %ebp,%esp
+        popl %ebp
+        ret
+.ffi_call_win32_end:
+        .balign 16
+FFI_HIDDEN(ffi_closure_THISCALL)
+        .globl	USCORE_SYMBOL(ffi_closure_THISCALL)
+#if defined(X86_WIN32) && !defined(__OS2__)
+        .def	_ffi_closure_THISCALL;	.scl	2;	.type	32;	.endef
+#endif
+USCORE_SYMBOL(ffi_closure_THISCALL):
+        /* Insert the register argument on the stack as the first argument */
+        xchg	%ecx, 4(%esp)
+        xchg	%ecx, (%esp)
+        push	%ecx
+        jmp	.ffi_closure_STDCALL_internal
+
+        .balign 16
+FFI_HIDDEN(ffi_closure_FASTCALL)
+        .globl	USCORE_SYMBOL(ffi_closure_FASTCALL)
+#if defined(X86_WIN32) && !defined(__OS2__)
+        .def	_ffi_closure_FASTCALL;	.scl	2;	.type	32;	.endef
+#endif
+USCORE_SYMBOL(ffi_closure_FASTCALL):
+        /* Insert the 2 register arguments on the stack as the first two arguments */
+        xchg	%edx, 4(%esp)
+        xchg	%ecx, (%esp)
+        push	%edx
+        push	%ecx
+        jmp	.ffi_closure_STDCALL_internal
+FFI_HIDDEN(ffi_closure_REGISTER)
+        .globl	USCORE_SYMBOL(ffi_closure_REGISTER)
+#if defined(X86_WIN32) && !defined(__OS2__)
+        .def	_ffi_closure_REGISTER;	.scl	2;	.type	32;	.endef
+#endif
+USCORE_SYMBOL(ffi_closure_REGISTER):
+        /* Insert the 3 register arguments on the stack as the first two arguments */
+        push	%eax
+        xchg	%ecx, 8(%esp)
+        xchg	%edx, 4(%esp)
+        push	%ecx
+        push	%edx
+        jmp	.ffi_closure_STDCALL_internal
+
+.LFE1:
+        # This assumes we are using gas.
+        .balign 16
+FFI_HIDDEN(ffi_closure_SYSV)
+#if defined(X86_WIN32)
+        .globl	USCORE_SYMBOL(ffi_closure_SYSV)
+#if defined(X86_WIN32) && !defined(__OS2__)
+        .def	_ffi_closure_SYSV;	.scl	2;	.type	32;	.endef
+#endif
+USCORE_SYMBOL(ffi_closure_SYSV):
+#endif
+.LFB3:
+        pushl	%ebp
+.LCFI4:
+        movl	%esp, %ebp
+.LCFI5:
+        subl	$40, %esp
+        leal	-24(%ebp), %edx
+        movl	%edx, -12(%ebp)	/* resp */
+        leal	8(%ebp), %edx
+        movl	%edx, 4(%esp)	/* args = __builtin_dwarf_cfa () */
+        leal	-12(%ebp), %edx
+        movl	%edx, (%esp)	/* &resp */
+#if defined(HAVE_HIDDEN_VISIBILITY_ATTRIBUTE) || !defined(__PIC__)
+        call	USCORE_SYMBOL(ffi_closure_SYSV_inner)
+#elif defined(X86_DARWIN)
+        calll	L_ffi_closure_SYSV_inner$stub
+#else
+        movl	%ebx, 8(%esp)
+        call	1f
+1:      popl	%ebx
+        addl	$_GLOBAL_OFFSET_TABLE_+[.-1b], %ebx
+        call	ffi_closure_SYSV_inner@PLT
+        movl	8(%esp), %ebx
+#endif
+        movl	-12(%ebp), %ecx
+
+0:
+        call	1f
+        # Do not insert anything here between the call and the jump table.
+.Lcls_store_table:
+        .long	.Lcls_noretval-.Lcls_store_table	/* FFI_TYPE_VOID */
+        .long	.Lcls_retint-.Lcls_store_table		/* FFI_TYPE_INT */
+        .long	.Lcls_retfloat-.Lcls_store_table	/* FFI_TYPE_FLOAT */
+        .long	.Lcls_retdouble-.Lcls_store_table	/* FFI_TYPE_DOUBLE */
+        .long	.Lcls_retldouble-.Lcls_store_table	/* FFI_TYPE_LONGDOUBLE */
+        .long	.Lcls_retuint8-.Lcls_store_table	/* FFI_TYPE_UINT8 */
+        .long	.Lcls_retsint8-.Lcls_store_table	/* FFI_TYPE_SINT8 */
+        .long	.Lcls_retuint16-.Lcls_store_table	/* FFI_TYPE_UINT16 */
+        .long	.Lcls_retsint16-.Lcls_store_table	/* FFI_TYPE_SINT16 */
+        .long	.Lcls_retint-.Lcls_store_table		/* FFI_TYPE_UINT32 */
+        .long	.Lcls_retint-.Lcls_store_table		/* FFI_TYPE_SINT32 */
+        .long	.Lcls_retllong-.Lcls_store_table	/* FFI_TYPE_UINT64 */
+        .long	.Lcls_retllong-.Lcls_store_table	/* FFI_TYPE_SINT64 */
+        .long	.Lcls_retstruct-.Lcls_store_table	/* FFI_TYPE_STRUCT */
+        .long	.Lcls_retint-.Lcls_store_table		/* FFI_TYPE_POINTER */
+        .long	.Lcls_retstruct1-.Lcls_store_table	/* FFI_TYPE_SMALL_STRUCT_1B */
+        .long	.Lcls_retstruct2-.Lcls_store_table	/* FFI_TYPE_SMALL_STRUCT_2B */
+        .long	.Lcls_retstruct4-.Lcls_store_table	/* FFI_TYPE_SMALL_STRUCT_4B */
+        .long	.Lcls_retmsstruct-.Lcls_store_table	/* FFI_TYPE_MS_STRUCT */
+
+1:
+        shl	$2, %eax
+        add	(%esp),%eax
+        mov	(%eax),%eax
+        add	(%esp),%eax
+        add	$4, %esp
+        jmp	*%eax
+
+        /* Sign/zero extend as appropriate.  */
+.Lcls_retsint8:
+        movsbl	(%ecx), %eax
+        jmp	.Lcls_epilogue
+
+.Lcls_retsint16:
+        movswl	(%ecx), %eax
+        jmp	.Lcls_epilogue
+
+.Lcls_retuint8:
+        movzbl	(%ecx), %eax
+        jmp	.Lcls_epilogue
+
+.Lcls_retuint16:
+        movzwl	(%ecx), %eax
+        jmp	.Lcls_epilogue
+
+.Lcls_retint:
+        movl	(%ecx), %eax
+        jmp	.Lcls_epilogue
+
+.Lcls_retfloat:
+        flds	(%ecx)
+        jmp	.Lcls_epilogue
+
+.Lcls_retdouble:
+        fldl	(%ecx)
+        jmp	.Lcls_epilogue
+
+.Lcls_retldouble:
+        fldt	(%ecx)
+        jmp	.Lcls_epilogue
+
+.Lcls_retllong:
+        movl	(%ecx), %eax
+        movl	4(%ecx), %edx
+        jmp	.Lcls_epilogue
+
+.Lcls_retstruct1:
+        movsbl	(%ecx), %eax
+        jmp	.Lcls_epilogue
+
+.Lcls_retstruct2:
+        movswl	(%ecx), %eax
+        jmp	.Lcls_epilogue
+
+.Lcls_retstruct4:
+        movl	(%ecx), %eax
+        jmp	.Lcls_epilogue
+
+.Lcls_retstruct:
+        # Caller expects us to pop struct return value pointer hidden arg.
+        movl	%ebp, %esp
+        popl	%ebp
+        ret	$0x4
+
+.Lcls_retmsstruct:
+        # Caller expects us to return a pointer to the real return value.
+        mov	%ecx, %eax
+        # Caller doesn't expects us to pop struct return value pointer hidden arg.
+        jmp	.Lcls_epilogue
+
+.Lcls_noretval:
+.Lcls_epilogue:
+        movl	%ebp, %esp
+        popl	%ebp
+        ret
+.ffi_closure_SYSV_end:
+.LFE3:
+
+#if !FFI_NO_RAW_API
+
+#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3)
+#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
+#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
+
+#ifdef X86_WIN32
+        .balign 16
+FFI_HIDDEN(ffi_closure_raw_THISCALL)
+        .globl	USCORE_SYMBOL(ffi_closure_raw_THISCALL)
+#if defined(X86_WIN32) && !defined(__OS2__)
+        .def	_ffi_closure_raw_THISCALL;	.scl	2;	.type	32;	.endef
+#endif
+USCORE_SYMBOL(ffi_closure_raw_THISCALL):
+        pushl	%ebp
+        movl	%esp, %ebp
+        pushl	%esi
+        subl	$36, %esp
+        movl	RAW_CLOSURE_CIF_OFFSET(%eax), %esi	 /* closure->cif */
+        movl	RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */
+        movl	%edx, 12(%esp)	/* user_data */
+        leal	12(%ebp), %edx	/* __builtin_dwarf_cfa () */
+        jmp	.stubraw
+#endif /* X86_WIN32 */
+
+        # This assumes we are using gas.
+        .balign 16
+#if defined(X86_WIN32)
+        .globl	USCORE_SYMBOL(ffi_closure_raw_SYSV)
+#if defined(X86_WIN32) && !defined(__OS2__)
+        .def	_ffi_closure_raw_SYSV;	.scl	2;	.type	32;	.endef
+#endif
+USCORE_SYMBOL(ffi_closure_raw_SYSV):
+#endif /* defined(X86_WIN32) */
+.LFB4:
+        pushl	%ebp
+.LCFI6:
+        movl	%esp, %ebp
+.LCFI7:
+        pushl	%esi
+.LCFI8:
+        subl	$36, %esp
+        movl	RAW_CLOSURE_CIF_OFFSET(%eax), %esi	 /* closure->cif */
+        movl	RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */
+        movl	%edx, 12(%esp)	/* user_data */
+        leal	8(%ebp), %edx	/* __builtin_dwarf_cfa () */
+.stubraw:
+        movl	%edx, 8(%esp)	/* raw_args */
+        leal	-24(%ebp), %edx
+        movl	%edx, 4(%esp)	/* &res */
+        movl	%esi, (%esp)	/* cif */
+        call	*RAW_CLOSURE_FUN_OFFSET(%eax)		 /* closure->fun */
+        movl	CIF_FLAGS_OFFSET(%esi), %eax		 /* rtype */
+0:
+        call	1f
+        # Do not insert anything here between the call and the jump table.
+.Lrcls_store_table:
+        .long	.Lrcls_noretval-.Lrcls_store_table	/* FFI_TYPE_VOID */
+        .long	.Lrcls_retint-.Lrcls_store_table	/* FFI_TYPE_INT */
+        .long	.Lrcls_retfloat-.Lrcls_store_table	/* FFI_TYPE_FLOAT */
+        .long	.Lrcls_retdouble-.Lrcls_store_table	/* FFI_TYPE_DOUBLE */
+        .long	.Lrcls_retldouble-.Lrcls_store_table	/* FFI_TYPE_LONGDOUBLE */
+        .long	.Lrcls_retuint8-.Lrcls_store_table	/* FFI_TYPE_UINT8 */
+        .long	.Lrcls_retsint8-.Lrcls_store_table	/* FFI_TYPE_SINT8 */
+        .long	.Lrcls_retuint16-.Lrcls_store_table	/* FFI_TYPE_UINT16 */
+        .long	.Lrcls_retsint16-.Lrcls_store_table	/* FFI_TYPE_SINT16 */
+        .long	.Lrcls_retint-.Lrcls_store_table	/* FFI_TYPE_UINT32 */
+        .long	.Lrcls_retint-.Lrcls_store_table	/* FFI_TYPE_SINT32 */
+        .long	.Lrcls_retllong-.Lrcls_store_table	/* FFI_TYPE_UINT64 */
+        .long	.Lrcls_retllong-.Lrcls_store_table	/* FFI_TYPE_SINT64 */
+        .long	.Lrcls_retstruct-.Lrcls_store_table	/* FFI_TYPE_STRUCT */
+        .long	.Lrcls_retint-.Lrcls_store_table	/* FFI_TYPE_POINTER */
+        .long	.Lrcls_retstruct1-.Lrcls_store_table	/* FFI_TYPE_SMALL_STRUCT_1B */
+        .long	.Lrcls_retstruct2-.Lrcls_store_table	/* FFI_TYPE_SMALL_STRUCT_2B */
+        .long	.Lrcls_retstruct4-.Lrcls_store_table	/* FFI_TYPE_SMALL_STRUCT_4B */
+        .long	.Lrcls_retstruct-.Lrcls_store_table	/* FFI_TYPE_MS_STRUCT */
+1:
+        shl	$2, %eax
+        add	(%esp),%eax
+        mov	(%eax),%eax
+        add	(%esp),%eax
+        add	$4, %esp
+        jmp	*%eax
+
+        /* Sign/zero extend as appropriate.  */
+.Lrcls_retsint8:
+        movsbl	-24(%ebp), %eax
+        jmp	.Lrcls_epilogue
+
+.Lrcls_retsint16:
+        movswl	-24(%ebp), %eax
+        jmp	.Lrcls_epilogue
+
+.Lrcls_retuint8:
+        movzbl	-24(%ebp), %eax
+        jmp	.Lrcls_epilogue
+
+.Lrcls_retuint16:
+        movzwl	-24(%ebp), %eax
+        jmp	.Lrcls_epilogue
+
+.Lrcls_retint:
+        movl	-24(%ebp), %eax
+        jmp	.Lrcls_epilogue
+
+.Lrcls_retfloat:
+        flds	-24(%ebp)
+        jmp	.Lrcls_epilogue
+
+.Lrcls_retdouble:
+        fldl	-24(%ebp)
+        jmp	.Lrcls_epilogue
+
+.Lrcls_retldouble:
+        fldt	-24(%ebp)
+        jmp	.Lrcls_epilogue
+
+.Lrcls_retllong:
+        movl	-24(%ebp), %eax
+        movl	-20(%ebp), %edx
+        jmp	.Lrcls_epilogue
+
+.Lrcls_retstruct1:
+        movsbl	-24(%ebp), %eax
+        jmp	.Lrcls_epilogue
+
+.Lrcls_retstruct2:
+        movswl	-24(%ebp), %eax
+        jmp	.Lrcls_epilogue
+
+.Lrcls_retstruct4:
+        movl	-24(%ebp), %eax
+        jmp	.Lrcls_epilogue
+
+.Lrcls_retstruct:
+        # Nothing to do!
+
+.Lrcls_noretval:
+.Lrcls_epilogue:
+        addl	$36, %esp
+        popl	%esi
+        popl	%ebp
+        ret
+.ffi_closure_raw_SYSV_end:
+.LFE4:
+
+#endif /* !FFI_NO_RAW_API */
+
+        # This assumes we are using gas.
+        .balign	16
+FFI_HIDDEN(ffi_closure_STDCALL)
+        .globl	USCORE_SYMBOL(ffi_closure_STDCALL)
+#if defined(X86_WIN32) && !defined(__OS2__)
+        .def	_ffi_closure_STDCALL;	.scl	2;	.type	32;	.endef
+#endif
+USCORE_SYMBOL(ffi_closure_STDCALL):
+.ffi_closure_STDCALL_internal:
+        /* ffi_closure ctx is at top of the stack */
+        movl	(%esp), %eax
+.LFB5:
+        pushl	%ebp
+.LCFI9:
+        movl	%esp, %ebp
+.LCFI10:
+        subl	$40, %esp
+        leal	-24(%ebp), %edx
+        movl	%edx, -12(%ebp)	/* resp */
+        leal	12(%ebp), %edx  /* account for stub return address on stack */
+        movl	%edx, 4(%esp)	/* args */
+        leal	-12(%ebp), %edx
+        movl	%edx, (%esp)	/* &resp */
+#if defined(HAVE_HIDDEN_VISIBILITY_ATTRIBUTE) || !defined(__PIC__)
+        call	USCORE_SYMBOL(ffi_closure_WIN32_inner)
+#elif defined(X86_DARWIN)
+        calll	L_ffi_closure_WIN32_inner$stub
+#else
+        movl	%ebx, 8(%esp)
+        call	1f
+1:      popl	%ebx
+        addl	$_GLOBAL_OFFSET_TABLE_+[.-1b], %ebx
+        call	ffi_closure_WIN32_inner@PLT
+        movl	8(%esp), %ebx
+#endif
+        movl	-12(%ebp), %ecx
+0:
+        xchgl	4(%ebp), %eax /* xchg size of stack parameters and ffi_closure ctx */
+        movl	CLOSURE_CIF_OFFSET(%eax), %eax
+        movl	CIF_FLAGS_OFFSET(%eax), %eax
+
+        call	1f
+        # Do not insert anything here between the call and the jump table.
+.Lscls_store_table:
+        .long	.Lscls_noretval-.Lscls_store_table	/* FFI_TYPE_VOID */
+        .long	.Lscls_retint-.Lscls_store_table	/* FFI_TYPE_INT */
+        .long	.Lscls_retfloat-.Lscls_store_table	/* FFI_TYPE_FLOAT */
+        .long	.Lscls_retdouble-.Lscls_store_table	/* FFI_TYPE_DOUBLE */
+        .long	.Lscls_retldouble-.Lscls_store_table	/* FFI_TYPE_LONGDOUBLE */
+        .long	.Lscls_retuint8-.Lscls_store_table	/* FFI_TYPE_UINT8 */
+        .long	.Lscls_retsint8-.Lscls_store_table	/* FFI_TYPE_SINT8 */
+        .long	.Lscls_retuint16-.Lscls_store_table	/* FFI_TYPE_UINT16 */
+        .long	.Lscls_retsint16-.Lscls_store_table	/* FFI_TYPE_SINT16 */
+        .long	.Lscls_retint-.Lscls_store_table	/* FFI_TYPE_UINT32 */
+        .long	.Lscls_retint-.Lscls_store_table	/* FFI_TYPE_SINT32 */
+        .long	.Lscls_retllong-.Lscls_store_table	/* FFI_TYPE_UINT64 */
+        .long	.Lscls_retllong-.Lscls_store_table	/* FFI_TYPE_SINT64 */
+        .long	.Lscls_retstruct-.Lscls_store_table	/* FFI_TYPE_STRUCT */
+        .long	.Lscls_retint-.Lscls_store_table	/* FFI_TYPE_POINTER */
+        .long	.Lscls_retstruct1-.Lscls_store_table	/* FFI_TYPE_SMALL_STRUCT_1B */
+        .long	.Lscls_retstruct2-.Lscls_store_table	/* FFI_TYPE_SMALL_STRUCT_2B */
+        .long	.Lscls_retstruct4-.Lscls_store_table	/* FFI_TYPE_SMALL_STRUCT_4B */
+1:
+        shl	$2, %eax
+        add	(%esp),%eax
+        mov	(%eax),%eax
+        add	(%esp),%eax
+        add	$4, %esp
+        jmp	*%eax
+
+        /* Sign/zero extend as appropriate.  */
+.Lscls_retsint8:
+        movsbl	(%ecx), %eax
+        jmp	.Lscls_epilogue
+
+.Lscls_retsint16:
+        movswl	(%ecx), %eax
+        jmp	.Lscls_epilogue
+
+.Lscls_retuint8:
+        movzbl	(%ecx), %eax
+        jmp	.Lscls_epilogue
+
+.Lscls_retuint16:
+        movzwl	(%ecx), %eax
+        jmp	.Lscls_epilogue
+
+.Lscls_retint:
+        movl	(%ecx), %eax
+        jmp	.Lscls_epilogue
+
+.Lscls_retfloat:
+        flds	(%ecx)
+        jmp	.Lscls_epilogue
+
+.Lscls_retdouble:
+        fldl	(%ecx)
+        jmp	.Lscls_epilogue
+
+.Lscls_retldouble:
+        fldt	(%ecx)
+        jmp	.Lscls_epilogue
+
+.Lscls_retllong:
+        movl	(%ecx), %eax
+        movl	4(%ecx), %edx
+        jmp	.Lscls_epilogue
+
+.Lscls_retstruct1:
+        movsbl	(%ecx), %eax
+        jmp	.Lscls_epilogue
+
+.Lscls_retstruct2:
+        movswl	(%ecx), %eax
+        jmp	.Lscls_epilogue
+
+.Lscls_retstruct4:
+        movl	(%ecx), %eax
+        jmp	.Lscls_epilogue
+
+.Lscls_retstruct:
+        # Nothing to do!
+
+.Lscls_noretval:
+.Lscls_epilogue:
+        movl	%ebp, %esp
+        popl	%ebp
+        movl	4(%esp), %ecx /* Return address */
+        addl	(%esp), %esp  /* Parameters stack size */
+        addl	$8, %esp
+        jmp	*%ecx
+.ffi_closure_STDCALL_end:
+.LFE5:
+
+#if defined(X86_DARWIN)
+.section __IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5
+L_ffi_closure_SYSV_inner$stub:
+        .indirect_symbol _ffi_closure_SYSV_inner
+        hlt ; hlt ; hlt ; hlt ; hlt
+L_ffi_closure_WIN32_inner$stub:
+        .indirect_symbol _ffi_closure_WIN32_inner
+        hlt ; hlt ; hlt ; hlt ; hlt
+#endif
+
+#if defined(X86_WIN32) && !defined(__OS2__)
+        .section	.eh_frame,"w"
+#endif
+.Lframe1:
+.LSCIE1:
+        .long	.LECIE1-.LASCIE1  /* Length of Common Information Entry */
+.LASCIE1:
+        .long	0x0	/* CIE Identifier Tag */
+        .byte	0x1	/* CIE Version */
+#ifdef __PIC__
+        .ascii "zR\0"	/* CIE Augmentation */
+#else
+        .ascii "\0"	/* CIE Augmentation */
+#endif
+        .byte	0x1	/* .uleb128 0x1; CIE Code Alignment Factor */
+        .byte	0x7c	/* .sleb128 -4; CIE Data Alignment Factor */
+        .byte	0x8	/* CIE RA Column */
+#ifdef __PIC__
+        .byte	0x1	/* .uleb128 0x1; Augmentation size */
+        .byte	0x1b	/* FDE Encoding (pcrel sdata4) */
+#endif
+        .byte	0xc	/* DW_CFA_def_cfa CFA = r4 + 4 = 4(%esp) */
+        .byte	0x4	/* .uleb128 0x4 */
+        .byte	0x4	/* .uleb128 0x4 */
+        .byte	0x88	/* DW_CFA_offset, column 0x8 %eip at CFA + 1 * -4 */
+        .byte	0x1	/* .uleb128 0x1 */
+        .align 4
+.LECIE1:
+
+.LSFDE1:
+        .long	.LEFDE1-.LASFDE1	/* FDE Length */
+.LASFDE1:
+        .long	.LASFDE1-.Lframe1	/* FDE CIE offset */
+#if defined __PIC__ && defined HAVE_AS_X86_PCREL
+        .long	.LFB1-.	/* FDE initial location */
+#else
+        .long	.LFB1
+#endif
+        .long	.LFE1-.LFB1	/* FDE address range */
+#ifdef __PIC__
+        .byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+        /* DW_CFA_xxx CFI instructions go here.  */
+
+        .byte	0x4	/* DW_CFA_advance_loc4 */
+        .long	.LCFI0-.LFB1
+        .byte	0xe	/* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */
+        .byte	0x8	/* .uleb128 0x8 */
+        .byte	0x85	/* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */
+        .byte	0x2	/* .uleb128 0x2 */
+
+        .byte	0x4	/* DW_CFA_advance_loc4 */
+        .long	.LCFI1-.LCFI0
+        .byte	0xd	/* DW_CFA_def_cfa_register CFA = r5 = %ebp */
+        .byte	0x5	/* .uleb128 0x5 */
+
+        /* End of DW_CFA_xxx CFI instructions.  */
+        .align 4
+.LEFDE1:
+
+.LSFDE3:
+        .long	.LEFDE3-.LASFDE3	/* FDE Length */
+.LASFDE3:
+        .long	.LASFDE3-.Lframe1	/* FDE CIE offset */
+#if defined __PIC__ && defined HAVE_AS_X86_PCREL
+        .long	.LFB3-.	/* FDE initial location */
+#else
+        .long	.LFB3
+#endif
+        .long	.LFE3-.LFB3	/* FDE address range */
+#ifdef __PIC__
+        .byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+        /* DW_CFA_xxx CFI instructions go here.  */
+
+        .byte	0x4	/* DW_CFA_advance_loc4 */
+        .long	.LCFI4-.LFB3
+        .byte	0xe	/* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */
+        .byte	0x8	/* .uleb128 0x8 */
+        .byte	0x85	/* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */
+        .byte	0x2	/* .uleb128 0x2 */
+
+        .byte	0x4	/* DW_CFA_advance_loc4 */
+        .long	.LCFI5-.LCFI4
+        .byte	0xd	/* DW_CFA_def_cfa_register CFA = r5 = %ebp */
+        .byte	0x5	/* .uleb128 0x5 */
+
+        /* End of DW_CFA_xxx CFI instructions.  */
+        .align 4
+.LEFDE3:
+
+#if !FFI_NO_RAW_API
+
+.LSFDE4:
+        .long	.LEFDE4-.LASFDE4	/* FDE Length */
+.LASFDE4:
+        .long	.LASFDE4-.Lframe1	/* FDE CIE offset */
+#if defined __PIC__ && defined HAVE_AS_X86_PCREL
+        .long	.LFB4-.	/* FDE initial location */
+#else
+        .long	.LFB4
+#endif
+        .long	.LFE4-.LFB4	/* FDE address range */
+#ifdef __PIC__
+        .byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+        /* DW_CFA_xxx CFI instructions go here.  */
+
+        .byte	0x4	/* DW_CFA_advance_loc4 */
+        .long	.LCFI6-.LFB4
+        .byte	0xe	/* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */
+        .byte	0x8	/* .uleb128 0x8 */
+        .byte	0x85	/* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */
+        .byte	0x2	/* .uleb128 0x2 */
+
+        .byte	0x4	/* DW_CFA_advance_loc4 */
+        .long	.LCFI7-.LCFI6
+        .byte	0xd	/* DW_CFA_def_cfa_register CFA = r5 = %ebp */
+        .byte	0x5	/* .uleb128 0x5 */
+
+        .byte	0x4	/* DW_CFA_advance_loc4 */
+        .long	.LCFI8-.LCFI7
+        .byte	0x86	/* DW_CFA_offset, column 0x6 %esi at CFA + 3 * -4 */
+        .byte	0x3	/* .uleb128 0x3 */
+
+        /* End of DW_CFA_xxx CFI instructions.  */
+        .align 4
+.LEFDE4:
+
+#endif /* !FFI_NO_RAW_API */
+
+.LSFDE5:
+        .long	.LEFDE5-.LASFDE5	/* FDE Length */
+.LASFDE5:
+        .long	.LASFDE5-.Lframe1	/* FDE CIE offset */
+#if defined __PIC__ && defined HAVE_AS_X86_PCREL
+        .long	.LFB5-.	/* FDE initial location */
+#else
+        .long	.LFB5
+#endif
+        .long	.LFE5-.LFB5	/* FDE address range */
+#ifdef __PIC__
+        .byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+        /* DW_CFA_xxx CFI instructions go here.  */
+
+        .byte	0x4	/* DW_CFA_advance_loc4 */
+        .long	.LCFI9-.LFB5
+        .byte	0xe	/* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */
+        .byte	0x8	/* .uleb128 0x8 */
+        .byte	0x85	/* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */
+        .byte	0x2	/* .uleb128 0x2 */
+
+        .byte	0x4	/* DW_CFA_advance_loc4 */
+        .long	.LCFI10-.LCFI9
+        .byte	0xd	/* DW_CFA_def_cfa_register CFA = r5 = %ebp */
+        .byte	0x5	/* .uleb128 0x5 */
+
+        /* End of DW_CFA_xxx CFI instructions.  */
+        .align 4
+.LEFDE5:
+
+#endif /* !_MSC_VER */
+
+#if defined __ELF__ && defined __linux__
+        .section	.note.GNU-stack,"",@progbits
+#endif
diff --git a/jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/win64.S b/jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/win64.S
new file mode 100644
index 0000000..347a26a
--- /dev/null
+++ b/jni/ruby/ext/fiddle/libffi-3.2.1/src/x86/win64.S
@@ -0,0 +1,519 @@
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+
+/* Constants for ffi_call_win64 */
+#define STACK 0
+#define PREP_ARGS_FN 32
+#define ECIF 40
+#define CIF_BYTES 48
+#define CIF_FLAGS 56
+#define RVALUE 64
+#define FN 72
+
+/* ffi_call_win64 (void (*prep_args_fn)(char *, extended_cif *),
+		   extended_cif *ecif, unsigned bytes, unsigned flags,
+		   unsigned *rvalue, void (*fn)());
+ */
+
+#ifdef _MSC_VER
+PUBLIC	ffi_call_win64
+
+EXTRN	__chkstk:NEAR
+EXTRN	ffi_closure_win64_inner:NEAR
+
+_TEXT	SEGMENT
+
+;;; ffi_closure_win64 will be called with these registers set:
+;;;    rax points to 'closure'
+;;;    r11 contains a bit mask that specifies which of the
+;;;    first four parameters are float or double
+;;;
+;;; It must move the parameters passed in registers to their stack location,
+;;; call ffi_closure_win64_inner for the actual work, then return the result.
+;;;
+ffi_closure_win64 PROC FRAME
+	;; copy register arguments onto stack
+	test	r11, 1
+	jne	first_is_float
+	mov	QWORD PTR [rsp+8], rcx
+	jmp	second
+first_is_float:
+	movlpd	QWORD PTR [rsp+8], xmm0
+
+second:
+	test	r11, 2
+	jne	second_is_float
+	mov	QWORD PTR [rsp+16], rdx
+	jmp	third
+second_is_float:
+	movlpd	QWORD PTR [rsp+16], xmm1
+
+third:
+	test	r11, 4
+	jne	third_is_float
+	mov	QWORD PTR [rsp+24], r8
+	jmp	fourth
+third_is_float:
+	movlpd	QWORD PTR [rsp+24], xmm2
+
+fourth:
+	test	r11, 8
+	jne	fourth_is_float
+	mov	QWORD PTR [rsp+32], r9
+	jmp	done
+fourth_is_float:
+	movlpd	QWORD PTR [rsp+32], xmm3
+
+done:
+	.ALLOCSTACK 40
+	sub	rsp, 40
+	.ENDPROLOG
+	mov	rcx, rax	; context is first parameter
+	mov	rdx, rsp	; stack is second parameter
+	add	rdx, 48		; point to start of arguments
+	mov	rax, ffi_closure_win64_inner
+	call	rax		; call the real closure function
+	add	rsp, 40
+	movd	xmm0, rax	; If the closure returned a float,
+				; ffi_closure_win64_inner wrote it to rax
+	ret	0
+ffi_closure_win64 ENDP
+
+ffi_call_win64 PROC FRAME
+	;; copy registers onto stack
+	mov	QWORD PTR [rsp+32], r9
+	mov	QWORD PTR [rsp+24], r8
+	mov	QWORD PTR [rsp+16], rdx
+	mov	QWORD PTR [rsp+8], rcx
+	.PUSHREG rbp
+	push	rbp
+	.ALLOCSTACK 48
+	sub	rsp, 48					; 00000030H
+	.SETFRAME rbp, 32
+	lea	rbp, QWORD PTR [rsp+32]
+	.ENDPROLOG
+
+	mov	eax, DWORD PTR CIF_BYTES[rbp]
+	add	rax, 15
+	and	rax, -16
+	call	__chkstk
+	sub	rsp, rax
+	lea	rax, QWORD PTR [rsp+32]
+	mov	QWORD PTR STACK[rbp], rax
+
+	mov	rdx, QWORD PTR ECIF[rbp]
+	mov	rcx, QWORD PTR STACK[rbp]
+	call	QWORD PTR PREP_ARGS_FN[rbp]
+
+	mov	rsp, QWORD PTR STACK[rbp]
+
+	movlpd	xmm3, QWORD PTR [rsp+24]
+	movd	r9, xmm3
+
+	movlpd	xmm2, QWORD PTR [rsp+16]
+	movd	r8, xmm2
+
+	movlpd	xmm1, QWORD PTR [rsp+8]
+	movd	rdx, xmm1
+
+	movlpd	xmm0, QWORD PTR [rsp]
+	movd	rcx, xmm0
+
+	call	QWORD PTR FN[rbp]
+ret_struct4b$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_4B
+ 	jne	ret_struct2b$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	mov	DWORD PTR [rcx], eax
+	jmp	SHORT ret_void$
+
+ret_struct2b$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_2B
+ 	jne	ret_struct1b$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	mov	WORD PTR [rcx], ax
+	jmp	SHORT ret_void$
+
+ret_struct1b$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_1B
+ 	jne	ret_uint8$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	mov	BYTE PTR [rcx], al
+	jmp	SHORT ret_void$
+
+ret_uint8$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT8
+ 	jne	ret_sint8$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	movzx   rax, al
+	mov	QWORD PTR [rcx], rax
+	jmp	SHORT ret_void$
+
+ret_sint8$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT8
+ 	jne	ret_uint16$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	movsx   rax, al
+	mov	QWORD PTR [rcx], rax
+	jmp	SHORT ret_void$
+
+ret_uint16$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT16
+ 	jne	ret_sint16$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	movzx   rax, ax
+	mov	QWORD PTR [rcx], rax
+	jmp	SHORT ret_void$
+
+ret_sint16$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT16
+ 	jne	ret_uint32$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	movsx   rax, ax
+	mov	QWORD PTR [rcx], rax
+	jmp	SHORT ret_void$
+
+ret_uint32$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT32
+ 	jne	ret_sint32$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	mov     eax, eax
+	mov	QWORD PTR [rcx], rax
+
+ret_void$:
+	xor	rax, rax
+
+	lea	rsp, QWORD PTR [rbp+16]
+	pop	rbp
+	ret	0
+
+ret_sint32$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT32
+ 	jne	ret_float$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	cdqe
+	mov	QWORD PTR [rcx], rax
+	jmp	SHORT ret_void$
+
+ret_float$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_FLOAT
+ 	jne	SHORT ret_double$
+
+ 	mov	rax, QWORD PTR RVALUE[rbp]
+ 	movss	DWORD PTR [rax], xmm0
+ 	jmp	SHORT ret_void$
+
+ret_double$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_DOUBLE
+ 	jne	SHORT ret_uint64$
+
+ 	mov	rax, QWORD PTR RVALUE[rbp]
+ 	movlpd	QWORD PTR [rax], xmm0
+ 	jmp	SHORT ret_void$
+
+ret_uint64$:
+  	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT64
+  	jne	SHORT ret_sint64$
+
+ 	mov	rcx, QWORD PTR RVALUE[rbp]
+ 	mov	QWORD PTR [rcx], rax
+ 	jmp	SHORT ret_void$
+
+ret_sint64$:
+  	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT64
+  	jne	SHORT ret_pointer$
+
+ 	mov	rcx, QWORD PTR RVALUE[rbp]
+ 	mov	QWORD PTR [rcx], rax
+ 	jmp	SHORT ret_void$
+
+ret_pointer$:
+  	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_POINTER
+  	jne	SHORT ret_int$
+
+ 	mov	rcx, QWORD PTR RVALUE[rbp]
+ 	mov	QWORD PTR [rcx], rax
+ 	jmp	SHORT ret_void$
+
+ret_int$:
+  	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_INT
+  	jne	SHORT ret_void$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	cdqe
+	mov	QWORD PTR [rcx], rax
+ 	jmp	SHORT ret_void$
+ffi_call_win64 ENDP
+_TEXT	ENDS
+END
+
+#else
+
+#ifdef SYMBOL_UNDERSCORE
+#define SYMBOL_NAME(name) _##name
+#else
+#define SYMBOL_NAME(name) name
+#endif
+
+.text
+
+.extern SYMBOL_NAME(ffi_closure_win64_inner)
+
+# ffi_closure_win64 will be called with these registers set:
+#    rax points to 'closure'
+#    r11 contains a bit mask that specifies which of the
+#    first four parameters are float or double
+#
+# It must move the parameters passed in registers to their stack location,
+# call ffi_closure_win64_inner for the actual work, then return the result.
+#
+	.balign 16
+	.globl SYMBOL_NAME(ffi_closure_win64)
+	.seh_proc SYMBOL_NAME(ffi_closure_win64)
+SYMBOL_NAME(ffi_closure_win64):
+	# copy register arguments onto stack
+	test	$1,%r11
+	jne	.Lfirst_is_float
+	mov	%rcx, 8(%rsp)
+	jmp	.Lsecond
+.Lfirst_is_float:
+	movlpd	%xmm0, 8(%rsp)
+
+.Lsecond:
+	test	$2, %r11
+	jne	.Lsecond_is_float
+	mov	%rdx, 16(%rsp)
+	jmp	.Lthird
+.Lsecond_is_float:
+	movlpd	%xmm1, 16(%rsp)
+
+.Lthird:
+	test	$4, %r11
+	jne	.Lthird_is_float
+	mov	%r8,24(%rsp)
+	jmp	.Lfourth
+.Lthird_is_float:
+	movlpd	%xmm2, 24(%rsp)
+
+.Lfourth:
+	test	$8, %r11
+	jne	.Lfourth_is_float
+	mov	%r9, 32(%rsp)
+	jmp	.Ldone
+.Lfourth_is_float:
+	movlpd	%xmm3, 32(%rsp)
+
+.Ldone:
+	.seh_stackalloc 40
+	sub	$40, %rsp
+	.seh_endprologue
+	mov	%rax, %rcx	# context is first parameter
+	mov	%rsp, %rdx	# stack is second parameter
+	add	$48, %rdx	# point to start of arguments
+	leaq	SYMBOL_NAME(ffi_closure_win64_inner)(%rip), %rax
+	callq	*%rax		# call the real closure function
+	add	$40, %rsp
+	movq	%rax, %xmm0	# If the closure returned a float,
+				# ffi_closure_win64_inner wrote it to rax
+	retq
+	.seh_endproc
+
+	.balign 16
+	.globl	SYMBOL_NAME(ffi_call_win64)
+	.seh_proc SYMBOL_NAME(ffi_call_win64)
+SYMBOL_NAME(ffi_call_win64):
+	# copy registers onto stack
+	mov	%r9,32(%rsp)
+	mov	%r8,24(%rsp)
+	mov	%rdx,16(%rsp)
+	mov	%rcx,8(%rsp)
+	.seh_pushreg rbp
+	push	%rbp
+	.seh_stackalloc 48
+	sub	$48,%rsp
+	.seh_setframe rbp, 32
+	lea	32(%rsp),%rbp
+	.seh_endprologue
+
+	mov	CIF_BYTES(%rbp),%eax
+	add	$15, %rax
+	and	$-16, %rax
+	cmpq	$0x1000, %rax
+	jb	Lch_done
+Lch_probe:
+	subq	$0x1000,%rsp
+	orl	$0x0, (%rsp)
+	subq	$0x1000,%rax
+	cmpq	$0x1000,%rax
+	ja	Lch_probe
+Lch_done:
+	subq	%rax, %rsp
+	orl	$0x0, (%rsp)
+	lea	32(%rsp), %rax
+	mov	%rax, STACK(%rbp)
+
+	mov	ECIF(%rbp), %rdx
+	mov	STACK(%rbp), %rcx
+	callq	*PREP_ARGS_FN(%rbp)
+
+	mov	STACK(%rbp), %rsp
+
+	movlpd	24(%rsp), %xmm3
+	movd	%xmm3, %r9
+
+	movlpd	16(%rsp), %xmm2
+	movd	%xmm2, %r8
+
+	movlpd	8(%rsp), %xmm1
+	movd	%xmm1, %rdx
+
+	movlpd	(%rsp), %xmm0
+	movd	%xmm0, %rcx
+
+	callq	*FN(%rbp)
+.Lret_struct4b:
+ 	cmpl	$FFI_TYPE_SMALL_STRUCT_4B, CIF_FLAGS(%rbp)
+ 	jne .Lret_struct2b
+
+	mov	RVALUE(%rbp), %rcx
+	mov	%eax, (%rcx)
+	jmp	.Lret_void
+
+.Lret_struct2b:
+	cmpl	$FFI_TYPE_SMALL_STRUCT_2B, CIF_FLAGS(%rbp)
+	jne .Lret_struct1b
+
+	mov	RVALUE(%rbp), %rcx
+	mov	%ax, (%rcx)
+	jmp .Lret_void
+
+.Lret_struct1b:
+	cmpl	$FFI_TYPE_SMALL_STRUCT_1B, CIF_FLAGS(%rbp)
+	jne .Lret_uint8
+
+	mov	RVALUE(%rbp), %rcx
+	mov	%al, (%rcx)
+	jmp .Lret_void
+
+.Lret_uint8:
+	cmpl	$FFI_TYPE_UINT8, CIF_FLAGS(%rbp)
+	jne .Lret_sint8
+
+	mov     RVALUE(%rbp), %rcx
+	movzbq  %al, %rax
+	movq    %rax, (%rcx)
+	jmp .Lret_void
+
+.Lret_sint8:
+	cmpl	$FFI_TYPE_SINT8, CIF_FLAGS(%rbp)
+	jne .Lret_uint16
+
+	mov     RVALUE(%rbp), %rcx
+	movsbq  %al, %rax
+	movq    %rax, (%rcx)
+	jmp .Lret_void
+
+.Lret_uint16:
+	cmpl	$FFI_TYPE_UINT16, CIF_FLAGS(%rbp)
+	jne .Lret_sint16
+
+	mov     RVALUE(%rbp), %rcx
+	movzwq  %ax, %rax
+	movq    %rax, (%rcx)
+	jmp .Lret_void
+
+.Lret_sint16:
+	cmpl	$FFI_TYPE_SINT16, CIF_FLAGS(%rbp)
+	jne .Lret_uint32
+
+	mov     RVALUE(%rbp), %rcx
+	movswq  %ax, %rax
+	movq    %rax, (%rcx)
+	jmp .Lret_void
+
+.Lret_uint32:
+	cmpl	$FFI_TYPE_UINT32, CIF_FLAGS(%rbp)
+	jne .Lret_sint32
+
+	mov     RVALUE(%rbp), %rcx
+	movl    %eax, %eax
+	movq    %rax, (%rcx)
+	jmp .Lret_void
+
+.Lret_sint32:
+ 	cmpl	$FFI_TYPE_SINT32, CIF_FLAGS(%rbp)
+ 	jne	.Lret_float
+
+	mov	RVALUE(%rbp), %rcx
+	cltq
+	movq	%rax, (%rcx)
+	jmp	.Lret_void
+
+.Lret_float:
+ 	cmpl	$FFI_TYPE_FLOAT, CIF_FLAGS(%rbp)
+ 	jne	.Lret_double
+
+ 	mov	RVALUE(%rbp), %rax
+ 	movss	%xmm0, (%rax)
+ 	jmp	.Lret_void
+
+.Lret_double:
+ 	cmpl	$FFI_TYPE_DOUBLE, CIF_FLAGS(%rbp)
+ 	jne	.Lret_uint64
+
+ 	mov	RVALUE(%rbp), %rax
+ 	movlpd	%xmm0, (%rax)
+ 	jmp	.Lret_void
+
+.Lret_uint64:
+  	cmpl	$FFI_TYPE_UINT64, CIF_FLAGS(%rbp)
+ 	jne	.Lret_sint64
+
+ 	mov	RVALUE(%rbp), %rcx
+ 	mov	%rax, (%rcx)
+ 	jmp	.Lret_void
+
+.Lret_sint64:
+  	cmpl	$FFI_TYPE_SINT64, CIF_FLAGS(%rbp)
+  	jne	.Lret_pointer
+
+ 	mov	RVALUE(%rbp), %rcx
+ 	mov	%rax, (%rcx)
+ 	jmp	.Lret_void
+
+.Lret_pointer:
+  	cmpl	$FFI_TYPE_POINTER, CIF_FLAGS(%rbp)
+  	jne	.Lret_int
+
+ 	mov	RVALUE(%rbp), %rcx
+ 	mov	%rax, (%rcx)
+ 	jmp	.Lret_void
+
+.Lret_int:
+  	cmpl	$FFI_TYPE_INT, CIF_FLAGS(%rbp)
+  	jne	.Lret_void
+
+	mov	RVALUE(%rbp), %rcx
+	cltq
+	movq	%rax, (%rcx)
+	jmp	.Lret_void
+
+.Lret_void:
+	xor	%rax, %rax
+
+	lea	16(%rbp), %rsp
+	pop	%rbp
+	retq
+	.seh_endproc
+#endif /* !_MSC_VER */
+
-- 
cgit v1.2.3-70-g09d2