1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
|
/* -----------------------------------------------------------------------
tile.S - Copyright (c) 2011 Tilera Corp.
Tilera TILEPro and TILE-Gx Foreign Function Interface
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
``Software''), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
----------------------------------------------------------------------- */
#define LIBFFI_ASM
#include <fficonfig.h>
#include <ffi.h>
/* Number of bytes in a register. */
#define REG_SIZE FFI_SIZEOF_ARG
/* Number of bytes in stack linkage area for backtracing.
A note about the ABI: on entry to a procedure, sp points to a stack
slot where it must spill the return address if it's not a leaf.
REG_SIZE bytes beyond that is a slot owned by the caller which
contains the sp value that the caller had when it was originally
entered (i.e. the caller's frame pointer). */
#define LINKAGE_SIZE (2 * REG_SIZE)
/* The first 10 registers are used to pass arguments and return values. */
#define NUM_ARG_REGS 10
#ifdef __tilegx__
#define SW st
#define LW ld
#define BGZT bgtzt
#else
#define SW sw
#define LW lw
#define BGZT bgzt
#endif
/* void ffi_call_tile (int_reg_t reg_args[NUM_ARG_REGS],
const int_reg_t *stack_args,
unsigned long stack_args_bytes,
void (*fnaddr)(void));
On entry, REG_ARGS contain the outgoing register values,
and STACK_ARGS contains STACK_ARG_BYTES of additional values
to be passed on the stack. If STACK_ARG_BYTES is zero, then
STACK_ARGS is ignored.
When the invoked function returns, the values of r0-r9 are
blindly stored back into REG_ARGS for the caller to examine. */
.section .text.ffi_call_tile, "ax", @progbits
.align 8
.globl ffi_call_tile
FFI_HIDDEN(ffi_call_tile)
ffi_call_tile:
/* Incoming arguments. */
#define REG_ARGS r0
#define INCOMING_STACK_ARGS r1
#define STACK_ARG_BYTES r2
#define ORIG_FNADDR r3
/* Temporary values. */
#define FRAME_SIZE r10
#define TMP r11
#define TMP2 r12
#define OUTGOING_STACK_ARGS r13
#define REG_ADDR_PTR r14
#define RETURN_REG_ADDR r15
#define FNADDR r16
.cfi_startproc
{
/* Save return address. */
SW sp, lr
.cfi_offset lr, 0
/* Prepare to spill incoming r52. */
addi TMP, sp, -REG_SIZE
/* Increase frame size to have room to spill r52 and REG_ARGS.
The +7 is to round up mod 8. */
addi FRAME_SIZE, STACK_ARG_BYTES, \
REG_SIZE + REG_SIZE + LINKAGE_SIZE + 7
}
{
/* Round stack frame size to a multiple of 8 to satisfy ABI. */
andi FRAME_SIZE, FRAME_SIZE, -8
/* Compute where to spill REG_ARGS value. */
addi TMP2, sp, -(REG_SIZE * 2)
}
{
/* Spill incoming r52. */
SW TMP, r52
.cfi_offset r52, -REG_SIZE
/* Set up our frame pointer. */
move r52, sp
.cfi_def_cfa_register r52
/* Push stack frame. */
sub sp, sp, FRAME_SIZE
}
{
/* Prepare to set up stack linkage. */
addi TMP, sp, REG_SIZE
/* Prepare to memcpy stack args. */
addi OUTGOING_STACK_ARGS, sp, LINKAGE_SIZE
/* Save REG_ARGS which we will need after we call the subroutine. */
SW TMP2, REG_ARGS
}
{
/* Set up linkage info to hold incoming stack pointer. */
SW TMP, r52
}
{
/* Skip stack args memcpy if we don't have any stack args (common). */
blezt STACK_ARG_BYTES, .Ldone_stack_args_memcpy
}
.Lmemcpy_stack_args:
{
/* Load incoming argument from stack_args. */
LW TMP, INCOMING_STACK_ARGS
addi INCOMING_STACK_ARGS, INCOMING_STACK_ARGS, REG_SIZE
}
{
/* Store stack argument into outgoing stack argument area. */
SW OUTGOING_STACK_ARGS, TMP
addi OUTGOING_STACK_ARGS, OUTGOING_STACK_ARGS, REG_SIZE
addi STACK_ARG_BYTES, STACK_ARG_BYTES, -REG_SIZE
}
{
BGZT STACK_ARG_BYTES, .Lmemcpy_stack_args
}
.Ldone_stack_args_memcpy:
{
/* Copy aside ORIG_FNADDR so we can overwrite its register. */
move FNADDR, ORIG_FNADDR
/* Prepare to load argument registers. */
addi REG_ADDR_PTR, r0, REG_SIZE
/* Load outgoing r0. */
LW r0, r0
}
/* Load up argument registers from the REG_ARGS array. */
#define LOAD_REG(REG, PTR) \
{ \
LW REG, PTR ; \
addi PTR, PTR, REG_SIZE \
}
LOAD_REG(r1, REG_ADDR_PTR)
LOAD_REG(r2, REG_ADDR_PTR)
LOAD_REG(r3, REG_ADDR_PTR)
LOAD_REG(r4, REG_ADDR_PTR)
LOAD_REG(r5, REG_ADDR_PTR)
LOAD_REG(r6, REG_ADDR_PTR)
LOAD_REG(r7, REG_ADDR_PTR)
LOAD_REG(r8, REG_ADDR_PTR)
LOAD_REG(r9, REG_ADDR_PTR)
{
/* Call the subroutine. */
jalr FNADDR
}
{
/* Restore original lr. */
LW lr, r52
/* Prepare to recover ARGS, which we spilled earlier. */
addi TMP, r52, -(2 * REG_SIZE)
}
{
/* Restore ARGS, so we can fill it in with the return regs r0-r9. */
LW RETURN_REG_ADDR, TMP
/* Prepare to restore original r52. */
addi TMP, r52, -REG_SIZE
}
{
/* Pop stack frame. */
move sp, r52
/* Restore original r52. */
LW r52, TMP
}
#define STORE_REG(REG, PTR) \
{ \
SW PTR, REG ; \
addi PTR, PTR, REG_SIZE \
}
/* Return all register values by reference. */
STORE_REG(r0, RETURN_REG_ADDR)
STORE_REG(r1, RETURN_REG_ADDR)
STORE_REG(r2, RETURN_REG_ADDR)
STORE_REG(r3, RETURN_REG_ADDR)
STORE_REG(r4, RETURN_REG_ADDR)
STORE_REG(r5, RETURN_REG_ADDR)
STORE_REG(r6, RETURN_REG_ADDR)
STORE_REG(r7, RETURN_REG_ADDR)
STORE_REG(r8, RETURN_REG_ADDR)
STORE_REG(r9, RETURN_REG_ADDR)
{
jrp lr
}
.cfi_endproc
.size ffi_call_tile, .-ffi_call_tile
/* ffi_closure_tile(...)
On entry, lr points to the closure plus 8 bytes, and r10
contains the actual return address.
This function simply dumps all register parameters into a stack array
and passes the closure, the registers array, and the stack arguments
to C code that does all of the actual closure processing. */
.section .text.ffi_closure_tile, "ax", @progbits
.align 8
.globl ffi_closure_tile
FFI_HIDDEN(ffi_closure_tile)
.cfi_startproc
/* Room to spill all NUM_ARG_REGS incoming registers, plus frame linkage. */
#define CLOSURE_FRAME_SIZE (((NUM_ARG_REGS * REG_SIZE * 2 + LINKAGE_SIZE) + 7) & -8)
ffi_closure_tile:
{
#ifdef __tilegx__
st sp, lr
.cfi_offset lr, 0
#else
/* Save return address (in r10 due to closure stub wrapper). */
SW sp, r10
.cfi_return_column r10
.cfi_offset r10, 0
#endif
/* Compute address for stack frame linkage. */
addli r10, sp, -(CLOSURE_FRAME_SIZE - REG_SIZE)
}
{
/* Save incoming stack pointer in linkage area. */
SW r10, sp
.cfi_offset sp, -(CLOSURE_FRAME_SIZE - REG_SIZE)
/* Push a new stack frame. */
addli sp, sp, -CLOSURE_FRAME_SIZE
.cfi_adjust_cfa_offset CLOSURE_FRAME_SIZE
}
{
/* Create pointer to where to start spilling registers. */
addi r10, sp, LINKAGE_SIZE
}
/* Spill all the incoming registers. */
STORE_REG(r0, r10)
STORE_REG(r1, r10)
STORE_REG(r2, r10)
STORE_REG(r3, r10)
STORE_REG(r4, r10)
STORE_REG(r5, r10)
STORE_REG(r6, r10)
STORE_REG(r7, r10)
STORE_REG(r8, r10)
{
/* Save r9. */
SW r10, r9
#ifdef __tilegx__
/* Pointer to closure is passed in r11. */
move r0, r11
#else
/* Compute pointer to the closure object. Because the closure
starts with a "jal ffi_closure_tile", we can just take the
value of lr (a phony return address pointing into the closure)
and subtract 8. */
addi r0, lr, -8
#endif
/* Compute a pointer to the register arguments we just spilled. */
addi r1, sp, LINKAGE_SIZE
}
{
/* Compute a pointer to the extra stack arguments (if any). */
addli r2, sp, CLOSURE_FRAME_SIZE + LINKAGE_SIZE
/* Call C code to deal with all of the grotty details. */
jal ffi_closure_tile_inner
}
{
addli r10, sp, CLOSURE_FRAME_SIZE
}
{
/* Restore the return address. */
LW lr, r10
/* Compute pointer to registers array. */
addli r10, sp, LINKAGE_SIZE + (NUM_ARG_REGS * REG_SIZE)
}
/* Return all the register values, which C code may have set. */
LOAD_REG(r0, r10)
LOAD_REG(r1, r10)
LOAD_REG(r2, r10)
LOAD_REG(r3, r10)
LOAD_REG(r4, r10)
LOAD_REG(r5, r10)
LOAD_REG(r6, r10)
LOAD_REG(r7, r10)
LOAD_REG(r8, r10)
LOAD_REG(r9, r10)
{
/* Pop the frame. */
addli sp, sp, CLOSURE_FRAME_SIZE
jrp lr
}
.cfi_endproc
.size ffi_closure_tile, . - ffi_closure_tile
/* What follows are code template instructions that get copied to the
closure trampoline by ffi_prep_closure_loc. The zeroed operands
get replaced by their proper values at runtime. */
.section .text.ffi_template_tramp_tile, "ax", @progbits
.align 8
.globl ffi_template_tramp_tile
FFI_HIDDEN(ffi_template_tramp_tile)
ffi_template_tramp_tile:
#ifdef __tilegx__
{
moveli r11, 0 /* backpatched to address of containing closure. */
moveli r10, 0 /* backpatched to ffi_closure_tile. */
}
/* Note: the following bundle gets generated multiple times
depending on the pointer value (esp. useful for -m32 mode). */
{ shl16insli r11, r11, 0 ; shl16insli r10, r10, 0 }
{ info 2+8 /* for backtracer: -> pc in lr, frame size 0 */ ; jr r10 }
#else
/* 'jal .' yields a PC-relative offset of zero so we can OR in the
right offset at runtime. */
{ move r10, lr ; jal . /* ffi_closure_tile */ }
#endif
.size ffi_template_tramp_tile, . - ffi_template_tramp_tile
|