Skip to content

Commit 5008972

Browse files
peterhinchdpgeorge
authored andcommitted
py/inlinethumb: Support for core floating point instructions.
Adds support for the following Thumb2 VFP instructions, via the option MICROPY_EMIT_INLINE_THUMB_FLOAT: vcmp vsqrt vneg vcvt_f32_to_s32 vcvt_s32_to_f32 vmrs vmov vldr vstr vadd vsub vmul vdiv
1 parent d8cbbca commit 5008972

12 files changed

Lines changed: 219 additions & 0 deletions

py/emitinlinethumb.c

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,35 @@ STATIC mp_uint_t get_arg_reg(emit_inline_asm_t *emit, const char *op, mp_parse_n
196196
return 0;
197197
}
198198

199+
#if MICROPY_EMIT_INLINE_THUMB_FLOAT
200+
STATIC mp_uint_t get_arg_vfpreg(emit_inline_asm_t *emit, const char *op, mp_parse_node_t pn) {
201+
const char *reg_str = get_arg_str(pn);
202+
if (reg_str[0] == 's' && reg_str[1] != '\0') {
203+
mp_uint_t regno = 0;
204+
for (++reg_str; *reg_str; ++reg_str) {
205+
mp_uint_t v = *reg_str;
206+
if (!('0' <= v && v <= '9')) {
207+
goto malformed;
208+
}
209+
regno = 10 * regno + v - '0';
210+
}
211+
if (regno > 31) {
212+
emit_inline_thumb_error_exc(emit,
213+
mp_obj_new_exception_msg_varg(&mp_type_SyntaxError,
214+
"'%s' expects at most r%d", op, 31));
215+
return 0;
216+
} else {
217+
return regno;
218+
}
219+
}
220+
malformed:
221+
emit_inline_thumb_error_exc(emit,
222+
mp_obj_new_exception_msg_varg(&mp_type_SyntaxError,
223+
"'%s' expects an FPU register", op));
224+
return 0;
225+
}
226+
#endif
227+
199228
STATIC mp_uint_t get_arg_reglist(emit_inline_asm_t *emit, const char *op, mp_parse_node_t pn) {
200229
// a register list looks like {r0, r1, r2} and is parsed as a Python set
201230

@@ -352,6 +381,17 @@ STATIC const format_9_10_op_t format_9_10_op_table[] = {
352381
};
353382
#undef X
354383

384+
#if MICROPY_EMIT_INLINE_THUMB_FLOAT
385+
// actual opcodes are: 0xee00 | op.hi_nibble, 0x0a00 | op.lo_nibble
386+
typedef struct _format_vfp_op_t { byte op; char name[3]; } format_vfp_op_t;
387+
STATIC const format_vfp_op_t format_vfp_op_table[] = {
388+
{ 0x30, "add" },
389+
{ 0x34, "sub" },
390+
{ 0x20, "mul" },
391+
{ 0x80, "div" },
392+
};
393+
#endif
394+
355395
STATIC void emit_inline_thumb_op(emit_inline_asm_t *emit, qstr op, mp_uint_t n_args, mp_parse_node_t *pn_args) {
356396
// TODO perhaps make two tables:
357397
// one_args =
@@ -366,6 +406,102 @@ STATIC void emit_inline_thumb_op(emit_inline_asm_t *emit, qstr op, mp_uint_t n_a
366406
mp_uint_t op_len;
367407
const char *op_str = (const char*)qstr_data(op, &op_len);
368408

409+
#if MICROPY_EMIT_INLINE_THUMB_FLOAT
410+
if (op_str[0] == 'v') {
411+
// floating point operations
412+
if (n_args == 2) {
413+
mp_uint_t op_code = 0x0ac0, op_code_hi;
414+
if (strcmp(op_str, "vcmp") == 0) {
415+
op_code_hi = 0xeeb4;
416+
op_vfp_twoargs:;
417+
mp_uint_t vd = get_arg_vfpreg(emit, op_str, pn_args[0]);
418+
mp_uint_t vm = get_arg_vfpreg(emit, op_str, pn_args[1]);
419+
asm_thumb_op32(emit->as,
420+
op_code_hi | ((vd & 1) << 6),
421+
op_code | ((vd & 0x1e) << 11) | ((vm & 1) << 5) | (vm & 0x1e) >> 1);
422+
} else if (strcmp(op_str, "vsqrt") == 0) {
423+
op_code_hi = 0xeeb1;
424+
goto op_vfp_twoargs;
425+
} else if (strcmp(op_str, "vneg") == 0) {
426+
op_code_hi = 0xeeb1;
427+
op_code = 0x0a40;
428+
goto op_vfp_twoargs;
429+
} else if (strcmp(op_str, "vcvt_f32_s32") == 0) {
430+
op_code_hi = 0xeeb8; // int to float
431+
goto op_vfp_twoargs;
432+
} else if (strcmp(op_str, "vcvt_s32_f32") == 0) {
433+
op_code_hi = 0xeebd; // float to int
434+
goto op_vfp_twoargs;
435+
} else if (strcmp(op_str, "vmrs") == 0) {
436+
mp_uint_t reg_dest;
437+
const char *reg_str0 = get_arg_str(pn_args[0]);
438+
if (strcmp(reg_str0, "APSR_nzcv") == 0) {
439+
reg_dest = 15;
440+
} else {
441+
reg_dest = get_arg_reg(emit, op_str, pn_args[0], 15);
442+
}
443+
const char *reg_str1 = get_arg_str(pn_args[1]);
444+
if (strcmp(reg_str1, "FPSCR") == 0) {
445+
// FP status to ARM reg
446+
asm_thumb_op32(emit->as, 0xeef1, 0x0a10 | (reg_dest << 12));
447+
} else {
448+
goto unknown_op;
449+
}
450+
} else if (strcmp(op_str, "vmov") == 0) {
451+
op_code_hi = 0xee00;
452+
mp_uint_t r_arm, vm;
453+
const char *reg_str = get_arg_str(pn_args[0]);
454+
if (reg_str[0] == 'r') {
455+
r_arm = get_arg_reg(emit, op_str, pn_args[0], 15);
456+
vm = get_arg_vfpreg(emit, op_str, pn_args[1]);
457+
op_code_hi |= 0x10;
458+
} else {
459+
vm = get_arg_vfpreg(emit, op_str, pn_args[0]);
460+
r_arm = get_arg_reg(emit, op_str, pn_args[1], 15);
461+
}
462+
asm_thumb_op32(emit->as,
463+
op_code_hi | ((vm & 0x1e) >> 1),
464+
0x0a10 | (r_arm << 12) | ((vm & 1) << 7));
465+
} else if (strcmp(op_str, "vldr") == 0) {
466+
op_code_hi = 0xed90;
467+
op_vldr_vstr:;
468+
mp_uint_t vd = get_arg_vfpreg(emit, op_str, pn_args[0]);
469+
mp_parse_node_t pn_base, pn_offset;
470+
if (get_arg_addr(emit, op_str, pn_args[1], &pn_base, &pn_offset)) {
471+
mp_uint_t rlo_base = get_arg_reg(emit, op_str, pn_base, 7);
472+
mp_uint_t i8;
473+
i8 = get_arg_i(emit, op_str, pn_offset, 0xff);
474+
asm_thumb_op32(emit->as,
475+
op_code_hi | rlo_base | ((vd & 1) << 6),
476+
0x0a00 | ((vd & 0x1e) << 11) | i8);
477+
}
478+
} else if (strcmp(op_str, "vstr") == 0) {
479+
op_code_hi = 0xed80;
480+
goto op_vldr_vstr;
481+
} else {
482+
goto unknown_op;
483+
}
484+
} else if (n_args == 3) {
485+
// search table for arith ops
486+
for (mp_uint_t i = 0; i < MP_ARRAY_SIZE(format_vfp_op_table); i++) {
487+
if (strncmp(op_str + 1, format_vfp_op_table[i].name, 3) == 0 && op_str[4] == '\0') {
488+
mp_uint_t op_code_hi = 0xee00 | (format_vfp_op_table[i].op & 0xf0);
489+
mp_uint_t op_code = 0x0a00 | ((format_vfp_op_table[i].op & 0x0f) << 4);
490+
mp_uint_t vd = get_arg_vfpreg(emit, op_str, pn_args[0]);
491+
mp_uint_t vn = get_arg_vfpreg(emit, op_str, pn_args[1]);
492+
mp_uint_t vm = get_arg_vfpreg(emit, op_str, pn_args[2]);
493+
asm_thumb_op32(emit->as,
494+
op_code_hi | ((vd & 1) << 6) | (vn >> 1),
495+
op_code | (vm >> 1) | ((vm & 1) << 5) | ((vd & 0x1e) << 11) | ((vn & 1) << 7));
496+
return;
497+
}
498+
}
499+
goto unknown_op;
500+
} else {
501+
goto unknown_op;
502+
}
503+
} else
504+
#endif
369505
if (n_args == 0) {
370506
if (strcmp(op_str, "nop") == 0) {
371507
asm_thumb_op16(emit->as, ASM_THUMB_OP_NOP);

py/mpconfig.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,11 @@
186186
#define MICROPY_EMIT_INLINE_THUMB (0)
187187
#endif
188188

189+
// Whether to enable float support in the Thumb2 inline assembler
190+
#ifndef MICROPY_EMIT_INLINE_THUMB_FLOAT
191+
#define MICROPY_EMIT_INLINE_THUMB_FLOAT (1)
192+
#endif
193+
189194
// Whether to emit ARM native code
190195
#ifndef MICROPY_EMIT_ARM
191196
#define MICROPY_EMIT_ARM (0)

tests/inlineasm/asmfpaddsub.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
@micropython.asm_thumb # r0 = r0+r1-r2
2+
def add_sub(r0, r1, r2):
3+
vmov(s0, r0)
4+
vcvt_f32_s32(s0, s0)
5+
vmov(s1, r1)
6+
vcvt_f32_s32(s1, s1)
7+
vmov(s2, r2)
8+
vcvt_f32_s32(s2, s2)
9+
vadd(s0, s0, s1)
10+
vsub(s0, s0, s2)
11+
vcvt_s32_f32(s31, s0)
12+
vmov(r0, s31)
13+
14+
print(add_sub(100, 20, 30))
15+

tests/inlineasm/asmfpaddsub.py.exp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
90

tests/inlineasm/asmfpcmp.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
@micropython.asm_thumb # test vcmp, vmrs
2+
def f(r0, r1):
3+
vmov(s0, r0)
4+
vcvt_f32_s32(s0, s0)
5+
vmov(s1, r1)
6+
vcvt_f32_s32(s1, s1)
7+
vcmp(s1, s0)
8+
vmrs(r0, FPSCR)
9+
mov(r1, 28)
10+
lsr(r0, r1)
11+
12+
print(f(0,1))
13+
print(f(1,1))
14+
print(f(1,0))

tests/inlineasm/asmfpcmp.py.exp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
2
2+
6
3+
8

tests/inlineasm/asmfpldrstr.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import array
2+
@micropython.asm_thumb # test vldr, vstr
3+
def arrayadd(r0):
4+
vldr(s0, [r0, 0])
5+
vldr(s1, [r0, 1])
6+
vadd(s2, s0, s1)
7+
vstr(s2, [r0, 2])
8+
9+
z = array.array("f", [2, 4, 10])
10+
arrayadd(z)
11+
print(z[2])
12+

tests/inlineasm/asmfpldrstr.py.exp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
6.0

tests/inlineasm/asmfpmuldiv.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
@micropython.asm_thumb # r0 = (int)(r0*r1/r2)
2+
def muldiv(r0, r1, r2):
3+
vmov(s0, r0)
4+
vcvt_f32_s32(s0, s0)
5+
vmov(s1, r1)
6+
vcvt_f32_s32(s1, s1)
7+
vmov(s2, r2)
8+
vcvt_f32_s32(s2, s2)
9+
vmul(s7, s0, s1)
10+
vdiv(s8, s7, s2)
11+
vcvt_s32_f32(s31, s8)
12+
vmov(r0, s31)
13+
14+
print(muldiv(100, 10, 50))
15+

tests/inlineasm/asmfpmuldiv.py.exp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
20

0 commit comments

Comments
 (0)