Skip to content

Commit 36db6bc

Browse files
committed
py, compiler: Improve passes; add an extra pass for native emitter.
1 parent ca25c15 commit 36db6bc

12 files changed

Lines changed: 216 additions & 201 deletions

File tree

py/asmthumb.c

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
#define SIGNED_FIT12(x) (((x) & 0xfffff800) == 0) || (((x) & 0xfffff800) == 0xfffff800)
4343

4444
struct _asm_thumb_t {
45-
int pass;
45+
uint pass;
4646
uint code_offset;
4747
uint code_size;
4848
byte *code_base;
@@ -58,14 +58,9 @@ struct _asm_thumb_t {
5858
asm_thumb_t *asm_thumb_new(uint max_num_labels) {
5959
asm_thumb_t *as;
6060

61-
as = m_new(asm_thumb_t, 1);
62-
as->pass = 0;
63-
as->code_offset = 0;
64-
as->code_size = 0;
65-
as->code_base = NULL;
61+
as = m_new0(asm_thumb_t, 1);
6662
as->max_num_labels = max_num_labels;
6763
as->label_offsets = m_new(int, max_num_labels);
68-
as->num_locals = 0;
6964

7065
return as;
7166
}
@@ -89,16 +84,16 @@ void asm_thumb_free(asm_thumb_t *as, bool free_code) {
8984
m_del_obj(asm_thumb_t, as);
9085
}
9186

92-
void asm_thumb_start_pass(asm_thumb_t *as, int pass) {
87+
void asm_thumb_start_pass(asm_thumb_t *as, uint pass) {
9388
as->pass = pass;
9489
as->code_offset = 0;
95-
if (pass == ASM_THUMB_PASS_2) {
90+
if (pass == ASM_THUMB_PASS_COMPUTE) {
9691
memset(as->label_offsets, -1, as->max_num_labels * sizeof(int));
9792
}
9893
}
9994

10095
void asm_thumb_end_pass(asm_thumb_t *as) {
101-
if (as->pass == ASM_THUMB_PASS_2) {
96+
if (as->pass == ASM_THUMB_PASS_COMPUTE) {
10297
// calculate size of code in bytes
10398
as->code_size = as->code_offset;
10499
as->code_base = m_new(byte, as->code_size);
@@ -120,7 +115,7 @@ void asm_thumb_end_pass(asm_thumb_t *as) {
120115
// all functions must go through this one to emit bytes
121116
STATIC byte *asm_thumb_get_cur_to_write_bytes(asm_thumb_t *as, int num_bytes_to_write) {
122117
//printf("emit %d\n", num_bytes_to_write);
123-
if (as->pass < ASM_THUMB_PASS_3) {
118+
if (as->pass < ASM_THUMB_PASS_EMIT) {
124119
as->code_offset += num_bytes_to_write;
125120
return as->dummy_data;
126121
} else {
@@ -224,12 +219,12 @@ void asm_thumb_exit(asm_thumb_t *as) {
224219

225220
void asm_thumb_label_assign(asm_thumb_t *as, uint label) {
226221
assert(label < as->max_num_labels);
227-
if (as->pass == ASM_THUMB_PASS_2) {
222+
if (as->pass < ASM_THUMB_PASS_EMIT) {
228223
// assign label offset
229224
assert(as->label_offsets[label] == -1);
230225
as->label_offsets[label] = as->code_offset;
231-
} else if (as->pass == ASM_THUMB_PASS_3) {
232-
// ensure label offset has not changed from PASS_2 to PASS_3
226+
} else {
227+
// ensure label offset has not changed from PASS_COMPUTE to PASS_EMIT
233228
//printf("l%d: (at %d=%ld)\n", label, as->label_offsets[label], as->code_offset);
234229
assert(as->label_offsets[label] == as->code_offset);
235230
}
@@ -383,20 +378,35 @@ void asm_thumb_mov_reg_i32_optimised(asm_thumb_t *as, uint reg_dest, int i32) {
383378
}
384379
}
385380

381+
// i32 is stored as a full word in the code, and aligned to machine-word boundary
382+
// TODO this is very inefficient, improve it!
383+
void asm_thumb_mov_reg_i32_aligned(asm_thumb_t *as, uint reg_dest, int i32) {
384+
// align on machine-word + 2
385+
if ((as->code_offset & 3) == 0) {
386+
asm_thumb_op16(as, ASM_THUMB_OP_NOP);
387+
}
388+
// jump over the i32 value (instruction prefect adds 4 to PC)
389+
asm_thumb_op16(as, OP_B_N(0));
390+
// store i32 on machine-word aligned boundary
391+
asm_thumb_data(as, 4, i32);
392+
// do the actual load of the i32 value
393+
asm_thumb_mov_reg_i32_optimised(as, reg_dest, i32);
394+
}
395+
386396
#define OP_STR_TO_SP_OFFSET(rlo_dest, word_offset) (0x9000 | ((rlo_dest) << 8) | ((word_offset) & 0x00ff))
387397
#define OP_LDR_FROM_SP_OFFSET(rlo_dest, word_offset) (0x9800 | ((rlo_dest) << 8) | ((word_offset) & 0x00ff))
388398

389399
void asm_thumb_mov_local_reg(asm_thumb_t *as, int local_num, uint rlo_src) {
390400
assert(rlo_src < REG_R8);
391401
int word_offset = as->num_locals - local_num - 1;
392-
assert(as->pass < ASM_THUMB_PASS_3 || word_offset >= 0);
402+
assert(as->pass < ASM_THUMB_PASS_EMIT || word_offset >= 0);
393403
asm_thumb_op16(as, OP_STR_TO_SP_OFFSET(rlo_src, word_offset));
394404
}
395405

396406
void asm_thumb_mov_reg_local(asm_thumb_t *as, uint rlo_dest, int local_num) {
397407
assert(rlo_dest < REG_R8);
398408
int word_offset = as->num_locals - local_num - 1;
399-
assert(as->pass < ASM_THUMB_PASS_3 || word_offset >= 0);
409+
assert(as->pass < ASM_THUMB_PASS_EMIT || word_offset >= 0);
400410
asm_thumb_op16(as, OP_LDR_FROM_SP_OFFSET(rlo_dest, word_offset));
401411
}
402412

@@ -405,7 +415,7 @@ void asm_thumb_mov_reg_local(asm_thumb_t *as, uint rlo_dest, int local_num) {
405415
void asm_thumb_mov_reg_local_addr(asm_thumb_t *as, uint rlo_dest, int local_num) {
406416
assert(rlo_dest < REG_R8);
407417
int word_offset = as->num_locals - local_num - 1;
408-
assert(as->pass < ASM_THUMB_PASS_3 || word_offset >= 0);
418+
assert(as->pass < ASM_THUMB_PASS_EMIT || word_offset >= 0);
409419
asm_thumb_op16(as, OP_ADD_REG_SP_OFFSET(rlo_dest, word_offset));
410420
}
411421

py/asmthumb.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,8 @@
2424
* THE SOFTWARE.
2525
*/
2626

27-
#define ASM_THUMB_PASS_1 (1)
28-
#define ASM_THUMB_PASS_2 (2)
29-
#define ASM_THUMB_PASS_3 (3)
27+
#define ASM_THUMB_PASS_COMPUTE (1)
28+
#define ASM_THUMB_PASS_EMIT (2)
3029

3130
#define REG_R0 (0)
3231
#define REG_R1 (1)
@@ -71,7 +70,7 @@ typedef struct _asm_thumb_t asm_thumb_t;
7170

7271
asm_thumb_t *asm_thumb_new(uint max_num_labels);
7372
void asm_thumb_free(asm_thumb_t *as, bool free_code);
74-
void asm_thumb_start_pass(asm_thumb_t *as, int pass);
73+
void asm_thumb_start_pass(asm_thumb_t *as, uint pass);
7574
void asm_thumb_end_pass(asm_thumb_t *as);
7675
uint asm_thumb_get_code_size(asm_thumb_t *as);
7776
void *asm_thumb_get_code(asm_thumb_t *as);
@@ -188,6 +187,7 @@ void asm_thumb_bcc_n(asm_thumb_t *as, int cond, uint label);
188187

189188
void asm_thumb_mov_reg_i32(asm_thumb_t *as, uint reg_dest, machine_uint_t i32_src); // convenience
190189
void asm_thumb_mov_reg_i32_optimised(asm_thumb_t *as, uint reg_dest, int i32_src); // convenience
190+
void asm_thumb_mov_reg_i32_aligned(asm_thumb_t *as, uint reg_dest, int i32); // convenience
191191
void asm_thumb_mov_local_reg(asm_thumb_t *as, int local_num_dest, uint rlo_src); // convenience
192192
void asm_thumb_mov_reg_local(asm_thumb_t *as, uint rlo_dest, int local_num); // convenience
193193
void asm_thumb_mov_reg_local_addr(asm_thumb_t *as, uint rlo_dest, int local_num); // convenience

py/asmx64.c

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@
112112
#define SIGNED_FIT8(x) (((x) & 0xffffff80) == 0) || (((x) & 0xffffff80) == 0xffffff80)
113113

114114
struct _asm_x64_t {
115-
int pass;
115+
uint pass;
116116
uint code_offset;
117117
uint code_size;
118118
byte *code_base;
@@ -138,14 +138,9 @@ void *alloc_mem(uint req_size, uint *alloc_size, bool is_exec) {
138138
asm_x64_t *asm_x64_new(uint max_num_labels) {
139139
asm_x64_t *as;
140140

141-
as = m_new(asm_x64_t, 1);
142-
as->pass = 0;
143-
as->code_offset = 0;
144-
as->code_size = 0;
145-
as->code_base = NULL;
141+
as = m_new0(asm_x64_t, 1);
146142
as->max_num_labels = max_num_labels;
147143
as->label_offsets = m_new(int, max_num_labels);
148-
as->num_locals = 0;
149144

150145
return as;
151146
}
@@ -170,17 +165,17 @@ void asm_x64_free(asm_x64_t *as, bool free_code) {
170165
m_del_obj(asm_x64_t, as);
171166
}
172167

173-
void asm_x64_start_pass(asm_x64_t *as, int pass) {
168+
void asm_x64_start_pass(asm_x64_t *as, uint pass) {
174169
as->pass = pass;
175170
as->code_offset = 0;
176-
if (pass == ASM_X64_PASS_2) {
171+
if (pass == ASM_X64_PASS_COMPUTE) {
177172
// reset all labels
178173
memset(as->label_offsets, -1, as->max_num_labels * sizeof(int));
179174
}
180175
}
181176

182177
void asm_x64_end_pass(asm_x64_t *as) {
183-
if (as->pass == ASM_X64_PASS_2) {
178+
if (as->pass == ASM_X64_PASS_COMPUTE) {
184179
// calculate size of code in bytes
185180
as->code_size = as->code_offset;
186181
//as->code_base = m_new(byte, as->code_size); need to allocale executable memory
@@ -204,7 +199,7 @@ void asm_x64_end_pass(asm_x64_t *as) {
204199
// all functions must go through this one to emit bytes
205200
STATIC byte *asm_x64_get_cur_to_write_bytes(asm_x64_t *as, int num_bytes_to_write) {
206201
//printf("emit %d\n", num_bytes_to_write);
207-
if (as->pass < ASM_X64_PASS_3) {
202+
if (as->pass < ASM_X64_PASS_EMIT) {
208203
as->code_offset += num_bytes_to_write;
209204
return as->dummy_data;
210205
} else {
@@ -367,6 +362,15 @@ void asm_x64_mov_i64_to_r64_optimised(asm_x64_t *as, int64_t src_i64, int dest_r
367362
}
368363
}
369364

365+
// src_i64 is stored as a full word in the code, and aligned to machine-word boundary
366+
void asm_x64_mov_i64_to_r64_aligned(asm_x64_t *as, int64_t src_i64, int dest_r64) {
367+
// mov instruction uses 2 bytes for the instruction, before the i64
368+
while (((as->code_offset + 2) & (WORD_SIZE - 1)) != 0) {
369+
asm_x64_nop(as);
370+
}
371+
asm_x64_mov_i64_to_r64(as, src_i64, dest_r64);
372+
}
373+
370374
void asm_x64_mov_i32_to_disp(asm_x64_t *as, int src_i32, int dest_r32, int dest_disp)
371375
{
372376
assert(0);
@@ -487,12 +491,12 @@ void asm_x64_setcc_r8(asm_x64_t *as, int jcc_type, int dest_r8) {
487491

488492
void asm_x64_label_assign(asm_x64_t *as, int label) {
489493
assert(label < as->max_num_labels);
490-
if (as->pass == ASM_X64_PASS_2) {
494+
if (as->pass < ASM_X64_PASS_EMIT) {
491495
// assign label offset
492496
assert(as->label_offsets[label] == -1);
493497
as->label_offsets[label] = as->code_offset;
494-
} else if (as->pass == ASM_X64_PASS_3) {
495-
// ensure label offset has not changed from PASS_2 to PASS_3
498+
} else {
499+
// ensure label offset has not changed from PASS_COMPUTE to PASS_EMIT
496500
//printf("l%d: (at %d=%ld)\n", label, as->label_offsets[label], as->code_offset);
497501
assert(as->label_offsets[label] == as->code_offset);
498502
}

py/asmx64.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,8 @@
2424
* THE SOFTWARE.
2525
*/
2626

27-
#define ASM_X64_PASS_1 (1)
28-
#define ASM_X64_PASS_2 (2)
29-
#define ASM_X64_PASS_3 (3)
27+
#define ASM_X64_PASS_COMPUTE (1)
28+
#define ASM_X64_PASS_EMIT (2)
3029

3130
#define REG_RAX (0)
3231
#define REG_RCX (1)
@@ -54,7 +53,7 @@ typedef struct _asm_x64_t asm_x64_t;
5453

5554
asm_x64_t* asm_x64_new(uint max_num_labels);
5655
void asm_x64_free(asm_x64_t* as, bool free_code);
57-
void asm_x64_start_pass(asm_x64_t *as, int pass);
56+
void asm_x64_start_pass(asm_x64_t *as, uint pass);
5857
void asm_x64_end_pass(asm_x64_t *as);
5958
uint asm_x64_get_code_size(asm_x64_t* as);
6059
void* asm_x64_get_code(asm_x64_t* as);
@@ -71,6 +70,7 @@ void asm_x64_mov_i32_to_r64(asm_x64_t* as, int src_i32, int dest_r64);
7170
void asm_x64_mov_i64_to_r64(asm_x64_t* as, int64_t src_i64, int dest_r64);
7271
void asm_x64_mov_i32_to_disp(asm_x64_t* as, int src_i32, int dest_r32, int dest_disp);
7372
void asm_x64_mov_i64_to_r64_optimised(asm_x64_t *as, int64_t src_i64, int dest_r64);
73+
void asm_x64_mov_i64_to_r64_aligned(asm_x64_t *as, int64_t src_i64, int dest_r64);
7474
void asm_x64_xor_r64_to_r64(asm_x64_t *as, int src_r64, int dest_r64);
7575
void asm_x64_add_r64_to_r64(asm_x64_t* as, int src_r64, int dest_r64);
7676
void asm_x64_add_i32_to_r32(asm_x64_t* as, int src_i32, int dest_r32);

0 commit comments

Comments
 (0)