Skip to content

Commit 5bb8a7a

Browse files
committed
Improve iMX RT performance
* Enable dcache for OCRAM where the VM heap lives. * Add CIRCUITPY_SWO_TRACE for pushing program counters out over the SWO pin via the ITM module in the CPU. Exempt some functions from instrumentation to reduce traffic and allow inlining. * Place more functions in ITCM to handle errors using code in RAM-only and speed up CP. * Use SET and CLEAR registers for digitalio. The SDK does read, mask and write. * Switch to 2MiB reserved for CircuitPython code. Up from 1MiB. * Run USB interrupts during flash erase and write. * Allow storage writes from CP if the USB drive is disabled. * Get perf bench tests running on CircuitPython and increase timeouts so it works when instrumentation is active.
1 parent 9083ae0 commit 5bb8a7a

50 files changed

Lines changed: 809 additions & 301 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

ports/atmel-samd/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,7 @@ all: $(BUILD)/firmware.bin $(BUILD)/firmware.uf2
366366
$(BUILD)/firmware.elf: $(OBJ) $(GENERATED_LD_FILE)
367367
$(STEPECHO) "LINK $@"
368368
$(Q)echo $(OBJ) > $(BUILD)/firmware.objs
369-
$(Q)$(CC) -o $@ $(LDFLAGS) @$(BUILD)/firmware.objs -Wl,--start-group $(LIBS) -Wl,--end-group
369+
$(Q)$(CC) -o $@ $(LDFLAGS) @$(BUILD)/firmware.objs -Wl,--print-memory-usage -Wl,--start-group $(LIBS) -Wl,--end-group
370370
$(Q)$(SIZE) $@ | $(PYTHON) $(TOP)/tools/build_memory_info.py $(GENERATED_LD_FILE) $(BUILD)
371371

372372
$(BUILD)/firmware.bin: $(BUILD)/firmware.elf

ports/mimxrt10xx/Makefile

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,12 @@ INC += \
4949
CFLAGS += -ftree-vrp -DNDEBUG
5050

5151
# TinyUSB defines
52-
CFLAGS += -DCFG_TUSB_MCU=OPT_MCU_MIMXRT10XX -DCFG_TUD_MIDI_RX_BUFSIZE=512 -DCFG_TUD_CDC_RX_BUFSIZE=512 -DCFG_TUD_MIDI_TX_BUFSIZE=512 -DCFG_TUD_CDC_TX_BUFSIZE=512 -DCFG_TUD_MSC_BUFSIZE=1024
52+
CFLAGS += -DCFG_TUSB_MCU=OPT_MCU_MIMXRT10XX -DCFG_TUD_CDC_RX_BUFSIZE=512 -DCFG_TUD_CDC_TX_BUFSIZE=512
53+
ifeq ($(CHIP_FAMILY), MIMXRT1011)
54+
CFLAGS += -DCFG_TUD_MIDI_RX_BUFSIZE=64 -DCFG_TUD_MIDI_TX_BUFSIZE=64 -DCFG_TUD_MSC_BUFSIZE=512
55+
else
56+
CFLAGS += -DCFG_TUD_MIDI_RX_BUFSIZE=512 -DCFG_TUD_MIDI_TX_BUFSIZE=512 -DCFG_TUD_MSC_BUFSIZE=1024
57+
endif
5358

5459
#Debugging/Optimization
5560
# Never set -fno-inline because we use inline to move small functions into routines that must be
@@ -76,11 +81,15 @@ CFLAGS += \
7681
-g3 -Wno-unused-parameter \
7782
-ffunction-sections -fdata-sections -fstack-usage
7883

79-
OPTIMIZATION_FLAGS ?= -O2 -fno-inline-functions
84+
OPTIMIZATION_FLAGS ?= -O2
8085

8186
# option to override compiler optimization level, set in boards/$(BOARD)/mpconfigboard.mk
8287
CFLAGS += $(OPTIMIZATION_FLAGS)
8388

89+
ifeq ($(CIRCUITPY_SWO_TRACE), 1)
90+
CFLAGS += -finstrument-functions -finstrument-functions-exclude-file-list=tinyusb -finstrument-functions-exclude-function-list='USB_OTG1_IRQHandler,usb_irq_handler,nlr_push,CLOCK_EnableClock,CLOCK_SetDiv,CLOCK_SetMux,__DMB,__ISB,__DSB,SCB_EnableICache,SCB_EnableDCache,ARM_MPU_Disable,ARM_MPU_Enable,SCB_DisableDCache,SCB_DisableICache,__enable_irq,__disable_irq,__set_MSP,port_get_raw_ticks,supervisor_ticks_ms64'
91+
endif
92+
8493
LD_FILES = $(wildcard boards/$(BOARD)/*.ld) $(addprefix linking/, flash/$(FLASH).ld chip_family/$(CHIP_FAMILY).ld common.ld)
8594

8695
LD_SCRIPT_FLAG := -Wl,-T,
@@ -171,7 +180,7 @@ all: $(BUILD)/firmware.bin $(BUILD)/firmware.uf2 $(BUILD)/firmware.hex
171180

172181
$(BUILD)/firmware.elf: $(OBJ) $(LD_FILES)
173182
$(STEPECHO) "LINK $@"
174-
$(Q)$(CC) -o $@ $(LDFLAGS) $(filter-out %.ld, $^) -Wl,--start-group $(LIBS) -Wl,--end-group
183+
$(Q)$(CC) -o $@ $(LDFLAGS) $(filter-out %.ld, $^) -Wl,--print-memory-usage -Wl,--start-group $(LIBS) -Wl,--end-group
175184

176185
$(BUILD)/firmware.bin: $(BUILD)/firmware.elf
177186
$(STEPECHO) "Create $@"

ports/mimxrt10xx/background.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,11 @@
2727

2828
#include "supervisor/port.h"
2929

30-
void port_background_task(void) {
30+
#include "supervisor/linker.h"
31+
32+
#include "fsl_common.h"
33+
34+
void PLACE_IN_ITCM(port_background_task)(void) {
3135
}
3236

3337
void port_background_tick(void) {
@@ -38,5 +42,6 @@ void port_background_tick(void) {
3842

3943
void port_start_background_task(void) {
4044
}
45+
4146
void port_finish_background_task(void) {
4247
}

ports/mimxrt10xx/boards/imxrt1010_evk/mpconfigboard.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,8 @@
1616

1717
#define DEFAULT_UART_BUS_RX (&pin_GPIO_09)
1818
#define DEFAULT_UART_BUS_TX (&pin_GPIO_10)
19+
20+
#define CIRCUITPY_CONSOLE_UART_RX (&pin_GPIO_09)
21+
#define CIRCUITPY_CONSOLE_UART_TX (&pin_GPIO_10)
22+
23+
#define MICROPY_HW_LED_STATUS (&pin_GPIO_11)

ports/mimxrt10xx/boards/metro_m7_1011/board.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,3 +47,18 @@ const mcu_pin_obj_t *mimxrt10xx_reset_forbidden_pins[] = {
4747
};
4848

4949
// Use the MP_WEAK supervisor/shared/board.c versions of routines not defined here.
50+
51+
bool mimxrt10xx_board_reset_pin_number(const mcu_pin_obj_t *pin) {
52+
#if CIRCUITPY_SWO_TRACE
53+
if (pin == &pin_GPIO_AD_09) {
54+
IOMUXC_SetPinMux( /* Add these lines*/
55+
IOMUXC_GPIO_AD_09_ARM_TRACE_SWO,
56+
0U);
57+
IOMUXC_SetPinConfig( /* Add these lines*/
58+
IOMUXC_GPIO_AD_09_ARM_TRACE_SWO,
59+
0x00F9U);
60+
return true;
61+
}
62+
#endif
63+
return false;
64+
}

ports/mimxrt10xx/boards/metro_m7_1011/flash_config.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
#include "fsl_flexspi_nor_boot.h"
1111

1212

13-
__attribute__((section(".boot_hdr.ivt")))
13+
__attribute__((section(".boot_hdr.ivt"),used))
1414
/*************************************
1515
* IVT Data
1616
*************************************/
@@ -25,7 +25,7 @@ const ivt image_vector_table = {
2525
IVT_RSVD /* Reserved = 0 */
2626
};
2727

28-
__attribute__((section(".boot_hdr.boot_data")))
28+
__attribute__((section(".boot_hdr.boot_data"),used))
2929
/*************************************
3030
* Boot Data
3131
*************************************/

ports/mimxrt10xx/common-hal/digitalio/DigitalInOut.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,12 @@ digitalio_direction_t common_hal_digitalio_digitalinout_get_direction(
118118

119119
void common_hal_digitalio_digitalinout_set_value(
120120
digitalio_digitalinout_obj_t *self, bool value) {
121-
GPIO_PinWrite(self->pin->gpio, self->pin->number, value);
121+
GPIO_Type *gpio = self->pin->gpio;
122+
if (value) {
123+
gpio->DR_SET = 1 << self->pin->number;
124+
} else {
125+
gpio->DR_CLEAR = 1 << self->pin->number;
126+
}
122127
}
123128

124129
bool common_hal_digitalio_digitalinout_get_value(

ports/mimxrt10xx/common-hal/microcontroller/__init__.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,21 +36,22 @@
3636
#include "shared-bindings/microcontroller/__init__.h"
3737
#include "shared-bindings/microcontroller/Pin.h"
3838
#include "shared-bindings/microcontroller/Processor.h"
39+
#include "supervisor/linker.h"
3940
#include "supervisor/shared/safe_mode.h"
4041
#include "supervisor/shared/translate/translate.h"
4142

4243
void common_hal_mcu_delay_us(uint32_t delay) {
4344
mp_hal_delay_us(delay);
4445
}
4546

46-
volatile uint32_t nesting_count = 0;
47-
void common_hal_mcu_disable_interrupts(void) {
47+
volatile uint32_t PLACE_IN_DTCM_BSS(nesting_count) = 0;
48+
void PLACE_IN_ITCM(common_hal_mcu_disable_interrupts)(void) {
4849
__disable_irq();
4950
__DMB();
5051
nesting_count++;
5152
}
5253

53-
void common_hal_mcu_enable_interrupts(void) {
54+
void PLACE_IN_ITCM(common_hal_mcu_enable_interrupts)(void) {
5455
if (nesting_count == 0) {
5556
// This is very very bad because it means there was mismatched disable/enables
5657
reset_into_safe_mode(SAFE_MODE_INTERRUPT_ERROR);

ports/mimxrt10xx/common-hal/neopixel_write/__init__.c

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,7 @@ void PLACE_IN_ITCM(common_hal_neopixel_write)(const digitalio_digitalinout_obj_t
6464
const uint32_t pin = digitalinout->pin->number;
6565

6666
__disable_irq();
67-
// Enable DWT in debug core. Useable when interrupts disabled, as opposed to Systick->VAL
68-
CoreDebug->DEMCR |= CoreDebug_DEMCR_TRCENA_Msk;
69-
DWT->CTRL |= DWT_CTRL_CYCCNTENA_Msk;
67+
// Use DWT in debug core. Useable when interrupts disabled, as opposed to Systick->VAL
7068
DWT->CYCCNT = 0;
7169

7270
for (;;) {
@@ -88,12 +86,12 @@ void PLACE_IN_ITCM(common_hal_neopixel_write)(const digitalio_digitalinout_obj_t
8886
mask = 0x80;
8987
}
9088
}
89+
// Enable interrupts again
90+
__enable_irq();
9191

9292
// Update the next start.
9393
next_start_raw_ticks = port_get_raw_ticks(NULL) + 4;
9494

95-
// Enable interrupts again
96-
__enable_irq();
9795
}
9896

9997
#pragma GCC pop_options

ports/mimxrt10xx/linking/common.ld

Lines changed: 62 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ Boards can setup reserved flash with _ld_reserved_flash_size in board.ld. */
66

77
ENTRY(Reset_Handler)
88

9-
code_size = 1M;
9+
code_size = 2M;
1010
_ld_default_stack_size = 20K;
1111

1212
/* Default reserved flash to nothing. */
@@ -22,9 +22,9 @@ MEMORY
2222
FLASH_IVT (rx) : ORIGIN = 0x60001000, LENGTH = 4K
2323
/* Place the ISRs 48k in to leave room for the bootloader when it is available. */
2424
FLASH_FIRMWARE (rx) : ORIGIN = 0x6000C000, LENGTH = code_size - 48K
25-
FLASH_FATFS (r) : ORIGIN = 0x60100000, LENGTH = _ld_flash_size - code_size - _ld_reserved_flash_size
25+
FLASH_FATFS (r) : ORIGIN = 0x60000000 + code_size, LENGTH = _ld_flash_size - code_size - _ld_reserved_flash_size
2626
/* Teensy uses the last bit of flash for recovery. */
27-
RESERVED_FLASH : ORIGIN = 0x60100000 + _ld_flash_size - _ld_reserved_flash_size, LENGTH = _ld_reserved_flash_size
27+
RESERVED_FLASH : ORIGIN = 0x60000000 + code_size + _ld_flash_size - _ld_reserved_flash_size, LENGTH = _ld_reserved_flash_size
2828
OCRAM (rwx) : ORIGIN = 0x20200000, LENGTH = ram_size - 64K
2929
DTCM (x) : ORIGIN = 0x20000000, LENGTH = 32K
3030
ITCM (x) : ORIGIN = 0x00000000, LENGTH = 32K
@@ -55,21 +55,42 @@ SECTIONS
5555
.text :
5656
{
5757
. = ALIGN(4);
58-
__VECTOR_TABLE = .;
59-
__VECTOR_RAM = .;
60-
_ld_isr_table = .;
61-
62-
KEEP(*(.isr_vector)) /* Startup code */
6358
*(EXCLUDE_FILE(
6459
*fsl_flexspi.o
60+
*dcd_ci_hs.o
61+
*tusb_fifo.o
62+
*usbd.o
63+
*string0.o
64+
*py/nlr*.o
65+
*py/obj.o
66+
*py/gc.o
67+
*py/map.o
68+
*py/runtime.o
69+
*py/objboundmeth.o
70+
*py/objtype.o
6571
) .text*) /* .text* sections (code) */
66-
*(.rodata*) /* .rodata* sections (constants, strings, etc.) */
72+
73+
/* Keep USB processing functions out of RAM because we don't know which will be used.
74+
We try to only keep USB interrupt related functions. */
75+
*dcd_ci_hs.o(.text.process_*_request .text.dcd_edpt* .text.dcd_init .text.dcd_set_address)
76+
*usbd.o(.text.process_*_request .text.process_[gs]et* .text.tud_* .text.usbd_* .text.configuration_reset .text.invoke_*)
77+
78+
/* Anything marked cold/unlikely should be in flash. */
79+
*(.text.unlikely.*)
80+
81+
*(EXCLUDE_FILE(
82+
*dcd_ci_hs.o
83+
*py/objboundmeth.o
84+
*py/objtype.o
85+
) .rodata*) /* .rodata* sections (constants, strings, etc.) */
6786
. = ALIGN(4);
6887
} > FLASH_FIRMWARE
6988

7089
.ARM.exidx :
7190
{
91+
__exidx_start = .;
7292
*(.ARM.exidx*)
93+
__exidx_end = .;
7394
*(.gnu.linkonce.armexidx.*)
7495
_etext = .; /* define a global symbol at end of code */
7596
__etext = .; /* define a global symbol at end of code */
@@ -81,7 +102,6 @@ SECTIONS
81102
{
82103
. = ALIGN(4);
83104
*(.data*) /* .data* sections */
84-
*fsl_flexspi.o(.text*)
85105
. = ALIGN(4);
86106
} > OCRAM AT> FLASH_FIRMWARE
87107
_ld_ocram_data_destination = ADDR(.data);
@@ -93,7 +113,7 @@ SECTIONS
93113
{
94114
. = ALIGN(4);
95115

96-
*(.bss*)
116+
*(SORT_BY_ALIGNMENT(SORT_BY_NAME(.bss*)))
97117
*(COMMON)
98118

99119
. = ALIGN(4);
@@ -103,22 +123,50 @@ SECTIONS
103123
_ld_heap_start = _ld_ocram_bss_start + _ld_ocram_bss_size;
104124
_ld_heap_end = ORIGIN(OCRAM) + LENGTH(OCRAM);
105125

106-
.itcm :
126+
127+
.itcm : ALIGN(4)
107128
{
108129
. = ALIGN(4);
109130
*(.itcm.*)
110-
131+
*fsl_flexspi.o(.text*)
132+
*dcd_ci_hs.o(.text*)
133+
*tusb_fifo.o(.text*)
134+
*py/objboundmeth.o(.text*)
135+
*py/objtype.o(.text*)
136+
*py/obj.o(.text*)
137+
*py/gc.o(.text*)
138+
*py/map.o(.text*)
139+
*py/nlr*.o(.text*)
140+
*py/runtime.o(.text*)
141+
*(.text.process_*_isr .text.dcd_event_* .text.osal_queue*)
142+
*string0.o(.text*)
111143
. = ALIGN(4);
112144
} > ITCM AT> FLASH_FIRMWARE
113145
_ld_itcm_destination = ADDR(.itcm);
114146
_ld_itcm_flash_copy = LOADADDR(.itcm);
115147
_ld_itcm_size = SIZEOF(.itcm);
116148

149+
/* Align for 256 ISR entries */
150+
.isr_vector : ALIGN(4 * 256)
151+
{
152+
. = ALIGN(4);
153+
KEEP(*(.isr_vector)) /* Startup code */
154+
. = ALIGN(4);
155+
} > ITCM AT> FLASH_FIRMWARE
156+
_ld_isr_destination = ADDR(.isr_vector);
157+
_ld_isr_flash_copy = LOADADDR(.isr_vector);
158+
_ld_isr_size = SIZEOF(.isr_vector);
159+
/* Used by the bootloader to start user code. */
160+
__VECTOR_TABLE = LOADADDR(.isr_vector);
161+
117162
.dtcm_data :
118163
{
119164
. = ALIGN(4);
120165

121166
*(.dtcm_data.*)
167+
*dcd_ci_hs.o(.rodata*)
168+
*py/objboundmeth.o(.rodata*)
169+
*py/objtype.o(.rodata*)
122170

123171
. = ALIGN(4);
124172
} > DTCM AT> FLASH_FIRMWARE
@@ -139,7 +187,7 @@ SECTIONS
139187
_ld_dtcm_bss_start = ADDR(.dtcm_bss);
140188
_ld_dtcm_bss_size = SIZEOF(.dtcm_bss);
141189

142-
.stack :
190+
.stack (NOLOAD) :
143191
{
144192
. = ALIGN(8);
145193
_ld_stack_bottom = .;

0 commit comments

Comments
 (0)