From 47e2043564fc2dcc59f201c089905d7842864d31 Mon Sep 17 00:00:00 2001 From: Jonathan Bell Date: Wed, 13 Jul 2022 13:45:02 +0100 Subject: [PATCH] probe: performance enhancements - Run out of SRAM, it goes quite a lot faster - Cache the DAP clock frequency - Read RDATA and write WDATA all in one go, and merge the turnaround into the ACK phase. Reduces the number of missed PIO SM cycles during an SWD transfer Also fix a compiler warning and demote the loglevel of some debug messages. Signed-off-by: Jonathan Bell --- CMakeLists.txt | 2 ++ src/picoprobe_config.h | 4 +-- src/probe.c | 15 ++++++----- src/probe.h | 4 +-- src/sw_dp_pio.c | 56 ++++++++++++++++++++++-------------------- 5 files changed, 43 insertions(+), 38 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 52bd806..c8e0b1c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -47,4 +47,6 @@ target_compile_definitions (picoprobe PRIVATE target_link_libraries(picoprobe PRIVATE pico_stdlib pico_unique_id tinyusb_device tinyusb_board hardware_pio) +pico_set_binary_type(picoprobe copy_to_ram) + pico_add_extra_outputs(picoprobe) diff --git a/src/picoprobe_config.h b/src/picoprobe_config.h index ebccd61..1552c36 100644 --- a/src/picoprobe_config.h +++ b/src/picoprobe_config.h @@ -49,8 +49,8 @@ // PIO config #define PROBE_SM 0 #define PROBE_PIN_OFFSET 2 -#define PROBE_PIN_SWCLK PROBE_PIN_OFFSET + 0 // 2 -#define PROBE_PIN_SWDIO PROBE_PIN_OFFSET + 1 // 3 +#define PROBE_PIN_SWCLK (PROBE_PIN_OFFSET + 0) // 2 +#define PROBE_PIN_SWDIO (PROBE_PIN_OFFSET + 1) // 3 // Target reset config #define PROBE_PIN_RESET 6 diff --git a/src/probe.c b/src/probe.c index 2fe9672..6bc6d0b 100644 --- a/src/probe.c +++ b/src/probe.c @@ -105,7 +105,7 @@ void probe_assert_reset(bool state) gpio_set_dir(PROBE_PIN_RESET, state); } -void probe_write_bits(uint bit_count, uint8_t data_byte) { +void probe_write_bits(uint bit_count, uint32_t data_byte) { DEBUG_PINS_SET(probe_timing, DBG_PIN_WRITE); pio_sm_put_blocking(pio0, PROBE_SM, bit_count - 1); pio_sm_put_blocking(pio0, PROBE_SM, data_byte); @@ -117,14 +117,13 @@ void probe_write_bits(uint bit_count, uint8_t data_byte) { DEBUG_PINS_CLR(probe_timing, DBG_PIN_WRITE); } -uint8_t probe_read_bits(uint bit_count) { +uint32_t probe_read_bits(uint bit_count) { DEBUG_PINS_SET(probe_timing, DBG_PIN_READ); pio_sm_put_blocking(pio0, PROBE_SM, bit_count - 1); uint32_t data = pio_sm_get_blocking(pio0, PROBE_SM); - uint8_t data_shifted = data >> 24; - - if (bit_count < 8) { - data_shifted = data_shifted >> 8-bit_count; + uint32_t data_shifted = data; + if (bit_count < 32) { + data_shifted = data >> (32 - bit_count); } picoprobe_dump("Read %d bits 0x%x (shifted 0x%x)\n", bit_count, data, data_shifted); @@ -207,7 +206,7 @@ void probe_handle_read(uint total_bits) { } else { chunk = bits; } - probe.tx_buf[probe.tx_len] = probe_read_bits(chunk); + probe.tx_buf[probe.tx_len] = (uint8_t)probe_read_bits(chunk); probe.tx_len++; // Decrement remaining bits bits -= chunk; @@ -230,7 +229,7 @@ void probe_handle_write(uint8_t *data, uint total_bits) { chunk = bits; } - probe_write_bits(chunk, *data++); + probe_write_bits(chunk, (uint32_t)*data++); bits -= chunk; } } diff --git a/src/probe.h b/src/probe.h index 69810db..6818f1f 100644 --- a/src/probe.h +++ b/src/probe.h @@ -27,8 +27,8 @@ #define PROBE_H_ void probe_set_swclk_freq(uint freq_khz); -void probe_write_bits(uint bit_count, uint8_t data_byte); -uint8_t probe_read_bits(uint bit_count); +void probe_write_bits(uint bit_count, uint32_t data_byte); +uint32_t probe_read_bits(uint bit_count); void probe_read_mode(void); void probe_write_mode(void); diff --git a/src/sw_dp_pio.c b/src/sw_dp_pio.c index 7a755cb..799d898 100755 --- a/src/sw_dp_pio.c +++ b/src/sw_dp_pio.c @@ -32,6 +32,7 @@ /* Slight hack - we're not bitbashing so we need to set baudrate off the DAP's delay cycles. * Ideally we don't want calls to udiv everywhere... */ #define MAKE_KHZ(x) (CPU_CLOCK / (2000 * ((x) + 1))) +static uint32_t cached_delay; // Generate SWJ Sequence // count: sequence bit count @@ -42,8 +43,11 @@ void SWJ_Sequence (uint32_t count, const uint8_t *data) { uint32_t bits; uint32_t n; - probe_set_swclk_freq(MAKE_KHZ(DAP_Data.clock_delay)); - picoprobe_info("SWJ sequence count = %d FDB=0x%2x\n", count, data[0]); + if (DAP_Data.clock_delay != cached_delay) { + probe_set_swclk_freq(MAKE_KHZ(DAP_Data.clock_delay)); + cached_delay = DAP_Data.clock_delay; + } + picoprobe_debug("SWJ sequence count = %d FDB=0x%2x\n", count, data[0]); n = count; while (n > 0) { if (n > 8) @@ -66,8 +70,11 @@ void SWD_Sequence (uint32_t info, const uint8_t *swdo, uint8_t *swdi) { uint32_t bits; uint32_t n; - probe_set_swclk_freq(MAKE_KHZ(DAP_Data.clock_delay)); - picoprobe_info("SWD sequence\n"); + if (DAP_Data.clock_delay != cached_delay) { + probe_set_swclk_freq(MAKE_KHZ(DAP_Data.clock_delay)); + cached_delay = DAP_Data.clock_delay; + } + picoprobe_debug("SWD sequence\n"); n = info & SWD_SEQUENCE_CLK; if (n == 0U) { n = 64U; @@ -108,8 +115,11 @@ uint8_t SWD_Transfer (uint32_t request, uint32_t *data) { uint32_t parity = 0; uint32_t n; - probe_set_swclk_freq(MAKE_KHZ(DAP_Data.clock_delay)); - picoprobe_info("SWD_transfer\n"); + if (DAP_Data.clock_delay != cached_delay) { + probe_set_swclk_freq(MAKE_KHZ(DAP_Data.clock_delay)); + cached_delay = DAP_Data.clock_delay; + } + picoprobe_debug("SWD_transfer\n"); /* Generate the request packet */ prq |= (1 << 0); /* Start Bit */ for (n = 1; n < 5; n++) { @@ -124,28 +134,25 @@ uint8_t SWD_Transfer (uint32_t request, uint32_t *data) { /* Turnaround (ignore read bits) */ probe_read_mode(); - probe_read_bits(DAP_Data.swd_conf.turnaround); - ack = probe_read_bits(3); + ack = probe_read_bits(DAP_Data.swd_conf.turnaround + 3); + ack >>= DAP_Data.swd_conf.turnaround; if (ack == DAP_TRANSFER_OK) { /* Data transfer phase */ if (request & DAP_TRANSFER_RnW) { - parity = 0; /* Read RDATA[0:31] - note probe_read shifts to LSBs */ - for (n = 0; n < 32; n += 8) { - bit = probe_read_bits(8); - parity += __builtin_popcount(bit); - val |= (bit & 0xff) << n; - } + val = probe_read_bits(32); bit = probe_read_bits(1); + parity = __builtin_popcount(val); if ((parity ^ bit) & 1U) { /* Parity error */ ack = DAP_TRANSFER_ERROR; } if (data) *data = val; - + picoprobe_debug("Read %02x ack %02x 0x%08x parity %01x\n", + prq, ack, val, bit); /* Turnaround for line idle */ probe_read_bits(DAP_Data.swd_conf.turnaround); probe_write_mode(); @@ -156,14 +163,12 @@ uint8_t SWD_Transfer (uint32_t request, uint32_t *data) { /* Write WDATA[0:31] */ val = *data; - parity = 0; - for (n = 0; n < 32; n += 8) { - bit = (val >> n) & 0xff; - probe_write_bits(8, bit); - parity += __builtin_popcount(bit); - } + probe_write_bits(32, val); + parity = __builtin_popcount(val); /* Write Parity Bit */ probe_write_bits(1, parity & 0x1); + picoprobe_debug("write %02x ack %02x 0x%08x parity %01x\n", + prq, ack, val, parity); } /* Capture Timestamp */ if (request & DAP_TRANSFER_TIMESTAMP) { @@ -173,9 +178,9 @@ uint8_t SWD_Transfer (uint32_t request, uint32_t *data) { /* Idle cycles - drive 0 for N clocks */ if (DAP_Data.transfer.idle_cycles) { for (n = DAP_Data.transfer.idle_cycles; n; ) { - if (n > 8) { - probe_write_bits(8, 0); - n -= 8; + if (n > 32) { + probe_write_bits(32, 0); + n -= 32; } else { probe_write_bits(n, 0); n -= n; @@ -194,8 +199,7 @@ uint8_t SWD_Transfer (uint32_t request, uint32_t *data) { probe_write_mode(); if (DAP_Data.swd_conf.data_phase && ((request & DAP_TRANSFER_RnW) == 0U)) { /* Dummy Write WDATA[0:31] + Parity */ - for (n = 0; n < 32; n += 8) - probe_write_bits(8, 0); + probe_write_bits(32, 0); probe_write_bits(1, 0); } return ((uint8_t)ack);