SPI + MCP23S17 + ESP32-S3 Slow for reading BUS. Am I missing something?

odelot
Posts: 5
Joined: Sat Jun 22, 2024 5:51 pm

SPI + MCP23S17 + ESP32-S3 Slow for reading BUS. Am I missing something?

Postby odelot » Sat Jun 22, 2024 6:34 pm

Can someone give me a hint and help me understand why SPI + MCP23S17 + ESP32-S3 is slow? I am about to give up on this project :-(

I'm trying to read the NES BUS with the ESP32 and avoid using an FPGA.

The NES has a clock of just over 1MHz, and the data (8 bits) and address (16 bits) are valid on the clock's falling edge.

Polling the NES clock pin in an infinite loop, I achieve just over 7MHz. Adding logic, I start missing clock changes. I expected the MCP23S17 with SPI at 10MHz to improve GPIO read speed, bypassing the GPIO mux. I used this library that uses native ESP-IDF SPI (HOST 2) on high-speed pins using IO Mux.

However, reading one bit per loop using MCP23S17 , I get a polling speed of ~30kHz, much slower than 7MHz (or 10Mhz, the MCP23S17 max frequency).

In both examples I dedicated CORE1 to BUS reading, but unlike my GPIO test, I couldn't suspend tasks or interrupts because the SPI library uses an event queue. Could this be the issue?

I lack experience with parallel BUS reading and using SPI and MCP23S17, so I might be missing something.

Any info is appreciated. Thank you in advance.

Here it is my code if you want to look, you can choose between pooling or SPI + MCP23S17 using the POOLING_ENABLE constant/define

Code: Select all

#include <stdio.h>
#include <string.h>

#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include <mcp23x17.h>
#include "driver/gpio.h"
#include "esp_system.h"
#include "nvs_flash.h"
#include "soc/gpio_reg.h"
#include "soc/gpio_num.h"
#include "esp_timer.h"
#include "rom/ets_sys.h"

#define POOLING_ENABLE 0 // 1 to enable pooling, 0 to enable MCP23S17

#define MCP23S17_ADDRESS 0x20

#define SPI_CS_GPIO GPIO_NUM_10
#define SPI_MOSI_GPIO GPIO_NUM_11
#define SPI_SCLK_GPIO GPIO_NUM_12
#define SPI_MISO_GPIO GPIO_NUM_13

#define NES_CLOCK_M2 GPIO_NUM_2 // it works both using GPIO or MCP23S17

#define HOST SPI2_HOST
#define DMA_CHAN SPI_DMA_CH_AUTO

static mcp23x17_t dev = {0};

// fast gpio access functions
#define GPIO_OUT_HIGH(x) REG_WRITE(GPIO_OUT_W1TS_REG, 1 << x)
#define GPIO_OUT_LOW(x) REG_WRITE(GPIO_OUT_W1TC_REG, 1 << x)
#define GPIO_IN_Read(x) ((REG_READ(GPIO_IN_REG) >> x) & 0x1)
#define GPIO_IN_ReadAll() REG_READ(GPIO_IN_REG)
#define GPIO_IN1_Read(x) ((REG_READ(GPIO_IN1_REG) >> (x - 32)) & 0x1)
#define GPIO_IN1_ReadAll() REG_READ(GPIO_IN1_REG)
#define GPIO_OUT1_HIGH(x) REG_WRITE(GPIO_OUT1_W1TS_REG, 1 << (x - 32))
#define GPIO_OUT1_LOW(x) REG_WRITE(GPIO_OUT1_W1TC_REG, 1 << (x - 32))

volatile uint32_t freq = 0;      // frequency counter
volatile uint32_t lastValue = 0; // keep last value readed

TaskHandle_t mcp23s17;
TaskHandle_t pooling;
TaskHandle_t printer;

void IRAM_ATTR printerCode(void *pvParameters)
{
    printf("\n printerCode is running on core %d\n", xPortGetCoreID());
    while (1)
    {
        printf("%ld, v: %ld\n", freq, lastValue); // print the frequency we can read the NES clock per second
        freq = 0;
        lastValue = 0;
        vTaskDelay(pdMS_TO_TICKS(1000));
        taskYIELD();
    }
}

void IRAM_ATTR poolingTaskCode(void *pvParameters)
{
    printf("\n poolingTaskCode is running on core %d\n", xPortGetCoreID());

    portDISABLE_INTERRUPTS();
    vTaskSuspendAll();
    uint32_t allInputR1;
    uint32_t value;
    while (1)
    {
        allInputR1 = GPIO_IN_ReadAll();
        value = (allInputR1 >> NES_CLOCK_M2) & 0x1;
        lastValue = value;
        freq += 1;
    }
}

void IRAM_ATTR mcp23s17TaskCode(void *pvParameters)
{
    printf("\n mcp23s17TaskCode is running on core %d\n", xPortGetCoreID());
    uint32_t value;
    while (1)
    {
        mcp23x17_get_level(&dev, NES_CLOCK_M2, &value);
        lastValue = value;
        freq += 1;
    }
    taskYIELD();
}

void app_main(void)
{
    ESP_ERROR_CHECK(nvs_flash_init());

    xTaskCreatePinnedToCore(
        printerCode,          /* Task function. */
        "printer",            /* name of task. */
        2048,                 /* Stack size of task */
        NULL,                 /* parameter of the task */
        tskIDLE_PRIORITY + 1, /* priority of the task */
        &printer,             /* Task handle to keep track of created task */
        0                     /* pin task to core 0 */
    );

    if (POOLING_ENABLE)
    {
        // config pooling
        gpio_config_t conf = {
            .mode = GPIO_MODE_INPUT,               /*!< GPIO mode: set input/output mode                     */
            .pull_up_en = GPIO_PULLUP_DISABLE,     /*!< GPIO pull-up                                         */
            .pull_down_en = GPIO_PULLDOWN_DISABLE, /*!< GPIO pull-down                                       */
            .intr_type = GPIO_INTR_DISABLE};

        conf.pin_bit_mask = (1ULL << NES_CLOCK_M2);
        gpio_config(&conf);

        xTaskCreatePinnedToCore(
            poolingTaskCode,              /* Task function. */
            "pooling",                    /* name of task. */
            configMINIMAL_STACK_SIZE * 6, /* Stack size of task */
            NULL,                         /* parameter of the task */
            configMAX_PRIORITIES - 1,     /* priority of the task */
            &pooling,                     /* Task handle to keep track of created task */
            1);                           /* pin task to core 1 */
    }
    else
    {
        // config SPI and MCP23S17

        spi_bus_config_t cfg = {
            .mosi_io_num = SPI_MOSI_GPIO,
            .miso_io_num = SPI_MISO_GPIO,
            .sclk_io_num = SPI_SCLK_GPIO,
            .quadwp_io_num = -1,
            .quadhd_io_num = -1,
            .max_transfer_sz = 0,
            .flags = 0,
            .isr_cpu_id = 1 // pin SPI2_HOST to core 0
        };
        ESP_ERROR_CHECK(spi_bus_initialize(HOST, &cfg, DMA_CHAN));
        ESP_ERROR_CHECK(mcp23x17_init_desc_spi(&dev, HOST, MCP23X17_MAX_SPI_FREQ, MCP23S17_ADDRESS, SPI_CS_GPIO));

        // Setup PORTA0 as input
        ESP_ERROR_CHECK(mcp23x17_set_mode(&dev, NES_CLOCK_M2, MCP23X17_GPIO_INPUT));
        // Enable pull-up
        ESP_ERROR_CHECK(mcp23x17_set_pullup(&dev, NES_CLOCK_M2, true));
        xTaskCreatePinnedToCore(
            mcp23s17TaskCode,             /* Task function. */
            "mcp23s17",                   /* name of task. */
            configMINIMAL_STACK_SIZE * 6, /* Stack size of task */
            NULL,                         /* parameter of the task */
            configMAX_PRIORITIES - 1,     /* priority of the task */
            &mcp23s17,                    /* Task handle to keep track of created task */
            1);                           /* pin task to core 1 */
    }
}
my sdkconfig diff from template project

Code: Select all

CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
# CONFIG_ESPTOOLPY_FLASHMODE_DIO is not set
# CONFIG_COMPILER_OPTIMIZATION_DEBUG is not set
CONFIG_COMPILER_OPTIMIZATION_PERF=y
# CONFIG_ESP_DEFAULT_CPU_FREQ_MHZ_160 is not set
CONFIG_ESP_DEFAULT_CPU_FREQ_MHZ_240=y
CONFIG_ESP_DEFAULT_CPU_FREQ_MHZ=240
# CONFIG_ESP_INT_WDT_CHECK_CPU1 is not set
# CONFIG_ESP_INT_WDT_CHECK_CPU1 is not set
CONFIG_FREERTOS_HZ=1000
# CONFIG_FREERTOS_TASK_FUNCTION_WRAPPER=y
CONFIG_LOG_DEFAULT_LEVEL_WARN=y
# CONFIG_LOG_DEFAULT_LEVEL_INFO is not set
CONFIG_LOG_DEFAULT_LEVEL=2
CONFIG_LOG_MAXIMUM_LEVEL=2
# CONFIG_LWIP_TCPIP_TASK_AFFINITY_NO_AFFINITY is not set
CONFIG_LWIP_TCPIP_TASK_AFFINITY_CPU0=y
CONFIG_LWIP_TCPIP_TASK_AFFINITY=0x0
# CONFIG_PTHREAD_DEFAULT_CORE_NO_AFFINITY is not set
CONFIG_PTHREAD_DEFAULT_CORE_0=y
CONFIG_PTHREAD_TASK_CORE_DEFAULT=0
# CONFIG_MCP23X17_IFACE_I2C is not set
CONFIG_MCP23X17_IFACE_SPI=y
CONFIG_FLASHMODE_QIO=y
# CONFIG_FLASHMODE_DIO is not set
# CONFIG_OPTIMIZATION_LEVEL_DEBUG is not set
# CONFIG_COMPILER_OPTIMIZATION_LEVEL_DEBUG is not set
# CONFIG_COMPILER_OPTIMIZATION_DEFAULT is not set
# CONFIG_ESP32S3_DEFAULT_CPU_FREQ_160 is not set
CONFIG_ESP32S3_DEFAULT_CPU_FREQ_240=y
CONFIG_ESP32S3_DEFAULT_CPU_FREQ_MHZ=240
# CONFIG_INT_WDT_CHECK_CPU1 is not set
# CONFIG_TASK_WDT_CHECK_IDLE_TASK_CPU1 is not set
# CONFIG_TCPIP_TASK_AFFINITY_NO_AFFINITY is not set
CONFIG_TCPIP_TASK_AFFINITY_CPU0=y
CONFIG_TCPIP_TASK_AFFINITY=0x0
# CONFIG_ESP32_DEFAULT_PTHREAD_CORE_NO_AFFINITY is not set
CONFIG_ESP32_DEFAULT_PTHREAD_CORE_0=y
CONFIG_ESP32_PTHREAD_TASK_CORE_DEFAULT=0

ESP_Sprite
Posts: 9766
Joined: Thu Nov 26, 2015 4:08 am

Re: SPI + MCP23S17 + ESP32-S3 Slow for reading BUS. Am I missing something?

Postby ESP_Sprite » Thu Jun 27, 2024 8:26 am

Why are you using a MCP23S17? The logic in a NES controller is literally a shift register, you can read out the entire state using a single SPI transaction.

odelot
Posts: 5
Joined: Sat Jun 22, 2024 5:51 pm

Re: SPI + MCP23S17 + ESP32-S3 Slow for reading BUS. Am I missing something?

Postby odelot » Sat Jun 29, 2024 12:22 am

Thank you for your response @ESP_Sprite . I'm not trying to create a NES controller. My goal is to read the WRAM (working RAM) over the parallel bus, which involves 16-bit address, 8-bit data, clock, and R/W flag.

I want to inspect the NES RAM using the ESP32 by capturing every write operation on the working RAM (2KB RAM). Currently, I'm experimenting with reading the BUS using I2S, but I've noticed significant changes between the ESP32 and ESP32-S3, especially regarding the camera mode for parallel reading. I created another topic to gather ideas on the best way to perform fast parallel bus reading.

I also have an FPGA, a Cyclone IV, but my preference is to use the ESP32-S3, dedicating one core to handle the NES bus and the other core for Wi-Fi.

If using I2S or the camera mode turns out to be impractical, I'll consider alternatives such as the STM32 with ESP8266 for Wi-Fi or the PicoW with PIO.

I have a year of free time, and I've decided to delve into NES/SNES and ESP32 projects to deepen my understanding of embedded systems.

ESP_Sprite
Posts: 9766
Joined: Thu Nov 26, 2015 4:08 am

Re: SPI + MCP23S17 + ESP32-S3 Slow for reading BUS. Am I missing something?

Postby ESP_Sprite » Mon Jul 01, 2024 5:16 am

Ah, gotcha. Hm, not sure if I have any advice then: standard GPIO isn't too fast on the ESP32 series, 'fast' GPIO doesn't have enough pins, and I don't think the camera interface (which can be abused to read in parallel data) has enough I/O for that. Maybe if you cut a few bits from your address bus: if the RAM is only 2K, you don't need more than 11 address bits for that.

User avatar
ok-home
Posts: 78
Joined: Sun May 02, 2021 7:23 pm
Location: Russia Novosibirsk
Contact:

Re: SPI + MCP23S17 + ESP32-S3 Slow for reading BUS. Am I missing something?

Postby ok-home » Mon Jul 01, 2024 8:30 am

Hi
I don't really understand what you want to get
1. capture 16 bits of address + 8 bits of data + r/w flag - address and data are generated by another device ?
2. independently generate 16 bits of address and write/read 8 bits of data to verify memory operability ?

for s3 - it is possible to use a hack

in the first case you can try - capture address by CAM module ( 16 bits in DMA mode, in parallel capture data by SPI Octal mode module ( 8 bits in DMA mode )
in the second case you can try - form address by LCD module ( 16 bits in DMA mode, in parallel write or read data by SPI Octal mode module ( 8 bits in DMA mode )

when using a common CLK - in batch mode this solution can work. It is clear that ESP-IDF drivers do not support it in any way and it will be necessary to switch to LL or HAL.

when using DMA modes it is possible to get significantly higher frequencies than 1 mHz

odelot
Posts: 5
Joined: Sat Jun 22, 2024 5:51 pm

Re: SPI + MCP23S17 + ESP32-S3 Slow for reading BUS. Am I missing something?

Postby odelot » Mon Jul 01, 2024 11:13 am

ESP_Sprite wrote:
Mon Jul 01, 2024 5:16 am
Ah, gotcha. Hm, not sure if I have any advice then: standard GPIO isn't too fast on the ESP32 series, 'fast' GPIO doesn't have enough pins, and I don't think the camera interface (which can be abused to read in parallel data) has enough I/O for that. Maybe if you cut a few bits from your address bus: if the RAM is only 2K, you don't need more than 11 address bits for that.
Thanks for the answer! Yes, the RAM is only 2KB, but it is mirrored 4 times, so I would need 13 bits to ensure it is within the WRAM range. Since there is no WRAM_SEL flag, only a ROM_SEL flag, I think I will need all 16 bits to make sure it is a WRAM address (zeros in the last 3 bits). I can reduce 3 bits to 1 bit using an AND gate.

After reading the Technical Reference more closely, I realize there is no way to use I2S in Camera mode to read 24 bits in parallel as we could on the ESP32-S2 or the original ESP32.

I will need to use the CAM peripheral to read at least 16 bits in parallel. I will try this approach this week.
ok-home wrote: Hi
I don't really understand what you want to get
1. capture 16 bits of address + 8 bits of data + r/w flag - address and data are generated by another device ?
2. independently generate 16 bits of address and write/read 8 bits of data to verify memory operability ?

for s3 - it is possible to use a hack

in the first case you can try - capture address by CAM module ( 16 bits in DMA mode, in parallel capture data by SPI Octal mode module ( 8 bits in DMA mode )
in the second case you can try - form address by LCD module ( 16 bits in DMA mode, in parallel write or read data by SPI Octal mode module ( 8 bits in DMA mode )

when using a common CLK - in batch mode this solution can work. It is clear that ESP-IDF drivers do not support it in any way and it will be necessary to switch to LL or HAL.

when using DMA modes it is possible to get significantly higher frequencies than 1 mHz
Thanks @ok-home. I want scenario 1: capturing 16 bits of address + 8 bits of data + r/w flag, where the address and data are generated by another device.

I will look into the SPI Octal mode as you suggested. Thanks for the guidance. I am already reading the Technical Reference document to use the registers instead of the ESP-IDF library to explore all the possibilities.

odelot
Posts: 5
Joined: Sat Jun 22, 2024 5:51 pm

Re: SPI + MCP23S17 + ESP32-S3 Slow for reading BUS. Am I missing something?

Postby odelot » Sun Jul 14, 2024 2:55 am

Thank you for your help!

Here is an example: https://github.com/odelot/esp32s3-fast-parallel-read, which uses 17 pins to read 16 pins in parallel with the ESP32S3 abusing of the camera peripheral. This might be helpful for anyone looking to do something similar.

I managed to achieve oversampling of the NES bus without losing addresses at a read speed between 20 and 22 MHz. However, I haven't explored using octa SPI to obtain and synchronize the 8bit data with address readings.

I will probably abandon the idea of capturing NES memory using the ESP32, but the study was worthwhile.

Who is online

Users browsing this forum: alicia.huang and 127 guests