Page 1 of 1

GDMA MEM TRANS does not work

Posted: Sat Apr 06, 2024 12:55 am
by vuthai
I'm using GDMA to copy data from internal memory to PSRAM without going through the cache, but I can't get GDMA to work even though testing with SPI, I2S... worked, I'm not what the initialization registe order, can anyone help me?

Error returning interrupt flag: GDMA_IN_DSCR_ERR_CH0_INT_RAW & GDMA_OUT_DSCR_ERR_CH0_INT_RAW

Code: Select all

void GDMA_MemInit(){
    periph_module_enable(PERIPH_GDMA_MODULE);
    REG_WRITE(GDMA_MISC_CONF_REG, GDMA_CLK_EN);
    REG_WRITE(GDMA_IN_PRI_CH0_REG, 2);
    REG_WRITE(GDMA_OUT_PRI_CH0_REG, 2);
    REG_WRITE(GDMA_IN_CONF1_CH0_REG, (1UL << 13) | 64); //block 32B
    REG_WRITE(GDMA_OUT_CONF1_CH0_REG, (1UL << 13));
    //esp_intr_alloc(ETS_DMA_IN_CH0_INTR_SOURCE, ESP_INTR_FLAG_IRAM, CPY_Interrupt, NULL, NULL);
    GDMA_Ready = 1;
}

void GDMA_MemCopy(uint8_t *dst, uint8_t *src, uint32_t len){
    uint32_t i, info, s, j;
    #define GDMA_SMAX  3072UL
    if(!GDMA_Ready)GDMA_MemInit();
    while(GDMA_CopyBusy)delay(1);
    GDMA_CopyBusy = 1;
    s = (len + (GDMA_SMAX-1)) / GDMA_SMAX;
    for(i = 0; i < s; i++){
        j = i + 1;
        if(j < s && j < 8){
            info = (0x80000000 | GDMA_SMAX) | (GDMA_SMAX << 12);
            CPY_Inlink[i].info = info;
            CPY_Inlink[i].buf = (uint32_t)dst;
            CPY_Inlink[i].next = (uint32_t)&CPY_Inlink[j];
            CPY_Outlink[i].info = info;
            CPY_Outlink[i].buf = (uint32_t)src;
            CPY_Outlink[i].next = (uint32_t)&CPY_Outlink[j];
            dst += GDMA_SMAX; src += GDMA_SMAX; len -= GDMA_SMAX;
        }else{
            info = (0xC0000000 | GDMA_SMAX) | (len << 12);
            CPY_Inlink[i].info = info;
            CPY_Inlink[i].buf = (uint32_t)dst;
            CPY_Inlink[i].next = 0;
            CPY_Outlink[i].info = info;
            CPY_Outlink[i].buf = (uint32_t)src;
            CPY_Outlink[i].next = 0;
            break;
        }
    }
    //DMA
    REG_WRITE(GDMA_OUT_CONF0_CH0_REG, GDMA_OUT_RST_CH0);
    REG_WRITE(GDMA_IN_CONF0_CH0_REG, GDMA_IN_RST_CH0);
    REG_WRITE(GDMA_OUT_INT_CLR_CH0_REG, 0xff);
    REG_WRITE(GDMA_IN_INT_CLR_CH0_REG, 0xff);
    REG_WRITE(GDMA_OUT_CONF0_CH0_REG, 0); // | GDMA_OUT_DATA_BURST_EN_CH0
    REG_WRITE(GDMA_IN_CONF0_CH0_REG, 0); // | GDMA_IN_DATA_BURST_EN_CH0
    //REG_WRITE(GDMA_IN_INT_ENA_CH0_REG, GDMA_IN_SUC_EOF_CH0_INT_ENA);
    //REG_WRITE(GDMA_IN_PRI_CH0_REG, 2);
    //REG_WRITE(GDMA_OUT_PRI_CH0_REG, 2);
    //REG_WRITE(GDMA_IN_CONF1_CH0_REG, (1UL << 13) | 64);
    //REG_WRITE(GDMA_OUT_CONF1_CH0_REG, (1UL << 13));
    REG_WRITE(GDMA_OUT_LINK_CH0_REG, (((uint32_t)&CPY_Outlink[0]) & 0x000FFFFF));
    REG_WRITE(GDMA_IN_LINK_CH0_REG, (((uint32_t)&CPY_Inlink[0]) & 0x000FFFFF));
    REG_WRITE(GDMA_IN_CONF0_CH0_REG, GDMA_MEM_TRANS_EN_CH0);
    REG_WRITE(GDMA_OUT_LINK_CH0_REG, REG_READ(GDMA_OUT_LINK_CH0_REG) | GDMA_OUTLINK_START_CH0);
    REG_WRITE(GDMA_IN_LINK_CH0_REG, REG_READ(GDMA_IN_LINK_CH0_REG) | GDMA_INLINK_START_CH0);
    i = 20;
    while(!(REG_READ(GDMA_IN_INT_RAW_CH0_REG) & GDMA_IN_SUC_EOF_CH0_INT_RAW) && i--){ //
        delayMicroseconds(10);
    }
    DEBUG_PRINTF("DMA Flag: %08X, %08X\r\n", REG_READ(GDMA_IN_INT_RAW_CH0_REG), REG_READ(GDMA_OUT_INT_RAW_CH0_REG) );
    GDMA_CopyBusy = 0;
}


Re: GDMA MEM TRANS does not work

Posted: Mon Apr 08, 2024 9:53 am
by MicroController
Hmm. Interestingly, the IDF provides the "Asynchronous Memory Copy" API - but apparently not for the ESP32. I guess there is a reason why it doesn't.

Btw,
copy data from internal memory to PSRAM without going through the cache
I don't think that's possible.

Re: GDMA MEM TRANS does not work

Posted: Mon Apr 08, 2024 12:49 pm
by ok-home
Hi
As far as I see it is esp32s3
Are you sure that
uint8_t *dst, uint8_t *src, uint32_t len

burst alligned ( 32 byte ) ?

Re: GDMA MEM TRANS does not work

Posted: Thu Apr 11, 2024 2:49 am
by vuthai
I use dynamic memory allocation with block alignment:
heap_caps_aligned_alloc(32, 2048+32, MALLOC_CAP_DMA);
heap_caps_aligned_alloc(64, 2048+64, MALLOC_CAP_SPIRAM);

Re: GDMA MEM TRANS does not work

Posted: Thu Apr 11, 2024 3:06 am
by vuthai
MicroController wrote:
Mon Apr 08, 2024 9:53 am
Hmm. Interestingly, the IDF provides the "Asynchronous Memory Copy" API - but apparently not for the ESP32. I guess there is a reason why it doesn't.

Btw,
copy data from internal memory to PSRAM without going through the cache
I don't think that's possible.
I use ESP32-S3 according to TRM with the following configuration but I'm not sure if there are any other configuration steps?

Code: Select all

3.6.3 Programming Procedures for Memory­to­Memory Transfer
To transfer data from one memory location to another, GDMA should be configured by software as follows:
1. Set GDMA_OUT_RST_CHn first to 1 and then to 0, to reset the state machine of GDMA’s transmit channel
and FIFO pointer;
2. Set GDMA_IN_RST_CHn first to 1 and then to 0, to reset the state machine of GDMA’s receive channel and
FIFO pointer;
3. Load an outlink, and configure GDMA_OUTLINK_ADDR_CHn with address of the first transmit descriptor;
4. Load an inlink, and configure GDMA_INLINK_ADDR_CHn with address of the first receive descriptor;
5. Set GDMA_MEM_TRANS_EN_CHn to enable memory-to-memory transfer;
6. Set GDMA_OUTLINK_START_CHn to enable GDMA’s transmit channel for data transfer;
7. Set GDMA_INLINK_START_CHn to enable GDMA’s receive channel for data transfer;
8. Wait for GDMA_IN_SUC_EOF_CHn_INT interrupt, which indicates that which indicates that a data
transaction has been completed.

Re: GDMA MEM TRANS does not work

Posted: Thu Apr 11, 2024 7:58 am
by MicroController
I'm not sure either, but IDF's Async. Memcopy does it. If you don't want to use the IDF API but want to implement it yourself you could take a look at the IDF code to find what it does differently.

Re: GDMA MEM TRANS does not work

Posted: Sun Apr 14, 2024 4:54 am
by ok-home
I made and tested a simple m2m example

Code: Select all

#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "soc/gdma_reg.h"
#include "soc/gdma_struct.h"
#include "soc/periph_defs.h"
#include "soc/system_reg.h"
#include "esp_rom_lldesc.h"
#include "string.h"
#include "rom/cache.h"
#include "esp_cache.h"
#include "esp_psram.h"

// example only
#define DMA_FRAME 2048
#define DMA_NUM 0

static lldesc_t *allocate_dma_descriptors(uint8_t *buffer, size_t len)
{
    uint32_t count = len / DMA_FRAME;     //  dma frames count
    uint32_t last_size = len % DMA_FRAME; // last frame bytes

    lldesc_t *dma = (lldesc_t *)heap_caps_malloc((count + 1) * sizeof(lldesc_t), MALLOC_CAP_DMA);
    if (dma == NULL)
    {
        return dma;
    }
    int x = 0;
    for (; x < count; x++)
    {
        dma[x].size = DMA_FRAME;
        dma[x].length = DMA_FRAME;
        dma[x].sosf = 0;
        dma[x].eof = 0;
        dma[x].owner = 1;
        dma[x].buf = buffer + DMA_FRAME * x;
        dma[x].empty = (uint32_t)&dma[(x + 1)];
    }

    dma[x].size = last_size;
    dma[x].length = last_size;
    dma[x].sosf = 0;
    dma[x].eof = 1;
    dma[x].owner = 1;
    dma[x].buf = buffer + DMA_FRAME * x;
    dma[x].empty = 0;

    return dma;
}

static lldesc_t *dma_out_link;  //src
static lldesc_t *dma_in_link;   //dst
void gdma_m2m_copy(uint8_t *dst, uint8_t *src, size_t len)
{
    if (REG_GET_BIT(SYSTEM_PERIP_CLK_EN1_REG, SYSTEM_DMA_CLK_EN) == 0)
    {
        REG_CLR_BIT(SYSTEM_PERIP_CLK_EN1_REG, SYSTEM_DMA_CLK_EN);
        REG_SET_BIT(SYSTEM_PERIP_CLK_EN1_REG, SYSTEM_DMA_CLK_EN);
        REG_SET_BIT(SYSTEM_PERIP_RST_EN1_REG, SYSTEM_DMA_RST);
        REG_CLR_BIT(SYSTEM_PERIP_RST_EN1_REG, SYSTEM_DMA_RST);
    }

    GDMA.channel[DMA_NUM].out.conf0.out_rst = 1;
    GDMA.channel[DMA_NUM].out.conf0.out_rst = 0;
    GDMA.channel[DMA_NUM].in.conf0.in_rst = 1;
    GDMA.channel[DMA_NUM].in.conf0.in_rst = 0;

    GDMA.channel[DMA_NUM].in.int_clr.val = ~0;
    GDMA.channel[DMA_NUM].in.int_ena.val = 0;
    GDMA.channel[DMA_NUM].out.int_clr.val = ~0;
    GDMA.channel[DMA_NUM].out.int_ena.val = 0;

    GDMA.channel[DMA_NUM].out.conf1.out_ext_mem_bk_size = 1; // 0-> 16 byte burst transfer, 1->32 byte burst transfer
    GDMA.channel[DMA_NUM].in.conf1.in_ext_mem_bk_size = 1;   // 0-> 16 byte burst transfer, 1->32 byte burst transfer

    dma_out_link = allocate_dma_descriptors(src, len);
    dma_in_link = allocate_dma_descriptors(dst, len);
    GDMA.channel[DMA_NUM].out.link.addr = ((uint32_t) & (dma_out_link[0])) & 0xfffff;
    GDMA.channel[DMA_NUM].in.link.addr = ((uint32_t) & (dma_in_link[0])) & 0xfffff;

    GDMA.channel[DMA_NUM].in.peri_sel.sel = 0x0;
    GDMA.channel[DMA_NUM].out.peri_sel.sel = 0x0;
    GDMA.channel[DMA_NUM].in.conf0.mem_trans_en = 1;
    GDMA.channel[DMA_NUM].out.link.start = 1;
    GDMA.channel[DMA_NUM].in.link.start = 1;
    // example only - without eof int
    vTaskDelay(1);
    //
    free(dma_out_link);
    free(dma_in_link);
}

void test_gdma_m2m_copy()
{
    // ram->spiram
    uint8_t *dst = heap_caps_aligned_calloc(32, 2048 * 2, 1, MALLOC_CAP_SPIRAM);
    uint8_t *src = heap_caps_aligned_calloc(32, 2048 * 2, 1, MALLOC_CAP_DMA);
    memset(src, 0xaa, 2048 * 2);
    memset(dst, 0x55, 2048 * 2);
    esp_cache_msync(dst, 2048 * 2, ESP_CACHE_MSYNC_FLAG_DIR_C2M);
    esp_cache_msync(src, 2048 * 2, ESP_CACHE_MSYNC_FLAG_DIR_C2M);

    gdma_m2m_copy(dst, src, 2048 * 2);
    
    esp_cache_msync(dst, 2048 * 2, ESP_CACHE_MSYNC_FLAG_DIR_M2C);
    esp_cache_msync(src, 2048 * 2, ESP_CACHE_MSYNC_FLAG_DIR_M2C);
    printf(" raw in = %lx  out %lx \n", GDMA.channel[DMA_NUM].in.int_raw.val, GDMA.channel[DMA_NUM].out.int_raw.val);
    for (int i = 0; i < 16; i++)
    {
        printf("i = %d src=%x dst=%x\n", i, src[i], dst[i]);
        vTaskDelay(1);
    }
}