Quantcast
Channel: Raspberry Pi Forums
Viewing all articles
Browse latest Browse all 6814

SDK • APS6404L PSRAM on RP2350 – consistent nibble-shift errors

$
0
0
I’m building a board where the RP2350 uses the secondary XIP interface (CS1) to talk to an APS6404L-3SQR 8 MB QSPI PSRAM.

I’ve have this init code that configures timing, dummy cycles, and XIP mapping, which I've mostly taken from micropython backend example:

Code:

size_t __no_inline_not_in_flash_func(psram_detect)(){    int psram_size = 0;    // Try and read the PSRAM ID via direct_csr.    qmi_hw->direct_csr = 30 << QMI_DIRECT_CSR_CLKDIV_LSB | QMI_DIRECT_CSR_EN_BITS;    // Need to poll for the cooldown on the last XIP transfer to expire    // (via direct-mode BUSY flag) before it is safe to perform the first    // direct-mode operation    while ((qmi_hw->direct_csr & QMI_DIRECT_CSR_BUSY_BITS) != 0) {    }    // Exit out of QMI in case we've inited already    qmi_hw->direct_csr |= QMI_DIRECT_CSR_ASSERT_CS1N_BITS;    // Transmit as quad.    qmi_hw->direct_tx = QMI_DIRECT_TX_OE_BITS | QMI_DIRECT_TX_IWIDTH_VALUE_Q << QMI_DIRECT_TX_IWIDTH_LSB | 0xf5;    while ((qmi_hw->direct_csr & QMI_DIRECT_CSR_BUSY_BITS) != 0) {    }    (void)qmi_hw->direct_rx;    qmi_hw->direct_csr &= ~(QMI_DIRECT_CSR_ASSERT_CS1N_BITS);    // Read the id    qmi_hw->direct_csr |= QMI_DIRECT_CSR_ASSERT_CS1N_BITS;    uint8_t kgd = 0;    uint8_t eid = 0;    for (size_t i = 0; i < 7; i++)    {        if (i == 0) {            qmi_hw->direct_tx = 0x9f;        } else {            qmi_hw->direct_tx = 0xff;        }        while ((qmi_hw->direct_csr & QMI_DIRECT_CSR_TXEMPTY_BITS) == 0) {        }        while ((qmi_hw->direct_csr & QMI_DIRECT_CSR_BUSY_BITS) != 0) {        }        if (i == 5) {            kgd = qmi_hw->direct_rx;        } else if (i == 6) {            eid = qmi_hw->direct_rx;        } else {            (void)qmi_hw->direct_rx;        }    }    // Disable direct csr.    qmi_hw->direct_csr &= ~(QMI_DIRECT_CSR_ASSERT_CS1N_BITS | QMI_DIRECT_CSR_EN_BITS);    if (kgd == 0x5D)    {        psram_size = 1024 * 1024; // 1 MiB        uint8_t size_id = eid >> 5;        if (eid == 0x26 || size_id == 2) {            psram_size *= 8; // 8 MiB        } else if (size_id == 0) {            psram_size *= 2; // 2 MiB        } else if (size_id == 1) {            psram_size *= 4; // 4 MiB        }    }    return psram_size;}size_t __no_inline_not_in_flash_func(do_psram_init)(){    gpio_set_function(IO_PIN_SECONDARY_FLASH, GPIO_FUNC_XIP_CS1);    uint32_t intr_stash = save_and_disable_interrupts();    size_t psram_size = psram_detect();    if (!psram_size) {        return 0;    }    // Enable direct mode, PSRAM CS, clkdiv of 10.    qmi_hw->direct_csr = 10 << QMI_DIRECT_CSR_CLKDIV_LSB | \        QMI_DIRECT_CSR_EN_BITS | \        QMI_DIRECT_CSR_AUTO_CS1N_BITS;    while (qmi_hw->direct_csr & QMI_DIRECT_CSR_BUSY_BITS) {        ;    }    // Enable QPI mode on the PSRAM    const uint CMD_QPI_EN = 0x35;    qmi_hw->direct_tx = QMI_DIRECT_TX_NOPUSH_BITS | CMD_QPI_EN;    while (qmi_hw->direct_csr & QMI_DIRECT_CSR_BUSY_BITS) {        ;    }    // Set PSRAM timing for APS6404    //    // Using an rxdelay equal to the divisor isn't enough when running the APS6404 close to 133MHz.    // So: don't allow running at divisor 1 above 100MHz (because delay of 2 would be too late),    // and add an extra 1 to the rxdelay if the divided clock is > 100MHz (i.e. sys clock > 200MHz).    const int max_psram_freq = 133000000;    const int clock_hz = clock_get_hz(clk_sys);    int divisor = (clock_hz + max_psram_freq - 1) / max_psram_freq;    if (divisor == 1 && clock_hz > 100000000) {        divisor = 2;    }    int rxdelay = divisor;    if (clock_hz / divisor > 100000000) {        rxdelay += 1;    }    // - Max select must be <= 8us.  The value is given in multiples of 64 system clocks.    // - Min deselect must be >= 18ns.  The value is given in system clock cycles - ceil(divisor / 2).    const int clock_period_fs = 1000000000000000ll / clock_hz;    const int max_select = (125 * 1000000) / clock_period_fs;  // 125 = 8000ns / 64    const int min_deselect = (18 * 1000000 + (clock_period_fs - 1)) / clock_period_fs - (divisor + 1) / 2;    qmi_hw->m[1].timing = 1 << QMI_M1_TIMING_COOLDOWN_LSB |        QMI_M1_TIMING_PAGEBREAK_VALUE_1024 << QMI_M1_TIMING_PAGEBREAK_LSB |        max_select << QMI_M1_TIMING_MAX_SELECT_LSB |        min_deselect << QMI_M1_TIMING_MIN_DESELECT_LSB |        rxdelay << QMI_M1_TIMING_RXDELAY_LSB |        divisor << QMI_M1_TIMING_CLKDIV_LSB;    // Set PSRAM commands and formats    qmi_hw->m[1].rfmt =        QMI_M0_RFMT_PREFIX_WIDTH_VALUE_Q << QMI_M0_RFMT_PREFIX_WIDTH_LSB | \            QMI_M0_RFMT_ADDR_WIDTH_VALUE_Q << QMI_M0_RFMT_ADDR_WIDTH_LSB | \            QMI_M0_RFMT_SUFFIX_WIDTH_VALUE_Q << QMI_M0_RFMT_SUFFIX_WIDTH_LSB | \            QMI_M0_RFMT_DUMMY_WIDTH_VALUE_Q << QMI_M0_RFMT_DUMMY_WIDTH_LSB | \            QMI_M0_RFMT_DATA_WIDTH_VALUE_Q << QMI_M0_RFMT_DATA_WIDTH_LSB | \            QMI_M0_RFMT_PREFIX_LEN_VALUE_8 << QMI_M0_RFMT_PREFIX_LEN_LSB | \            6 << QMI_M0_RFMT_DUMMY_LEN_LSB;    qmi_hw->m[1].rcmd = 0xEB;    qmi_hw->m[1].wfmt =        QMI_M0_WFMT_PREFIX_WIDTH_VALUE_Q << QMI_M0_WFMT_PREFIX_WIDTH_LSB | \            QMI_M0_WFMT_ADDR_WIDTH_VALUE_Q << QMI_M0_WFMT_ADDR_WIDTH_LSB | \            QMI_M0_WFMT_SUFFIX_WIDTH_VALUE_Q << QMI_M0_WFMT_SUFFIX_WIDTH_LSB | \            QMI_M0_WFMT_DUMMY_WIDTH_VALUE_Q << QMI_M0_WFMT_DUMMY_WIDTH_LSB | \            QMI_M0_WFMT_DATA_WIDTH_VALUE_Q << QMI_M0_WFMT_DATA_WIDTH_LSB | \            QMI_M0_WFMT_PREFIX_LEN_VALUE_8 << QMI_M0_WFMT_PREFIX_LEN_LSB;    qmi_hw->m[1].wcmd = 0x38;    // Disable direct mode    qmi_hw->direct_csr = 0;    // Enable writes to PSRAM    hw_set_bits(&xip_ctrl_hw->ctrl, XIP_CTRL_WRITABLE_M1_BITS);    restore_interrupts(intr_stash);    return psram_size;}
So far I’ve tried both SPI (opcode on 1 bit) and QPI (0x35 Enter QPI) modes, experimented with divisors and RX delays, varied dummy cycles from 6 to 8, and confirmed cache flush after writes. The PSRAM responds and transfers data, but verification shows consistent 4-bit (nibble) shifts at burst boundaries:

Code:

mismatch @+0x00000018 got=0x5a3a7add exp=0xa3a5acd7mismatch @+0x000000dc got=0x300ff330 exp=0x92a5f330mismatch @+0x000000ec got=0x5feaf944 exp=0x9ea5f944mismatch @+0x000000f0 got=0x5c9a19dd exp=0x99a5fbd1mismatch @+0x00000120 got=0x5ddadeff exp=0xeda5d4fdmismatch @+0x00000160 got=0x52da2f2d exp=0xfda52f2dmismatch @+0x00000164 got=0x52caefbb exp=0xfca529bemismatch @+0x00000168 got=0x52fabf0b exp=0xffa5280bmismatch @+0x000001ac got=0x50ea0dd4 exp=0xcea50dd4mismatch @+0x000001b0 got=0x939c0fa1 exp=0xc9a50fa1mismatch @+0x000001bc got=0x51aa0b18 exp=0xcaa50b18
Interestingly, writing and then immediately reading sequentially works perfectly, but if I write an entire buffer first and then read back, the verification fails roughly every 24 bytes, showing the same nibble-shift pattern.

This works:

Code:

uint32_t* ext_sram32 = (uint32_t*)0x1d000000; // also tried 0x11, 0x15    for (uint32_t i = 0; i < words; i += stride_words)    {        uint32_t pat = make_pat(i);        ext_sram32[i] = pat;        uint32_t v = ext_sram32[i];            if (v != pat)        {            debug_printf("mismatch @+0x%08x got=0x%08x exp=0x%08x\n", i * 4u, v, pat);            ....
While this fails:

Code:

    for (uint32_t i = 0; i < words; i += stride_words)    {        ext_sram32[i] = make_pat(i);    }    debug_printf("done, reading back....\n");    int errcnt = 0;    for (uint32_t i = 0; i < words; i += stride_words)    {        uint32_t pat = make_pat(i);        // ext_sram32[i] = pat;        uint32_t v = ext_sram32[i];        if (v != pat)        {
make_pat is just a deterministic 0xA5A5A5A5u ^ (i * 0x01000193u)

Timing changes only move or worsen the pattern, and entering QPI sometimes makes it fail completely—suggesting a mode mismatch.

Has anyone run APS6404L reliably on the RP2350’s CS1/XIP port?
Which exact combination of mode (SPI vs QPI), opcode widths, dummy/latency, and timing settings worked for you? Any working initialization snippet would be appreciated.

This is how I use it on PCB
photo_2025-10-19 20.21.13.jpeg
PSRAM_CS goes to pin 47.

Flash on it's own works absolutely fine.

The behavior is both on stock speed as well as on 260Mhz overclock. I've tried setting PICO_FLASH_SPI_CLKDIV to as low as 4, didn't help.

Any pointers?

Statistics: Posted by desertkun — Sun Oct 19, 2025 6:23 pm — Replies 1 — Views 77



Viewing all articles
Browse latest Browse all 6814

Trending Articles