Need driver library for 3.5inch 480x320 display ST7796 with RP2040

I tried creating a driver library for 3.5inch 480x320 display ST7796 with RP2040. Before building the LVGL firmware, I changed the value of LV_COLOR_DEPTH to 16 in lib\lv_bindings. I am getting this error

Traceback (most recent call last):
  File "<stdin>", line 6, in <module>
  File "ili9xxx.py", line 323, in __init__
  File "ili9xxx.py", line 311, in __init__
MemoryError: memory allocation failed, allocating 536923960 bytes

main.py

import lvgl as lv
import ili9xxx
from machine import SPI, Pin

spi = SPI(1, baudrate=27000000, sck=Pin(10), mosi=Pin(11), miso=Pin(8))
drv = ili9xxx.St7796(spi=spi, dc=23, cs=29, rst=28)

scr = lv.obj()
btn = lv.btn(scr)
btn.set_style_bg_color(lv.color_hex(0xFF0000), lv.PART.MAIN | lv.STATE.DEFAULT)
scr.set_style_bg_color(lv.color_hex(0x000000), lv.PART.MAIN | lv.STATE.DEFAULT)
label = lv.label(btn)
label.set_text("Hello World!")
btn.set_style_width(120, lv.PART.MAIN | lv.STATE.DEFAULT)
btn.set_style_height(35, lv.PART.MAIN | lv.STATE.DEFAULT)
bh = 18
bw = 60
by = 160 - bh
bx = 240 - bw
btn.set_x(bx)
btn.set_y(by)
lv.scr_load(scr)

ili9xxx.py

from micropython import const
import lvgl as lv
import machine
import time
import struct

_MADCTL = const(0x36)
_MADCTL_BGR = const(0x08)  # colors are BGR (not RGB)
_MADCTL_RTL = const(0x04)  # refresh right to left

MADCTL_MH = const(0x04)  # Refresh 0=Left to Right, 1=Right to Left
MADCTL_ML = const(0x10)  # Refresh 0=Top to Bottom, 1=Bottom to Top
MADCTL_MV = const(0x20)  # 0=Normal, 1=Row/column exchange
MADCTL_MX = const(0x40)  # 0=Left to Right, 1=Right to Left
MADCTL_MY = const(0x80)  # 0=Top to Bottom, 1=Bottom to Top
ORIENTATION_TABLE = (MADCTL_MX, MADCTL_MV, MADCTL_MY, MADCTL_MY | MADCTL_MX | MADCTL_MV)


ST7796_PORTRAIT = const(0)
ST7796_LANDSCAPE = const(1)
ST7796_INV_PORTRAIT = const(2)
ST7796_INV_LANDSCAPE = const(3)

DISPLAY_TYPE_ST7789 = const(4)
DISPLAY_TYPE_ILI9488 = const(2)
COLOR_MODE_RGB = const(0x00)

class St77xx_hw(object):
    def __init__(self, *, cs, dc, spi, res, suppRes, bl=None, model=None, suppModel=[], rst=None, rot=ST7796_LANDSCAPE, bgr=False, rp2_dma=None):
        
        self.buf1 = bytearray(1)
        self.buf2 = bytearray(2)
        self.buf4 = bytearray(4)

        self.cs,self.dc,self.rst=[(machine.Pin(p,machine.Pin.OUT) if isinstance(p,int) else p) for p in (cs,dc,rst)]
        self.bl=bl
        if isinstance(self.bl,int): self.bl=machine.PWM(machine.Pin(self.bl,machine.Pin.OUT))
        elif isinstance(self.bl,machine.Pin): self.bl=machine.PWM(self.bl)
        assert isinstance(self.bl,(machine.PWM,type(None)))
        self.set_backlight(10) # set some backlight

        self.rot=rot
        self.bgr=bgr
        self.width,self.height=(0,0) # this is set later in hard_reset->config->apply_rotation

        if res not in suppRes: raise ValueError('Unsupported resolution %s; the driver supports: %s.'%(str(res),', '.join(str(r) for r in suppRes)))
        if suppModel and model not in suppModel: raise ValueError('Unsupported model %s; the driver supports: %s.'%(str(model),', '.join(str(r) for r in suppModel)))

        self.res=res
        self.model=model

        self.rp2_dma=rp2_dma
        self.spi=spi
        self.hard_reset()


    def off(self): self.set_backlight(0)

    def hard_reset(self):
        if self.rst:
            for v in (1,0,1):
                self.rst.value(v)
                time.sleep(.2)
            time.sleep(.2)
        self.config()
        
    def config(self):
        self.config_hw() # defined in child classes
        self.apply_rotation(self.rot)
        
    def set_backlight(self,percent):
        if self.bl is None: return
        self.bl.duty_u16(percent*655)
        
    def set_window(self, x, y, w, h):
        c0,r0=ST77XX_COL_ROW_MODEL_START_ROTMAP[self.res[0],self.res[1],self.model][self.rot%4]
        struct.pack_into('>hh', self.buf4, 0, c0+x, c0+x+w-1)
        self.write_register(ST77XX_CASET, self.buf4)
        struct.pack_into('>hh', self.buf4, 0, r0+y, r0+y+h-1)
        self.write_register(ST77XX_RASET, self.buf4)

    def apply_rotation(self,rot):
        self.rot=rot
        if (self.rot%2)==0: self.width,self.height=self.res
        else: self.height,self.width=self.res
        self.write_register(ST77XX_MADCTL,bytes([(ST77XX_MADCTL_BGR if self.bgr else 0)|ST77XX_MADCTL_ROTS[self.rot%4]]))

    def blit(self, x, y, w, h, buf, is_blocking=True):
        self.set_window(x, y, w, h)
        if self.rp2_dma: self._rp2_write_register_dma(ST77XX_RAMWR, buf, is_blocking)
        else: self.write_register(ST77XX_RAMWR, buf)

    def clear(self, color):
        bs=128 # write pixels in chunks; makes the fill much faster
        struct.pack_into('>h',self.buf2,0,color)
        buf=bs*bytes(self.buf2)
        npx=self.width*self.height
        self.set_window(0, 0, self.width, self.height)
        self.write_register(ST77XX_RAMWR, None)
        self.cs.value(0)
        self.dc.value(1)
        for _ in range(npx//bs): self.spi.write(buf)
        for _ in range(npx%bs): self.spi.write(self.buf2)
        self.cs.value(1)

    def write_register(self, reg, buf=None):
        struct.pack_into('B', self.buf1, 0, reg)
        self.cs.value(0)
        self.dc.value(0)
        self.spi.write(self.buf1)
        if buf is not None:
            self.dc.value(1)
            self.spi.write(buf)
        self.cs.value(1)

    def _rp2_write_register_dma(self, reg, buf, is_blocking=True):
        'If *is_blocking* is False, used should call wait_dma explicitly.'
        SPI1_BASE = 0x40040000 # FIXME: will be different for another SPI bus?
        SSPDR     = 0x008
        self.rp2_dma.config(
            src_addr = uctypes.addressof(buf),
            dst_addr = SPI1_BASE + SSPDR,
            count    = len(buf),
            src_inc  = True,
            dst_inc  = False,
            trig_dreq= self.rp2_dma.DREQ_SPI1_TX
        )
        struct.pack_into('B',self.buf1,0,reg)
        self.cs.value(0)

        self.dc.value(0)
        self.spi.write(self.buf1)

        self.dc.value(1)
        self.rp2_dma.enable()

        if is_blocking: self.rp2_wait_dma()

    def rp2_wait_dma(self):
        '''
        Wait for rp2-port DMA transfer to finish; no-op unless self.rp2_dma is defined.
        Can be used as callback before accessing shared SPI bus e.g. with the xpt2046 driver.
        '''
        if self.rp2_dma is None: return
        while self.rp2_dma.is_busy(): pass
        self.rp2_dma.disable()
        # wait to send last byte. It should take < 1uS @ 10MHz
        time.sleep_us(1)
        self.cs.value(1)

    def _run_seq(self,seq):
        '''
        Run sequence of (initialization) commands; those are given as list of tuples, which are either
        `(command,data)` or `(command,data,delay_ms)`
        '''
        for i,cmd in enumerate(seq):
            if len(cmd)==2: (reg,data),delay=cmd,0
            elif len(cmd)==3: reg,data,delay=cmd
            else: raise ValueError('Command #%d has %d items (must be 2 or 3)'%(i,len(cmd)))
            self.write_register(reg,data)
            if delay>0: time.sleep_ms(delay)

    def madctl(self, colormode, rotation, rotations):
        if rotation >= 0:
            return rotation | colormode
        index = abs(rotation) - 1
        if index > len(rotations):
                RuntimeError('Invalid display rot value specified during init.')
        return rotations[index] | colormode



class St7796_hw(St77xx_hw):
    def __init__(self, **kw):
        """ST7796 TFT Display Driver.

        Requires ``LV_COLOR_DEPTH=16`` when building lv_micropython to function.
        """
        super().__init__(
            res=(480, 320),
            suppRes=[
                (480, 320),
            ],
            model=None,
            suppModel=None,
            bgr=False,
            **kw,
        )
        
    def display_config(self):
        if lv.color_t.__SIZE__ == 4:
            display_type = DISPLAY_TYPE_ILI9488
            pixel_format = 0x06  # 262K-Colors
        elif lv.color_t.__SIZE__ == 2:
            pixel_format = 0x05  # 65K-Colors  55??
            display_type = DISPLAY_TYPE_ST7789
        else:
            raise RuntimeError('ST7796 micropython driver requires defining LV_COLOR_DEPTH=32 or LV_COLOR_DEPTH=16')
        return pixel_format
        
    def config_hw(self):
        self._run_seq(
            [
                (0x01,None, 120),
                (0x11, None, 120),
                (0xF0,b"\xc3"),
                (0xF0,b"\x96"),
                (0x36,bytes([self.madctl(COLOR_MODE_RGB, ST7796_LANDSCAPE, ORIENTATION_TABLE)])),
                #(0X36,b"\x0"),
                (0x3A,bytes([self.display_config()])),
                (0xB4,b"\x01"),
                (0xB6,b"\x80\x02\x3B"),
                (0xE8,b"\x40\x8A\x00\x00\x29\x19\xA5\x33"),
                (0xC1,b"\x06"),
                (0xC2,b"\xA7"),
                (0xC5,b"\x18", 120),
                (0xE0,b"\xF0\x09\x0B\x06\x04\x15\x2F\x54\x42\x3C\x17\x14\x18\x1B"),
                (0xE1,b"\xE0\x09\x0B\x06\x04\x03\x2B\x43\x42\x3B\x16\x14\x17\x1B", 120),
                (0xF0,b"\x3C"),
                (0xF0,b"\x69",120),
                (0x29,b""),
            ]
        )

    def apply_rotation(self, rot):
        self.rot = rot
        if (self.rot % 2) == 0:
            self.width, self.height = self.res
        else:
            self.height, self.width = self.res
        self.write_register(_MADCTL,bytes([_MADCTL_BGR | ORIENTATION_TABLE[self.rot % 4]]),)
    
    def madctl(self, colormode, rotation, rotations):

        # if rotation is 0 or positive use the value as is.

        if rotation >= 0:
            return rotation | colormode

        # otherwise use abs(rotation)-1 as index to retreive value from rotations set

        index = abs(rotation) - 1
        if index > len(rotations):
                RuntimeError('Invalid display rot value specified during init.')

        return rotations[index] | colormode

class St77xx_lvgl(object):
    '''LVGL wrapper for St77xx, not to be instantiated directly.

    * creates and registers LVGL display driver;
    * allocates buffers (double-buffered by default);
    * sets the driver callback to the disp_drv_flush_cb method.

    '''
    def disp_drv_flush_cb(self,disp_drv,area,color):
        # print(f"({area.x1},{area.y1}..{area.x2},{area.y2})")
        self.rp2_wait_dma() # wait if not yet done and DMA is being used
        # blit in background
        self.blit(area.x1,area.y1,w:=(area.x2-area.x1+1),h:=(area.y2-area.y1+1),color.__dereference__(2*w*h),is_blocking=False)
        self.disp_drv.flush_ready()
        
    def __init__(self,doublebuffer=True,factor=4):
        import lvgl as lv
        import lv_utils

        if lv.COLOR_DEPTH!=16: raise RuntimeError(f'LVGL *must* be compiled with LV_COLOR_DEPTH=16 (currently LV_COLOR_DEPTH={lv.COLOR_DEPTH}.')
        
        bufSize=(self.width*self.height*lv.color_t.__SIZE__)//factor

        if not lv.is_initialized(): lv.init()
        # create event loop if not yet present
        if not lv_utils.event_loop.is_running(): self.event_loop=lv_utils.event_loop()

        # attach all to self to avoid objects' refcount dropping to zero when the scope is exited
        self.disp_drv = lv.disp_create(self.width, self.height)  //line 311
        self.disp_drv.set_flush_cb(self.disp_drv_flush_cb)
        self.disp_drv.set_draw_buffers(bytearray(bufSize), bytearray(bufSize) if doublebuffer else None, bufSize, lv.DISP_RENDER_MODE.PARTIAL)
        self.disp_drv.set_color_format(lv.COLOR_FORMAT.NATIVE if self.bgr else lv.COLOR_FORMAT.NATIVE_REVERSED)


class St7796(St7796_hw, St77xx_lvgl):
    def __init__(self, doublebuffer=True, factor=4, **kw):
        """See :obj:`Ili9341_hw` for the meaning of the parameters."""
        import lvgl as lv

        St7796_hw.__init__(self, **kw)
        St77xx_lvgl.__init__(self, doublebuffer, factor)

@ kdschlosser can you have a look at this code. I used the library from this issue