' First draft of Propeller VM for Bellard's fbcc C compiler

CON

_clkmode        = xtal1 + pll16x
_xinfreq        = 5_000_000

' These are the SPIN byte codes for mul and div
SPIN_MUL_OP     = $F4  '(multiply, return lower 32 bits)
SPIN_DIV_OP     = $F6  '(divide, return quotient 32 bits)
SPIN_REM_OP     = $F7  '(divide, return remainder 32 bits)

OBJ
  dbg : "PASDebug"

VAR
  long cog

PUB start (params) | okay
  okay := cog := cognew(@debug, params) + 1   'Start emulator in a new COG
  dbg.start(31,30, @debug)

PUB stop
'FIXME we need this

PUB getdispatch_table
  return @dispatch_table

DAT
                        org     0

debug                   long $34FC1202,$6CE81201,$83C120B,$8BC0E0A,$E87C0E03,$8BC0E0A
                        long $EC7C0E05,$A0BC1207,$5C7C0003,$5C7C0003,$7FFC,$7FF8

enter                   jmp     #init

ld_b                    mov     memp, tos               ' fetch byte and sign extend
                        add     memp, LMM_base
                        rdbyte  tos, memp
                        shl     tos, #24
                        sar     tos, #24
                        jmp     #next_inst

ld_ub                   mov     memp, tos               ' fetch byte
                        add     memp, LMM_base
                        rdbyte  tos, memp
                        jmp     #next_inst

ld_w                    mov     memp, tos               ' fetch word and sign extend
                        add     memp, LMM_base
                        rdword  tos, memp
                        shl     tos, #16
                        sar     tos, #16
                        jmp     #next_inst

ld_uw                   mov     memp, tos               ' fetch word
                        add     memp, LMM_base
                        rdlong  tos, memp
                        jmp     #next_inst

ld_i                    mov     memp, tos               ' fetch long
                        add     memp, LMM_base
                        rdlong  tos, memp
                        jmp     #next_inst

st_b                    call    #pop                    ' store byte
                        mov     memp, tos
                        add     memp, LMM_base
                        wrbyte  data, memp
                        mov     tos, data
                        jmp     #next_inst

st_w                    call    #pop                    ' store word
                        mov     memp, tos
                        add     memp, LMM_base
                        wrword  data, memp
                        mov     tos, data
                        jmp     #next_inst

st_i                    call    #pop                    ' store long
                        mov     memp, tos
                        add     memp, LMM_base
                        wrlong  data, memp
                        mov     tos, data
                        jmp     #next_inst

add_i                   call    #pop                    ' nos + tos -> tos
                        add     tos, data
                        jmp     #next_inst

sub_i                   call    #pop                    ' nos - tos -> tos
                        sub     data, tos
                        mov     tos, data
                        jmp     #next_inst

mul_i                   call    #pop                    ' nos * tos -> tos
                        mov     x, data
                        mov     y, tos
                        mov     a, #SPIN_MUL_OP
                        jmp     #math_F4
div_i
div_ui                  call    #pop                    ' nos / tos -> tos
                        mov     y, data
                        mov     x, tos
                        mov     a, #SPIN_DIV_OP
                        jmp     #math_F4
mod_i
mod_ui                  call    #pop                    ' nos % tos -> tos
                        mov     y, data
                        mov     x, tos
                        mov     a, #SPIN_REM_OP
                        jmp     #math_F4

neg_i                   neg     tos, tos                ' -tos -> tos
                        jmp     #next_inst

cmplt_i                 call    #pop                    ' (nos<tos) ? 1 : 0
                        cmps    tos, data wz, wc
                        mov     tos, #0
              if_b      mov     tos, #1
                        jmp     #next_inst

cmple_i                 call    #pop                    ' (nos<=tos) ? 1 : 0
                        cmps    tos, data wz, wc
                        mov     tos, #0
              if_be     mov     tos, #1
                        jmp     #next_inst

cmpge_i                 call    #pop                    ' (nos>=tos) ? 1 : 0
                        cmps    tos, data wz, wc
                        mov     tos, #0
              if_ae     mov     tos, #1
                        jmp     #next_inst

cmpgt_i                 call    #pop                    ' (nos>tos) ? 1 : 0
                        cmps    tos, data wz, wc
                        mov     tos, #0
              if_a      mov     tos, #1
                        jmp     #next_inst

cmpeq_i                 call    #pop                    ' (nos==tos) ? 1 : 0
                        cmps    tos, data wz
              if_nz     mov     tos, #0
              if_z      mov     tos, #1
                        jmp     #next_inst

cmpne_i                 call    #pop                    ' (nos!=tos) ? 1 : 0
                        cmps    tos, data wz
              if_z      mov     tos, #0
              if_nz     mov     tos, #1
                        jmp     #next_inst

cmplt_ui                call    #pop                    ' (nos<tos) ? 1 : 0
                        cmp     tos, data wz, wc
                        mov     tos, #0
              if_ne     mov     tos, #1
                        jmp     #next_inst

cmple_ui                call    #pop                    ' (nos<=tos) ? 1 : 0
                        cmp     tos, data wz, wc
                        mov     tos, #0
              if_ne     mov     tos, #1
                        jmp     #next_inst

cmpge_ui                call    #pop                    ' (nos>=tos) ? 1 : 0
                        cmp     tos, data wz, wc
                        mov     tos, #0
              if_ne     mov     tos, #1
                        jmp     #next_inst

cmpgt_ui                call    #pop                    ' (nos>tos) ? 1 : 0
                        cmp     tos, data wz, wc
                        mov     tos, #0
              if_ne     mov     tos, #1
                        jmp     #next_inst

and_i                   call    #pop                    ' nos & tos -> tos
                        and     tos, data
                        jmp     #next_inst

or_i                    call    #pop                    ' nos | tos -> tos
                        or      tos, data
                        jmp     #next_inst

xor_i                   call    #pop                    ' nos ^ tos -> tos
                        xor     tos, data
                        jmp     #next_inst

not_i                   xor     tos, minus_one          ' ~tos -> tos
                        jmp     #next_inst

shl_i                   shl     tos, #1                 ' tos << 1 -> tos
                        jmp     #next_inst

shr_i                   sar     tos, #1                 ' tos >> 1 -> tos (signed)
                        jmp     #next_inst

shr_ui                  shr     tos, #1                 ' tos >> 1 -> tos (unsigned)
                        jmp     #next_inst

cvt_i_ub                and     tos, #$7f               ' cut high order sign bits
                        jmp     #next_inst

cvt_i_uw                and     tos, L0xffff            ' cut high order sign bits
                        jmp     #next_inst

noop
cvt_i_b
cvt_i_w
cvt_b_i
cvt_w_i                 jmp     #next_inst              ' all no-ops

li_i                    call    #push                   ' immediate -> tos
                        call    #mget_i
                        mov     tos, data
                        jmp     #next_inst

libp_i                  call    #push                   ' &bp[i] -> tos
                        call    #mget_i
                        neg     tos, data               ' our stack grows down, not up
                        add     tos, bp
                        jmp     #next_inst

jeq_i                   call    #mget_i                 ' if(!tos) i -> pc
                        cmp     tos, #0     wz
              if_e      mov     pc, data
                        call    #pop
                        mov     tos, data
                        jmp     #next_inst

jne_i                   call    #mget_i                 ' if(tos) i -> pc
                        cmp     tos, #0     wz
              if_ne     mov     pc, data
                        call    #pop
                        mov     tos, data
                        jmp     #next_inst

switch_i                call    #pop                    ' process switch jump table (list of longs):
                        add     tos, LMM_base
                        mov     memp, tos               '    [n+1]
                        rdlong  tmp, memp               '    [case 1] ... [case n]
                        shl     tmp, #2                 '    [default addr]
                        add     memp, tmp               '    [case 1 addr] ... [case n addr]
:l1                     add     tos, #4                 ' addr of table is in tos, switch expr is in nos
                        cmp     memp, tos   wc,wz
              if_e      jmp     #:l2
                        rdlong  tmp2, tos
                        cmp     data, tmp2  wc,wz
              if_ne     jmp     #:l1
                        add     tos, tmp
:l2                     rdlong  pc, tos
                        call    #pop                    ' update tos
                        mov     tos,data
                        jmp     #next_inst

jump                    call    #mget_i                 ' i -> pc
                        mov     pc, data
                        jmp     #next_inst

jsr                     call    #mget_i                 ' tos -> pc; push pc; push bp; push argc
                        mov     tmp, pc
                        mov     pc, tos
                        mov     tos, tmp
                        call    #push
                        mov     tos, bp
                        call    #push
                        mov     tos, data
                        mov     bp, sp
                        jmp     #next_inst

rts                     mov     tmp, tos                ' tos (= retval) -> tmp, pop argc; pop bp; pop pc; sp += argc * 4; tmp -> tos
                        mov     sp, bp                  ' drop local variables
                        sub     sp, #4                  ' compensate for argc left in tos on call
                        call    #pop
                        mov     tos, data
                        call    #pop
                        mov     bp, data
                        call    #pop
                        mov     pc, data
                        add     sp, tos
                        mov     tos, tmp
                        jmp     #next_inst

dup                     call    #push                   ' tos -> *sp--
                        jmp     #next_inst

drop                    call    #pop                    ' *++sp -> tos
                        mov     tos, data
                        jmp     #next_inst

addsp                   call    #mget_i                 ' sp - i -> sp (our stack grows down, not up)
                        call    #push
                        sub     sp, data
                        call    #pop
                        jmp     #next_inst

libcall                 call    #push
                        ' wrlong  sp, sys_sp
                        wrlong  bp, sys_bp
                        call    #mget_i                 ' Get syscall ID
                        add     data, #1                ' Add one to avoid zero
                        wrlong  data, sys_op
:wait                   rdlong  tmp, sys_op wz          ' Wait for command completion
              if_nz     jmp     #:wait
                        rdlong  tos, sys_sp
                        jmp     #next_inst

'------------------------------------------------------------------------------
                        fit $FF                         'Opcode handlers must fit in 256 LONGS
'------------------------------------------------------------------------------
init                    mov     tmp, par
                        rdlong  LMM_base, tmp
                        add     tmp, #4
                        rdlong  pc, tmp
                        add     tmp, #4
                        mov     bp, sp
                        rdlong  sp, tmp
                        add     tmp, #4
                        rdlong  dispatch_tab, tmp
                        add     tmp, #4
                        mov     sys_op, tmp
                        add     tmp, #4
                        mov     sys_sp, tmp
                        add     tmp, #4
                        mov     sys_bp, tmp

' Main CVM fetch and execute loop
next_inst               mov     memp, pc
                        add     pc, #1
                        add     memp, LMM_base
                        rdbyte  data, memp
                        add     data, dispatch_tab
                        rdbyte  tmp, data
                        jmp     tmp                    'No # here we are jumping through tmp.
'------------------------------------------------------------------------------
'CVM memory space access routines

'Push a LONG onto the stack from "tos"
push                    mov     memp, sp
                        add     memp, LMM_base
                        wrlong  tos, memp
                        sub     sp, #4
push_ret                ret

'Pop a LONG from the stack into "data", set Z according to data.
pop                     add     sp, #4
                        mov     memp, sp
                        add     memp, LMM_base
                        rdlong  data, memp wz           'Must set Z for caller
pop_ret                 ret

'Read a 4 byte unaligned long from pc into data, increment pc by 4
mget_i                  mov     memp, pc
                        add     memp, LMM_base
                        rdbyte  data, memp
                        add     memp, #1
                        rdbyte  tmp, memp
                        shl     tmp, #8
                        or      data, tmp
                        add     memp, #1
                        rdbyte  tmp, memp
                        shl     tmp, #16
                        or      data, tmp
                        add     memp, #1
                        rdbyte  tmp, memp
                        shl     tmp, #24
                        or      data, tmp
                        add     pc, #4
mget_i_ret              ret

'CVM registers and working variables
pc                      long 0      'CVM Program Counter
sp                      long 0      'CVM Stack Pointer
bp                      long 0      'CVM Base Pointer
tos                     long 0      'Top Of Stack
data                    long 0      'Data parameter for read, write etc
memp                    long 0      'Temporary pointer into ZPU memory space
tmp                     long 0      'For temp operands etc
tmp2                    long 0      'For temp in switch_i
tab                     long 0      'For temp in switch_i

'IO interface area. Do not change the order of these.
sys_op                  long 0
sys_sp                  long 0
sys_bp                  long 0

LMM_base                long 0      'HUB address of the CVM memory area
dispatch_tab            long 0      'HUB address of instruction dispatch table

minus_one               long $FFFFFFFF
L0xffff                 long $0000FFFF

'------------------------------------------------------------------------------
'Maths routines borrowed from the Cluso Spin interpreter.
'In turn borrowed from the Parallax Spin interpreter.
'$F4..F7 = MUL/DIV lower/upper result
'
' "a" holds the opcode (lower 5 bits as shown in the first block of code... e.g. MPY=$F4=%10100)
' "x" and "y" are the two numbers to be multiplied (or divided) x * y or x / y, result is in "x" (pushed).
'
' So, if you use this routine with a set to...
'  a = $F4 (multiply, return lower 32 bits)
'  a = $F5 (multiply, return upper 32 bits)
'  a = $F6 (divide, return quotient 32 bits)
'  a = $F7 (divide, return remainder 32 bits)

math_F4                 and     a,#%11111               '<== and mask (need in a)
                        mov     t1,#0
                        mov     t2,#32                  'multiply/divide
                        abs     x,x             wc
                        muxc    a,#%01100
                        abs     y,y             wc,wz
        if_c            xor     a,#%00100
                        test    a,#%00010       wc      'set c if divide (DIV/MOD)
        if_c_and_nz     jmp     #mdiv                   'if divide and y=0, do multiply so result=0
                        shr     x,#1            wc      'multiply
mmul    if_c            add     t1,y            wc
                        rcr     t1,#1           wc
                        rcr     x,#1            wc
                        djnz    t2,#mmul
                        test    a,#%00100       wz
        if_nz           neg     t1,t1
        if_nz           neg     x,x             wz
        if_nz           sub     t1,#1
                        test    a,#%00001       wz
        if_nz           mov     x,t1
                        mov     tos, x
                        jmp     #next_inst

mdiv                    shr     y,#1            wc,wz   'divide
                        rcr     t1,#1
        if_nz           djnz    t2,#mdiv
mdiv2                   cmpsub  x,t1            wc
                        rcl     y,#1
                        shr     t1,#1
                        djnz    t2,#mdiv2
                        test    a,#%01000       wc
                        negc    x,x
                        test    a,#%00100       wc
                        test    a,#%00001       wz
        if_z            negc    x,y
                        mov     tos, x
                        jmp     #next_inst

a                       long    0
x                       long    0
y                       long    0
t1                      long    0
t2                      long    0
'------------------------------------------------------------------------------
                        fit     $1F0
'------------------------------------------------------------------------------



'------------------------------------------------------------------------------
' The instruction dispatch look up table (in HUB)
dispatch_table
{00}    byte  noop
{01}    byte  ld_b
{02}    byte  ld_ub
{03}    byte  ld_w
{04}    byte  ld_uw
{05}    byte  ld_i
{06}    byte  st_b
{07}    byte  st_w
{08}    byte  st_i
{09}    byte  add_i
{0A}    byte  sub_i
{0B}    byte  mul_i
{0C}    byte  div_i
{0D}    byte  div_ui
{0E}    byte  mod_i
{0F}    byte  mod_ui

{10}    byte  neg_i
{11}    byte  cmplt_i
{12}    byte  cmple_i
{13}    byte  cmpgt_i
{14}    byte  cmpge_i
{15}    byte  cmpeq_i
{16}    byte  cmpne_i
{17}    byte  cmplt_ui
{18}    byte  cmple_ui
{19}    byte  cmpge_ui
{1A}    byte  cmpgt_ui
{1B}    byte  and_i
{1C}    byte  or_i
{1D}    byte  xor_i
{1E}    byte  not_i
{1F}    byte  shl_i

{20}    byte  shr_i
{21}    byte  shr_ui
{22}    byte  cvt_i_b
{23}    byte  cvt_i_ub
{24}    byte  cvt_i_w
{25}    byte  cvt_i_uw
{26}    byte  cvt_b_i
{27}    byte  cvt_w_i
{28}    byte  li_i
{29}    byte  libp_i
{2A}    byte  jeq_i
{2B}    byte  jne_i
{2C}    byte  switch_i
{2D}    byte  jump
{2E}    byte  jsr
{2F}    byte  rts

{30}    byte  dup
{31}    byte  drop
{32}    byte  addsp
{33}    byte  libcall