home .. forth .. colorforth mail list archive ..

[colorforth] AHA compiler prototype


Hi everyone,

Sorry for the big message, but here's a little project
I'm working on, in assembly source.
Maybe not truly in the spirit of forth, but anyway:
It's a 32 bit, byte oriented AHA style compiler for x86
implementing dictionary unification (I just read your comments
about it) and 5 token types.

-Compile a relative call using index of dictionary (7 bits index)
-Jump to absolute address written in dictionary (6 bits index)
-Copy following bytes (5 bits count)
-Definition (4 bits count)
-Comment (4 bits count)

The dictionary array is 128 adresses long. The first 64
entries are executable at compile time.
Dictionary searches are resolved in the editor, the AHA way of
doing things. Tokens are left shifted (huffman encoding) and
branches on carry are performed

A typical program start with definitions and bincopy tokens
(no macro/forth words and definition conflicts) and then have
compileword tokens, along with comments spread anywhere.
Litterals are implemented in a special macro, implemented at edit time,
see the code below.

So there's at least one mandatory macro in every program, "end".
The others are the one needed by the program only.
Or maybe in a complete system they could be written
in a static dictionary.

All his there, its far from finished but it's on its way.
I got ?dup and ?lit working, thats the trickiest part.
Look carefully at the register usage before reading
and look at the "Cbinary" label, its almost straigth from
colorforth source taken at merlintec.

I have yet to write the editor, but before I want a top notch compiler
so feel free to comment.
Adam
=============================
;Written in FASM v1.46 GUI
;
;Register usage @ Compile time
;EAX: Main scratch register
;EBX: Most recent optimizable instruction
;ECX: scratch for "rep mosvsb"
;EDX: Second optimizable instruction
;ESI: next source code byte pointer
;EDI: next compiled byte
;EBP: Next dictionary entry pointer
;ESP: Top of return stack pointer

use32
macro align value { rb (value-1)-($+value-1) mod value }

CompileCall:
       mov [EDI], byte 0E8h ;call rel32
       mov EDX, EBX ;Needed only for push2 jump1
       mov EBX, EDI ;Semicolon optimization
       mov EAX, [EAX*4+Dictionary]
       lea EDI, [EDI+5]
       sub EAX, EDI
       mov [EDI-4], EAX
       ret

ExecuteWord:
       jmp dword [EAX*4+Dictionary]

BinaryRecord:
       inc EAX
       mov ECX, EAX
       rep movsb ;cld
       ret

Detoken:
       and EAX, 0FFh ;Tokens are 1 byte wide
       shr EAX, 1
       jc CompileCall
       shr EAX, 1
       jc ExecuteWord
       shr EAX, 1
       jc BinaryRecord
       shr EAX, 1
       jc SkipComment ;forward reference

WriteDefinition:
       mov [EBP], EDI
       lea EBP, [EBP+4]
       mov EBX, ESP    ;Optimization, see below

SkipComment:
       inc ESI
       add ESI, EAX
       ret

Compiler:
       push ESI
       mov ESI, EAX ;start of source code in Top of stack
       mov EBP, Dictionary ;Start defining macros at first
@@:     mov EAX, [ESI]
       inc ESI ;thanks KC5TJA ;pp
       call Detoken
       jmp @b
       pop ESI ;Reenable the data stack
       lodsd   ;Runtime drop
       ret

align 4
Dictionary:
       times 128 dd 0
;The compiler code ends here. The rest is macros
;To end compilation, the end word is mandatory.
;==========
; end
;End of source code, stop the compiler.
 Stopcompiler:
      add byte [ESP], 2
      ret

;==========
; dup
; ?dup
;Optimization used inside primitive
;definitions, get rid of "drop dup"
;in compiled code.
;* lodsd could be replaced with its longer
;  version to gain speed over size
 Cdup:
       mov dword [EDI], 89FC768Dh
       mov byte [EDI+4], 06h
       add EDI, 5
       ret
 Qdup:
       lea EAX, [EDI-1]
       cmp EBX, EAX
       jnz Cdup
       cmp byte [EAX], 0ADh
       jnz Cdup
       mov EDI, EAX
       ret

; ;  (semicolon)
; ret
; nop
;Compiles a ret, or if preceded by a call
;transform "call ret" into "jump" and
;"call1 jmp2" into "push2 jmp1".
;nop is used to prevent lookback optimization
;and for alignement purpose.
Semicolon:
       lea EAX, [EDI-5]
       cmp EBX, EAX
       jnz Cret
       cmp byte [EAX], 0E8h
       jnz Cret
       inc byte [EAX] ;jmp
       lea EAX, [EAX-5]
       cmp EDX, EAX
       jnz @f
       cmp byte [EAX], 0E8h
       jnz @f
       mov byte [EAX], 68h ;push
       inc EAX
       push dword [EAX]    ;\
       push dword [EAX+5]  ; Swap the 2
       pop dword [EAX]     ; addresses
       pop dword [EAX+5]   ;/
       ret

Cret:
       mov byte [EDI], 0C3h ;ret
       inc EDI
       ret
Cnop:
       mov byte [EDI], 90h ;nop
       inc EDI
       ret
; lit
; ?lit
;?lit is used by primitives (as ?dup is) and clear the zero flag
;if there's no litteral value behind the actually compiled primitive.
;The code handles the ?dup optimization.
Clit:
       call Qdup
       mov EDX, EBX
       mov EBX, EDI
       mov byte [EDI], 0B8h ;mov EAX, imm32
       inc EDI
       movsd
       ret

Qlit:
       lea EAX, [EDI-5]
       cmp EBX, EAX
       jnz @f
       cmp byte [EAX], 0B8h
       jnz @f
       mov EBX, EDX
       mov EAX, dword [EDI+1]
       cmp dword [EDI-5], 89FC768Dh ;dup
       jz nodup
       add EDI, -5
Cdrop:
       mov byte [EDI], 0ADh ;lodsd
       mov EBX, EDI ;Optimization
       inc EDI ;Zero flag clear
       ret

nodup:
       add EDI, -10 ;Zero flag clear
       ret

@@:     xor EAX, EAX  ;Zero flag set
       ret

; push
; pop
; swap
; over
;The other missing stack manipulation word definitions.
Cpush:
       call Qlit
       jz @f
       mov byte [EDI], 68h ;push imm32
       inc EDI
       stosd
       ret
@@:     mov byte [EDI], 50h ;push EAX
       inc EDI
       jmp Cdrop
Cpop:
       call Qdup
       mov byte [EDI], 58h ;pop EAX
       inc EDI
       ret
Cswap:
       mov dword [EDI], 5836FF50h ;push EAX, push [ESI], pop EAX
       add ESI, 4
       mov word [EDI], 068Fh ;pop [ESI]
       inc EDI
       inc EDI
       ret
Cover:
       call Qdup
       mov dword [EDI], 04468Bh ;mov EAX, [ESI+4]
       add EDI, 3
       ret
; +
; binary
; nip
; and
; xor
; -
; 2*
; 2/
; *
; /mod
; /
; mod
; */
;Those are the main words for binary arithmetic.

Cadd:
       mov word [EDI], 603h ;add EAX, [ESI]
Cbinary:
       call Qlit
       jz @f
       add byte [EDI], 2 ;*** EAX, imm32
       inc EDI ;overwrite 06h byte
       stosd
       ret
@@:
       inc EDI ;\keep the written code
       inc EDI ;/and add a nip
Cnip:
       mov dword [EDI], 04768Dh ;lea ESI, [ESI+4]
       add EDI, 3
       ret

Cand:
       mov word [EDI], 623h ;and EAX, [ESI]
       jmp Cbinary
Cxor:
       mov word [EDI], 633h ;xor EAX, [ESI]
       jmp Cbinary
Cnot:
       mov word [EDI], 0D0F7h ;not EAX
       inc EDI
       inc EDI
       ret
C2star:
       mov word [EDI], 0E0D1h ;shr EAX, 1
       inc EDI
       inc EDI
       ret
C2slash:
       mov word [EDI], 0E8D1h ;shl EAX, 1
       inc EDI
       inc EDI
       ret
Cstar:  mov dword [EDI], 06AF0Fh ;imul EAX, [ESI]
       add EDI, 3
       ret

Cdivmod: ;nd - rq
       call Cswap
       mov byte [EDI], 99h ;cdq
       inc EDI
       mov dword [EDI], 16893EF7h ;idiv [ESI] | mov [ESI], EDX
       add ESI, 4
       ret
Cslash: call Cdivmod
       call Cnip
       ret
Cmod:   call Cdivmod
       call Cdrop
       ret
CStarSlash:


; @
; a@
; a!
; !
; !+
; @+
;Those words use dword adressing.
;Store use the EDI register as Address register
;! use ?lit for the two operands.
Cfetch:
       call Qlit
       jz @f
       call Qdup
       mov word [EDI], 0A1h ;mov EAX, [imm32]
       inc EDI
       shl EAX, 1 ;\Dword adressing
       shl EAX, 1 ;/optimized at compile time
       stosd
       ret
@@:
       mov dword [EDI],0E0D1E0D1h ;2x"shl EAX, 1"
       add EDI, 4
       mov word [EDI], 008Bh ;mov EAX, [EAX]
       add EDI, 2
       ret
CAfetch:
       call Qdup
       mov word [EDI], 0F889h ;mov EAX, EDI
       add ESI, 2
       ret
CAstore:
       call Qlit
       jz @f
       mov byte [EDI], 0BFh ;mov EDI, imm32
       inc EDI
       stosd
       ret
@@:
       mov word [EDI], 0C789h ;mov EDI, EAX
       add ESI, 2
       jmp Cdrop

Cstore:
       call Qlit
       jz Cstorenolit
       push EAX
       call Qlit
       jz @f
       mov word [EDI], 05C7h ;mov [imm32], imm32
       inc EDI
       inc EDI
       pop dword [EDI]
       shl dword [EDI], 2
       add EDI, 4
       stosd
       ret
@@:
       pop EAX
       mov word [EDI], 0589h ;mov [imm32], eax
       inc EDI
       inc EDI
       stosd
       ret

Cstorenolit:
       call CAstore
       mov dword [EDI], 0BD0489h ;mov [EDI*4]
       add ESI, 3
       stosd ;0
       jmp Cdrop









---------------------------------------------------------------------
To unsubscribe, e-mail: colorforth-unsubscribe@xxxxxxxxxxxxxxxxxx
For additional commands, e-mail: colorforth-help@xxxxxxxxxxxxxxxxxx
Main web page - http://www.colorforth.com