forked from smtlaissezfaire/bcompiler
-
Notifications
You must be signed in to change notification settings - Fork 0
/
bcc.bc
1071 lines (962 loc) · 16.6 KB
/
bcc.bc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#
# BCC: a toy compiler
#
# Copyright (C) 2001, Edmund GRIMLEY EVANS <[email protected]>
#
###
### Binary image
###
var binary
var binary_size
var pc
def out_of_memory
{
123 exit
}
def addr data emit
{
addr binary_size < not if
addr 1 + 1 << binary_size=
binary binary_size realloc binary=
binary not if
out_of_memory
fi
fi
data binary addr c[]=
return0
}
def dump
{
var i
0 i=
{
i pc == until
binary i c[] putchar
i 1 + i=
continue
}
return0
}
def pos n store_int
{
n binary pos c[]& =
return0
}
def pos fetch_int
{
binary pos c[]& @
return1
}
###
### Lexical analysis
###
string token_eof ""
string token__def "_def"
string token_break "break"
string token_continue "continue"
string token_def "def"
string token_else "else"
string token_fi "fi"
string token_if "if"
string token_return0 "return0"
string token_return1 "return1"
string token_string "string"
string token_until "until"
string token_var "var"
string token_while "while"
string token_ob "{"
string token_cb "}"
def s is_symbol
{
{
s token_eof strcmp while
s token_break strcmp while
s token_continue strcmp while
s token_def strcmp while
s token_else strcmp while
s token_fi strcmp while
s token_if strcmp while
s token_return0 strcmp while
s token_return1 strcmp while
s token_string strcmp while
s token_until strcmp while
s token_var strcmp while
s token_while strcmp while
s token_ob strcmp while
s token_cb strcmp while
1 return1
}
0 return1
}
string next_token ""
# A silly fixed-length buffer
'00 '00 '00 '00 '00 '00 '00 '00 '00 '00 '00 '00 '00 '00 '00 '00
'00 '00 '00 '00 '00 '00 '00 '00 '00 '00 '00 '00 '00 '00 '00 '00
'00 '00 '00 '00 '00 '00 '00 '00 '00 '00 '00 '00 '00 '00 '00 '00
'00 '00 '00 '00 '00 '00 '00 '00 '00 '00 '00 '00 '00 '00 '00 '00
var line
# comment = /#[^\n]*\n?/
# space = /\s/
# string_literal = /"([^"]|\\.)*"/
# token = /\S+/
def getchar1
{
0 getc
dup 10 == if
line 1 + line=
fi
return1
}
def get_next_token
{
var c
var p
getchar1 c=
c -1 == if
0 next_token 0 c[]=
return0
fi
c 32 <= if
continue
fi
c 35 == if
{
getchar1 c=
c 10 == until
c -1 == if
0 next_token 0 c[]=
return0
fi
continue
}
continue
fi
c 34 == if
next_token p=
{
c p 0 c[]=
p 1 + p=
getchar1 c=
c 34 == if
c p 0 c[]=
p 1 + p=
break
fi
c 92 == if
getchar1 c=
fi
c -1 == if
96 exit # unterminated string
fi
continue
}
else
next_token p=
{
c p 0 c[]=
p 1 + p=
getchar1 c=
c 32 <= until
continue
}
fi
0 p 0 c[]=
return0
}
def eof
{
next_token 0 c[] 0 == return1
}
def s token
{
next_token s strcmp if 0 return1 fi
get_next_token
1 return1
}
string syntax_error_str1 "Syntax error at line "
string syntax_error_str2 " near "
string syntax_error_str3 "Syntax error at end of input"
def syntax_error
{
next_token 0 c[] if
syntax_error_str1 2 prints
line header_lines - 1 + 2 printd
syntax_error_str2 2 prints
next_token 2 prints
else
syntax_error_str3 2 prints
fi
10 2 putc
1 exit
}
def x require
{
x not if syntax_error fi
return0
}
###
### Symbol tables
###
var globals
var locals
var local_args_num
def symbol.sizeof { wsize 4 * return1 }
def sym symbol.name& { sym return1 }
def sym symbol.type& { sym wsize + return1 }
def sym symbol.value& { sym wsize 2 * + return1 }
def sym symbol.next& { sym wsize 3 * + return1 }
def sym symbol.name { sym symbol.name& @ return1 }
def sym symbol.type { sym symbol.type& @ return1 }
def sym symbol.value { sym symbol.value& @ return1 }
def type sym symbol.type= { type sym symbol.type& = return0 }
def value sym symbol.value= { value sym symbol.value& = return0 }
def table s symbol_found
{
var t
table t=
{
t @ not if
0 return1
fi
t @ symbol.name s strcmp not if
t @ return1
fi
t @ symbol.next& t=
continue
}
}
def table s symbol_find
{
var t
table t=
{
t @ not if
symbol.sizeof malloc
dup t =
dup symbol.name& s strdup swap =
dup symbol.type& 0 swap =
dup symbol.value& 0 swap =
dup symbol.next& 0 swap =
return1
fi
t @ symbol.name s strcmp not if
t @ return1
fi
t @ symbol.next& t=
continue
}
}
def table symbol_table_clear
{
# FIXME: leaks
0 table =
}
###
### Code generation
###
def a n generate_int
{
# little-endian
a n 255 & emit
a 1 + n 8 >> 255 & emit
a 2 + n 16 >> 255 & emit
a 3 + n 24 >> 255 & emit
return0
}
def a n generate_call
{
a 232 emit # 0xe8 = call N
a 1 + n a - 5 - generate_int
a 5 + return1
}
def a n generate_jump
{
a 233 emit # 0xe9 = jmp N
a 1 + n a - 5 - generate_int
a 5 + return1
}
def a n generate_branch_false
{
a 88 emit # 0x58 = pop %eax
a 1 + 133 emit
a 2 + 192 emit # 0x85 0xc0 = test %eax,%eax
a 3 + 15 emit
a 4 + 132 emit # 0x0f 0x84 = je N
a 5 + n a - 9 - generate_int
a 9 + return1
}
def a n generate_branch_true # unused
{
a 88 emit # 0x58 = pop %eax
a 1 + 133 emit
a 2 + 192 emit # 0x85 0xc0 = test %eax,%eax
a 3 + 15 emit
a 4 + 133 emit # 0x0f 0x85 = jne N
a 5 + n a - 9 - generate_int
a 9 + return1
}
def a n generate_constant
{
a 104 emit # 0x68 = push N
a 1 + n generate_int
a 5 + return1
}
def a generate_proc_start
{
a 85 emit # 0x55 = push %ebp
a 1 + 137 emit
a 2 + 229 emit # 0x89 0xe5 = mov %esp,%ebp
a 3 + return1
}
def a generate_proc_end
{
a 201 emit # 0xc9 = leave
a 1 + 195 emit # 0xc3 = ret
a 2 + return1
}
def a n generate_arg_load
{
a 139 emit
a 1 + 133 emit # 0x8b 0x85 = mov N(%ebp),%eax
a 2 + 2 local_args_num + n - 4 * generate_int
a 6 + 80 emit # 0x50 = push %eax
a 7 + return1
}
def a n generate_stack_frame
{
n not if
a return1
fi
a 129 emit
a 1 + 236 emit # 0x81 0xec = sub $N,%esp
a 2 + n 4 * generate_int
a 6 + return1
}
def a n generate_var_load
{
a 139 emit
a 1 + 133 emit # 0x8b 0x85 = mov N(%ebp),%eax
a 2 + 0 n - 4 * generate_int
a 6 + 80 emit # 0x50 = push %eax
a 7 + return1
}
def a n generate_var_store
{
a 88 emit # 0x58 = pop %eax
a 1 + 137 emit
a 2 + 133 emit # 0x89 0x85 = mov %eax,N(%ebp)
a 3 + 0 n - 4 * generate_int
a 7 + return1
}
def a n generate_var_addr
{
a 141 emit
a 1 + 133 emit # 0x8d 0x85 = lea N(%ebp),%eax
a 2 + 0 n - 4 * generate_int
a 6 + 80 emit # 0x50 = push %eax
a 7 + return1
}
def a generate_global
{
a 0 generate_int
a 4 + return1
}
def a n generate_global_load
{
a 232 emit # 0xe8 = call N
a 1 + 0 generate_int
a 5 + 88 emit # 0x58 = pop %eax
a 6 + 139 emit
a 7 + 128 emit # 0x8b 0x80 = mov N(%eax),%eax
a 8 + n a - 5 - generate_int
a 12 + 80 emit # 0x50 = push %eax
a 13 + return1
}
def a n generate_global_store
{
a 232 emit # 0xe8 = call N
a 1 + 0 generate_int
a 5 + 88 emit # 0x58 = pop %eax
a 6 + 91 emit # 0x5b = pop %ebx
a 7 + 137 emit
a 8 + 152 emit # 0x89 0x98 = mov %ebx,N(%eax)
a 9 + n a - 5 - generate_int
a 13 + return1
}
def a n generate_global_addr
{
a 232 emit # 0xe8 = call N
a 1 + 0 generate_int
a 5 + 88 emit # 0x58 = pop %eax
a 6 + 141 emit
a 7 + 128 emit # 0x8d 0x80 = lea N(%eax),%eax
a 8 + n a - 5 - generate_int
a 12 + 80 emit # 0x50 = push %eax
a 13 + return1
}
def a generate_skip_jump_if_false
{
a 88 emit # 0x58 = pop %eax
a 1 + 133 emit
a 2 + 192 emit # 0x85 0xc0 = test %eax,%eax
a 3 + 116 emit
a 4 + 5 emit # 0x74 0x05 = je +5
a 5 + return1
}
def a generate_skip_jump_if_true
{
a 88 emit # 0x58 = pop %eax
a 1 + 133 emit
a 2 + 192 emit # 0x85 0xc0 = test %eax,%eax
a 3 + 117 emit
a 4 + 5 emit # 0x75 0x05 = jne +5
a 5 + return1
}
def a n generate_return0
{
a 201 emit # 0xc9 = leave
a 1 + 91 emit # 0x5b = pop %ebx
a 2 + 129 emit
a 3 + 196 emit # 0x81 0xc4 = add $N,%esp
a 4 + n 4 * generate_int
a 8 + 83 emit # 0x53 = push %ebx
a 9 + 195 emit # 0xc3 = ret
a 10 + return1
}
def a n generate_return1
{
a 88 emit # 0x58 = pop %eax
a 1 + 201 emit # 0xc9 = leave
a 2 + 91 emit # 0x5b = pop %ebx
a 3 + 129 emit
a 4 + 196 emit # 0x81 0xc4 = add $N,%esp
a 5 + n 4 * generate_int
a 9 + 80 emit # 0x50 = push %eax
a 10 + 83 emit # 0x53 = push %ebx
a 11 + 195 emit # 0xc3 = ret
a 12 + return1
}
###
### Parser
###
def unimplemented
{
77 exit
}
def end label_define
{
end
{
dup -1 == if return0 fi
dup fetch_int
swap pc generate_jump drop
continue
}
}
def compile_number
{
var n
var p
var c
var neg
next_token p=
p 0 c[] 45 == if
1 neg=
p 1 + p=
else
0 neg=
fi
p 0 c[] c=
c 48 < if
0 return1
fi
c 58 < not if
0 return1
fi
c 48 - n=
{
p 1 + p=
p 0 c[] c=
c not if
break
fi
c 48 < if
0 return1
fi
c 58 < not if
0 return1
fi
n 10 * c + 48 - n=
continue
}
neg if
0 n - n=
fi
pc n generate_constant pc=
get_next_token
1 return1
}
def s compile_local_symbol
{
var sym
var c
locals& s symbol_found sym=
sym if
sym symbol.type
dup 0 == if
0 return1
fi
dup 1 == if
pc sym symbol.value generate_arg_load pc=
1 return1
fi
dup 2 == if
pc sym symbol.value generate_var_load pc=
1 return1
fi
97 exit # internal error
fi
s s strlen 1 - c[] c=
c 61 != if # '='
c 38 != if # '&'
0 return1
fi
fi
# Chop and restore the last char - what a hack
0 s s strlen 1 - c[]=
locals& s symbol_found sym=
c s s strlen c[]=
sym if
sym symbol.type
dup 1 == if
101 exit # arg_addr or arg_store
fi
dup 2 == if
c 61 == if
pc sym symbol.value generate_var_store pc=
else
pc sym symbol.value generate_var_addr pc=
fi
1 return1
fi
fi
0 return1
}
# FIXME: The following function should be merged with the previous one.
def s compile_global_symbol
{
var sym
var c
globals& s symbol_found sym=
sym if
sym symbol.type 1 == if
pc
pc 0 generate_call pc=
dup sym symbol.value store_int
sym symbol.value=
1 return1
fi
sym symbol.type 2 == if
pc sym symbol.value generate_call pc=
1 return1
fi
sym symbol.type 3 == if
pc sym symbol.value generate_global_load pc=
1 return1
fi
sym symbol.type 4 == if
pc sym symbol.value generate_global_addr pc=
1 return1
fi
unimplemented
fi
s s strlen 1 - c[] c=
c 61 != if # '='
c 38 != if # '&'
globals& s symbol_find sym=
-1 sym symbol.value=
1 sym symbol.type=
s compile_global_symbol
1 return1
fi
fi
# Chop and restore the last char - what a hack
0 s s strlen 1 - c[]=
globals& s symbol_found sym=
c s s strlen c[]=
sym if
sym symbol.type
dup 3 == if
c 61 == if
pc sym symbol.value generate_global_store pc=
else
pc sym symbol.value generate_global_addr pc=
fi
1 return1
fi
unimplemented
fi
globals& s symbol_find sym=
-1 sym symbol.value=
1 sym symbol.type=
s compile_global_symbol
1 return1
}
def compile_word
{
next_token is_symbol not if
0 return1
fi
next_token compile_local_symbol if
get_next_token
1 return1
fi
next_token compile_global_symbol if
get_next_token
1 return1
fi
0 return1
}
def compile_loop
{
var end
token_ob token not if
0 return1
fi
-1 end=
pc end& compile_body
end label_define
token_cb token require
1 return1
}
def begin end compile_if
{
var pos1
var pos2
token_if token not if
0 return1
fi
pc pos1=
pos1 0 generate_branch_false pc=
begin end compile_body require
token_fi token if
pos1 pc generate_branch_false
1 return1
fi
token_else token require
pc pos2=
pos2 0 generate_jump pc=
pos1 pc generate_branch_false
begin end compile_body require
token_fi token require
pos2 pc generate_jump
1 return1
}
def end compile_break
{
token_break token not if
0 return1
fi
pc 0 generate_jump
pc end @ store_int
pc end =
pc=
1 return1
}
def begin compile_continue
{
token_continue token not if
0 return1
fi
pc begin generate_jump pc=
1 return1
}
def end compile_until
{
token_until token not if
0 return1
fi
pc generate_skip_jump_if_false pc=
pc 0 generate_jump
pc end @ store_int
pc end =
pc=
1 return1
}
def end compile_while
{
token_while token not if
0 return1
fi
pc generate_skip_jump_if_true pc=
pc 0 generate_jump
pc end @ store_int
pc end =
pc=
1 return1
}
def begin end compile_jump
{
{
end compile_break until
begin compile_continue until
end compile_until until
end compile_while until
0 return1
}
1 return1
}
def compile_return
{
token_return0 token if
pc local_args_num generate_return0 pc=
1 return1
fi
token_return1 token if
pc local_args_num generate_return1 pc=
1 return1
fi
0 return1
}
def begin end compile_body
{
compile_number if continue fi
compile_word if continue fi
compile_loop if continue fi
begin end compile_if if continue fi
begin end compile_jump if continue fi
compile_return if continue fi
1 return1
}
def compile_vars
{
var count
var sym
0 count=
{
token_var token while
next_token is_symbol require
locals& next_token symbol_find sym=
sym symbol.type if
54 exit # variable redefined
fi
2 sym symbol.type=
count 1 + count=
count sym symbol.value=
get_next_token
continue
}
pc count generate_stack_frame pc=
1 return1
}
def sym compile_define_proc_1
{
sym
dup symbol.value
{
dup -1 == if
drop dup 2 swap symbol.type=
pc swap symbol.value=
return0
fi
dup fetch_int swap
pc generate_call drop
continue
}
}
def s compile_define_proc
{
globals& s symbol_find
dup symbol.type 0 == if
dup -1 swap symbol.value=
compile_define_proc_1
return0
fi
dup symbol.type 1 == if
compile_define_proc_1
return0
fi
dup symbol.type 2 == if
32 exit # symbol redefined
fi
dup symbol.type 3 == if
32 exit # symbol redefined
fi
unimplemented
}
def s compile_define_var
{
globals& s symbol_find
dup symbol.type 0 == if
dup 3 swap symbol.type=
pc swap symbol.value=
pc generate_global pc=
return0
fi
33 exit # symbol redefined
}
# FIXME: combine these functions?
def s compile_define_string
{
globals& s symbol_find
dup symbol.type 0 == if
dup 4 swap symbol.type=
pc swap symbol.value=
return0
fi
33 exit # symbol redefined
}
def compile_args_name
{
var count
var sym
0 count=
next_token is_symbol require
{
next_token is_symbol while
locals& next_token symbol_find sym=
sym symbol.type if
34 exit # local symbol reused
fi
1 sym symbol.type=
count 1 + count=
count sym symbol.value=
get_next_token
continue
}
# The last one was the procedure name, not an argument!
0 sym symbol.type=
sym symbol.name compile_define_proc
count 1 - local_args_num=
1 return1
}
def compile_procedure
{
var end
token_def token not if 0 return1 fi
compile_args_name require
pc generate_proc_start pc=
token_ob token require
compile_vars require
-1 end=
pc end& compile_body require
end label_define
pc generate_proc_end pc=
token_cb token require
locals& symbol_table_clear
1 return1
}
def compile_string_literal
{
var p
next_token p=
p 0 c[] 34 != if
0 return1
fi
{
p 1 + p=
p 0 c[]
dup require
dup 34 == if
p 1 c[] 0 == require
pc 0 emit
pc 1 + pc=
break
fi
dup 92 == if
p 1 c[] require
pc p 1 c[] emit
pc 1 + pc=
p 1 + p=
fi
pc swap emit
pc 1 + pc=
continue
}
get_next_token
1 return1
}
def compile_global
{
token_var token if
next_token is_symbol require
next_token compile_define_var
get_next_token
1 return1
fi
token_string token if
next_token is_symbol require
next_token compile_define_string
get_next_token
compile_string_literal require
1 return1
fi
0 return1
}
def character convert_hex
{