diff --git a/komarov.andrey/.gitignore b/komarov.andrey/.gitignore new file mode 100644 index 00000000..ed30ac1 --- /dev/null +++ b/komarov.andrey/.gitignore @@ -0,0 +1,4 @@ +arm +.cabal-sandbox +*flymake* +dist diff --git a/komarov.andrey/LICENSE b/komarov.andrey/LICENSE new file mode 100644 index 00000000..ee7d6a5 --- /dev/null +++ b/komarov.andrey/LICENSE @@ -0,0 +1,14 @@ + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + Version 2, December 2004 + + Copyright (C) 2004 Sam Hocevar + + Everyone is permitted to copy and distribute verbatim or modified + copies of this license document, and changing it is allowed as long + as the name is changed. + + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. You just DO WHAT THE FUCK YOU WANT TO. + diff --git a/komarov.andrey/README.md b/komarov.andrey/README.md new file mode 100644 index 00000000..df0af6c --- /dev/null +++ b/komarov.andrey/README.md @@ -0,0 +1,67 @@ +# Что должно быть установлено в системе # + +* qemu +* cabal + +# Установка эмулятора # + +В папке `vm` лежит файл `download.sh`, который скачает всё необходимое +для запуска виртуальной машины (всё взято отсюда: +https://people.debian.org/~aurel32/qemu/armel/). + +Затем, `run.sh` запустит виртуальную машину (несколько минут). После +запуска, можно будет заходить на неё по ssh на 2222 порту. Пароли: +user/user, root/root. + +На виртуальной машине выполнить: + +`````` +~# apt-get update && apt-get install -y gcc strace +`````` + +Убедиться, что `as` и `ld` установились. + +# Установка компилятора # + +`````` +cabal install +`````` + +После этого должен появиться файл `~/.cabal/bin/fcc`. Это компилятор, +который принимает на вход программу, и выводит на стандартный вывод +ассемблер для ARM. Процесс компиляции и запуска: + +````` +local $ fcc < examples/cat.fc > cat.S +local $ scp -P 2222 cat.S user@localhost:~ +user@debian-armel $ as cat.S -o cat.o +user@debian-armel $ ld cat.o -o cat +user@debian-armel $ echo hello | ./cat +hello +````` + +# Как запустить что-то, отличное от `cat` # + +В остальных примерах не используется ввод-вывод, а для этого результат +возвращается из функции main как код возврата. Наглядно за этим можно, +например, наблюдать, запуская программу через strace и смотря на +аргумент, с которым вызван системный вызов `exit` + +# Оптимизации # + +Реализовано два вида оптимизаций: + +* Удаление ненужного кода +Начинаем с точки входа, смотрим, какие функции и глобальные +переменные достижимы, остальные удаляем +* Пытаемся посчитать значения подвыражений + * Выделяем <<хорошие>> функции и пытаемся вычислять их во время компиляции + * Хорошие --- те, которые не используют <<неправильных>> + <<библиотечных>> вызовов, массивов (TBD) и глобальных переменных + * Удаляем недостижимый код и упраздняем очевидные `if`-ы и `while`-ы + * Вычислятор -решает проблему останова- делает 10000 итераций и + успокаивается, если вычислить не удалось + +Примеры хорошо оптимизируемых программ: `examples/fact.fc`, +`examples/sum.fc` + diff --git a/komarov.andrey/Setup.hs b/komarov.andrey/Setup.hs new file mode 100644 index 00000000..9a994af --- /dev/null +++ b/komarov.andrey/Setup.hs @@ -0,0 +1,2 @@ +import Distribution.Simple +main = defaultMain diff --git a/komarov.andrey/asm/Makefile b/komarov.andrey/asm/Makefile new file mode 100644 index 00000000..3e277fd --- /dev/null +++ b/komarov.andrey/asm/Makefile @@ -0,0 +1,9 @@ + +hello: example.o + gcc -g hello.c example.o -o hello + +example.o: example.S + as example.S -o example.o + +clean: + rm -f *.o hello diff --git a/komarov.andrey/asm/example.S b/komarov.andrey/asm/example.S new file mode 100644 index 00000000..9cfd0af --- /dev/null +++ b/komarov.andrey/asm/example.S @@ -0,0 +1,52 @@ +.data +x: .word 0 + +.text + +xx: .word x + +.global f +.global fact +.global ret5 +.global ret6 + +f: +ldr r1, xx +ldr r0, [r1] +add r0, #1 +str r0, [r1] +mov pc, lr + +# calling conv. +ret5: +ret6: +ldr r0, [sp] +mov pc, lr + +# fact(n) +fact: +push {fp, lr} @ +mov fp, sp @ ENTER +sub sp, sp, #4 @ + +mov r2, #0 +teq r0, r2 +beq ret1 + +str r0, [fp, #-4] +sub r0, r0, #1 +bl fact +ldr r1, [fp, #-4] +mul r0, r1, r0 +b ret + +ret1: +mov r0, #1 +b ret + +ret: +mov sp, fp @ +pop {fp, lr} @ EXIT +mov pc, lr @ + + diff --git a/komarov.andrey/asm/hello.c b/komarov.andrey/asm/hello.c new file mode 100644 index 00000000..dca8272 --- /dev/null +++ b/komarov.andrey/asm/hello.c @@ -0,0 +1,27 @@ +#include + +int f(); + +int fact(int x); + +int ret5(int a, int b, int c, int d, int e); + +int ret6(int a, int b, int c, int d, int e, int f); + +int add(int a, int b) +{ + int c = a + b; + return c; +} + +int main() +{ + printf("hello\n"); + int i; + for (i = 0; i < 10; i++) + printf("%d ", f()); + printf("\n"); + printf("%d\n", fact(5)); + printf("%d\n", ret5(1, 2, 3, 4, 5)); + printf("%d\n", ret6(1, 2, 3, 4, 5, 6)); +} diff --git a/komarov.andrey/demo/Makefile b/komarov.andrey/demo/Makefile new file mode 100644 index 00000000..45bb875 --- /dev/null +++ b/komarov.andrey/demo/Makefile @@ -0,0 +1,13 @@ +all: + make -C ../src + ../src/TestCompiler < callc.l > callc.S + ../src/TestCompiler < isPrime.l > prime.S + ../src/TestCompiler < fact.l > fact.S + ../src/TestCompiler < global.l > global.S + +compile: + gcc callc.S call.c -o call + gcc fact.S fact.c -o fact + gcc global.S global.c -o global + gcc prime.S prime.c -o prime + diff --git a/komarov.andrey/demo/call.c b/komarov.andrey/demo/call.c new file mode 100644 index 00000000..e97065e --- /dev/null +++ b/komarov.andrey/demo/call.c @@ -0,0 +1,14 @@ +#include + +int add(int a, int b) +{ + return a + b; +} + +int f(); + +int main() +{ + printf("%d\n", f()); +} + diff --git a/komarov.andrey/demo/callc.l b/komarov.andrey/demo/callc.l new file mode 100644 index 00000000..55bc32e --- /dev/null +++ b/komarov.andrey/demo/callc.l @@ -0,0 +1,6 @@ +int add(int a, int b); + +int f() +{ + return add(40, 2); +} diff --git a/komarov.andrey/demo/fact.c b/komarov.andrey/demo/fact.c new file mode 100644 index 00000000..05abbf4 --- /dev/null +++ b/komarov.andrey/demo/fact.c @@ -0,0 +1,9 @@ +#include + +int fact(int n); + +int main() +{ + printf("%d\n", fact(5)); + return 0; +} diff --git a/komarov.andrey/demo/fact.l b/komarov.andrey/demo/fact.l new file mode 100644 index 00000000..a43666e --- /dev/null +++ b/komarov.andrey/demo/fact.l @@ -0,0 +1,6 @@ +int fact(int n) { + if (n == 0) + return 1; + else + return n * fact(n - 1); +} diff --git a/komarov.andrey/demo/global.c b/komarov.andrey/demo/global.c new file mode 100644 index 00000000..7cc6182 --- /dev/null +++ b/komarov.andrey/demo/global.c @@ -0,0 +1,12 @@ +#include + +int inc(); + +int main() +{ + int i; + for (i = 0; i < 10; i++) + printf("%d ", inc()); + printf("\n"); + return 0; +} diff --git a/komarov.andrey/demo/global.l b/komarov.andrey/demo/global.l new file mode 100644 index 00000000..5a2f660 --- /dev/null +++ b/komarov.andrey/demo/global.l @@ -0,0 +1,8 @@ + +int v; + +int inc() +{ + v = v + 1; + return v; +} diff --git a/komarov.andrey/demo/isPrime.l b/komarov.andrey/demo/isPrime.l new file mode 100644 index 00000000..c9f9b53 --- /dev/null +++ b/komarov.andrey/demo/isPrime.l @@ -0,0 +1,50 @@ +int div(int a, int b); +int mod(int a, int b); + +bool isPrime(int n) +{ + int i; + i = 2; + while (i * i <= n) + { + if (mod(n, i) == 0) + return false; + else {} + i = i + 1; + } + return true; +} + +int numPrimes(int from, int to) +{ + int i; + i = from; + int ans; + ans = 0; + while (i < to) + { + if (isPrime(i)) + ans = ans + 1; + else {} + i = i + 1; + } + return ans; +} + +int mod(int a, int b) +{ + return a - b * div(a, b); +} + +int div(int a, int b) +{ + int c; + c = 0; + while (a >= 0) + { + c = c + 1; + a = a - b; + } + return c - 1; +} + diff --git a/komarov.andrey/demo/prime.c b/komarov.andrey/demo/prime.c new file mode 100644 index 00000000..1835613 --- /dev/null +++ b/komarov.andrey/demo/prime.c @@ -0,0 +1,15 @@ +#include + +int isPrime(int n); +int numPrimes(int from, int to); + +int main() +{ + int i; + for (i = 2; i < 20; i++) + { + printf("%d is %s\n", i, isPrime(i) ? "prime" : "not prime"); + } + printf("%d primes between 100 and 5000\n", numPrimes(100, 5000)); +} + diff --git a/komarov.andrey/examples/cat-global.fc b/komarov.andrey/examples/cat-global.fc new file mode 100644 index 00000000..8dd0bd7 --- /dev/null +++ b/komarov.andrey/examples/cat-global.fc @@ -0,0 +1,9 @@ +int c; +int main() { + c = getchar(); + while (c != 0-1) { + putchar(c); + c = getchar(); + } + return 0; +} diff --git a/komarov.andrey/examples/cat.fc b/komarov.andrey/examples/cat.fc new file mode 100644 index 00000000..c0e6df0 --- /dev/null +++ b/komarov.andrey/examples/cat.fc @@ -0,0 +1,9 @@ +int main() { + int c; + c = getchar(); + while (c != 0-1) { + putchar(c); + c = getchar(); + } + return 0; +} diff --git a/komarov.andrey/examples/e.fc b/komarov.andrey/examples/e.fc new file mode 100644 index 00000000..fcf4713 --- /dev/null +++ b/komarov.andrey/examples/e.fc @@ -0,0 +1 @@ +int main() { return 10 + 20; } diff --git a/komarov.andrey/examples/e2.fc b/komarov.andrey/examples/e2.fc new file mode 100644 index 00000000..e0a34fc --- /dev/null +++ b/komarov.andrey/examples/e2.fc @@ -0,0 +1,11 @@ +int f() { + if (true) { + return 10; + } else { + return 20; + } +} + +int main() { + return f(); +} diff --git a/komarov.andrey/examples/e3.fc b/komarov.andrey/examples/e3.fc new file mode 100644 index 00000000..7a5e836 --- /dev/null +++ b/komarov.andrey/examples/e3.fc @@ -0,0 +1,9 @@ +int fact(int n) { + if (n == 0) { + return 10; + } else { + return 20; + } +} + +int main() { return fact(10); } diff --git a/komarov.andrey/examples/e4.fc b/komarov.andrey/examples/e4.fc new file mode 100644 index 00000000..b48a5aa --- /dev/null +++ b/komarov.andrey/examples/e4.fc @@ -0,0 +1,9 @@ +int fact(int n) { + if (n == 0) { + return 10; + } else { + return fact(n - 3); + } +} + +int main() { return fact(10); } diff --git a/komarov.andrey/examples/e5.fc b/komarov.andrey/examples/e5.fc new file mode 100644 index 00000000..51cdeed --- /dev/null +++ b/komarov.andrey/examples/e5.fc @@ -0,0 +1,10 @@ +int f() { + int a; + a = 0; + while (a < 10) { + a = a + 3; + } + return a; +} + +int main() { return f(); } diff --git a/komarov.andrey/examples/e6.fc b/komarov.andrey/examples/e6.fc new file mode 100644 index 00000000..0d24a83 --- /dev/null +++ b/komarov.andrey/examples/e6.fc @@ -0,0 +1,9 @@ +int f() { + int a; + a = 10; + return a; +} + +int main() { + return f(); +} diff --git a/komarov.andrey/examples/fact.fc b/komarov.andrey/examples/fact.fc new file mode 100644 index 00000000..dcd5476 --- /dev/null +++ b/komarov.andrey/examples/fact.fc @@ -0,0 +1,12 @@ +int fact(int n) { + if (n == 0) { + return 1; + } else { + return n * fact(n - 1); + } +} + +int main() +{ + return fact(5); +} diff --git a/komarov.andrey/examples/sieve.fc b/komarov.andrey/examples/sieve.fc new file mode 100644 index 00000000..aee2773 --- /dev/null +++ b/komarov.andrey/examples/sieve.fc @@ -0,0 +1,43 @@ +int count(bool* a, int n) +{ + int i; + i = 0; + int res; + res = 0; + while (i < n) + { + if (a[i]) { res = res + 1; } else {} + i = i + 1; + } + return res; +} + +bool* sieve(int n) +{ + bool* res; + res = new bool[n]; + int i; + i = 0; + while (i < n) { res[i] = true; i = i + 1; } + res[1] = false; + res[0] = false; + i = 2; + while (i < n) + { + if (res[i]) { + int j; + j = 2 * i; + while (j < n) { res[j] = false; j = j + i; } + } else {} + i = i + 1; + } + return res; +} + +int main() +{ + int n; n = 1000; + bool* a; a = sieve(n); + int cnt; cnt = count(a, n); + return cnt; +} diff --git a/komarov.andrey/examples/sum.fc b/komarov.andrey/examples/sum.fc new file mode 100644 index 00000000..0324ddd --- /dev/null +++ b/komarov.andrey/examples/sum.fc @@ -0,0 +1,18 @@ + +int summ(int n) +{ + int i; + int sum; + i = 0; + sum = 0; + while (i < n) + { + sum = sum + i; + i = i + 1; + } + return sum; +} + +int main() { + return summ(100); +} diff --git a/komarov.andrey/fcc.cabal b/komarov.andrey/fcc.cabal new file mode 100644 index 00000000..47eea0c --- /dev/null +++ b/komarov.andrey/fcc.cabal @@ -0,0 +1,24 @@ +name: fcc +version: 0.2.0.0 +synopsis: Featherweight C compiler +-- description: +homepage: https://github.com/vvolochay/Compilers +license: PublicDomain +license-file: LICENSE +author: Andrey Komarov +maintainer: taukus@gmail.com +-- copyright: +category: Language +build-type: Simple +-- extra-source-files: +cabal-version: >=1.20 + +executable fcc + main-is: Main.hs + other-modules: FCC.Parser, FCC.Lexer, FCC.Expr, FCC.Program, + FCC.Type, FCC.Stdlib, FCC.TypecheckError, FCC.Typecheck, + FCC.Optimize + build-depends: base >=4.8 && <5, array, containers, mtl, bound, prelude-extras + build-tools: happy, alex + hs-source-dirs: src + default-language: Haskell2010 diff --git a/komarov.andrey/report/Makefile b/komarov.andrey/report/Makefile new file mode 100644 index 00000000..c87c9d8 --- /dev/null +++ b/komarov.andrey/report/Makefile @@ -0,0 +1,10 @@ +main: show + +all: head.tex report.tex + xelatex -shell-escape head.tex + +show: all + evince head.pdf + +clean: + rm -rf *.aux *.log *.pyg *.pdf diff --git a/komarov.andrey/report/head.tex b/komarov.andrey/report/head.tex new file mode 100644 index 00000000..4930bae --- /dev/null +++ b/komarov.andrey/report/head.tex @@ -0,0 +1,35 @@ +\documentclass{article} +\usepackage[cm]{fullpage} +\usepackage{pdflscape} +\usepackage{xecyr} +\usepackage{xltxtra} +\setmainfont[Mapping=tex-text]{Times New Roman} +\setmonofont[Scale=MatchLowercase]{Courier New} +\defaultfontfeatures{Mapping=tex-text} +\usepackage{polyglossia} +\setdefaultlanguage{russian} +\newfontfamily\russianfont{Times New Roman} +\setotherlanguage{english} + +\newfontfamily\cyrillicfont[Script=Cyrillic]{Times New Roman} +\newfontfamily\cyrillicfontsf[Script=Cyrillic]{Arial} +\newfontfamily\cyrillicfonttt[Script=Cyrillic]{Courier New} + + +\usepackage{minted} +\usepackage{amsmath} +\usepackage{amssymb} +\usepackage{stmaryrd} + +\usepackage{syntax} + +\begin{document} +\title{Компиляторы} +\author{Андрей Комаров, группа 5539} +\date{\today} +\maketitle + +\input{report.tex} + + +\end{document} diff --git a/komarov.andrey/report/report.tex b/komarov.andrey/report/report.tex new file mode 100644 index 00000000..eec035f --- /dev/null +++ b/komarov.andrey/report/report.tex @@ -0,0 +1,124 @@ +\section{Грамматика} + +\setlength{\grammarparsep}{20pt plus 1pt minus 1pt} % increase separation between rules +\setlength{\grammarindent}{12em} % increase separation between LHS/RHS + +\begin{grammar} + + ::= * + + ::= `;' +\alt `(' `)' `;' +\alt `(' `)' `{' * `}' + + ::= `{' * `}' +\alt `;' +\alt `=' `;' +\alt `;' +\alt `if' `(' `)' `else' +\alt `while' `(' `)' +\alt `return' `;' + + ::= +\alt +\alt +\alt `(' `)' +\alt `+' +\alt `-' +\alt `*' +\alt `<' +\alt `>' +\alt `==' +\alt `!=' +\alt `<=' +\alt `>=' +\alt `&&' +\alt `||' +\alt `(' `)' + + ::= $\varepsilon$ +\alt +\alt `,' + + ::= $\varepsilon$ +\alt +\alt `,' + + ::= `true' | `false' + + ::= + + + ::= + + +\end{grammar} + +\section{Некоторые факты об этом компиляторе} + +Компилируемый язык похож на очень сильно урезанный Си. Компиляция +однопроходная в том смысле, что нельзя использовать функции до их +определения. Компилируемая функция добавляется в контекст, +следовательно, поддерживаются рекурсивные функции. Можно делать +forward declaration, и, следовательно, косвенную рекурсию. + +Пока есть два типа: \texttt{int} и \texttt{bool}, компилятор проверяет +типы. Можно создавать локальные и глобальные переменные. +Глобальные переменные, также, как и функции, нельзя использовать до их +объявления. + +Конвенция вызова такая же, как в Си: первые четыре аргумента +передаются в r0--r3, остальные~--- в стеке. +Скомпилированные функции можно вызывать из Си. +Можно вызывать функции из Си. + +Скомпилированный код не содержит никаких оптимизаций и содержит +некоторые деоптимизации (для удобства копирует переданные в регистрах +аргументы в стек). + +Код компилируется в стековую машину: вычисляются подвыражения, их +результаты снимаются со стека, производится вычисление и результат +кладётся обратно в стек. + +\section{Примеры кода} + +Эти примеры содержатся в папке \texttt{demo} в репозитории. + +\subsection{Вычисление факториала} + +\inputminted{c}{../demo/fact.l} + +\subsection{Работа с глобальными переменными} + +\inputminted{c}{../demo/global.l} + +\subsection{Вычисление числа простых чисел на отрезке} + +\inputminted{c}{../demo/isPrime.l} + +\subsection{Двустороннее взаимодействие с Си} + +\inputminted{c}{../demo/callc.l} + +Код на Си: + +\inputminted{c}{../demo/call.c} + +\section{Как запускать} + +На \texttt{akomarov.org} на 2222 порту есть машина на ARM-е, на +которой можно запускать примеры. + +\begin{itemize} +\item Пользователь: \texttt{user} +\item Пароль: \texttt{ieti9Vai} +\end{itemize} + +К сожалению, там установлена старая версия GHC, которой компилятор не +компилируется, поэтому, получается этакая кросс-компиляция. + +\section{Что дальше} + +\begin{itemize} +\item Добавить строки (и массивы). +\item Добавить возможность компилироваться в самостоятельный бинарник. +\item Переписать всё, потому что сейчас мерзко. +\end{itemize} diff --git a/komarov.andrey/run.sh b/komarov.andrey/run.sh new file mode 100755 index 00000000..2111f7e --- /dev/null +++ b/komarov.andrey/run.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +cat > prog.fc +fcc < prog.fc > prog.S +scp prog.S charmander:~/v2/a.S +ssh charmander 'cd v2; make' + diff --git a/komarov.andrey/src/.gitignore b/komarov.andrey/src/.gitignore new file mode 100644 index 00000000..1f94316 --- /dev/null +++ b/komarov.andrey/src/.gitignore @@ -0,0 +1,2 @@ +*.o +*.hi diff --git a/komarov.andrey/src/FCC/Codegen.hs b/komarov.andrey/src/FCC/Codegen.hs new file mode 100644 index 00000000..1f8ef04 --- /dev/null +++ b/komarov.andrey/src/FCC/Codegen.hs @@ -0,0 +1,145 @@ +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE FlexibleContexts #-} +module FCC.Codegen ( + codegen, + ) where + +import FCC.Expr +import FCC.Program + +import Bound + +import Data.Monoid +import Control.Monad.State + +import qualified Data.Map as M + +impossible = error "Internal compiler error. Please submit a bug-report." + +data Binding = Local Int | Global String | Arg Int + deriving (Eq, Ord, Show, Read) + +data CodegenState = CodegenState { + offset :: Int, + counter :: Int, + maxOffset :: Int, + argumentsCount :: Int } + +emptyState = CodegenState 0 0 0 0 + +modifyOffset :: MonadState CodegenState m => (Int -> Int) -> m () +modifyOffset f = modify $ \s@(CodegenState{maxOffset = m, offset = o}) + -> s{maxOffset = m `max` o, offset = f o} + +resetMaxOffset :: MonadState CodegenState m => m () +resetMaxOffset = modify $ \s -> s{maxOffset = 1} + +setArgumentsCount :: MonadState CodegenState m => Int -> m () +setArgumentsCount args = modify $ \s -> s{argumentsCount = args} + +newtype Codegen a = Codegen { + runCodegen :: State CodegenState a + } deriving (Functor, Applicative, Monad, MonadState CodegenState) + +fresh :: String -> Codegen String +fresh pref = do + x <- gets counter + modify (\s -> s{counter = x + 1}) + return $ pref ++ show x + +freshLabel :: Codegen String +freshLabel = fresh "_label_" + +freshVar :: Codegen String +freshVar = fresh "_var_" + +codegen :: Program String -> [String] +codegen p = evalState (runCodegen $ compileP p) emptyState + +compileP :: Program String -> Codegen [String] +compileP (Program funs vars) = do + dataSegNames <- sequence [freshVar | _ <- M.keys vars] + let dataHead = ["@@@@@@@@@", ".data", ".global _start"] + dataBody = [name ++ ": .word 0" | name <- dataSegNames] + textVeryHead = ["", "@@@@@@@@@", ".text"] + textHead = [realName ++ ": .word " ++ dataName | (realName, dataName) <- zip (M.keys vars) dataSegNames] + functions <- mapM f $ M.toList funs + return $ dataHead ++ dataBody ++ textVeryHead ++ textHead ++ concat functions + where + f :: (String, Function String) -> Codegen [String] + f (name, fun) = do + code <- compileF fun + return $ ["", "@@@@@@@", name ++ ":"] ++ code + +compileF :: Function String -> Codegen [String] +compileF (Function _ _ (Native _ code)) = return $ code ++ ["mov pc, lr"] +compileF (Function _ _ (Inner s)) = do + let e = instantiate (return . Arg) (Global <$> s) + resetMaxOffset + modifyOffset (const 1) + code <- compileE e + off <- gets maxOffset + return $ ["push {fp, lr}", "mov fp, sp", "sub sp, #" ++ show (off * 4)] ++ code + +compileE :: Expr Binding -> Codegen [String] +compileE (Var (Local off)) = return ["ldr r0, [fp, #-" ++ show (off * 4) ++"]", "push {r0}"] +compileE (Var (Global name)) = return ["ldr r0, " ++ name, "ldr r0, [r0]", "push {r0}"] +compileE (Var (Arg arg)) = return ["ldr r0, [fp, #" ++ show (arg * 4 + 8) ++ "]", "push {r0}"] +compileE (Lit i) = return ["ldr r0, =" ++ show i, "push {r0}"] +compileE (LitBool True) = return ["mov r0, #1", "push {r0}\t\t@ true"] +compileE (LitBool False) = return ["mov r0, #0", "push {r0}\t\t@ false"] +compileE (Lam t s) = do + v <- freshVar + off <- gets offset + modifyOffset (+1) + code <- compileE $ instantiate1 (Var (Local off)) s + modifyOffset (`subtract` 1) + return code +compileE Empty = return [] +compileE (Pop e) = do + code <- compileE e + return $ code ++ ["pop {r0}"] +compileE (Seq e1 e2) = (++) <$> compileE e1 <*> compileE e2 +compileE (Call (Var (Global fname)) args) = do + compiledArgs <- concat <$> reverse <$> mapM compileE args + return $ compiledArgs ++ ["bl " ++ fname] +compileE (Call _ _) = impossible +compileE (Eq _ _) = impossible +compileE (While cond body) = do + begin <- freshLabel + end <- freshLabel + cond' <- compileE cond + body' <- compileE body + return $ [begin ++ ": @ while"] ++ cond' ++ ["pop {r0}", "tst r0, r0", "beq " ++ end] ++ body' ++ ["b " ++ begin, end ++ ": @ endwhile"] +compileE (If cond thn els) = do + elseLabel <- freshLabel + endIfLabel <- freshLabel + cond' <- compileE cond + thn' <- compileE thn + els' <- compileE els + return $ cond' ++ ["pop {r0}", "tst r0, r0", "beq " ++ elseLabel] + ++ thn' ++ ["b " ++ endIfLabel, elseLabel ++ ": @ else:"] + ++ els' ++ [endIfLabel ++ ": @ endif"] +compileE (Assign (Var (Local off)) src) = do + code <- compileE src + return $ code ++ ["pop {r0}", "str r0, [fp, #-" ++ show (off * 4) ++ "]", "push {r0}"] +compileE (Assign (Var (Global name)) src) = do + code <- compileE src + return $ code ++ ["pop {r0}", "ldr r1, " ++ name, "str r0, [r1]", "push {r0}"] +compileE (Assign (Var (Arg arg)) src) = do + code <- compileE src + return $ code ++ ["pop {r0}", "str r0, [fp, #" ++ show (arg * 4 + 8) ++ "]", "push {r0}"] +compileE (Assign (Array a i) src) = do + codea <- compileE a + codei <- compileE i + code <- compileE src + return $ codea ++ codei ++ code ++ ["pop {r0}\t\t@ b", "pop {r1}\t\t@ i", "pop {r2}\t\t@ a", "str r0, [r2, r1, LSL #2]", "push {r0}"] +compileE (Assign _ _) = impossible +compileE (Array a i) = do + codea <- compileE a + codei <- compileE i + return $ codea ++ codei ++ ["pop {r1}", "pop {r0}", "ldr r0, [r0, r1, LSL #2]", "push {r0}"] +compileE (Return e) = do + code <- compileE e + nargs <- gets argumentsCount + return $ code ++ ["pop {r0}", "mov sp, fp", "pop {fp, lr}", "add sp, #" ++ show (nargs * 4), "push {r0}", "mov pc, lr"] diff --git a/komarov.andrey/src/FCC/Eval.hs b/komarov.andrey/src/FCC/Eval.hs new file mode 100644 index 00000000..686bf19 --- /dev/null +++ b/komarov.andrey/src/FCC/Eval.hs @@ -0,0 +1,17 @@ +module FCC.Eval ( + Eval(..), + Value(..), + ) where + +import Data.Int +import qualified Data.Map as M + +data Value + = VVoid + | VInt Int32 + | VBool Bool + | VArray (M.Map Int32 Value) + deriving (Eq, Ord, Show, Read) + +type Eval = [Value] -> Maybe Value + diff --git a/komarov.andrey/src/FCC/Evaluator.hs b/komarov.andrey/src/FCC/Evaluator.hs new file mode 100644 index 00000000..24fd816 --- /dev/null +++ b/komarov.andrey/src/FCC/Evaluator.hs @@ -0,0 +1,156 @@ +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +module FCC.Evaluator ( + calc, calcF, + EvalConfig(..), + config, + ) where + +import FCC.Type +import FCC.Expr +import FCC.Eval +import FCC.Program +import FCC.Optimize.StdlibEval + +import Bound + +import Control.Monad.Except +import Control.Monad.State +import Control.Monad.Reader +import Control.Monad.Cont + +import qualified Data.Set as S +import qualified Data.Map as M + +data Context = Context { + bindings :: M.Map String Value, + counter :: Int, + bound :: Int + } deriving (Eq, Ord, Show, Read) + + +defaultTimeout = 10000 + +data EvalConfig = EvalConfig { + ctxFunctions :: M.Map String (Function String), + initialTimeout :: Int + } deriving (Eq, Ord, Show) + +newtype Evaluator r a = Evaluator { + runEvaluator :: ExceptT () (StateT Context (ReaderT EvalConfig (Cont r))) a + } deriving (Functor, Applicative, Monad, + MonadError (), MonadReader EvalConfig, + MonadState Context, MonadCont) + +impossible = error "FATAL ERROR ¯\\_(ツ)_/¯" + +runE :: Context -> EvalConfig -> Evaluator (Either () a) a -> Either () a +runE s r e = runCont (runReaderT (runStateT (runExceptT $ runEvaluator e) s) r) fst + +calc :: EvalConfig -> Expr String -> Maybe Value +calc cfg e = case runE (Context M.empty (initialTimeout cfg) 0) cfg (callCC $ \k -> eval k e) of + Left _ -> Nothing + Right a -> Just a + +calcF :: EvalConfig -> Function String -> [Value] -> Maybe Value +calcF cfg f args = case runE (Context M.empty (initialTimeout cfg) 0) cfg (call f args) of + Left _ -> Nothing + Right a -> Just a + +tick :: Evaluator r () +tick = do + remain <- gets counter + when (remain <= 0) $ throwError () + modify $ \c -> c{ counter = remain - 1 } + +fresh :: Evaluator r String +fresh = do + var <- gets bound + modify $ \c -> c{bound = var + 1} + return $ "_opt_var_" ++ show var + +defaultVal :: Type -> Value +defaultVal TInt = VInt 0 +defaultVal TBool = VBool False +defaultVal TVoid = VVoid +defaultVal (TArray _) = VArray M.empty +defaultVal (TFun _ _) = impossible + +call :: Function String -> [Value] -> Evaluator r Value +call (Function _ _ (Native name _)) args = do + tick + case do {ev <- M.lookup name builtinsE; ev args } of + Nothing -> throwError () + Just res -> return res +call (Function fargs _ (Inner s)) args = do + tick + names <- sequence [fresh | _ <- fargs] + modify (\c -> c{bindings = bindings c `M.union` M.fromList (zip names args)}) + callCC $ \k -> eval k $ instantiate (Var . (names !!)) s + +eval :: (Value -> Evaluator r Value) -> Expr String -> Evaluator r Value +eval k (Var v) = do + b <- gets bindings + case M.lookup v b of + Nothing -> throwError () + Just val -> return val +eval k (Lit i) = return $ VInt i +eval k (LitBool b) = return $ VBool b +eval k (Lam t s) = do + v <- fresh + modify $ \c -> c{bindings = M.insert v (defaultVal t) (bindings c)} + eval k $ instantiate1 (Var v) s +eval k Empty = return $ VVoid +eval k (Pop e) = tick >> eval k e +eval k (Seq e1 e2) = tick >> eval k e1 >> eval k e2 +eval k (Call (Var fname) args) = do + f <- asks $ (M.lookup fname) . ctxFunctions + case f of + Nothing -> throwError () + Just f' -> do + args' <- mapM (\x -> callCC $ \r -> eval r x) args + call f' args' +eval k (Call _ _) = impossible +--eval k (Eq _ _) = impossible +eval k e@(While cond body) = do + g <- get + c <- eval k cond + case c of + VBool b -> if b then eval k body >> eval k e else return VVoid + _ -> impossible +eval k (If cond thn els) = do + c <- eval k cond + case c of + VBool b -> if b then eval k thn else eval k els + _ -> impossible +eval k (Assign (Var v) src) = do + src' <- eval k src + modify $ \c -> c{bindings = M.insert v src' (bindings c)} + return src' +--eval k (Assign (Array a i) src) = _ -- TODO ?????? :(((((( +--eval k (Assign _ _) = impossible +--eval k (Array a i) = _ +--eval k (New _ _) = impossible +eval k (Return e) = do + e' <- eval k e + k e' +eval _ _ = throwError () + +config :: Program String -> EvalConfig +config p@(Program funs _) = EvalConfig puM defaultTimeout where + puM = M.filterWithKey (\k v -> k `S.member` pu) funs + pu = findPure p + +findPure :: Program String -> S.Set String +findPure (Program funs vars) = allPure where + allPure = fix' S.empty (updPure funs) + +updPure :: M.Map String (Function String) -> S.Set String -> S.Set String +updPure funs ctx = ctx `S.union` S.fromList [name | (name, f) <- M.toList funs, isPure (S.insert name ctx) f] + +isPure :: S.Set String -> Function String -> Bool +isPure _ (Function _ _ (Native name _)) = name `M.member` builtinsE +isPure ctx (Function _ _ (Inner s)) = all (`S.member` ctx) s + +fix' :: Eq a => a -> (a -> a) -> a +fix' init mod = if new == init then init else fix' new mod where + new = mod init diff --git a/komarov.andrey/src/FCC/Expr.hs b/komarov.andrey/src/FCC/Expr.hs new file mode 100644 index 00000000..4382561 --- /dev/null +++ b/komarov.andrey/src/FCC/Expr.hs @@ -0,0 +1,71 @@ +{-# LANGUAGE TypeOperators #-} +{-# LANGUAGE KindSignatures #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE DeriveTraversable #-} +module FCC.Expr ( + Expr(..), + declVar, + ) where + +import FCC.Type + +import Control.Monad + +import Data.Int +import Prelude.Extras +import Bound + +data Expr a + = Var a + | Lit Int32 + | LitBool Bool + | Lam Type (Scope () Expr a) + | Empty + | Pop (Expr a) + | Seq (Expr a) (Expr a) + | Call (Expr a) [Expr a] + | Eq (Expr a) (Expr a) -- костыль во имя нереализации ad-hoc полиморфизма + | While (Expr a) (Expr a) + | If (Expr a) (Expr a) (Expr a) + | Assign (Expr a) (Expr a) + | Array (Expr a) (Expr a) + | New Type (Expr a) + | Return (Expr a) + deriving (Eq, Ord, Show, Read, Foldable, Traversable) + +instance Functor Expr where + fmap = liftM + +instance Applicative Expr where + pure = return + (<*>) = ap + +instance Monad Expr where + return = Var + Var a >>= f = f a + Lit i >>= _ = Lit i + LitBool b >>= _ = LitBool b + Lam t scope >>= f = Lam t $ scope >>>= f + Empty >>= f = Empty + Pop e >>= f = Pop $ e >>= f + Seq e1 e2 >>= f = Seq (e1 >>= f) (e2 >>= f) + Call fun args >>= f = Call (fun >>= f) $ fmap (>>= f) args + Eq e1 e2 >>= f = Eq (e1 >>= f) (e2 >>= f) + While cond e >>= f = While (cond >>= f) (e >>= f) + If cond thn els >>= f = If (cond >>= f) (thn >>= f) (els >>= f) + Assign dest src >>= f = Assign (dest >>= f) (src >>= f) + Array arr ind >>= f = Array (arr >>= f) (ind >>= f) + New t e >>= f = New t $ e >>= f + Return e >>= f = Return $ e >>= f + +declVar :: Eq a => Type -> a -> Expr a -> Expr a +declVar t x e = Lam t $ abstract1 x e + +instance Eq1 Expr where + (==#) = (==) +instance Ord1 Expr where + compare1 = compare +instance Show1 Expr where + showsPrec1 = showsPrec +instance Read1 Expr where + readsPrec1 = readsPrec diff --git a/komarov.andrey/src/FCC/Lexer.x b/komarov.andrey/src/FCC/Lexer.x new file mode 100644 index 00000000..1727e5d --- /dev/null +++ b/komarov.andrey/src/FCC/Lexer.x @@ -0,0 +1,116 @@ +{ +module FCC.Lexer ( + Alex(..), runAlex, alexError, + Token(..), lexer +) where + +import Data.Int +import qualified Data.Set as S +} + +%wrapper "monadUserState" + +$digit = 0-9 +$alpha = [a-zA-Z] +$alnum = [a-zA-Z0-9_] +$eol = [\n] + +tokens :- + $eol ; + $white+ ; + $digit+ { \(_, _, _, s) l -> return $ TokenNum (read $ take l s) } + "(" { r TokenLParen } + ")" { r TokenRParen } + "{" { r TokenLBrace } + "}" { r TokenRBrace } + "[" { r TokenLBracket } + "]" { r TokenRBracket } + "+" { r TokenAdd } + "-" { r TokenSub } + "*" { r TokenMul } + "<" { r TokenLess } + ">" { r TokenGreater } + "==" { r TokenEqual } + "<=" { r TokenLessEq } + ">=" { r TokenGreaterEq } + "!=" { r TokenNotEqual } + "&&" { r TokenAnd } + "||" { r TokenOr } + "^" { r TokenXor } + "=" { r TokenAssign } + ";" { r TokenSemicolon } + "if" { r TokenIf } + "else" { r TokenElse } + "while" { r TokenWhile } + "return" { r TokenReturn } + "true" { r TokenTrue } + "false" { r TokenFalse } + "new" { r TokenNew } + "," { r TokenComma } + "&" { r TokenAmp } + "!" { r TokenNot } + $alpha $alnum* { \(_, _, _, s) l -> var $ take l s } + +{ + +var :: String -> Alex Token +var s = do + tp <- isType s + return $ (if tp then TokenTyVar else TokenVar) s + +r :: Token -> AlexInput -> Int -> Alex Token +r t _ _ = return t + +data AlexUserState = AlexUserState { types :: S.Set String } + +isType :: String -> Alex Bool +isType s = do + AlexUserState {types=t} <- alexGetUserState + return $ s `S.member` t + +alexInitUserState :: AlexUserState +alexInitUserState = AlexUserState $ S.fromList ["int", "void", "bool"] + +alexEOF :: Alex Token +alexEOF = return TokenEOF + +data Token = TokenNum Int32 + | TokenVar String + | TokenTyVar String + | TokenLParen + | TokenRParen + | TokenLBrace + | TokenRBrace + | TokenLBracket + | TokenRBracket + | TokenAdd + | TokenSub + | TokenMul + | TokenLess + | TokenGreater + | TokenEqual + | TokenLessEq + | TokenGreaterEq + | TokenNotEqual + | TokenNot + | TokenAnd + | TokenOr + | TokenXor + | TokenAssign + | TokenSemicolon + | TokenIf + | TokenElse + | TokenWhile + | TokenReturn + | TokenTrue + | TokenFalse + | TokenComma + | TokenAmp + | TokenNew + | TokenEOF + deriving (Eq, Show) + +lexer :: (Token -> Alex a) -> Alex a +lexer cont = (alexMonadScan >>= cont) + +} diff --git a/komarov.andrey/src/FCC/Optimize.hs b/komarov.andrey/src/FCC/Optimize.hs new file mode 100644 index 00000000..7cda145 --- /dev/null +++ b/komarov.andrey/src/FCC/Optimize.hs @@ -0,0 +1,20 @@ +module FCC.Optimize ( + optimize, + ) where + +import FCC.Program + +import FCC.Optimize.ShrinkUnused +import FCC.Optimize.CalcPure + +import Data.Maybe + +optimize :: Program String -> Program String +optimize p = fix' p upd + +upd :: Program String -> Program String +upd = shrink . calcSubExprs + +fix' :: Eq a => a -> (a -> a) -> a +fix' init mod = if new == init then init else fix' new mod where + new = mod init diff --git a/komarov.andrey/src/FCC/Optimize/CalcPure.hs b/komarov.andrey/src/FCC/Optimize/CalcPure.hs new file mode 100644 index 00000000..d41ebe5 --- /dev/null +++ b/komarov.andrey/src/FCC/Optimize/CalcPure.hs @@ -0,0 +1,83 @@ +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +module FCC.Optimize.CalcPure ( + calcSubExprs, + ) where + +import FCC.Eval +import FCC.Expr +import FCC.Program +import FCC.Evaluator + +import Bound + +import Data.List (elemIndex) +import Data.Maybe (fromMaybe) +import Control.Monad.RWS + +import qualified Data.Map as M + +calcSubExprs :: Program String -> Program String +calcSubExprs p = fst $ evalRWS (runOptimizer $ optP p) (config p) 0 + +newtype Optimizer a = Optimizer { + runOptimizer :: RWS EvalConfig () Int a + } deriving (Functor, Applicative, Monad, MonadReader EvalConfig, MonadState Int) + +fresh :: Optimizer String +fresh = do + n <- get + put $ n + 1 + return $ "_opt_t_vat_" ++ show n + +optP :: Program String -> Optimizer (Program String) +optP p@(Program funs vars) = do + funs' <- sequence (fmap optF funs) + return $ Program funs' vars + +optF :: Function String -> Optimizer (Function String) +optF f@(Function _ _ (Native{})) = return f +optF f@(Function args ret (Inner s)) = do + let names = ["_opt_arg_" ++ show i | (i, _) <- zip [0..] args] + e = instantiate (Var . (names !!)) s + e' <- opt e + let s' = abstract (`elemIndex` names) e' + return $ Function args ret (Inner s') + +opt :: Expr String -> Optimizer (Expr String) +opt e@(Var _) = return e +opt e@(Lit _) = return e +opt e@(LitBool _) = return e +opt (Lam t s) = do + name <- fresh + let e = instantiate1 (Var name) s + e' <- opt e + return $ Lam t $ abstract1 name e' +opt Empty = return Empty +opt (Pop e) = Pop <$> opt e +opt (Seq Empty e) = opt e +opt (Seq e Empty) = opt e +opt (Seq e1 e2) = Seq <$> (opt e1) <*> (opt e2) +opt e@(Call (Var fname) args) = do + cfg <- ask + let ok = do + args <- sequence $ map (calc cfg) args + f <- M.lookup fname (ctxFunctions cfg) + v <- calcF cfg f args + v2e v + case ok of + Nothing -> Call (Var fname) <$> forM args opt + Just e' -> return e' +opt (While (LitBool False) _) = return Empty +opt (While cond body) = While <$> opt cond <*> opt body +opt (If (LitBool True) thn _) = opt thn +opt (If (LitBool False) _ els) = opt els +opt (If cond thn els) = If <$> opt cond <*> opt thn <*> opt els +opt (Assign dst src) = Assign <$> opt dst <*> opt src +opt (Array a i) = Array <$> opt a <*> opt i +opt (Return e) = Return <$> opt e +opt e = return e + +v2e :: Value -> Maybe (Expr a) +v2e (VInt i) = return $ Lit i +v2e (VBool b) = return $ LitBool b +v2e _ = Nothing diff --git a/komarov.andrey/src/FCC/Optimize/ShrinkUnused.hs b/komarov.andrey/src/FCC/Optimize/ShrinkUnused.hs new file mode 100644 index 00000000..995ccb1 --- /dev/null +++ b/komarov.andrey/src/FCC/Optimize/ShrinkUnused.hs @@ -0,0 +1,32 @@ +module FCC.Optimize.ShrinkUnused ( + shrink, + ) where + +import FCC.Program + +import Data.Foldable +import Control.Monad.RWS + +import qualified Data.Set as S +import qualified Data.Map as M + +start = "_start" + +shrink :: Program String -> Program String +shrink p@(Program funs vars) = p' where + used = fix' (S.singleton start) (upd funs) + funs' = M.filterWithKey (\n _ -> n `S.member` used) funs + vars' = M.filterWithKey (\n _ -> n `S.member` used) vars + p' = Program funs' vars' + +fix' :: Eq a => a -> (a -> a) -> a +fix' init mod = if new == init then init else fix' new mod where + new = mod init + +upd :: M.Map String (Function String) -> S.Set String -> S.Set String +upd funs reached = reached `S.union` S.unions [free f | (name, f) <- M.toList funs, name `S.member` reached] + +free :: Function String -> S.Set String +free (Function _ _ (Native _ _)) = S.empty +free (Function _ _ (Inner s)) = S.fromList $ toList s + diff --git a/komarov.andrey/src/FCC/Optimize/StdlibEval.hs b/komarov.andrey/src/FCC/Optimize/StdlibEval.hs new file mode 100644 index 00000000..2274976 --- /dev/null +++ b/komarov.andrey/src/FCC/Optimize/StdlibEval.hs @@ -0,0 +1,38 @@ +module FCC.Optimize.StdlibEval ( + builtinsE + ) where + +import FCC.Eval + +import Data.Int +import qualified Data.Map as M + +new :: Eval +new [VInt size] = Just $ VArray M.empty + +liftI2 :: (Int32 -> Int32 -> Int32) -> Eval +liftI2 op [VInt i1, VInt i2] = Just $ VInt $ i1 `op` i2 +liftI2 _ _ = Nothing + +liftI2B :: (Int32 -> Int32 -> Bool) -> Eval +liftI2B op [VInt i1, VInt i2] = Just $ VBool $ i1 `op` i2 +liftI2B _ _ = Nothing + +liftB :: (Bool -> Bool) -> Eval +liftB op [VBool b] = Just $ VBool $ op b +liftB _ _ = Nothing + +builtinsE :: M.Map String Eval +builtinsE = M.fromList $ [ + ("_new", new), -- какая-то скользкая дорожка. не доверяю вычислятору new + ("_builtin_add", liftI2 (+)), + ("_builtin_sub", liftI2 (-)), + ("_builtin_mul", liftI2 (*)), + ("_builtin_less", liftI2B (<)), + ("_builtin_eq_int", liftI2B (==)), + ("_builtin_eq_bool", liftI2B (==)), + ("_builtin_eq_ptr", liftI2B (==)), + ("_builtin_not", liftB (not)) + + ] + diff --git a/komarov.andrey/src/FCC/Parser.y b/komarov.andrey/src/FCC/Parser.y new file mode 100644 index 00000000..dbfc7b8 --- /dev/null +++ b/komarov.andrey/src/FCC/Parser.y @@ -0,0 +1,135 @@ +{ +module FCC.Parser ( + parse +) where + +import FCC.Lexer +import FCC.Expr +import FCC.Program +import FCC.Type + +} + +%lexer { lexer } { TokenEOF } +%monad { Alex } { >>= } { return } +%name parseAlex +%tokentype { Token } +%error { parseError } + +%token + '(' { TokenLParen } + ')' { TokenRParen } + '{' { TokenLBrace } + '}' { TokenRBrace } + '[' { TokenLBracket } + ']' { TokenRBracket } + '+' { TokenAdd } + '-' { TokenSub } + '*' { TokenMul } + '!' { TokenNot } + '<' { TokenLess } + '>' { TokenGreater } + '==' { TokenEqual } + '<=' { TokenLessEq } + '>=' { TokenGreaterEq } + '!=' { TokenNotEqual } + '&&' { TokenAnd } + '||' { TokenOr } + '^' { TokenXor } + '=' { TokenAssign } + ';' { TokenSemicolon } + if { TokenIf } + else { TokenElse } + while { TokenWhile } + return { TokenReturn } + new { TokenNew } + num { TokenNum $$ } + true { TokenTrue } + false { TokenFalse } + var { TokenVar $$ } + tyvar { TokenTyVar $$ } + ',' { TokenComma } + +%left ',' +%right '=' +%left '||' '&&' +%left '^' +%nonassoc '==' '!=' +%nonassoc '<' '>' '<=' '>=' +%left '+' '-' +%left '*' +%left '!' new +%nonassoc '[' ']' + + +%% + +Prog :: { Program String } +Prog : TopLevels { program $1 } + +Expr :: { Expr String } +Expr : var { Var $1 } + | num { Lit $1 } + | true { LitBool True } + | false { LitBool False } + | '(' Expr ')' { $2 } + | Expr '+' Expr { Call (Var "_builtin_add") [$1, $3] } + | Expr '-' Expr { Call (Var "_builtin_sub") [$1, $3] } + | Expr '*' Expr { Call (Var "_builtin_mul") [$1, $3] } + | Expr '||' Expr { Call (Var "_builtin_or") [$1, $3] } + | Expr '&&' Expr { Call (Var "_builtin_and") [$1, $3] } + | Expr '^' Expr { Call (Var "_builtin_xor") [$1, $3] } + | '!' Expr { Call (Var "_builtin_not") [$2] } + | Expr '<' Expr { Call (Var "_builtin_less") [$1, $3] } + | Expr '<=' Expr { Call (Var "_builtin_lesseq") [$1, $3] } + | Expr '>' Expr { Call (Var "_builtin_greater") [$1, $3] } + | Expr '>=' Expr { Call (Var "_builtin_greatereq") [$1, $3] } + | Expr '==' Expr { Eq $1 $3 } + | Expr '!=' Expr { Call (Var "_builtin_not") [Eq $1 $3] } + | var '(' FunCallList ')' { Call (Var $1) $3 } + | Expr '[' Expr ']' { Array $1 $3 } + | Expr '=' Expr { Assign $1 $3 } + | new Type '[' Expr ']' { New $2 $4 } + | '{' Stmts '}' { $2 } + +Stmt :: { Expr String } + : Expr ';' { Pop $1 } + | if '(' Expr ')' '{' Stmts '}' else '{' Stmts '}' { If $3 $6 $10 } + | while '(' Expr ')' '{' Stmts '}' { While $3 $6 } + | return Expr ';' { Return $2 } + +Stmts :: { Expr String } +Stmts : {- empty -} { Empty } + | Stmt Stmts { Seq $1 $2 } + | Type var ';' Stmts { declVar $1 $2 $4 } + +FunCallList :: { [Expr String] } +FunCallList : {- empty -} { [] } + | Expr { [$1] } + | Expr ',' FunCallList { $1:$3 } + +Type :: { Type } +Type : tyvar { toPrimitiveType $1 } + | Type '*' { TArray $1 } + +TopLevel :: { TopLevel String } +TopLevel : Type var ';' { DeclVar $1 $2 } + | Type var '(' FunArgsList ')' '{' Stmts '}' { DeclFun $2 $1 $4 $7 } + +FunArgsList :: { [(String, Type)] } +FunArgsList : {- empty -} { [] } + | Type var { [($2, $1)] } + | Type var ',' FunArgsList { ($2, $1):$4 } + +TopLevels :: { [TopLevel String] } +TopLevels : {- empty -} { [] } + | TopLevel TopLevels { $1:$2 } + +{ +parseError :: Token -> Alex a +parseError t = alexError $ "Parse error on token " ++ show t + +parse :: String -> Either String (Program String) +parse s = runAlex s parseAlex + +} diff --git a/komarov.andrey/src/FCC/Program.hs b/komarov.andrey/src/FCC/Program.hs new file mode 100644 index 00000000..d082272 --- /dev/null +++ b/komarov.andrey/src/FCC/Program.hs @@ -0,0 +1,44 @@ +module FCC.Program ( + Function(..), + Program(..), + TopLevel(..), + FunctionBody(..), + function, + program, + ) where + +import Data.List (elemIndex) +import Bound + +import FCC.Expr +import FCC.Type + +import qualified Data.Map as M + +data FunctionBody a + = Inner (Scope Int Expr a) + | Native String [String] + deriving (Eq, Ord, Show) + +data Function a + = Function { argsTypes :: [Type], retType :: Type, body :: FunctionBody a } + deriving (Eq, Ord, Show) + +data Program a = + Program { functions :: M.Map a (Function a), variables :: M.Map a Type } + deriving (Eq, Ord, Show) + +function :: Eq a => [(a, Type)] -> Type -> Expr a -> Function a +function args ret body = Function (map snd args) ret $ + Inner $ abstract (`elemIndex` (map fst args)) body + +data TopLevel a + = DeclVar Type a + | DeclFun a Type [(a, Type)] (Expr a) + +-- TODO перестать считать, что всё уникально +program :: Ord a => [TopLevel a] -> Program a +program ts = Program funs vars where + vars = M.fromList [(a, t) | DeclVar t a <- ts] + funs = M.fromList [(name, function args ret body) | DeclFun name ret args body <- ts] + diff --git a/komarov.andrey/src/FCC/Stdlib.hs b/komarov.andrey/src/FCC/Stdlib.hs new file mode 100644 index 00000000..54a729e --- /dev/null +++ b/komarov.andrey/src/FCC/Stdlib.hs @@ -0,0 +1,67 @@ +module FCC.Stdlib ( + builtins, + withStdlib + ) where + +import FCC.Type +import FCC.Expr +import FCC.Program + +import Bound + +import qualified Data.Map as M + +withStdlib :: Program String -> Program String +withStdlib (Program funs vars) = Program (funs `M.union` (M.fromList builtins)) vars + +native :: String -> [Type] -> Type -> [String] -> (String, Function String) +native name args ret body = (name, Function args ret $ Native name body) + +start :: Function String +start = Function [] TInt $ Inner $ abstract (const Nothing) $ Call (Var "_exit") [Call (Var "main") []] + +exit :: (String, Function String) +exit = native "_exit" [TInt] TVoid ["pop {r0}", "mov r7, #1", "swi 0"] + +new :: (String, Function String) +new = native "_new" [TVoid] (TArray TVoid) ["ldr r0, =0", "pop {r1}", "add r1, r0, r1, LSL#2", "ldr r2, =3", + "ldr r3, =33", "ldr r4, =0", "ldr r5, =0", + "ldr r7, =192", "swi 0", "push {r0}"] + +getchar :: (String, Function String) +getchar = native "getchar" [] TInt ["ldr r7, =3", "ldr r0, =0", "push {r0}", "mov r1, sp", "ldr r2, =1", "swi 0", + "ldr r2, =0", "ldr r1, =-1", "cmp r0, r2", "strle r1, [sp]"] + +putchar :: (String, Function String) +putchar = native "putchar" [TInt] TInt ["ldr r7, =4", "ldr r0, =1", "mov r1, sp", "ldr r2, =1", "swi 0", + "ldr r0, [sp]"] + +builtins :: [(String, Function String)] +builtins = [ + native "_builtin_add" [TInt, TInt] TInt ["pop {r0}", "pop {r1}", "add r0, r0, r1", "push {r0}"], + native "_builtin_sub" [TInt, TInt] TInt ["pop {r0}", "pop {r1}", "sub r0, r0, r1", "push {r0}"], + native "_builtin_mul" [TInt, TInt] TInt ["pop {r0}", "pop {r1}", "mul r2, r0, r1", "push {r2}"], + native "_builtin_less" [TInt, TInt] TBool ["pop {r1, r2}", "cmp r1, r2", "movlt r0, #1", "movge r0, #0", "push {r0}"], + native "_builtin_eq_int" [TInt, TInt] TBool ["pop {r1, r2}", "teq r1, r2", "moveq r0, #1", "movne r0, #0", "push {r0}"], + native "_builtin_not" [TBool] TBool ["pop {r0}", "ldr r1, =1", "sub r0, r1, r0", "push {r0}"], + ("_start", start), + getchar, + putchar, + exit, + new + ] + + {- + | Expr '+' Expr { Call (Var "_builtin_add") [$1, $3] } + | Expr '-' Expr { Call (Var "_builtin_sub") [$1, $3] } + | Expr '*' Expr { Call (Var "_builtin_mul") [$1, $3] } + | Expr '||' Expr { Call (Var "_builtin_or") [$1, $3] } + | Expr '&&' Expr { Call (Var "_builtin_and") [$1, $3] } + | Expr '^' Expr { Call (Var "_builtin_xor") [$1, $3] } + | Expr '<' Expr { Call (Var "_builtin_less") [$1, $3] } + | Expr '<=' Expr { Call (Var "_builtin_lesseq") [$1, $3] } + | Expr '>' Expr { Call (Var "_builtin_greater") [$1, $3] } + | Expr '>=' Expr { Call (Var "_builtin_greatereq") [$1, $3] } + | Expr '==' Expr { Eq $1 $3 } + | Expr '!=' Expr { Call (Var "_builtin_not") (Eq $1 $3) } +-} diff --git a/komarov.andrey/src/FCC/Type.hs b/komarov.andrey/src/FCC/Type.hs new file mode 100644 index 00000000..d839605 --- /dev/null +++ b/komarov.andrey/src/FCC/Type.hs @@ -0,0 +1,29 @@ +module FCC.Type ( + Type(..), + toPrimitiveType, + ) where + +import Data.List (intercalate) + +data Type + = TInt + | TBool + | TVoid + | TArray Type + | TFun [Type] Type + deriving (Eq, Ord, Show, Read) + +toPrimitiveType :: String -> Type +toPrimitiveType "int" = TInt +toPrimitiveType "bool" = TBool +toPrimitiveType "void" = TVoid +toPrimitiveType t = error $ "BEDA " ++ t + +{- +instance Show Type where + show TInt = "int" + show TBool = "bool" + show (TArray t) = show t ++ "*" + show (TTuple ts) = "(" ++ intercalate ", " (map show ts) ++ ")" + show (TFun from to) = "{" ++ show from ++ " -> " ++ show to ++ "}" +-} diff --git a/komarov.andrey/src/FCC/Typecheck.hs b/komarov.andrey/src/FCC/Typecheck.hs new file mode 100644 index 00000000..1182e3f --- /dev/null +++ b/komarov.andrey/src/FCC/Typecheck.hs @@ -0,0 +1,149 @@ +{-# LANGUAGE KindSignatures #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE FunctionalDependencies #-} +{-# LANGUAGE FlexibleInstances #-} +module FCC.Typecheck ( + runTC, + ) where + +import FCC.Type +import FCC.TypecheckError +import FCC.Expr +import FCC.Program + +import Bound + +import Data.Foldable +import Data.List (elemIndex) +import Control.Monad.State +import Control.Monad.Reader +import Control.Monad.Except + +import qualified Data.Map as M +import qualified Data.Set as S + +data Context = Context { + expectedRetType :: Type + } + +fresh :: Typecheck String +fresh = do + cnt <- get + modify (+1) + return $ "_var_" ++ show cnt + +newtype Typecheck a = Typecheck { + runTypecheck :: StateT Int (ReaderT Context (Except TypecheckError)) a + } deriving (Functor, Applicative, Monad, + MonadError TypecheckError, MonadReader Context, MonadState Int) + +runTC :: Program String -> Either TypecheckError (Program String) +runTC prog = fmap (fst . fst) $ runExcept $ runReaderT (runStateT (runTypecheck $ typecheck prog) 0) (Context TVoid) + +class Typecheckable (f :: * -> *) t | f -> t where + typecheck :: f t -> Typecheck (f t, Type) + +instance Typecheckable Program String where + typecheck (Program funs vars) = do + when (not (S.null unboundVars)) $ throwError $ UnboundVariables (S.toList unboundVars) + funs' <- traverse ff funs + return $ (Program funs' vars, TVoid) + where + allFreeVars = S.fromList $ concatMap freeVars $ M.elems funs + allBoundVars = S.fromList $ M.keys funs ++ M.keys vars + unboundVars = allFreeVars S.\\ allBoundVars + + freeVars :: Function String -> [String] + freeVars f = case body f of + Inner s -> toList s + Native _ _ -> [] + + ff :: Function String -> Typecheck (Function String) + ff f@Function {body = Native{}} = return f + ff (Function argTypes ret (Inner s)) = do + argNames <- sequence [fresh | _ <- argTypes] + let e = instantiate ((map Var argNames) !!) s + args = M.fromList $ zip argNames argTypes + funs' = fmap (\(Function a r _) -> TFun a r) $ funs + allTypes = args `M.union` funs' `M.union` vars + (e', _) <- local (const $ Context ret) $ + typecheck $ fmap (\n -> (n, allTypes M.! n)) e + let s' = abstract (`elemIndex` argNames) $ fmap fst e' + return $ Function argTypes ret (Inner s') + +instance Typecheckable Expr (String, Type) where + typecheck v@(Var (_, t)) = do + return (v, t) + typecheck (Lit i) = return (Lit i, TInt) + typecheck (LitBool b) = return (LitBool b, TBool) + typecheck (Lam t s) = do + var <- fresh + (e, te) <- typecheck $ instantiate1 (Var (var, t)) s + return (Lam t (abstract1 (var, t) e), te) + typecheck Empty = return (Empty, TVoid) + typecheck (Pop e) = do + (e', te) <- typecheck e + return (Pop e', te) + typecheck (Seq e1 e2) = do + (e1', _) <- typecheck e1 + (e2', _) <- typecheck e2 + return $ (Seq e1' e2', TVoid) + typecheck (Call f@(Var _) args) = do + (f', tf) <- typecheck f + args' <- mapM typecheck args + let targs = fmap snd args' + (tfargs, tfret) <- case tf of + TFun ta t -> return (ta, t) + t -> throwError $ NotAFunction t f + when (targs /= tfargs) $ throwError $ ArgumentsTypesDiffer targs tfargs f + return (Call f' (map fst args'), tfret) + typecheck (Call f _) = throwError $ NotCallable f + typecheck (Eq e1 e2) = do + (e1', te1) <- typecheck e1 + (e2', te2) <- typecheck e2 + when (te1 /= te2) $ throwError $ EqTypesDiffer te1 te2 e1 e2 + let select fname t = return (Call (Var (fname, TFun [t, t] TBool)) [e1', e2'], TBool) + case te1 of + TInt -> select "_builtin_eq_int" TInt + TBool -> select "_builtin_eq_bool" TBool + TArray a -> select "_builtin_eq_ptr" (TArray a) + _ -> throwError $ UnsupportedTypeForEq te1 e1 e2 + typecheck (While cond body) = do + (cond', tcond) <- typecheck cond + (body', _) <- typecheck body + when (tcond /= TBool) $ throwError $ WhileConditionIsNotBool tcond cond + return (While cond' body', TVoid) + typecheck (If cond thn els) = do + (cond', tcond) <- typecheck cond + (thn', _) <- typecheck thn + (els', _) <- typecheck els + when (tcond /= TBool) $ throwError $ IfConditionIsNotBool tcond cond + return (If cond' thn' els', TVoid) + typecheck (Assign v@(Var _) val) = do + (v', tv) <- typecheck v + (val', tval) <- typecheck val + when (tv /= tval) $ throwError $ AssignTypeMismatch tv tval v val + return (Assign v' val', TVoid) + typecheck (Assign ai@(Array _ _) val) = do + (ai', tai) <- typecheck ai + (val', tval) <- typecheck val + when (tai /= tval) $ throwError $ AssignTypeMismatch tai tval ai val + return (Assign ai' val', TVoid) + typecheck (Assign dst _) = throwError $ NotAssignable dst + typecheck (Array a i) = do + (a', ta) <- typecheck a + (i', ti) <- typecheck i + ta' <- case ta of + TArray x -> return x + _ -> throwError $ NotAnArray ta a + when (ti /= TInt) $ throwError $ IndexIsNotInt ti i + return $ (Array a' i', ta') + typecheck (New t e) = do + (e', te) <- typecheck e + when (te /= TInt) $ throwError $ NewArraySizeIsNotInt te e + return (Call (Var ("_new", TFun [TInt] (TArray t))) [e'], TArray t) + typecheck (Return e) = do + (e', te) <- typecheck e + tret <- asks expectedRetType + when (te /= tret) $ throwError $ WrongReturnType te tret e + return (Return e', TVoid) diff --git a/komarov.andrey/src/FCC/TypecheckError.hs b/komarov.andrey/src/FCC/TypecheckError.hs new file mode 100644 index 00000000..cdd4986 --- /dev/null +++ b/komarov.andrey/src/FCC/TypecheckError.hs @@ -0,0 +1,28 @@ +{-# LANGUAGE GADTs #-} +{-# LANGUAGE StandaloneDeriving #-} +module FCC.TypecheckError ( + TypecheckError(..), + ) where + +import FCC.Type +import FCC.Expr + +data TypecheckError where + SomethingWentWrong :: TypecheckError + NotCallable :: Show a => Expr a -> TypecheckError + NotAssignable :: Show a => Expr a -> TypecheckError + NotAnArray :: Show a => Type -> Expr a -> TypecheckError + IndexIsNotInt :: Show a => Type -> Expr a -> TypecheckError + WhileConditionIsNotBool :: Show a => Type -> Expr a -> TypecheckError + IfConditionIsNotBool :: Show a => Type -> Expr a -> TypecheckError + UnboundVariable :: Show a => a -> TypecheckError + EqTypesDiffer :: Show a => Type -> Type -> Expr a -> Expr a -> TypecheckError + UnsupportedTypeForEq :: Show a => Type -> Expr a -> Expr a -> TypecheckError + NotAFunction :: Show a => Type -> Expr a -> TypecheckError + ArgumentsTypesDiffer :: Show a => [Type] -> [Type] -> Expr a -> TypecheckError + AssignTypeMismatch :: Show a => Type -> Type -> Expr a -> Expr a -> TypecheckError + WrongReturnType :: Show a => Type -> Type -> Expr a -> TypecheckError + UnboundVariables :: Show a => [a] -> TypecheckError + NewArraySizeIsNotInt :: Show a => Type -> Expr a -> TypecheckError + +deriving instance Show TypecheckError diff --git a/komarov.andrey/src/Main.hs b/komarov.andrey/src/Main.hs new file mode 100644 index 00000000..305035a --- /dev/null +++ b/komarov.andrey/src/Main.hs @@ -0,0 +1,19 @@ +module Main where + +import FCC.Parser +import FCC.Typecheck +import FCC.Codegen +import FCC.Stdlib +import FCC.Optimize + +import Data.List (intercalate) + +main :: IO () +main = do + input <- getContents + let p = parse input + case p of + Left e -> putStrLn $ "failed to parse: " ++ show e + Right x -> case runTC (withStdlib x) of + Left e' -> putStrLn $ "failed to typecheck: " ++ show e' + Right p -> let p' = optimize p in (putStrLn $ "@ " ++ show p') >> (putStrLn $ intercalate "\n" $ codegen p') diff --git a/komarov.andrey/src/Makefile b/komarov.andrey/src/Makefile new file mode 100644 index 00000000..4ae6b82 --- /dev/null +++ b/komarov.andrey/src/Makefile @@ -0,0 +1,12 @@ +all: lexer parser FCC/Parser.hs FCC/Lexer.hs + echo hi + +lexer: FCC/Lexer.x + alex FCC/Lexer.x + +parser: FCC/Parser.y + happy FCC/Parser.y -ilog + +clean: + rm -f *.o *.hi log Lexer.hs Parser.hs TestCompiler + diff --git a/komarov.andrey/tests/TestParser.hs b/komarov.andrey/tests/TestParser.hs new file mode 100644 index 00000000..cb21b33 --- /dev/null +++ b/komarov.andrey/tests/TestParser.hs @@ -0,0 +1,25 @@ +import System.Exit +import System.Directory +import Data.List (isPrefixOf, isSuffixOf) +import Control.Monad (forM_) + +import FCC.Parser (parse) + +checkFile :: FilePath -> IO () +checkFile path = do + putStrLn $ "Checking " ++ path + contents <- readFile path + let res = parse contents + print res + case ("parseErr" `isPrefixOf` path, res) of + (True, Right _) -> exitFailure + (False, Left _) -> exitFailure + _ -> return () + + +main :: IO () +main = do + setCurrentDirectory "examples" + files <- getDirectoryContents "." + let good = filter (".fc" `isSuffixOf`) files + forM_ good checkFile diff --git a/komarov.andrey/tests/TestPrettyPrinter.hs b/komarov.andrey/tests/TestPrettyPrinter.hs new file mode 100644 index 00000000..068dc62 --- /dev/null +++ b/komarov.andrey/tests/TestPrettyPrinter.hs @@ -0,0 +1,49 @@ +import Test.Framework (defaultMain, testGroup) +import Test.Framework.Providers.HUnit +import Test.Framework.Providers.QuickCheck2 (testProperty) + +import Test.QuickCheck +import Test.HUnit + +import System.Exit +import System.Directory + +import Data.List + +import Control.Monad + +import FCC.AST +import FCC.Parser + +-- main = defaultMain tests + +{- +tests = [ + testGroup "Expression" [ + testProperty "expr" prop_expr + ] + ] + +prop_expr :: Expression () -> Bool +prop_expr (EDeref _) = False +prop_expr _ = True +-} + +checkFile :: FilePath -> IO () +checkFile path = do + putStrLn $ "Checking " ++ path + contents <- readFile path + let res = parse contents + print res + case res of + Right p -> case parse (show p) of + Right p' -> when (p /= p') $ print p >> print p' >> exitFailure + Left _ -> print p >> exitFailure + Left _ -> return () + +main :: IO () +main = do + setCurrentDirectory "examples" + files <- getDirectoryContents "." + let good = filter (".fc" `isSuffixOf`) files + forM_ good checkFile diff --git a/komarov.andrey/vm/download.sh b/komarov.andrey/vm/download.sh new file mode 100755 index 00000000..9a20d71 --- /dev/null +++ b/komarov.andrey/vm/download.sh @@ -0,0 +1,5 @@ +#!/bin/sh +wget https://people.debian.org/~aurel32/qemu/armel/debian_wheezy_armel_standard.qcow2 +wget https://people.debian.org/~aurel32/qemu/armel/initrd.img-3.2.0-4-versatile +wget https://people.debian.org/~aurel32/qemu/armel/vmlinuz-3.2.0-4-versatile + diff --git a/komarov.andrey/vm/run.sh b/komarov.andrey/vm/run.sh new file mode 100755 index 00000000..616aaa5 --- /dev/null +++ b/komarov.andrey/vm/run.sh @@ -0,0 +1,2 @@ +#!/bin/sh +qemu-system-arm -M versatilepb -kernel vmlinuz-3.2.0-4-versatile -initrd initrd.img-3.2.0-4-versatile -hda debian_wheezy_armel_standard.qcow2 -append "root=/dev/sda1" -redir tcp:2222::22