diff --git a/.drone.yml b/.drone.yml new file mode 100644 index 0000000..068f22c --- /dev/null +++ b/.drone.yml @@ -0,0 +1,27 @@ +--- +kind: pipeline +name: default +type: docker + +steps: + - + name: tests + commands: + # /umake holds the binaries which are installed in the image. + # refer to the dockerfile. + - "pip3 install --upgrade ." + - "cd umake && pyflakes ." + - "cd ../test && python3.6 test.py" + image: grisha85/umake:3 + pull: if-not-exists + + - name: coverage + image: plugins/codecov + settings: + token: + from_secret: CODECOV + required: true + files: + - "*.xml" + paths: + - test/coverage \ No newline at end of file diff --git a/.gitignore b/.gitignore index 073d515..c11fbe3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,9 @@ test/env/* __pycache__ +test/coverage/* +test/.coverage +example/hello.o +example/.umake +example/hello_world +example/.coverage +umake.egg-info diff --git a/Dockerfile b/Dockerfile index 2e1cf8c..bf60162 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,9 +1,12 @@ FROM ubuntu:18.04 RUN apt-get update -y -RUN apt-get install -y python3.6 build-essential python3-pip libxml2-dev zlib1g-dev strace -RUN apt-get install -y git +RUN apt-get install -y python3.6 build-essential python3-pip libxml2-dev zlib1g-dev strace vim wget ADD . /umake -RUN pip3 install /umake +RUN pip3 install -e /umake + +RUN pip3 install ipdb coverage pyflakes + +RUN wget https://dl.min.io/server/minio/release/linux-amd64/minio && chmod +x ./minio && mv ./minio /usr/bin # for tests RUN apt-get install -y libprotobuf-c0-dev protobuf-c-compiler diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..0aabf3d --- /dev/null +++ b/LICENSE @@ -0,0 +1,25 @@ +BSD 2-Clause License + +Copyright (c) 2020, Gregory Freilikhman +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..690b6d5 --- /dev/null +++ b/Makefile @@ -0,0 +1,22 @@ + + +.PHONY: test enter benchmark + +IMAGE=grisha85/umake:3 + +VOLUMES=-v`pwd`:/umake + +build-docker: + docker build -t ${IMAGE} . + +test: + docker run --rm --privileged -it ${VOLUMES} -w/umake ${IMAGE} bash -c 'cd test && python3.6 test.py' + +lint: + docker run --rm ${VOLUMES} -w/umake/umake ${IMAGE} pyflakes . + +enter: + docker run --rm --privileged -it ${VOLUMES} -w/umake ${IMAGE} bash + +benchmark: + docker run --rm --privileged -it -v`pwd`:/umake -w/umake ${IMAGE} bash -c 'cd test && python3 ./test.py TestUMake.test_benchmark' \ No newline at end of file diff --git a/README.md b/README.md index d1ee402..e63b382 100644 --- a/README.md +++ b/README.md @@ -1,213 +1,86 @@ UMake ===== -**Blazing Fast. Sub scecond Modification detection. Few seconds for cached compilation** +**Blazing Fast. Sub second Modification detection. Just a few seconds for cached compilation** + +![dpdk build](doc/images/dpdk-build/dpdk-build.gif) Overview -------- -UMake is a build system that building your projects. -influenced by [`tup`](http://gittup.org/tup/). +UMake is a build system that building your projects. +Influenced by [`tup`](http://gittup.org/tup/). With the features below your compilation speed will be on average dramatically reduced, either after branch change either with your `CI` builds. -* local cache - disk cache -* remote cache - minio -* auto dependency discovery using strace -* simple configuration language +[![codecov](https://codecov.io/gh/grisha85/umake/branch/master/graph/badge.svg)](https://codecov.io/gh/grisha85/umake/tree/master/umake) -Running example ---------------- -``` -git clone https://github.com/grisha85/umake.git -cd umake -docker build -t umake . -docker run --rm -it -v`pwd`/example:/example -w/example umake bash -umake --no-remote-cache -./hello_world -``` +[![droneio](http://xrayio.com/api/badges/grisha85/umake/status.svg)](http://xrayio.com/grisha85/umake/) -How UMake works ---------------- -* loading compilation graph (DAG) from previous build (graph is empty when built first time) - -* scannig filesytem using the loaded graph to check for changes - * `modified` - if file was modified on filesystem it marked as modified on the graph - * `deleted` - if file was deleted from the filesystem, successor target deleted as well from the filesystem and the graph - +#### `Local cache` +many base libraries in your project rarely changed, why recompile them over and over again. Local cache reduce compilation times and remote cache access. -* parsing UMakefile and creating new commands or updating the existing +#### `Remote cache` +If someone already compiled most of the libraries in your project, use those results. - * `deleted` - if command deleted, targets of this command deleted as well from both graph and filesystem - * `updated` - if command is updated, UMake will handle it as `delete` and `create`. so, target of the old command will be deleted and new target will be created when graph will be executed -* executing the graph in parallel +#### `Auto dependency discovery` +makes your life easier to create build scripts no matter what your tool is: `gcc`, `protoc`, `docker build` ... - * `auto dependency detection` - updating the graph with accessed files by parsing strace logs - * `cache` - saving to cache. more details: [Cache System](#cache-system) -* saving the build graph -Note: by automatically deleting `targets` when no longer needed (either `command` is delete or source file was the deleted for this `target`) UMake implementing `clean` by design. So `clean` no longer need to be mainained. +[Detailed overview](doc/overview.md) -UMakefile ---------- -## Rule `:` +[Cache System](doc/cache.md) -A signle `command` is generated for this rule +[UMakefile](doc/umakefile.md) -`:` source | manual-deps `>` cmd `>` target +Install +------- -`manual-deps` - targets the this tule depends on, in order to keep a correct build order -`cmd` - bash command -`target` - the output of the command - -`{filename}` - full path filename of the source `/my/path/filename.a` -`{dir}` - directory of the source `/my/path/` -`{noext}` - filename without extension `filename` -`{target}` - expanded target `helloworld.a` +platform: linux (tested on ubuntu 18.04) -Example: -``` -: *.o > gcc {filename} -o {target} > helloworld.a -``` +dependencies: strace, bash. python3 -#### Recursive Source `**` -recursice deps are support -``` -root\ - a\ - a.a\ - a.a.a - a.a.b - a.b.a - a.b\ - a.b.a - a.b.b - b\ - b -``` -* `root/**` -> (`a.a.a`, `a.a.b`, `b`) -* `root/a/**/*.b` -> (`a.a.b`, `a.b.b`) +ubuntu packages(apt-get install): build-essential python-dev libxml2 libxml2-dev zlib1g-dev -#### Manual Dependency `|` -In order to maintain the correct order of the build (that is done in parallel), there are use cases that manual depndecy is needed to be provided. for example: if there are `generated headers` that used later by other `command` to generate anoter target. +for more details check the `Dockerfile` how to create environment for umake. - -## Rule `:foreach` -Same as `:` but will create `command` for each `source` (that match the pettern *.o in the example above) file that will be found on the filesystem - -## Macro `!` -Macros are expanded immediatlly (like `#define X "hello"` in c/cpp) -Macros can accept parameters - -Example: -``` -!c(includes, flags) : gcc -g -O2 -Wall -fPIC -c {filename} $includes $flags -o {target} > {dir}/{noext}.o -``` -#### Default values -`Macro` supports defualt values, by default it `""`: -``` -!c(includes, flags=-O3) : gcc -g -O2 -Wall -fPIC -c {filename} $includes $flags -o {target} > {dir}/{noext}.o -``` -now `!c` can be called as following -``` -!c(-Iinclude) # includes = -Iinclude, flags=-O3 -!c(-Iinclude, -O0) # includes = -Iinclude, flags=-O0 -!c() # includes = "", flags=-O3 ``` -## Const `$` -Consts are like macros, and can be used to parametrize calls to macros -Example: -``` -$libs = -lpthread -!so($libs) -``` - -## Config `[:]` -Configs allow to configure and changing umake execution. - -#### `workdir` -Default: \ - -Changing current working directory. After changing the working directory all `relative paths` are relative to the new working dir. `Absoulte paths` are relative to the `root` (the directory where UMakefile exists). -Relative path `my_dir_a/my_dir_b` will be evaluated as `/my_dir_a/my_dir_b`. However `/my_dir_a/my_dir_b` will be evaluated as `/my_dir_a/my_dir_b` regardless what working dir is. - -The following rules are similar: - -``` -: src/packages/a > gcc > src/packages/b -``` -``` -[workdir:src/packages] -: a > gcc > b -``` -Return to root -``` -[workdir:/] -``` - -#### `variant` - -Defult: "default" - -The ability to generate diffrent variants from the same sources. For example: debug/release compilations. variant `terminated` with a `newline` -``` -# varaint is terminated with newline -[variant:default] -$cflags = -O3 - -[variant:debug] -$cflags = -O0 - -: my.c > !c($cflags) > my.o -``` -now compile with `umake` for default variant -``` -umake -``` -or -``` -umake --variant debug +git clone https://github.com/grisha85/umake.git +cd umake +pip3 install . ``` -for `debug` variant. - -#### `include` -Default: - -include another `UMakefile` into the current one. +Running example +--------------- ``` -[include:somedir/umakefile] +git clone https://github.com/grisha85/umake.git +cd umake +docker build -t umake . +docker run --rm -it -v`pwd`/example:/example -w/example umake bash +umake --no-remote-cache +./hello_world ``` -will open and parse `somedir/umakefile` in the current working dir context. -# Cache System -Targets are being cached after creation, and checked if the target is in cache just before executing a `command`. There are two types of cache that UMake is using local(filesystem) and remote (minio). -## How Cache works -### On Save -* `sha1` of the target sources (those that were generated from UMakefile) are being calculated and `sha1` of the `command` itself. All dependecies files (also those that were auto detected) Saved to `md-` file. -* `sha1` of all dependecies are calculated and the just created target is saved to `/` -### On Load -* `sha1` of the target sources (those that were generated from UMakefile) are being calculated and `sha1` of the `command` itself. Reading `md-` for all the file dependecies -* calculating `sha1` of all of the target dependecies (from the files system) and copying `/` to the project directory as it was generated by the `command` +UMake configuration +------------------- +This section lists all the various configurations that umake supports -## Local Cache -The local cache is stored in `~/.umake/build-cache`. +| Variable name | Description | +|--------------------------------|------------------------------------------------------------| +| UMAKE_CONFIG_ROOT | The root directory in which all umake files will be stored | -## Remote Cache -TBD -# Arguments +Real Life Examples +------------------ +[DPDK build](doc/dpdk-build.md) -``` -usage: umake [-h] [--details] [--json JSON_FILE] [--no-remote-cache] - [--no-local-cache] - [target] - -positional arguments: - target target path -optional arguments: - -h, --help show this help message and exit - --details details about the target - --json JSON_FILE output as json - --no-remote-cache don't use remote cache - --no-local-cache don't use local cache +Talking about UMake: +-------------------- +This section includes link to various places around the web that reason about umake. +We believe that by reviewing questions and opinions that other people wrote about umake one can learn more about it. +So without further ado is here is the list: -``` +* [DriveNets blog](https://drivenets.com/blog/technology-developments/the-inside-story-of-how-we-optimized-our-own-build-system/) +* [Reddit r/bazel](https://www.reddit.com/r/bazel/comments/fa084s/how_we_optimised_our_build_system_using_umake/) +* [Reddit r/cpp](https://www.reddit.com/r/cpp/comments/f9yjxn/how_we_optimised_our_build_system_using_umake/) +* [Reddit r/gcc](https://www.reddit.com/r/gcc/comments/faiqum/how_we_optimised_our_build_system_using_umake/) +Have another story to share about umake? just open a PR with a change to this list and we'll merge it in. diff --git a/UMakefile b/UMakefile deleted file mode 100644 index f8b460a..0000000 --- a/UMakefile +++ /dev/null @@ -1,9 +0,0 @@ -!gcc() = gcc -c -fPIC {filename} $a $b -o {target} -!so() = gcc -shared {filename} -o {target} - -$a1 = 1 -$a2 = 2 - -:foreach test/*.c > !gcc() > {dir}/{noext}.o -: test/*.o > !so() > test/b.so -: test/Dockerfile | test/b.so test/*.o > docker build -f {filename} test/ > diff --git a/doc/cache.md b/doc/cache.md new file mode 100644 index 0000000..3ac684d --- /dev/null +++ b/doc/cache.md @@ -0,0 +1,74 @@ +# Cache System +Targets are being cached after creation, and checked if the target is in cache just before executing a `command`. There are two types of cache that UMake is using local (filesystem) and remote (minio). + +## How Cache works +### On Save +* `sha1` of the target sources (those that were generated from UMakefile) are being calculated and `sha1` of the `command` itself. All dependecies files (also those that were auto detected) Saved to `md-` file. +* `sha1` of all dependecies are calculated and the just created target is saved to `/` +### On Load +* `sha1` of the target sources (those that were generated from UMakefile) are being calculated and `sha1` of the `command` itself. Reading `md-` for all the file dependencies +* calculating `sha1` of all of the target dependencies (from the files system) and copying `/` to the project directory as it was generated by the `command` + +## Local Cache +The local cache is stored in `~/.umake/build-cache`. + +## Remote Cache +umake support uploading the artifacts to a remote server. The more people that work on the same code base the more the remote cache will work. +Think of it like that, if someone in the office already build a file then there is no need to build it again. This concept speeds up local builds, since if someone +else already built the binary we just download from the cache. It also speeds up CI builds, since in most cases developers compile & test their code locally. + +### Using minio as a umake remote cache +[Minio](https://min.io/) is a high performance open source object storage. It is compatible with AWS S3. It is very easy to set it up using the official [docker image](https://hub.docker.com/r/minio/minio/). Connecting it umake is also very simple. + +#### How to run minio for umake +We need to start a minio server and make sure that it exposes a port via docker. +Use the following command to spin up the server: +``` +docker run \ + -e MINIO_ACCESS_KEY=umake \ + -e MINIO_SECRET_KEY=umakeumake \ + -p 9000:9000 \ + minio/minio server /data +``` +Note that you need to select a password and a username. In this example it's umake/umakeumake. +For more minio configurations refer to the [docs](https://github.com/minio/minio/blob/master/docs/config/README.md). +Now that the server is up and running we need to create a bucket for umake. +The simplest way to do it is via the minio web interface. + +* Access http://localhost:9000 and user the username and password you selected. +* Create a new bucket called umake-build-cache + +And that's it! we are good to go. Let's test it out using the example in the repo + +``` +$ cd ./example +$ umake + [0.082] done imports + [0.000] done loading graph + [0.000] done filesystem scan + [0.001] done parsing UMakefile + gcc -c /home/dn/umake/example/hello.c -o /home/dn/umake/example/*.o + [0.084] [CACHED] /home/dn/umake/example/*.o + gcc /home/dn/umake/example/*.o -o /home/dn/umake/example/hello_world + [0.090] [CACHED] /home/dn/umake/example/hello_world + [0.000] done saving graph + [0.004] done cache gc + Workers 0/8 Cache 0/1500[MB] Cache Hits 0% Local/Remote 0%/0% Variant default Time 0[sec] hello_world + + $ umake --remote-cache-stats + [0.082] done imports +bucket size 0MB, n_objects 4 + +$ umake --no-local-cache + [0.081] done imports + [0.000] done loading graph + [0.000] done filesystem scan + [0.001] done parsing UMakefile + gcc -c /home/dn/umake/example/hello.c -o /home/dn/umake/example/*.o + [0.008] [REMOTE-CACHE] /home/dn/umake/example/*.o + gcc /home/dn/umake/example/*.o -o /home/dn/umake/example/hello_world + [0.007] [REMOTE-CACHE] /home/dn/umake/example/hello_world + [0.000] done saving graph + [0.003] done cache gc + Workers 0/8 Cache 0/1500[MB] Cache Hits 100% Local/Remote 0%/100% Variant default Time 0[sec] hello_world +``` diff --git a/doc/dpdk-build.md b/doc/dpdk-build.md new file mode 100644 index 0000000..c36ed55 --- /dev/null +++ b/doc/dpdk-build.md @@ -0,0 +1,39 @@ + +# Build DPDK in 9 seconds +[`DPDK`](https://www.dpdk.org/) is an open source project for fast packet processing. `umake` can compile it in `9` seconds: + +![dpdk build](images/dpdk-build/dpdk-build.gif) + +## Setup + +- VM with 6 cpus / `Intel(R) Xeon(R) Gold 6138 CPU @ 2.00GHz` host +- prepare tests with: +``` +git clone https://github.com/grisha85/dpdk.git +cd dpdk +make prepare +``` +## Results +| compilation | time (seconds) | command | comments | +|-------------------------------- |---------------- |------------------- |---------- | +| ninja | 160 | make ninja +| ninja null build | 0.054 | make ninja | | +| umake - uncached | 274 | make umake | [1] | +| umake null build | 0.9 | make umake | | +| umake - local cache | `9` | make umake-local | | +| umake - remote cache(over lan) | 14 | make umake-remote | | + +1. strace has huge performance penalty + +## How the port to `umake` was made +- Output of verbose `ninja` compilation was saved to a file: [ninja compilation output](https://github.com/grisha85/dpdk/blob/master/ninja) +- The output was parsed with a [python script](https://github.com/grisha85/dpdk/blob/master/parse_ninja.py) to `UMakefile` + + +## Remarks + +- This is not full port of DPDK compilation to `umake`. +- The generated UMakefile is not an ideal one, check documentation for better usage of `wildcards`, `varaints`, etc.. + +# Conclusion +**In most compilations there are limited number of files that are being changed, so `umake` can increase dramatically compilation speed (11-16 times faster). This is especially true for CI builds.** diff --git a/doc/images/dpdk-build/dpdk-build.gif b/doc/images/dpdk-build/dpdk-build.gif new file mode 100644 index 0000000..4569d21 Binary files /dev/null and b/doc/images/dpdk-build/dpdk-build.gif differ diff --git a/doc/images/overview/1.png b/doc/images/overview/1.png new file mode 100644 index 0000000..ecfbd81 Binary files /dev/null and b/doc/images/overview/1.png differ diff --git a/doc/images/overview/2.png b/doc/images/overview/2.png new file mode 100644 index 0000000..6ed997d Binary files /dev/null and b/doc/images/overview/2.png differ diff --git a/doc/images/overview/3.png b/doc/images/overview/3.png new file mode 100644 index 0000000..a2b258f Binary files /dev/null and b/doc/images/overview/3.png differ diff --git a/doc/images/overview/4.png b/doc/images/overview/4.png new file mode 100644 index 0000000..dba78db Binary files /dev/null and b/doc/images/overview/4.png differ diff --git a/doc/overview.md b/doc/overview.md new file mode 100644 index 0000000..6acb2aa --- /dev/null +++ b/doc/overview.md @@ -0,0 +1,150 @@ +UMake Overview +============== +Since a lot of the core concepts from tup apply to umake it is highly recommended to read the following: + +* http://gittup.org/tup/ex_dependencies.html +* http://gittup.org/tup/make_vs_tup.html + + +Why UMake +--------- +- Fast modification detection (sub second even for large projects) +- More parallelism due to DAG (more details later) +- Fast re-compilation, with built-in cache (local-cache, remote-cache) + + +My First UMakfile +----------------- +`a.c`: +``` +#include "dep_a.h" +#include "stdio.h" + +int +main() +{ + printf("hello\n"); + return 0; +} +``` + + +`UMakefile`: +``` +: a.c > gcc -c {filename} {target} > a.o +``` +the rule above parsed to the following command +``` +gcc -c a.c a.o +``` + +### Graph of the rule above +![ ](./images/overview/1.png) + + +### Graph after executing the rule +![ ](images/overview/2.png) + +Now if any of the nodes (`a.c`, `dep_a.h`, `gcc -c a.c a.o` or `a.o`) will be modified, `a.o` will be regenerated. umake use both timestamps and hashes to check for modifications. + +Targets as dependencies +---------------------- +`b.c`: +``` +#include "a.pb-c.h" +#include "stdio.h" + +int +main() +{ + printf("hello\n"); + return 0; +} +``` +Lets have another rule that generates header which will be used by other another rule. + +`UMakefile`: +``` +: a.proto > protoc {filename} > a.pb-c.h a.pb-c.c +: b.c > gcc -c {filename} {target} > b.o +``` +first rule above is generating header `a.pb-c.h` that `b.c` is including. +### Graph after parsing the rules +![ ](images/overview/3.png) + + +Now we have copmilation ordering issue. `b.o` might be generated before `a.pb-c.h` because nothing enforce the order between and `b.o` and `a.pb-c.h`. + +When running the above `UMakefile`, an error will be reported. This is because second rule using target of the the first rule internally (to generate `b.o`, `a.pb-c.h` is needed) + +### Manual Dependency: `|` +In order to fix this order issue we need to tell `umake` that generating `b.o` should come only after `a.pb-c.h` is generated. We would use `| a.pb-c.h` for that. + +`UMakefile`: +``` +: a.proto > protoc {filename} > a.pb-c.h a.pb-c.c +: b.c | a.pb-c.h > gcc -c {filename} {target} > b.o +``` + +### Graph after parsing the above UMakefile +![ ](images/overview/4.png) + + +:foreach rule +-------- + +``` +: src/src_a.c > compile.sh {filename} {target} > src/src_a.o +: src/src_b.c > compile.sh {filename} {target} > src/src_b.o +: src/src_c.c > compile.sh {filename} {target} > src/src_c.o +: src/src_d.c > compile.sh {filename} {target} > src/src_d.o +``` +can be changed to: +``` +:foreach src/*.c > compile.sh {filename} {target} > {dir}/{noext}.umake.o +``` +`:foreach` works exactlly like `:` (macros, vars, manual dependencies.) + +Macros and Variables +-------------------- +The above statement might repeated many times in UMakefile. so macros can be used in order to make life easier. + +``` +!c(includes, flags) : gcc -c {filename} {target} $includes $flags > {dir}/{noext}.umake.o + +: src/*.c > !c(-Iinclude, -O3) +``` + +`c` - macro name + +`includes`, `flags` - arguments to marcro + + +**The above can be also used with variables:** +``` +!c(includes, flags) : compile.sh {filename} {target} $includes $flags > {dir}/{noext}.umake.o + +$includes = -Iinclude +$flags = -O3 +: src/*.c > !c($includes, $flags) +``` + +Compiling specific target +------------------------- +``` +umake lib/libmy_lib.so +``` +In this case only the subraph of `lib/libmy_lib.so` will be recompiled + +Variants +-------- +``` +$debug_flags = -O3 + +[variant:debug] +$debug_flags = -O0 + +$includes = $debug_flags +: src/*.c > !c($debug_flags, ) +``` +Now if compiled with `umake` the `-O3` flags will be passed. If compiled with `umake -v debug` the `-O0` flags will be passed. diff --git a/doc/umakefile.md b/doc/umakefile.md new file mode 100644 index 0000000..105454b --- /dev/null +++ b/doc/umakefile.md @@ -0,0 +1,178 @@ + + +UMakefile +--------- +## Rule `:` + +A single `command` is generated for this rule + +`:` source `|` manual-deps `>` cmd `>` target + +`manual-deps` - targets the this target depends on, helps keep a correct build order + +`cmd` - bash command + +`target` - the output of the command + + +`{filename}` - full path filename of the source `/my/path/filename.a` + +`{dir}` - directory containing the source `/my/path/` + +`{noext}` - filename without extension `filename` + +`{target}` - expanded target `helloworld.a` + +Example: +``` +: *.o > gcc {filename} -o {target} > helloworld.a +``` + +#### Recursive Source `**` +recursice dependencies are support +``` +root\ + a\ + a.a\ + a.a.a + a.a.b + a.b.a + a.b\ + a.b.a + a.b.b + b\ + b +``` +* `root/**` -> (`a.a.a`, `a.a.b`, `b`) +* `root/a/**/*.b` -> (`a.a.b`, `a.b.b`) + +#### Manual Dependency `|` +In order to maintain a correct build order (that is executed in parallel), there are use cases where manual dependecy is needed. for example: if there are `generated headers` (like when using `protobuf`) that are being later used by another `command` to generate a different target. + + +## Rule `:foreach` +Same as `:` but will create `command` for each `source` file existing on filesystem (like when we match the pettern *.o in the example above) + +## Macro `!` +Macros are expanded immediately (like using `#define X "hello"` in c/cpp) +Macros can accept input parameters (again, similar to using c/cpp macros) + +Example: +``` +!c(includes, flags) : gcc -g -O2 -Wall -fPIC -c {filename} $includes $flags -o {target} > {dir}/{noext}.o +``` +#### Default values +`Macro` supports default values, by default they are `""`: +``` +!c(includes, flags=-O3) : gcc -g -O2 -Wall -fPIC -c {filename} $includes $flags -o {target} > {dir}/{noext}.o +``` +now `!c` can be called as following: +``` +!c(-Iinclude) # includes = -Iinclude, flags=-O3 +!c(-Iinclude, -O0) # includes = -Iinclude, flags=-O0 +!c() # includes = "", flags=-O3 +``` +## Const `$` +Consts are like macros, and can be used to parametrize calls to macros +Example: +``` +$libs = -lpthread +!so($libs) +``` + +## Config `[:]` +Configs allow to configure and changing umake execution. + +#### `workdir` +Default: \ + +Change the current working directory. +`relative paths` will now be relative to the new working dir. +`Absoulte paths` will now be relative to the `root` (the directory where UMakefile exists). + +Example: +Relative path `my_dir_a/my_dir_b` will be evaluated as `/my_dir_a/my_dir_b`. +However `/my_dir_a/my_dir_b` will be evaluated as `/my_dir_a/my_dir_b` *regardless* of what our `workdir` is. + +The following rules are similar: + +``` +: src/packages/a > gcc > src/packages/b +``` +``` +[workdir:src/packages] +: a > gcc > b +``` +Return to root +``` +[workdir:/] +``` + +#### `variant` + +Defult: ["default"] + +Note: multiple variants supported + +The ability to generate diffrent variants from the same sources. For example: debug/release compilations. Variants are `terminated` with a `newline`. Code that is not part of a variant is always running (common to all variants). +``` +# varaint is terminated with newline +[variant:default] +$cflags = -O3 + +[variant:debug] +$cflags = -O0 + +: my.c > !c($cflags) > my.o +``` +now compile with `umake` for default variant +``` +umake +``` +or +``` +umake --variant debug +``` +for `debug` variant. + +#### `include` +Default: - + +include another `UMakefile` into the current one. +``` +[include:somedir/umakefile] +``` +will open and parse `somedir/umakefile` in the current working dir context. + +#### `remote cache` +Default: None + +Environment: UMAKE_CONFIG_REMOTE_CACHE + +configure remote cache +``` +[remote_cache: ] +``` + +**remote-cache-type** - minio + +**hostname** - hostname:port + +**access-key** - access key (user name) + +**secret-key** - secret key (password) + +**bucketname** - bucketname + +**permission** - ro (read-only)/ rw (read/write) + + +#### `local cache size` +Default: 1500MB + +Environment: UMAKE_CONFIG_LOCAL_CACHE_SIZE + +configure local cache size +``` +[local_cache_size:] +``` \ No newline at end of file diff --git a/example/UMakefile b/example/UMakefile index 39e9f83..913d7d0 100644 --- a/example/UMakefile +++ b/example/UMakefile @@ -1,2 +1,2 @@ -:foreach *.c > gcc -c {filename} -o {target} > *.o +:foreach *.c > gcc -c {filename} -o {target} > {noext}.o : *.o > gcc {filename} -o {target} > hello_world diff --git a/setup.py b/setup.py index d3ba126..abb48c3 100644 --- a/setup.py +++ b/setup.py @@ -13,8 +13,8 @@ setup( name='umake', version='0', - packages=['umake'], + packages=['umake'] + find_packages(), install_requires=required, license='MIT', - scripts=['./umake/umake'] + scripts=['./umake/umake'], ) diff --git a/test/test.py b/test/test.py index 411e0f0..a279155 100644 --- a/test/test.py +++ b/test/test.py @@ -1,12 +1,31 @@ import unittest -from subprocess import check_output +from subprocess import check_output, Popen import subprocess import shutil import os import json - +import time +from minio import Minio +import minio ROOT = os.getcwd() +COVERAGE_DIR_PATH = os.path.join(ROOT, 'coverage') +COVEARAGE_CMD = "coverage-3.6 run -a" +UMAKE_BIN = f"{ROOT}/../umake/umake" + +class Timer: + def __init__(self, msg, iterations): + self.msg = msg + self.iterations = iterations + + def __enter__(self): + self.start = time.time() + return self + + def __exit__(self, *args): + self.end = time.time() + interavl = self.end - self.start + print(f"[{interavl:.3f}] [{interavl/self.iterations:.5f} per iter] {self.msg}") class TestUMake(unittest.TestCase): @@ -14,16 +33,26 @@ def setUp(self): shutil.rmtree("env", ignore_errors=True) os.mkdir("env") os.mkdir("env/proto") + try: + check_output("pkill -9 minio", shell=True) + except subprocess.CalledProcessError: + pass @classmethod def setUpClass(cls): - pass - + os.environ['COVERAGE_FILE'] = os.path.join(COVERAGE_DIR_PATH, ".coverage") + + @classmethod + def tearDownClass(cls): + print(f"\nCreating coverage xml from coverage/.coverage") + check_output(f"coverage-3.6 xml -i -o {os.path.join(COVERAGE_DIR_PATH, 'coverage.xml')}", stderr=subprocess.STDOUT, shell=True) + + def _create_setup_simple_umake(self): with open('env/a.h', "w") as f: f.write(""" """) - + with open('env/a.c', "w") as f: f.write(""" #include "stdio.h" @@ -45,19 +74,19 @@ def _create_setup_simple_umake(self): return 0; } """) - + with open('env/b.h', "w") as f: f.write(""" int hello(); """) - + def _check_file_exists(self, path, check_timestamp={}, is_changed={}): timestamps = dict() for p in path: full_path = os.path.join("env", p) self.assertTrue(os.path.isfile(full_path), msg=f"path {full_path} is {os.path.isfile(full_path)}") - + if len(is_changed): timestamps[p] = os.stat(full_path).st_mtime if is_changed[p] == True: @@ -65,40 +94,55 @@ def _check_file_exists(self, path, check_timestamp={}, is_changed={}): else: self.assertEqual(check_timestamp[p], timestamps[p], msg=f"p={p}") return timestamps - + def _check_file_not_exists(self, path): for p in path: self.assertFalse(os.path.isfile(os.path.join("env", p))) - def _compile(self, umake, variant="", targets=[],should_fail=False): + def _compile(self, umake, variant=[], targets=[], should_fail=False, should_output=True, local_cache=False, remote_cache=False): with open('env/UMakefile', "w") as umakefile: umakefile.write(umake) - if variant != "": - variant = f"--variant {variant}" + variant_config = "" + if variant: + variant_config = f'--variant {" --variant ".join(variant)}' targets_str = "" + + local_cache_conf = "--no-local-cache" + if local_cache: + local_cache_conf = "" + + remote_cache_conf="--no-remote-cache" + if remote_cache: + remote_cache_conf = "" + if targets: - targets_str = " ".join(targets) + targets_str = " ".join(targets) + try: - print(check_output(f"umake --no-remote-cache --no-local-cache {variant} {targets_str}", cwd="env/", shell=True).decode("utf-8")) + def call(): + return check_output(f"{COVEARAGE_CMD} {UMAKE_BIN} {remote_cache_conf} {local_cache_conf} {variant_config} {targets_str}", cwd="env/", shell=True).decode("utf-8") + if should_output: + print(call()) + else: + call() if should_fail: self.assertTrue(False, msg="umake compiled although should fail") - except subprocess.CalledProcessError as e: + except subprocess.CalledProcessError: if should_fail is False: - print(e) self.assertTrue(False, msg="Failed to run umake") def _assert_compilation(self, target, deps_conf=None, deps_manual=None, deps_auto_in=None): - check_output(f"umake {target} --no-local-cache --no-remote-cache --details --json json_out", shell=True, cwd="env/") + print(check_output(f"{COVEARAGE_CMD} {UMAKE_BIN} {target} --no-local-cache --no-remote-cache --details --json json_out", shell=True, cwd="env/")) with open("env/json_out") as f: deps = json.load(f) - + deps_conf = [os.path.join(ROOT, "env", dep) for dep in deps_conf] self.assertEqual(deps["deps"]["configured"], deps_conf, msg="deps_conf") deps_manual = [os.path.join(ROOT, "env", dep) for dep in deps_manual] self.assertEqual(deps["deps"]["manual"], deps_manual, msg="deps_manual") deps_auto_in = [os.path.join(ROOT, "env", dep) for dep in deps_auto_in] self.assertEqual(deps["deps"]["auto_in"], deps_auto_in, msg="deps_auto_in") - + def _rm(self, files): for f in files: os.remove(os.path.join("env", f)) @@ -227,7 +271,7 @@ def test_generated_targets(self): umake += ":foreach a_use.c | proto/a_proto.pb-c.h > gcc -g -O2 -Wall -fPIC -c {filename} -o {target} > {dir}/{noext}.o\n" umake += ":foreach b_notuse.c > gcc -g -O2 -Wall -fPIC -c {filename} -o {target} > {dir}/{noext}.o\n" umake += ": *.o proto/*.o > gcc -g --shared -O2 -Wall -fPIC {filename} -o {target} > test.so\n" - + self._compile(umake) timestamps = {"a_use.o": 0, "b_notuse.o": 0, "proto/a_proto.pb-c.c": 0, "proto/a_proto.pb-c.h": 0, "proto/a_proto.pb-c.o": 0, "test.so": 0} is_changed = {"a_use.o": True, "b_notuse.o": True, "proto/a_proto.pb-c.c": True, "proto/a_proto.pb-c.h": True, "proto/a_proto.pb-c.o": True, "test.so": True} @@ -247,7 +291,7 @@ def test_generated_targets(self): self._compile(umake) is_changed = {"a_use.o": True, "b_notuse.o": False, "proto/a_proto.pb-c.c": True, "proto/a_proto.pb-c.h": True, "proto/a_proto.pb-c.o": True, "test.so": True} timestamps = self._check_file_exists(["a_use.o", "b_notuse.o", "proto/a_proto.pb-c.c", "proto/a_proto.pb-c.h", "proto/a_proto.pb-c.o", "test.so"], check_timestamp=timestamps, is_changed=is_changed) - + umake = ":foreach proto/*.proto > protoc-c -I={dir} --c_out={dir} {filename} > {dir}/{noext}.pb-c.c {dir}/{noext}.pb-c.h\n" umake += ":foreach b_notuse.c > gcc -g -O2 -Wall -fPIC -c {filename} -o {target} > {dir}/{noext}.o\n" umake += ": *.o > gcc -g --shared -O2 -Wall -fPIC {filename} -o {target} > test.so\n" @@ -299,10 +343,10 @@ def test_recursive_deps(self): umake += ": c > ../helper_file_create.sh something d > d\n" self._compile(umake) self._check_file_exists(["a", "b", "c", "d"]) - + self._rm(["a"]) self._compile(umake, should_fail=True) - + """ check if c delete -> all reconstructed """ self._create("a", "asd") self._compile(umake) @@ -319,7 +363,7 @@ def test_recursive_deps(self): self._compile(umake, should_fail=True) self._check_file_exists(["a", "b", "c", "d"]) # self._check_file_not_exists(["c"]) - + """ check all sources exists for a command """ self._create("a", "asd") self._create("a1", "asd") @@ -328,15 +372,15 @@ def test_recursive_deps(self): self._compile(umake) self._rm(["a"]) self._compile(umake, should_fail=True) - + self._create("a", "asd") self._compile(umake) - + self._rm(["a1"]) self._compile(umake, should_fail=True) # delete a -> b exists - # create a -> b exists + # create a -> b exists # delete a1 -> b exists def test_umakefile_parsing(self): @@ -363,7 +407,7 @@ def test_changing_command_for_target_more_objects(self): self._create("a.c", "") self._create("b.c", "") self._create("c.c", "") - + umake = ":foreach a.c b.c > gcc -g -O2 -Wall -fPIC -c {filename} -o {target} > {dir}/{noext}.o\n" umake += ": a.o b.o > gcc -g --shared -O2 -Wall -fPIC {filename} -o {target} > test.so\n" self._compile(umake) @@ -418,22 +462,30 @@ def test_change_working_dir(self): def test_variant(self): umake = "[variant:default]\n" umake += "$file = a\n" + umake += "!create(file1) : ../helper_file_create.sh something $file1 > $file1\n" + umake += ": > !create($file)\n" umake += "\n" umake += "[variant:test]\n" umake += "$file = b\n" - umake += "\n" umake += "!create(file1) : ../helper_file_create.sh something $file1 > $file1\n" umake += ": > !create($file)\n" + umake += "\n" self._compile(umake) self._check_file_exists(["a"]) + self._check_file_not_exists(["b"]) - self._compile(umake, variant="test") + self._compile(umake, variant=["test"]) self._check_file_exists(["b"]) self._check_file_not_exists(["a"]) - self._compile(umake, variant="test1", should_fail=True) - + self._compile(umake, variant=["test1"], should_fail=True) + + """ test multiple variants """ + self._compile(umake, variant=["default", "test"]) + self._check_file_exists(["a"]) + self._check_file_exists(["b"]) + def test_compiling_specific_target(self): umake = ": > ../helper_file_create.sh something a > a\n" umake += ": > ../helper_file_create.sh something b > b\n" @@ -442,11 +494,10 @@ def test_compiling_specific_target(self): timestamps = {"a": 0, "b": 0, "c": 0, "d": 0} is_changed = {"a": True, "b": True, "c": True, "d": True} - self._compile(umake) timestamps = self._check_file_exists(["a", "b", "c", "d"], check_timestamp=timestamps, is_changed=is_changed) self._rm(["a", "b", "c", "d"]) - + timestamps = {"a": 0} is_changed = {"a": True} self._compile(umake, targets=["a"]) @@ -476,11 +527,9 @@ def test_autodep_update(self): self._create("b", "b") self._create("d", "d") self._create("a.sh", "/bin/cat a && /bin/cat b && echo n >> c\n") - umake = ": > ./a.sh > c" self._compile(umake) self._assert_compilation("c", deps_conf=[], deps_manual=[], deps_auto_in=["a", "a.sh", "b"]) - self._create("a.sh", "/bin/cat a && echo n >> c\n") self._compile(umake) self._assert_compilation("c", deps_conf=[], deps_manual=[], deps_auto_in=["a", "a.sh"]) @@ -488,7 +537,7 @@ def test_autodep_update(self): self._create("a.sh", "/bin/cat a && /bin/cat d && /bin/cat b && echo n >> c\n") self._compile(umake) self._assert_compilation("c", deps_conf=[], deps_manual=[], deps_auto_in=["a", "a.sh", "b", "d"]) - + def test_include(self): self._create("UMakfile_b", ": > ../helper_file_create.sh something b > b\n") umake = "[include:UMakfile_b]\n" @@ -503,6 +552,108 @@ def test_include(self): self._compile(umake) self._check_file_exists(["other_dir/b"]) + def test_clean_exit(self): + # Create two parallel targets that are not related. + # This should make umake trigger two workers to handle the commands. + # The first command will fail and we want to make sure that all workers + # are terminated. + umake = ": > /bin/sleep 0.5 && false > \n" + umake += ": > /bin/sleep 60 > \n" + start = time.perf_counter() + self._compile(umake, should_fail=True) + assert time.perf_counter() - start < 5 + + def test_benchmark(self): + n_files = 300 + umake = "" + for i in range(n_files): + self._create(f"{i}.c", +f""" +int x{i}; +int my_func{i}() +{{ + x{i}++; + return 0; +}} +""") + umake += f": {i}.c > gcc -g -O2 -Wall -fPIC -c {{filename}} -o {{target}} > {i}.o\n" + + with Timer("build", n_files): + self._compile(umake, should_output=False, local_cache=True) + + with Timer("build - null", n_files): + self._compile(umake, should_output=False, local_cache=True) + os.remove("env/.umake/db.pickle") + with Timer("build - from local cache", n_files): + self._compile(umake, should_output=False, local_cache=True) + + BUCKET_NAME = "umake-bucket" + def _start_remote_cache_minio(self): + os.environ["MINIO_ACCESS_KEY"] = "umake" + os.environ["MINIO_SECRET_KEY"] = "umakeumake" + cmd = "minio server /data" + server = Popen(cmd, shell=True) + time.sleep(4) + self.mc = Minio("0.0.0.0:9000",access_key="umake", secret_key="umakeumake", secure=False) + try: + for obj in self.mc.list_objects(bucket_name=self.BUCKET_NAME, recursive=True): + self.mc.remove_object(bucket_name=self.BUCKET_NAME, object_name=obj.object_name) + self.mc.remove_bucket(self.BUCKET_NAME) + except minio.error.NoSuchBucket: + pass + + self.mc.make_bucket(self.BUCKET_NAME) + return server + + def test_remote_cache(self): + minio_server = self._start_remote_cache_minio() + umake = f"[remote_cache:minio 0.0.0.0:9000 umake umakeumake {self.BUCKET_NAME} rw]\n" + umake += ": > touch f > f" + self._compile(umake, remote_cache=True) + + self.mc.stat_object(self.BUCKET_NAME, "md-48b67e8fe03dc99f08de9753e3a1dae34eb0b136") + self.mc.remove_object(self.BUCKET_NAME, "md-48b67e8fe03dc99f08de9753e3a1dae34eb0b136") + self._rm(["f"]) + """ test env""" + os.environ["UMAKE_CONFIG_REMOTE_CACHE"] = f"minio 0.0.0.0:9000 umake umakeumake {self.BUCKET_NAME} rw" + umake = ": > touch f > f" + + self._compile(umake, remote_cache=True) + + self.mc.stat_object(self.BUCKET_NAME, "md-48b67e8fe03dc99f08de9753e3a1dae34eb0b136") + self.mc.remove_object(self.BUCKET_NAME, "md-48b67e8fe03dc99f08de9753e3a1dae34eb0b136") + self._rm(["f"]) + del os.environ["UMAKE_CONFIG_REMOTE_CACHE"] + + """ read only """ + umake = f"[remote_cache:minio 0.0.0.0:9000 umake umakeumake {self.BUCKET_NAME} ro]\n" + umake += ": > touch f > f" + self._compile(umake, remote_cache=True) + + with self.assertRaises(minio.error.NoSuchKey) as context: + self.mc.stat_object(self.BUCKET_NAME, "md-48b67e8fe03dc99f08de9753e3a1dae34eb0b136") + + """ no remote cache """ + self.mc.remove_object(self.BUCKET_NAME, "md-48b67e8fe03dc99f08de9753e3a1dae34eb0b136") + self._rm(["f"]) + umake = ": > touch f > f" + + with self.assertRaises(minio.error.NoSuchKey) as context: + self.mc.stat_object(self.BUCKET_NAME, "md-48b67e8fe03dc99f08de9753e3a1dae34eb0b136") + + minio_server.terminate() + + def test_local_cache(self): + umake = f"[local_cache_size:100]\n" + umake += ": > touch f > f" + self._compile(umake, local_cache=True) + assert os.path.exists("env/.umake/build-cache/md-48b67e8fe03dc99f08de9753e3a1dae34eb0b136") == True + + umake = f"[local_cache_size:0]\n" + umake += ": > touch f > f" + self._compile(umake, local_cache=True) + assert os.path.exists("env/.umake/build-cache/md-48b67e8fe03dc99f08de9753e3a1dae34eb0b136") == False + if __name__ == '__main__': unittest.main() diff --git a/umake/cache/__init__.py b/umake/cache/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/umake/cache/base_cache.py b/umake/cache/base_cache.py new file mode 100644 index 0000000..72048b9 --- /dev/null +++ b/umake/cache/base_cache.py @@ -0,0 +1,43 @@ +from abc import ABC, abstractmethod + +class MetadataCache: + def __init__(self, deps): + self.deps = deps + + +class Cache(ABC): + + @abstractmethod + def open_cache(self, cache_hash)->MetadataCache: + """ + Get an object from the cache using a given hash. + + :param cache_hash: The key of the object to get + :return: The resulting element from cache. + """ + pass + + @abstractmethod + def save_cache(self, cache_hash, metadata_cache: MetadataCache): + """ + Save a given object into the cache. + + :param cache_hash: The hash to use for storing the element in the cache. + :param metadata_cache: The object to store in the cache. + """ + pass + + @abstractmethod + def get_cache_stats(self): + """ + Get stats on the number of artifacts in the cache. + """ + pass + + @abstractmethod + def clear_bucket(self): + """ + Delete all the artifacts in the cache. + Mainly used for clean variants. + """ + pass \ No newline at end of file diff --git a/umake/cache/cache_mgr.py b/umake/cache/cache_mgr.py new file mode 100644 index 0000000..a05f00a --- /dev/null +++ b/umake/cache/cache_mgr.py @@ -0,0 +1,58 @@ +from enum import IntEnum +from .base_cache import MetadataCache +from .minio_cache import MinioCache +from .fs_cache import FsCache +from ..config import global_config + +class CacheMgr: + + class CacheType(IntEnum): + NOT_CACHED = 0 + LOCAL = 1 + REMOTE = 2 + + fs_cache: FsCache = FsCache() + def __init__(self): + if global_config.remote_cache_enable: + self.minio_cache = MinioCache() + + def open_cache(self, cache_hash) -> MetadataCache: + try: + if global_config.local_cache: + return self.fs_cache.open_cache(cache_hash) + else: + raise FileNotFoundError + except FileNotFoundError: + if global_config.remote_cache_enable: + return self.minio_cache.open_cache(cache_hash) + raise FileNotFoundError + + def save_cache(self, cache_hash, metadata_cache: MetadataCache): + if global_config.local_cache: + self.fs_cache.save_cache(cache_hash, metadata_cache) + if global_config.remote_cache_enable and global_config.remote_write_enable: + self.minio_cache.save_cache(cache_hash, metadata_cache) + + def _get_cache(self, deps_hash, targets): + ret = False + if global_config.local_cache: + ret = self.fs_cache._get_cache(deps_hash, targets) + if ret is False: + if global_config.remote_cache_enable: + ret = self.minio_cache._get_cache(deps_hash, targets) + if ret is True: + return CacheMgr.CacheType.REMOTE + else: + return CacheMgr.CacheType.LOCAL + return CacheMgr.CacheType.NOT_CACHED + + def _save_cache(self, deps_hash, targets, local_only=False): + if global_config.local_cache: + self.fs_cache._save_cache(deps_hash, targets) + if local_only: + return + if global_config.remote_cache_enable and global_config.remote_write_enable: + self.minio_cache._save_cache(deps_hash, targets) + + def gc(self): + self.fs_cache.gc() diff --git a/umake/cache/fs_cache.py b/umake/cache/fs_cache.py new file mode 100644 index 0000000..d3648d2 --- /dev/null +++ b/umake/cache/fs_cache.py @@ -0,0 +1,102 @@ +import shutil +import hashlib +import pickle +import os +from os.path import join +from subprocess import check_output +from .base_cache import MetadataCache +from umake.config import UMAKE_BUILD_CACHE_DIR +from umake.colored_output import out +from umake.utils.fs import fs_lock, fs_unlock, get_size_KB +from umake.utils.timer import Timer +from umake.config import global_config + + +class FsCache: + + def __init__(self): + pass + + def open_cache(self, cache_hash) -> MetadataCache: + cache_src = join(UMAKE_BUILD_CACHE_DIR, "md-" + cache_hash.hex()) + with open(cache_src, "rb") as metadata_file: + metadata = pickle.load(metadata_file) + return metadata + + def save_cache(self, cache_hash, metadata_cache: MetadataCache): + cache_src = join(UMAKE_BUILD_CACHE_DIR, "md-" + cache_hash.hex()) + with open(cache_src, "wb") as metadata_file: + pickle.dump(metadata_cache, metadata_file, protocol=pickle.HIGHEST_PROTOCOL) + + def _get_cache(self, deps_hash, targets): + if deps_hash is None: + return False + cache_src = join(UMAKE_BUILD_CACHE_DIR, deps_hash.hex()) + try: + for target in targets: + f = hashlib.sha1(target.encode("ascii")).hexdigest() + src = join(cache_src, f) + shutil.copyfile(src, target) + shutil.copymode(src, target) + except FileNotFoundError: + shutil.rmtree(cache_src, ignore_errors=True) + return False + + return True + + def _save_cache(self, deps_hash, targets): + cache_dst = join(UMAKE_BUILD_CACHE_DIR, deps_hash.hex()) + fd, lock_path = fs_lock(cache_dst) + if fd == None: + return + try: + shutil.rmtree(cache_dst, ignore_errors=True) + os.mkdir(cache_dst) + for target in targets: + dst = join(cache_dst, hashlib.sha1(target.encode("ascii")).hexdigest()) + tmp_dst = f"{dst}.tmp" + # do "atomic" copy, in case the copy is interferred + shutil.copyfile(target, tmp_dst) + shutil.copymode(target, tmp_dst) + os.rename(tmp_dst, dst) + finally: + fs_unlock(fd, lock_path) + + def gc(self): + def remove(path): + """ param could either be relative or absolute. """ + if os.path.isfile(path): + os.remove(path) # remove the file + elif os.path.isdir(path): + shutil.rmtree(path) # remove dir and all contains + else: + raise ValueError("file {} is not a file or dir.".format(path)) + + with Timer("done cache gc") as timer: + cache_dir_size_KB = get_size_KB(UMAKE_BUILD_CACHE_DIR) + high_thresh = cache_dir_size_KB * 1.1 + low_tresh = global_config.local_cache_size * 1024 * 0.6 + + if global_config.local_cache_size * 1024 > high_thresh: + return + + fd, lock_path = fs_lock(UMAKE_BUILD_CACHE_DIR) + if fd == None: + out.print_fail(f"\tcahce: {UMAKE_BUILD_CACHE_DIR} is locked") + return + try: + cache_entry_size = 0 + cache_dir = check_output(['ls', '-lru', '--sort=time', UMAKE_BUILD_CACHE_DIR]).decode('utf-8') + for cache_line in cache_dir.splitlines(): + try: + _, _, _, _, _, _, _, _, cache_entry_name = cache_line.split() + cache_entry_full_path = join(UMAKE_BUILD_CACHE_DIR, cache_entry_name) + remove(cache_entry_full_path) + cache_entry_size = get_size_KB(UMAKE_BUILD_CACHE_DIR) + if cache_entry_size < low_tresh: + break + except ValueError: + pass + timer.set_postfix(f"freed {int((cache_dir_size_KB - cache_entry_size) / 1024)}MB") + finally: + fs_unlock(fd, lock_path) \ No newline at end of file diff --git a/umake/cache/minio_cache.py b/umake/cache/minio_cache.py new file mode 100644 index 0000000..7a4276f --- /dev/null +++ b/umake/cache/minio_cache.py @@ -0,0 +1,145 @@ +import certifi +import urllib3 +import hashlib +import os +import pickle +import io +from os.path import join +from stat import S_IMODE +from minio import Minio, error # takes 0.1 seconds, check what to do +from minio.helpers import MAX_POOL_SIZE +from umake.cache import base_cache +from umake.cache.base_cache import MetadataCache +from umake.config import global_config +from umake.colored_output import out + + +class MinioCache(base_cache.Cache): + + def __init__(self): + self.n_timeouts = 0 + ca_certs = certifi.where() + http = urllib3.PoolManager( + timeout=1, + maxsize=MAX_POOL_SIZE, + cert_reqs='CERT_REQUIRED', + ca_certs=ca_certs, + retries=urllib3.Retry( + total=3, + backoff_factor=0.5, + status_forcelist=[500, 502, 503, 504] + ) + ) + + self.mc = Minio(global_config.remote_hostname, + access_key=global_config.remote_access_key, + secret_key=global_config.remote_secret_key, + secure=False, + http_client=http) + + def _increase_timeout_and_check(self): + self.n_timeouts += 1 + if self.n_timeouts >= 3: + out.print_fail(f"remote cache timedout {self.n_timeouts} time, disabling remote cahce") + global_config.remote_cache_enable = False + + def open_cache(self, cache_hash)->MetadataCache: + cache_src = "md-" + cache_hash.hex() + try: + metadata_file = self.mc.get_object(bucket_name=global_config.remote_bucket, object_name=cache_src) + metadata = pickle.loads(metadata_file.read()) + return metadata + except (urllib3.exceptions.ReadTimeoutError, urllib3.exceptions.MaxRetryError, urllib3.exceptions.ProtocolError): + self._increase_timeout_and_check() + raise FileNotFoundError + except error.RequestTimeTooSkewed: + out.print_fail("Time on your host not configured currectlly, remote-cache is disabled") + global_config.remote_cache_enable = False + raise FileNotFoundError + except error.NoSuchKey: + raise FileNotFoundError + + def save_cache(self, cache_hash, metadata_cache: MetadataCache): + cache_src = "md-" + cache_hash.hex() + md = pickle.dumps(metadata_cache, protocol=pickle.HIGHEST_PROTOCOL) + try: + self.mc.put_object(bucket_name=global_config.remote_bucket, object_name=cache_src, data=io.BytesIO(md), length=len(md)) + except (urllib3.exceptions.ReadTimeoutError, urllib3.exceptions.MaxRetryError, urllib3.exceptions.ProtocolError): + self._increase_timeout_and_check() + except error.RequestTimeTooSkewed: + out.print_fail("Time on your host not configured currectlly, remote-cache is disabled") + global_config.remote_cache_enable = False + + def _get_chmod(self, src): + if hasattr(os, 'chmod'): + st = os.stat(src) + return st.st_mode + else: + return None + + def _set_chmod(self, dst, st_mode): + os.chmod(dst, S_IMODE(st_mode)) + + def _get_cache(self, deps_hash, targets): + if deps_hash is None: + return False + cache_src = deps_hash.hex() + try: + for target in targets: + f = hashlib.sha1(target.encode("ascii")).hexdigest() + src = join(cache_src, f) + obj = self.mc.fget_object(bucket_name=global_config.remote_bucket, object_name=src, file_path=target) + st_mode = int(obj.metadata["X-Amz-Meta-St_mode"]) + self._set_chmod(target, st_mode) + except KeyError: + # some cases with minio that .metadata["X-Amz-Meta-St_mode"] is not exists + # the file will be pushed again after compilation + out.print_fail("metadata not exists") + return False + except error.NoSuchKey: + return False + except (urllib3.exceptions.ReadTimeoutError, urllib3.exceptions.MaxRetryError, urllib3.exceptions.ProtocolError): + self._increase_timeout_and_check() + return False + except error.RequestTimeTooSkewed: + out.print_fail("Time on your host not configured currectlly, remote-cache is disabled") + global_config.remote_cache_enable = False + return False + + return True + + def _save_cache(self, deps_hash, targets): + cache_dst = deps_hash.hex() + # fd, lock_path = fs_lock(cache_dst) + # if fd == None: + # return + try: + # shutil.rmtree(cache_dst, ignore_errors=True) + # os.mkdir(cache_dst) + for target in targets: + dst = join(cache_dst, hashlib.sha1(target.encode("ascii")).hexdigest()) + file_attr = {"st_mode": self._get_chmod(target)} + self.mc.fput_object(bucket_name=global_config.remote_bucket, object_name=dst, file_path=target, metadata=file_attr) + except (urllib3.exceptions.ReadTimeoutError, urllib3.exceptions.MaxRetryError, urllib3.exceptions.ProtocolError): + self._increase_timeout_and_check() + except error.RequestTimeTooSkewed: + out.print_fail("Time on your host not configured currectlly, remote-cache is disabled") + global_config.remote_cache_enable = False + finally: + # fs_unlock(fd, lock_path) + pass + + def get_cache_stats(self): + bucket_size = 0 + n_objects = 0 + for obj in self.mc.list_objects(bucket_name=global_config.remote_bucket, recursive=True): + if obj.is_dir: + continue + bucket_size += obj.size + n_objects += 1 + print(f"bucket size {int(bucket_size / 1024 / 1024)}MB, n_objects {n_objects}") + + def clear_bucket(self): + for obj in self.mc.list_objects(bucket_name=global_config.remote_bucket, recursive=True): + self.mc.remove_object(bucket_name=global_config.remote_bucket, object_name=obj.object_name) + self.get_cache_stats() \ No newline at end of file diff --git a/umake/colored_output.py b/umake/colored_output.py index dcb717e..71551af 100644 --- a/umake/colored_output.py +++ b/umake/colored_output.py @@ -1,30 +1,15 @@ import sys import threading -import os -from os.path import join -from subprocess import check_output, CalledProcessError from datetime import datetime +from umake.config import UMAKE_MAX_WORKERS, UMAKE_BUILD_CACHE_DIR +from umake.utils.fs import get_size_KB +from umake.config import global_config -ROOT = os.getcwd() -UMAKE_ROOT_DIR = join(ROOT, ".umake") -UMKAE_TMP_DIR = join(UMAKE_ROOT_DIR, "tmp") -UMAKE_BUILD_CACHE_DIR = join(UMAKE_ROOT_DIR, "build-cache") -UMAKE_BUILD_CACHE_MAX_SIZE_MB = 1500 MINIMAL_ENV = {"PATH": "/usr/bin"} -UMAKE_MAX_WORKERS = 8 -UMAKE_DB = join(UMAKE_ROOT_DIR, "db.pickle") - file_action_fmt = " [{action}] {filename}" is_ineractive_terminal = sys.stdout.isatty() -def get_size_KB(path): - try: - return int(check_output(['du','-s', path]).split()[0].decode('utf-8')) - except CalledProcessError: - return 0 - - class bcolors: HEADER = '\033[95m' OKBLUE = '\033[94m' @@ -36,24 +21,31 @@ class bcolors: UNDERLINE = '\033[4m' +def format_text(out_str, color: bcolors=""): + if is_ineractive_terminal: + return f"{color} {out_str} {bcolors.ENDC}" + else: + return out_str + + class AtomicInt: def __init__(self): self.num = 0 self.lock = threading.Lock() - + def inc(self): with self.lock: self.num += 1 - + def dec(self): with self.lock: self.num -= 1 - + def __repr__(self): return str(self.num) class InteractiveOutput: - + def __init__(self): self.bar_lock = threading.Lock() self.n_active_workers = AtomicInt() @@ -64,7 +56,7 @@ def __init__(self): self.start_time = datetime.now() self.curr_job = "" - self.variant = "deafult" + self.variant = "default" self.n_calls = 0 def _get_curr_cache_size(self): @@ -78,37 +70,48 @@ def update_bar(self, force=False): with self.bar_lock: if force: self.n_calls = 0 - self._get_curr_cache_size() bright_blue = "\033[1;34;40m" bold = "\033[1;37;40m" diff = int((datetime.now() - self.start_time).total_seconds()) - + sys.stdout.write("\x1b[2K\r") print(f"\r{bright_blue} Workers {bcolors.ENDC}{bold}{self.n_active_workers}/{UMAKE_MAX_WORKERS}{bcolors.ENDC}", end="") - print(f"{bright_blue} Cache {bcolors.ENDC}{bold}{int(self.cache_current)}/{UMAKE_BUILD_CACHE_MAX_SIZE_MB}[MB] {bcolors.ENDC}", end="") - if self.n_works_done: - n_cache_hits = self.n_local_hits + self.n_remote_hits - cache_ratio = int(n_cache_hits / self.n_works_done * 100) - local_ratio = 0 - remote_ratio = 0 - if n_cache_hits: - local_ratio = int(self.n_local_hits / n_cache_hits * 100) - remote_ratio = int(self.n_remote_hits / n_cache_hits * 100) - print(f"{bright_blue} Cache Hits {bcolors.ENDC}{bold}{cache_ratio}% {bcolors.ENDC}", end="") - print(f"{bright_blue} Local/Remote {bcolors.ENDC}{bold}{local_ratio}%/{remote_ratio}% {bcolors.ENDC}", end="") - print(f"{bright_blue} Variant {bcolors.ENDC}{bold} {self.variant} {bcolors.ENDC}", end="") print(f"{bright_blue} Time {bcolors.ENDC}{bold} {diff}[sec] {bcolors.ENDC}", end="") print(f"{bold} {self.curr_job} {bcolors.ENDC}", end="") sys.stdout.flush() - + self.n_calls += 1 - + + def compilation_summary(self): + self.n_calls = 0 # for cache calculation + self._get_curr_cache_size() + + bold = "\033[1;37;40m" + bright_blue = "\033[1;34;40m" + permissions = "rw" if global_config.remote_write_enable else "ro" + print() + if global_config.remote_cache_enable: + print(f"{bright_blue} Remote URI {bcolors.ENDC}", end="") + print(f"{bold} {global_config.remote_hostname} ({permissions}) {bcolors.ENDC}", end="") + if self.n_works_done: + n_cache_hits = self.n_local_hits + self.n_remote_hits + cache_ratio = int(n_cache_hits / self.n_works_done * 100) + local_ratio = 0 + remote_ratio = 0 + if n_cache_hits: + local_ratio = int(self.n_local_hits / n_cache_hits * 100) + remote_ratio = int(self.n_remote_hits / n_cache_hits * 100) + print(f"{bright_blue} Cache Hits {bcolors.ENDC}{bold}{cache_ratio}% {bcolors.ENDC}", end="") + print(f"{bright_blue} Local/Remote {bcolors.ENDC}{bold}{local_ratio}%/{remote_ratio}% {bcolors.ENDC}", end="") + print(f"{bright_blue} Variant {bcolors.ENDC}{bold} {self.variant} {bcolors.ENDC}", end="") + print(f"{bright_blue} Cache {bcolors.ENDC}{bold}{int(self.cache_current)}/{global_config.local_cache_size}[MB]{bcolors.ENDC}", end="") + sys.stdout.flush() + def print_colored(self, out_str, color=""): if is_ineractive_terminal: sys.stdout.write("\x1b[2K\r") - print(f"{color} {out_str} {bcolors.ENDC}") - else: - print(f"{out_str}") + print(format_text(out_str, color)) + # print(f"{color} {out_str} {bcolors.ENDC}") # self.update_bar() def print_fail(self, out_str): @@ -133,4 +136,10 @@ def print(self, s): self.print_colored(s) def destroy(self): - print() \ No newline at end of file + self.update_bar() + self.compilation_summary() + print() + + +# To be used as a default interactive output by other modules +out = InteractiveOutput() \ No newline at end of file diff --git a/umake/config.py b/umake/config.py new file mode 100644 index 0000000..1475e30 --- /dev/null +++ b/umake/config.py @@ -0,0 +1,39 @@ +import os +from os.path import join + +CONFIG_ENV_PREFIX = 'UMAKE_CONFIG_' + +# Local cache +ROOT = os.environ.get(f'{CONFIG_ENV_PREFIX}_ROOT', os.getcwd()) + + +# General +UMAKE_MAX_WORKERS = 8 +UMAKE_ROOT_DIR = join(ROOT, ".umake") +UMKAE_TMP_DIR = join(UMAKE_ROOT_DIR, "tmp") +UMAKE_BUILD_CACHE_DIR = join(UMAKE_ROOT_DIR, "build-cache") +UMAKE_DB = join(UMAKE_ROOT_DIR, "db.pickle") + +class Config: + def __init__(self): + self.json_file = None + self.interactive_output = False + self.targets = [] + self.variant = {"default"} + self.compile_commands = False + self.verbose = False + + self.local_cache = True + self.local_cache_size = 1500 + + self.remote_cache_config = True # how user configured + # the next is result of `remote_cache_config` and if configured + self.remote_cache_enable = False + self.remote_hostname = None + self.remote_access_key = None + self.remote_secret_key = None + self.remote_bucket = None + self.remote_write_enable = False + + +global_config = Config() \ No newline at end of file diff --git a/umake/exceptions.py b/umake/exceptions.py new file mode 100644 index 0000000..c67d7b6 --- /dev/null +++ b/umake/exceptions.py @@ -0,0 +1,26 @@ +class TargetNotGeneratedErr(Exception): + pass + + +class CompilationFailedErr(Exception): + pass + + +class CmdFailedErr(Exception): + pass + + +class NotFileErr(Exception): + pass + + +class DepIsGenerated(Exception): + pass + + +class LineParseErr(Exception): + pass + + +class CleanExitErr(Exception): + pass \ No newline at end of file diff --git a/umake/pywildcard.py b/umake/pywildcard.py index 07963e7..97454b3 100644 --- a/umake/pywildcard.py +++ b/umake/pywildcard.py @@ -1,3 +1,9 @@ +""" +taken from https://github.com/agalera/python-wildcard +LICENSE: GNU General Public License v3.0 +https://github.com/agalera/python-wildcard/blob/master/LICENSE.txt +""" + """Filename matching with shell patterns. fnmatch(FILENAME, PATTERN) matches according to the local convention. @@ -39,9 +45,9 @@ def fnmatch(name, pat): if the operating system requires it. If you don't want this, use fnmatchcase(FILENAME, PATTERN). """ - import os - name = os.path.normcase(name) - pat = os.path.normcase(pat) + # import os + # name = os.path.normcase(name) + # pat = os.path.normcase(pat) return fnmatchcase(name, pat) diff --git a/umake/umake b/umake/umake index 7c147cf..a70dd92 100755 --- a/umake/umake +++ b/umake/umake @@ -1,57 +1,17 @@ #!/usr/bin/python3.6 import time -from umake.colored_output import InteractiveOutput, bcolors, ROOT, UMAKE_ROOT_DIR, UMKAE_TMP_DIR, UMAKE_BUILD_CACHE_DIR, UMAKE_BUILD_CACHE_MAX_SIZE_MB, MINIMAL_ENV, get_size_KB, UMAKE_MAX_WORKERS, UMAKE_DB +from umake.colored_output import InteractiveOutput, bcolors, MINIMAL_ENV, format_text +from umake.colored_output import out +from umake.utils.timer import Timer # from pyinstrument import Profiler # profiler = Profiler() -out = InteractiveOutput() -class Timer: - def __init__(self, msg, threshold=0, color=bcolors.OKGREEN): - self.msg = msg - self.postfix = "" - self.prefix = "" - self.threshold = threshold - self.color = color - - def set_prefix(self, prefix): - self.prefix = prefix - - def set_postfix(self, postfix): - self.postfix = postfix - - def __enter__(self): - self.start = time.time() - return self - - def __exit__(self, *args): - self.end = time.time() - self.interval = self.end - self.start - if self.interval > self.threshold: - out.print_colored(f"[{self.interval:.3f}] {self.prefix} {self.msg} {self.postfix}", self.color) - - -class MetadataCache: - def __init__(self, deps): - self.deps = deps - - -def fs_lock(path): - lock_path = path + ".lock" - try: - fd = os.open(lock_path, os.O_CREAT | os.O_EXCL) - return fd, lock_path - except FileExistsError: - return None, None - - -def fs_unlock(fd, lock_path): - try: - os.close(fd) - finally: - os.remove(lock_path) +class WorkerExit: + pass +worker_exit_code = WorkerExit() with Timer("done imports"): from subprocess import Popen, PIPE, check_output, TimeoutExpired @@ -71,332 +31,24 @@ with Timer("done imports"): from itertools import chain import shutil import sys - import umake.pywildcard as fnmatch - from minio import Minio, error # takes 0.1 seconds, check what to do - from minio.helpers import (MAX_PART_SIZE, - MAX_POOL_SIZE, - MIN_PART_SIZE, - DEFAULT_PART_SIZE, - MAX_MULTIPART_COUNT) import urllib3 import certifi import io import glob - -class Config: - def __init__(self): - self.json_file = None - self.interactive_output = False - self.remote_cache = True - self.local_cache = True - self.targets = [] - self.variant = "default" - - -global_config = Config() - - -class MinioCache: - - BUCKET = "umake-build-cache" - def __init__(self): - self.n_timeouts = 0 - ca_certs = certifi.where() - http = urllib3.PoolManager( - timeout=1, - maxsize=MAX_POOL_SIZE, - cert_reqs='CERT_REQUIRED', - ca_certs=ca_certs, - retries=urllib3.Retry( - total=3, - backoff_factor=0.5, - status_forcelist=[500, 502, 503, 504] - ) - ) - - self.mc = Minio("my-server", - access_key='user', - secret_key='pass', - secure=False, - http_client=http) - - def _increase_timeout_and_check(self): - self.n_timeouts += 1 - if self.n_timeouts >= 3: - out.print_fail(f"remote cache timedout {self.n_timeouts} time, disabling remote cahce") - global_config.remote_cache = False - - def open_cache(self, cache_hash) -> MetadataCache: - cache_src = "md-" + cache_hash.hex() - try: - metadata_file = self.mc.get_object(bucket_name=self.BUCKET, object_name=cache_src) - metadata = pickle.loads(metadata_file.read()) - return metadata - except (urllib3.exceptions.ReadTimeoutError, urllib3.exceptions.MaxRetryError, urllib3.exceptions.ProtocolError): - self._increase_timeout_and_check() - raise FileNotFoundError - except error.RequestTimeTooSkewed: - out.print_fail("Time on your host not configured currectlly, remote-cache is disabled") - global_config.remote_cache = False - raise FileNotFoundError - except error.NoSuchKey: - raise FileNotFoundError - - def save_cache(self, cache_hash, metadata_cache: MetadataCache): - cache_src = "md-" + cache_hash.hex() - md = pickle.dumps(metadata_cache, protocol=pickle.HIGHEST_PROTOCOL) - try: - self.mc.put_object(bucket_name=self.BUCKET, object_name=cache_src, data=io.BytesIO(md), length=len(md)) - except (urllib3.exceptions.ReadTimeoutError, urllib3.exceptions.MaxRetryError, urllib3.exceptions.ProtocolError): - self._increase_timeout_and_check() - except error.RequestTimeTooSkewed: - out.print_fail("Time on your host not configured currectlly, remote-cache is disabled") - global_config.remote_cache = False - - def _get_chmod(self, src): - if hasattr(os, 'chmod'): - stat_func, chmod_func = os.stat, os.chmod - st = os.stat(src) - return st.st_mode - else: - return None - - def _set_chmod(self, dst, st_mode): - os.chmod(dst, S_IMODE(st_mode)) - - def _get_cache(self, deps_hash, targets): - if deps_hash is None: - return False - cache_src = deps_hash.hex() - try: - for target in targets: - f = hashlib.sha1(target.encode("ascii")).hexdigest() - src = join(cache_src, f) - obj = self.mc.fget_object(bucket_name=self.BUCKET, object_name=src, file_path=target) - st_mode = int(obj.metadata["X-Amz-Meta-St_mode"]) - self._set_chmod(target, st_mode) - except KeyError: - # some cases with minio that .metadata["X-Amz-Meta-St_mode"] is not exists - # the file will be pushed again after compilation - out.print_fail("metadata not exists") - return False - except error.NoSuchKey: - return False - except (urllib3.exceptions.ReadTimeoutError, urllib3.exceptions.MaxRetryError, urllib3.exceptions.ProtocolError): - self._increase_timeout_and_check() - return False - except error.RequestTimeTooSkewed: - out.print_fail("Time on your host not configured currectlly, remote-cache is disabled") - global_config.remote_cache = False - return False - - return True - - def _save_cache(self, deps_hash, targets): - cache_dst = deps_hash.hex() - # fd, lock_path = fs_lock(cache_dst) - # if fd == None: - # return - try: - # shutil.rmtree(cache_dst, ignore_errors=True) - # os.mkdir(cache_dst) - for target in targets: - dst = join(cache_dst, hashlib.sha1(target.encode("ascii")).hexdigest()) - file_attr = {"st_mode": self._get_chmod(target)} - self.mc.fput_object(bucket_name=self.BUCKET, object_name=dst, file_path=target, metadata=file_attr) - except (urllib3.exceptions.ReadTimeoutError, urllib3.exceptions.MaxRetryError, urllib3.exceptions.ProtocolError): - self._increase_timeout_and_check() - except error.RequestTimeTooSkewed: - out.print_fail("Time on your host not configured currectlly, remote-cache is disabled") - global_config.remote_cache = False - finally: - # fs_unlock(fd, lock_path) - pass - - def get_cache_stats(self): - bucket_size = 0 - n_objects = 0 - for obj in self.mc.list_objects(bucket_name=self.BUCKET, recursive=True): - if obj.is_dir: - continue - bucket_size += obj.size - n_objects += 1 - print(f"bucket size {int(bucket_size / 1024 / 1024)}MB, n_objects {n_objects}") - - def clear_bucket(self): - for obj in self.mc.list_objects(bucket_name=self.BUCKET, recursive=True): - self.mc.remove_object(bucket_name=self.BUCKET, object_name=obj.object_name) - self.get_cache_stats() - - -class FsCache: - - def __init__(self): - pass - - def open_cache(self, cache_hash) -> MetadataCache: - cache_src = join(UMAKE_BUILD_CACHE_DIR, "md-" + cache_hash.hex()) - with open(cache_src, "rb") as metadata_file: - metadata = pickle.load(metadata_file) - return metadata - - - def save_cache(self, cache_hash, metadata_cache: MetadataCache): - cache_src = join(UMAKE_BUILD_CACHE_DIR, "md-" + cache_hash.hex()) - with open(cache_src, "wb") as metadata_file: - pickle.dump(metadata_cache, metadata_file, protocol=pickle.HIGHEST_PROTOCOL) - - def _get_cache(self, deps_hash, targets): - if deps_hash is None: - return False - cache_src = join(UMAKE_BUILD_CACHE_DIR, deps_hash.hex()) - try: - for target in targets: - f = hashlib.sha1(target.encode("ascii")).hexdigest() - src = join(cache_src, f) - shutil.copyfile(src, target) - shutil.copymode(src, target) - except FileNotFoundError: - shutil.rmtree(cache_src, ignore_errors=True) - return False - - return True - - def _save_cache(self, deps_hash, targets): - cache_dst = join(UMAKE_BUILD_CACHE_DIR, deps_hash.hex()) - fd, lock_path = fs_lock(cache_dst) - if fd == None: - return - try: - shutil.rmtree(cache_dst, ignore_errors=True) - os.mkdir(cache_dst) - for target in targets: - dst = join(cache_dst, hashlib.sha1(target.encode("ascii")).hexdigest()) - tmp_dst = f"{dst}.tmp" - # do "atomic" copy, in case the copy is interferred - shutil.copyfile(target, tmp_dst) - shutil.copymode(target, tmp_dst) - os.rename(tmp_dst, dst) - finally: - fs_unlock(fd, lock_path) - - def gc(self): - def remove(path): - """ param could either be relative or absolute. """ - if os.path.isfile(path): - os.remove(path) # remove the file - elif os.path.isdir(path): - shutil.rmtree(path) # remove dir and all contains - else: - raise ValueError("file {} is not a file or dir.".format(path)) - - with Timer("done cache gc") as timer: - cache_dir_size_KB = get_size_KB(UMAKE_BUILD_CACHE_DIR) - high_thresh = cache_dir_size_KB * 1.1 - low_tresh = UMAKE_BUILD_CACHE_MAX_SIZE_MB * 1024 * 0.6 - - if UMAKE_BUILD_CACHE_MAX_SIZE_MB * 1024 > high_thresh: - return - - fd, lock_path = fs_lock(UMAKE_BUILD_CACHE_DIR) - if fd == None: - out.print_fail(f"\tcahce: {UMAKE_BUILD_CACHE_LOCK} is locked") - return - try: - cache_dir = check_output(['ls', '-lru', '--sort=time', UMAKE_BUILD_CACHE_DIR]).decode('utf-8') - for cache_line in cache_dir.splitlines(): - try: - _, _, _, _, _, _, _, _, cache_entry_name = cache_line.split() - cache_entry_full_path = join(UMAKE_BUILD_CACHE_DIR, cache_entry_name) - remove(cache_entry_full_path) - cache_entry_size = get_size_KB(UMAKE_BUILD_CACHE_DIR) - if cache_entry_size < low_tresh: - break - except ValueError: - pass - timer.set_postfix(f"freed {int((cache_dir_size_KB - cache_entry_size) / 1024)}MB") - finally: - fs_unlock(fd, lock_path) - - -class CacheMgr: - - class CacheType(IntEnum): - NOT_CACHED = 0 - LOCAL = 1 - REMOTE = 2 - - fs_cache: FsCache = FsCache() - def __init__(self): - if global_config.remote_cache: - self.minio_cache = MinioCache() - - def open_cache(self, cache_hash) -> MetadataCache: - try: - if global_config.local_cache: - return self.fs_cache.open_cache(cache_hash) - else: - raise FileNotFoundError - except FileNotFoundError: - if global_config.remote_cache: - return self.minio_cache.open_cache(cache_hash) - raise FileNotFoundError - - def save_cache(self, cache_hash, metadata_cache: MetadataCache): - if global_config.local_cache: - self.fs_cache.save_cache(cache_hash, metadata_cache) - if global_config.remote_cache: - self.minio_cache.save_cache(cache_hash, metadata_cache) - - def _get_cache(self, deps_hash, targets): - ret = False - if global_config.local_cache: - ret = self.fs_cache._get_cache(deps_hash, targets) - if ret is False: - if global_config.remote_cache: - ret = self.minio_cache._get_cache(deps_hash, targets) - if ret is True: - return CacheMgr.CacheType.REMOTE - else: - return CacheMgr.CacheType.LOCAL - return CacheMgr.CacheType.NOT_CACHED - - def _save_cache(self, deps_hash, targets, local_only=False): - if global_config.local_cache: - self.fs_cache._save_cache(deps_hash, targets) - if local_only: - return - if global_config.remote_cache: - self.minio_cache._save_cache(deps_hash, targets) - - def gc(self): - self.fs_cache.gc() - - - -def byte_xor(ba1, ba2): - return bytes([_a ^ _b for _a, _b in zip(ba1, ba2)]) - - - -class CmdFailedErr(RuntimeError): - pass - - -class TargetExistsErr(RuntimeError): - pass - - -class NotFileErr(RuntimeError): - pass + import umake.pywildcard as fnmatch + from umake.cache.cache_mgr import CacheMgr + from umake.cache.minio_cache import MinioCache + from umake.exceptions import * + from umake.utils.fs import fs_lock, fs_unlock, join_paths, get_size_KB + from umake import config + from umake.config import UMAKE_DB, UMAKE_MAX_WORKERS, ROOT, UMAKE_ROOT_DIR, UMKAE_TMP_DIR, UMAKE_BUILD_CACHE_DIR + from umake.config import global_config, CONFIG_ENV_PREFIX + from umake.cache.base_cache import MetadataCache -class DepIsGenerated(RuntimeError): - pass - -class LineParseErr(RuntimeError): - pass +def byte_xor(ba1, ba2): + return bytes([_a ^ _b for _a, _b in zip(ba1, ba2)]) class CmdExecuter: @@ -407,7 +59,7 @@ class CmdExecuter: self.dep_files = None self.is_ok = False self.is_from_cache: CacheMgr.CacheType = CacheMgr.CacheType.NOT_CACHED - + # cache state """ in """ self.deps_hash = None @@ -415,7 +67,7 @@ class CmdExecuter: self.cmd_hash = None """ out """ self.dep_files_hashes = dict() - + def _check_in_root(self, check_str: str): if check_str[0] == "/": if check_str.startswith("/tmp/") or check_str.startswith("/dev/") or check_str.startswith("/proc/") or \ @@ -436,6 +88,18 @@ class CmdExecuter: path = raw_path.split('"')[1] return self._check_in_root(path) + def _parse_creat(self, raw_path, args): + """ + 1234 creat("/home/umake-user/debian-example/umake.deb", O_RDONLY|O_CLOEXEC) = 3 + 1234 creat("tar_file.tar", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory) + """ + rc_index = 5 if args[4] == "=" else 4 + rc = int(args[rc_index]) + if not rc >= 0: + return None + path = raw_path.split('"')[1] + return self._check_in_root(path) + def _parse_openat(self, raw_path, args): """ 21456 openat(AT_FDCWD, "/proc/sys/net/core/somaxconn", O_RDONLY|O_CLOEXEC) = 3 @@ -453,11 +117,14 @@ class CmdExecuter: path = args[2].split('"')[1] return self._check_in_root(path) - def make(self, cache_mgr: CacheMgr): + def make(self, cache_mgr: CacheMgr, is_exiting): tmp_unique_name_full_path = join(UMKAE_TMP_DIR, str(uuid.uuid1())) with Timer(self.cmd.compile_show(), color=bcolors.WARNING) as timer: + if global_config.verbose: + timer.add_log_line(self.cmd.cmd) + if self.target: - cache_type = cache_mgr._get_cache(self.deps_hash, self.target) + cache_type = cache_mgr._get_cache(self.deps_hash, self.target) if cache_type > CacheMgr.CacheType.NOT_CACHED: if cache_type == CacheMgr.CacheType.LOCAL: timer.set_prefix("[LOCAL-CACHE]") @@ -467,44 +134,53 @@ class CmdExecuter: self.is_ok = True self.is_from_cache = cache_type return - strace_cmd = f"strace -o{tmp_unique_name_full_path} -f -e open,openat /bin/bash -c '{self.cmd.cmd}'" - self.proc = Popen(strace_cmd, env=MINIMAL_ENV, shell=True, stdout=PIPE, stderr=PIPE, cwd=self.cmd.cmd_root) - + strace_cmd = f"strace -o{tmp_unique_name_full_path} -f -e creat,open,openat /bin/bash -c '{self.cmd.cmd}'" + self.proc = Popen(strace_cmd, env=os.environ, shell=True, stdout=PIPE, stderr=PIPE, cwd=self.cmd.cmd_root) + while True: try: - stdout, stderr = self.proc.communicate(timeout=3) + stdout, stderr = self.proc.communicate(timeout=0.1) break except TimeoutExpired: + if is_exiting() is False: + # exiting + self.proc.kill() + raise CleanExitErr() out.curr_job = self.cmd.summarized_show() out.update_bar() + rc = self.proc.poll() - stdout = stdout.decode("utf-8") stderr = stderr.decode("utf-8") - if rc != 0: - out.print_neutarl(stdout) - out.print_fail(stderr) + timer.add_log_line(format_text(f"{self.cmd.line}", bcolors.FAIL)) + timer.add_log_line(format_text(f"\t{self.cmd.cmd}", bcolors.BOLD)) + if stderr: + timer.add_log_line(format_text(f"\nError:\n\t\t{stderr}", "")) + if stdout: + timer.add_log_line(format_text(f"\nt\t{stdout}", "")) + + raise CompilationFailedErr() # TODO: print here the source of the command else: if stdout: - out.print(stdout) + timer.add_log_line(stdout) if stderr: - out.print(stderr) - self.is_ok = True - + timer.add_log_line(stderr) self.dep_files = set() with open(tmp_unique_name_full_path) as strace_output: for line in strace_output.readlines(): args = line.split() raw_path = args[1] - if not (raw_path.startswith('open(') or raw_path.startswith('openat(')): - continue if raw_path.startswith('open('): full_path = self._parse_open(raw_path, args) - else: + elif raw_path.startswith('openat('): full_path = self._parse_openat(raw_path, args) + elif raw_path.startswith('creat('): + full_path = self._parse_creat(raw_path, args) + else: + continue if full_path is None: continue @@ -519,27 +195,21 @@ class CmdExecuter: continue self.dep_files.add(full_path) if self.target: - if rc == 0 and not self.target.issubset(self.dep_files): - raise RuntimeError(f"Target not generated: Expected {self.target} Got: {self.dep_files}") + if not self.target.issubset(self.dep_files): + timer.add_log_line(format_text(f"Target not generated:\n \tExpected {self.target}\n \tGot: {self.dep_files}", bcolors.FAIL)) + raise TargetNotGeneratedErr() self.dep_files -= self.target - + deps_hash = self.cmd_hash for dep in self.dep_files: deps_hash = byte_xor(deps_hash, self.dep_files_hashes[dep]) cache_mgr._save_cache(deps_hash, self.target) timer.set_prefix("[CACHED]") - + def get_results(self): return self.dep_files, self.target -def join_paths(root, rest): - if rest[0] == "/": - ret = join(ROOT, rest[1:]) - else: - ret = join(root, rest) - return ret - class FileEntry: class EntryType(Enum): @@ -564,10 +234,9 @@ class FileEntry: def init(self): self.dependencies_built = 0 - def set_modified(self, new_value: bool): self.is_modified = new_value - + def increase_dependencies_built(self, inc: int): self.dependencies_built += inc @@ -577,7 +246,7 @@ class FileEntry: @staticmethod def file_md5sum(full_path): with open(full_path, "rb") as file_to_check: - data = file_to_check.read() + data = file_to_check.read() md5_returned = hashlib.sha1(data).digest() return md5_returned @@ -596,14 +265,14 @@ class FileEntry: self.md5sum = new_md5sum self.mtime = new_mtime return modified - + def delete_fs(self): out.print_file_deleted(self.full_path, "DELETING") try: os.remove(self.full_path) except FileNotFoundError: pass - + def update_with_md5sum(self, new_md5sum): stat = os.stat(self.full_path) self.mtime = int(stat.st_mtime * 100000) @@ -611,7 +280,7 @@ class FileEntry: def __str__(self): return f"{self.full_path}: {self.data.conf_deps}" - + def __repr__(self): return f"['{self.full_path}': '{self.is_modified}']" @@ -619,34 +288,35 @@ class FileEntry: class Line: def __init__(self, filename, line_num, line): self.filename = filename - self.line_num = line_num + self.line_num = line_num + 1 self.line = line - + def __str__(self): return f"{self.filename}:{self.line_num}\n\t{self.line}" class Cmd: - def __init__(self, cmd, dep, manual_deps, target, line, cmd_root): + def __init__(self, cmd, dep, manual_deps, target, line, cmd_root, source): self.cmd = cmd self.dep = dep self.manual_deps = manual_deps self.conf_deps = set(dep) self.target: set = target self.cmd_root = cmd_root + self.source = source self.line: Line = line def compile_show(self): return " ".join(sorted(self.target)) - + def summarized_show(self): return " ".join([os.path.basename(target) for target in sorted(self.target)]) def update(self, other): self.line = other.line - + def __eq__(self, other): return self.cmd == other.cmd and \ self.dep == other.dep and \ @@ -656,7 +326,7 @@ class Cmd: class GraphDB: def __init__(self, db_version): - self.nodes = dict() + self.nodes = dict() self.graph = igraph.Graph(directed=True) self.last_cmds = set() self.db_version = db_version @@ -664,7 +334,7 @@ class GraphDB: def sub_graph_nodes(self, sub_nodes=[]): if sub_nodes == []: return [key for key, fentry in self.nodes.items() if fentry.entry_type != FileEntry.EntryType.CMD] - + vertecies = set() for node in sub_nodes: try: @@ -693,18 +363,19 @@ class GraphDB: connections = [(from_node, to_node) for (from_node, to_node) in connections \ if not self.graph.are_connected(from_node, to_node)] self.graph.add_edges(connections) - + def remove_connections(self, connections): self.graph.delete_edges(connections) + vertices_to_del = set() for dep, target in connections: try: vert = self.graph.vs.find(dep) except ValueError: # it might not be exists - pass + continue if vert.degree() == 0: - self.graph.delete_vertices(vert.index) - del self.nodes[dep] + vertices_to_del.add(dep) + self.remove_node(vertices_to_del) def get_data(self, node) -> FileEntry: return self.nodes[node] @@ -716,19 +387,21 @@ class GraphDB: def init(self): for node in self.get_nodes(): fentry: FileEntry = self.get_data(node) - if fentry.entry_type == FileEntry.EntryType.CMD: - self.last_cmds.add(node) - elif fentry.entry_type == FileEntry.EntryType.FILE: + # if fentry.entry_type == FileEntry.EntryType.CMD: + # self.last_cmds.add(node) + if fentry.entry_type == FileEntry.EntryType.FILE: fentry.init() @staticmethod - def load_graph(): + def load_graph(): pathname = os.path.realpath(__file__) db_file_entry = FileEntry(pathname, FileEntry.EntryType.FILE) db_version = db_file_entry.md5sum try: with open(UMAKE_DB, "rb") as db_file: data: GraphDB = pickle.load(db_file) + return data + # don't use it for now if data.db_version != db_version: out.print_file_deleted(f"umake changed deleting db {UMAKE_DB}") os.remove(UMAKE_DB) @@ -736,7 +409,7 @@ class GraphDB: return data except FileNotFoundError: return GraphDB(db_version) - + def get_nodes(self, wanted_type=None): for name, node in self.nodes.items(): if wanted_type: @@ -761,10 +434,10 @@ class GraphDB: del self.nodes[node] indecies.append(self.graph.vs.find(node).index) self.graph.delete_vertices(indecies) - + def topological_sort(self): return [self.graph.vs[i]["name"] for i in self.graph.topological_sorting()] - + def subgraph_topological_sort(self, sub_nodes): vertecies = set() for node in sub_nodes: @@ -773,7 +446,7 @@ class GraphDB: vertecies.update(self.graph.subcomponent(idx_node, mode="in")) except ValueError: continue - + sub_graph = self.graph.subgraph(vertecies) return [sub_graph.vs[i]["name"] for i in sub_graph.topological_sorting()] @@ -829,13 +502,37 @@ class CmdTemplate: basename=os.path.basename(full_path), noext=noext, target=target) - self.cmds.append(Cmd(cmd, deps, manual_deps, targets, self.line, self.root)) + self.cmds.append(Cmd(cmd, deps, manual_deps, targets, self.line, self.root, full_path)) else: cmd = self.cmd_fmt.format(filename=full_path, dir=dirname, basename=os.path.basename(full_path), noext=noext) - self.cmds.append(Cmd(cmd, deps, manual_deps, {}, self.line, self.root)) + self.cmds.append(Cmd(cmd, deps, manual_deps, {}, self.line, self.root, full_path)) + + def _find_target_in_all_targets(self, target_fmt, all_targets, found_targets): + wild_card_exists = False + target_found = False + + if not all_targets: + return False + + if "*" in target_fmt: + wild_card_exists = True + + # run fnmatch only on needed paths, if "*" no exists just search in set + if wild_card_exists: + for global_target in all_targets: + if fnmatch.fnmatch(global_target, target_fmt): + found_targets.add(global_target) + target_found = True + else: + if target_fmt in all_targets: + found_targets.add(target_fmt) + target_found = True + + return target_found + def create_cmds(self, graph: GraphDB, all_targets: set): full_path = None @@ -843,32 +540,26 @@ class CmdTemplate: manual_deps = set() for dep_fmt in self.deps_fmt: - added = False - for global_target in all_targets: - dep_fmt_fullpath = join_paths(self.root, dep_fmt) - if fnmatch.fnmatch(global_target, dep_fmt_fullpath): - manual_deps.add(global_target) - added = True - - if added == False: + dep_fmt_fullpath = join_paths(self.root, dep_fmt) + if self._find_target_in_all_targets(dep_fmt_fullpath, all_targets, manual_deps) is False: raise RuntimeError(f"{self.line}: manual dep '{dep_fmt}' is not exists as target in other commands") if self.foreach: for source_fmt in self.sources_fmt: generated_sources = set() - for global_target in all_targets: - src_fmt_fullpath = join_paths(self.root, source_fmt) - if fnmatch.fnmatch(global_target, src_fmt_fullpath): - generated_sources.add(global_target) + src_fmt_fullpath = join_paths(self.root, source_fmt) + + self._find_target_in_all_targets(src_fmt_fullpath, all_targets, generated_sources) + files = self._iterate_file_glob(graph, source_fmt, all_targets) files.update(generated_sources) self._create_foreach_cmd(files, manual_deps, all_targets, graph) - + else: deps = set() sources = set() generated_sources = set() - + deps.update(manual_deps) fs_sources = [] for source_fmt in self.sources_fmt: @@ -879,13 +570,7 @@ class CmdTemplate: is_found = True source_fmt_fullpath = join_paths(self.root, source_fmt) - source_dir = os.path.dirname(source_fmt_fullpath) - for global_target in all_targets: - if fnmatch.fnmatch(global_target, source_fmt_fullpath): - is_found = True - generated_sources.add(global_target) - - if is_found is False: + if not (self._find_target_in_all_targets(source_fmt_fullpath, all_targets, generated_sources) or is_found): raise RuntimeError(f"[{source_fmt_fullpath}] {self.line}:\n \t\tsource mentioned in umakefile not exists") targets = set() @@ -894,10 +579,10 @@ class CmdTemplate: noext = None dirname = None parent_dir = None - + if fs_sources: full_path = sorted(fs_sources)[0] - + if full_path is not None: basename = os.path.basename(full_path) noext = os.path.splitext(basename)[0] @@ -923,8 +608,8 @@ class CmdTemplate: target=" ".join(sorted(targets))) deps.update(sources) deps.update(generated_sources) - self.cmds.append(Cmd(cmd, deps, manual_deps, targets, self.line, self.root)) - + self.cmds.append(Cmd(cmd, deps, manual_deps, targets, self.line, self.root, full_path)) + def find_between(string, token_start, token_end): state = "find_token_start" @@ -945,11 +630,12 @@ def find_between(string, token_start, token_end): class UMakeFileParser(): """ HELLO = 1 - : a.c > gcc {filename} -o {target} > {filename}.o - : > gcc {filename} -o {target} > {filename}.o - :foreach *.c > gcc {filename} -o {target} > {basename}.o - :foreach | sdf > gcc {filename} -o {target} > {basename}.o + : a.c > gcc {filename} -o {target} > {filename}.o + : > gcc {filename} -o {target} > {filename}.o + :foreach *.c > gcc {filename} -o {target} > {basename}.o + :foreach | sdf > gcc {filename} -o {target} > {basename}.o """ + def __init__(self, filename): self.fielanme = filename self.cmds_template: [CmdTemplate] = [] @@ -957,32 +643,74 @@ class UMakeFileParser(): self.load_file(filename) self.globals_vars = dict() self.macros = dict() + self.configs = dict() + self.parsed_variants = {"default"} self.parse_file(filename) + self._resolve_configs() + self._check_variants_usage() + + def _get_config(self, config_name): + env_config_name = f"{CONFIG_ENV_PREFIX}{config_name.upper()}" + config_value = os.getenv(env_config_name, self.configs.get(config_name, False)) + try: + del self.configs[config_name] + except KeyError: + pass + return config_value + def _check_variants_usage(self): + not_exists_varaints = global_config.variant.difference(self.parsed_variants) + if not_exists_varaints: + raise CmdFailedErr(f"variant\s {not_exists_varaints} not exists, supported: {self.parsed_variants}") + + def _resolve_configs(self): + config_value = self._get_config("remote_cache") + if config_value: + if global_config.remote_cache_config: + remote_type, hostname, access_key, secret_pass, bucket_name, permissions = config_value.split() + if permissions not in ['rw', 'ro']: + raise CmdFailedErr(f"not supported permission '{permissions}', supported are ['rw', 'ro']") + if permissions == "rw": + global_config.remote_write_enable = True + if remote_type == "minio": + global_config.remote_hostname = hostname + global_config.remote_access_key = access_key + global_config.remote_secret_key = secret_pass + global_config.remote_bucket = bucket_name + global_config.remote_cache_enable = True + else: + raise CmdFailedErr(f"not supported remote cahce '{remote_type}''") + + config_value = self._get_config("local_cache_size") + if config_value: + global_config.local_cache_size = int(config_value) + + if self.configs: + CmdFailedErr(f"unsupported config: '{self.configs}''") + def load_file(self, filename): with open(filename, mode="r") as umakefile: return umakefile.read() def parse_file(self, umakefile, workdir=ROOT, in_variant=False, use_current_variant=False): - if workdir is None: workdir = ROOT def should_line_parsing_stopped(in_variant, use_current_variant): return in_variant and not use_current_variant - + for line_num, line in enumerate(self.load_file(join(workdir, umakefile)).splitlines()): try: foreach = False deps_fmt = [] source_fmt = [] - + if line == "" or line[0] == "#": if line == "" and in_variant: in_variant = False use_current_variant = False continue - + if line[0] == ":": if should_line_parsing_stopped(in_variant, use_current_variant): continue @@ -995,7 +723,7 @@ class UMakeFileParser(): macro_args_defaults = self.macros[macro_name][2] if macro_args_sent == ['']: macro_args_sent = [] - + for idx, in_macro_args in enumerate(macro_args): try: sent_arg = macro_args_sent[idx] @@ -1011,7 +739,7 @@ class UMakeFileParser(): else: send_arg_value = sent_arg except KeyError: - raise RuntimeError(f"{umakefile}:{line_num} macro {macro_name} called with not exists arg: {macro_args_sent}") + raise CmdFailedErr(f"{umakefile}:{line_num} macro {macro_name} called with not exists arg: {macro_args_sent}") macro_body = macro_body.replace(in_macro_args, send_arg_value) line = line.replace(macro_call, macro_body) @@ -1043,7 +771,7 @@ class UMakeFileParser(): source_fmt = sources_cand[1:deps_index] if workdir: if not os.path.isdir(workdir): - RuntimeError(f"path is not directory {workdir}") + CmdFailedErr(f"path is not directory {workdir}") cmd_root = workdir else: cmd_root = ROOT @@ -1081,7 +809,7 @@ class UMakeFileParser(): try: self.globals_vars[var_name.strip()] += f" {var_body.strip()}" except KeyError: - raise RuntimeError(f"{line_num}: {line}, var {var_name.strip()} was not declared") + raise CmdFailedErr(f"{line_num}: {line}, var {var_name.strip()} was not declared") except ValueError: var_name, var_body = line.split("=", 1) for var_to_replace in find_between(var_body, "$", " "): @@ -1089,17 +817,18 @@ class UMakeFileParser(): self.globals_vars[var_name.strip()] = var_body.strip() elif line[0] == "[": if line[-1] != "]": - raise RuntimeError(f"{line_num}: {line} \n can't parse this line") + raise CmdFailedErr(f"{line_num}: {line} \n can't parse this line") if should_line_parsing_stopped(in_variant, use_current_variant): continue - config_name, config_value = line[1:-1].split(":") + config_name, config_value = line[1:-1].split(":", 1) config_name = config_name.strip() config_value = config_value.strip() if config_name == "variant": if in_variant: - raise RuntimeError(f"{line_num}: {line} \n cannot configure variant in variant") + raise CmdFailedErr(f"{line_num}: {line} \n cannot configure variant in variant") in_variant = True - if global_config.variant == config_value: + self.parsed_variants.add(config_value) + if config_value in global_config.variant: use_current_variant = True elif config_name == "workdir": if config_value == "/": @@ -1108,10 +837,11 @@ class UMakeFileParser(): workdir = join(ROOT, config_value) elif config_name == "include": self.parse_file(config_value, workdir, in_variant, use_current_variant) - + else: + self.configs[config_name] = config_value else: raise RuntimeError(f"{line_num}: {line} \n can't parse this line") - + except: out.print_fail(f"ERROR failed to parse UMakefile") @@ -1123,23 +853,22 @@ class UMakeFileParser(): class UMake: def __init__(self): - self.cache_mgr = CacheMgr() - self.graph = None + self.worker_continue = True # should wor + self.worker_threads = list() - self._start_executer_thread() - - def _init_build(self): + def _init_build_dirs(self): shutil.rmtree(UMKAE_TMP_DIR, ignore_errors=True) os.makedirs(UMKAE_TMP_DIR, exist_ok=True) os.makedirs(UMAKE_BUILD_CACHE_DIR, exist_ok=True) - def _start_executer_thread(self): + def _start_executer_threads(self): self.jobs_queue = Queue() # CmdExecuter self.done_queue = Queue() self.n_jobs = 0 for _ in range(UMAKE_MAX_WORKERS): - exec_thread = threading.Thread(target=self.executer_thread, daemon=True) + exec_thread = threading.Thread(target=self.executer_thread) + self.worker_threads.append(exec_thread) exec_thread.start() def _get_file_entry(self, full_path): @@ -1193,7 +922,7 @@ class UMake: elif fentry.entry_type == FileEntry.EntryType.FILE: self._remove_file_from_graph(f, deleted_set) else: - raise RuntimeError(f"trying to delete not file but {fentry.entry_type}. how can it be???") + raise RuntimeError(f"trying to delete not file but type is {fentry.entry_type}. how can it be???") self.graph.remove_node(deleted_set) def _graph_remove_cmd_node(self, cmd: Cmd, connection): @@ -1204,11 +933,11 @@ class UMake: for del1 in delete: connection.remove(del1) self.graph.remove_node(cmd.cmd) - + def _graph_add_cmd_node(self, cmd: Cmd, connections: set): fentry_cmd = FileEntry(cmd.cmd, FileEntry.EntryType.CMD, cmd) self.graph.add_node(cmd.cmd, fentry_cmd) - + for target in cmd.target: self.graph.add_node(target, FileEntry(target, FileEntry.EntryType.GENERATED, cmd)) connections.add((cmd.cmd, target)) @@ -1226,7 +955,7 @@ class UMake: if not self.graph.is_exists(target): self.graph.add_node(target, FileEntry(target, FileEntry.EntryType.GENERATED, new_cmd)) connections.add((new_cmd.cmd, target)) - + for dep in new_cmd.dep: connections.add((dep, new_cmd.cmd)) @@ -1247,8 +976,6 @@ class UMake: new_fentry = self._get_file_entry(f) self.graph.add_node(f, new_fentry) for cmd in cmd_template.cmds: - if self.graph.is_exists(cmd.cmd): - cmd_fentry = self.graph.get_data(cmd.cmd) cmds.add(cmd.cmd) removed_cmds = last_cmds.difference(cmds) @@ -1269,7 +996,7 @@ class UMake: # self.graph.remove_node(remove_cmd) delete_nodes.add(remove_cmd) self.graph.remove_node(delete_nodes) - + connections = set() for cmd_template in umakefile.cmds_template: for cmd in cmd_template.cmds: @@ -1281,38 +1008,48 @@ class UMake: else: self._graph_add_cmd_node(cmd, connections) self.graph.add_connections(connections) - + self.graph.last_cmds = cmds # check target request exists if global_config.targets: all_targets_exists = any(target in all_targets for target in global_config.targets) if not all_targets_exists: - raise RuntimeError(f"target not exist {global_config.targets}") - + raise CmdFailedErr(f"target not exist {global_config.targets}") + def executer_thread(self): cache_mgr = CacheMgr() - while True: + while self.worker_continue: executer: CmdExecuter executer = self.jobs_queue.get() + + if type(executer) is WorkerExit: + return + out.n_active_workers.inc() out.curr_job = executer.cmd.summarized_show() - out.print(f"{executer.cmd.cmd}") try: - executer.make(cache_mgr) + executer.make(cache_mgr, lambda: self.worker_continue) + executer.is_ok = True + except (CompilationFailedErr, TargetNotGeneratedErr, CleanExitErr) as e: + executer.is_ok = False + self._send_exits() except Exception as e: import traceback traceback.print_exc() out.print(e) executer.is_ok = False + self._send_exits() out.n_active_workers.dec() + self.done_queue.put(executer) + def _handle_done(self, add_conns_out, del_conns_out): execucter: CmdExecuter execucter = self.done_queue.get() self.n_jobs -= 1 if execucter.is_ok is False: - raise CmdFailedErr(f"command failed: {execucter.cmd.line}\n cmd:\n\t {execucter.cmd.cmd}") - + raise CmdFailedErr() + deps, targets = execucter.get_results() node = execucter.cmd.cmd @@ -1348,13 +1085,13 @@ class UMake: out.print(e) continue self.graph.add_node(dep, fentry) - + if not self.graph.graph.are_connected(dep, node): conns.append((dep, node)) # with Timer(f"connections {execucter.cmd.summarized_show()}: {len(conns)}", color=bcolors.FAIL): # self.graph.add_connections(conns) add_conns_out.extend(conns) - + del_cons = set() # kepp user conigured deps preds = preds - execucter.cmd.dep @@ -1368,7 +1105,7 @@ class UMake: target_node = self.graph.get_data(target) target_node.increase_dependencies_built(-1) target_node.update() - + if targets and not execucter.is_from_cache: self._set_deps_hash(node_entry, execucter) @@ -1419,7 +1156,7 @@ class UMake: continue while node_entry.dependencies_built > 0: self._handle_done(add_conns, del_conns) - + successors = set() for succ in self.graph.successors(node): succ_node = self.graph.get_data(succ) @@ -1430,7 +1167,7 @@ class UMake: if node_entry.entry_type == FileEntry.EntryType.CMD: deps_hash, cached_deps, metadata_hash = self._get_deps_hash(node_entry) execucter = CmdExecuter(successors, "", node_entry.data) - + execucter.cmd_hash = node_entry.md5sum execucter.metadata_hash = metadata_hash execucter.deps_hash = deps_hash @@ -1439,11 +1176,14 @@ class UMake: self.n_jobs += 1 else: node_entry.set_modified(False) - + if self.n_jobs: while self._handle_done(add_conns, del_conns): pass - + + # start terminating threads + self._send_exits() + self.graph.add_connections(add_conns) self.graph.remove_connections(del_conns) @@ -1458,35 +1198,95 @@ class UMake: def cache_gc(self): self.cache_mgr.gc() - + + def create_compilation_database(self): + """ + Create a compilation database based on the graph of the project. + Example entry for compilation database: + [ + { "directory": "/home/user/llvm/build", + "command": "/usr/bin/clang++ -Irelative -DSOMEDEF=\"With spaces, quotes and \\-es.\" -c -o file.o file.cc", + "file": "file.cc" }, + ... + ] + + For more details on compilation databases refer to: + https://clang.llvm.org/docs/JSONCompilationDatabase.html + """ + cmds = [] + for node in self.graph.nodes.values(): + # We only care about the commands that are executed, skip eveything else. + if node.entry_type != FileEntry.EntryType.CMD: + continue + + cmds.append({ + 'directory': '/', + 'command': str(node.data.cmd), + 'file': str(node.data.source), + }) + + import json + with open('compile_commands.json', 'w') as db_file: + json.dump(cmds, db_file) + + def _send_exits(self): + if self.worker_continue is False: + return + self.worker_continue = False + for _ in range(UMAKE_MAX_WORKERS): + self.jobs_queue.put(worker_exit_code) + + def _exit_nicelly(self): + # stop all executers + self._send_exits() + with Timer("done clean exit"): + for thread in self.worker_threads: + thread.join(timeout=1) + def run(self): fd, lock_path = fs_lock(UMAKE_ROOT_DIR) if fd == None: out.print_fail(f"another umake is running!, if you sure it's not running remove {UMAKE_ROOT_DIR}.lock") os.sys.exit(-1) return + try: - self._init_build() + self._init_build_dirs() self.load_graph() self.scan_fs() - self.parse_cmd_files() # profiler.start() - self.execute_graph() + self.parse_cmd_files() # profiler.stop() + try: + self._start_executer_threads() + self.cache_mgr = CacheMgr() + self.execute_graph() + finally: + self._exit_nicelly() + # print(profiler.output_text(color=True, show_all=True)) self.dump_graph() + if global_config.compile_commands: + self.create_compilation_database() self.cache_gc() out.update_bar(force=True) - out.destroy() + except CmdFailedErr as e: + out.print_fail(e) + os.sys.exit(-1) + except Exception as e: + import traceback + traceback.print_exc() + out.print(e) + os.sys.exit(-1) finally: fs_unlock(fd, lock_path) def show_target_details(self, target): target_fentry: FileEntry target_fentry = self.graph.get_data(target) - + cmd = target_fentry.data.cmd all_deps = set(self.graph.predecessors(cmd)) - + configured_deps = sorted(target_fentry.data.dep - target_fentry.data.manual_deps) manual_deps = sorted(target_fentry.data.manual_deps) auto_deps = all_deps - target_fentry.data.dep @@ -1497,7 +1297,7 @@ class UMake: global_auto_deps = sorted(auto_deps - auto_dep_in_project) auto_dep_in_project = sorted(auto_dep_in_project) successors = set(self.graph.successors(target)) - + if global_config.json_file: with open(global_config.json_file, "w") as f: import json @@ -1525,7 +1325,7 @@ class UMake: out.print_colored(f"\t\t{idx:4} {dep:70} [{self.graph.get_data(dep).md5sum.hex()}]", bcolors.OKBLUE) print() print("\tsuccessors targets:") - for succ in successors: + for succ in sorted(successors): succ_targets = " ".join(sorted(set(self.graph.successors(succ)))) print(f"\t\t{succ_targets}") print() @@ -1538,7 +1338,7 @@ class UMake: generated = self.graph.get_nodes(FileEntry.EntryType.GENERATED) for idx, target in enumerate(sorted(generated)): print(f'{idx:4} {target.replace(ROOT + "/", "")}') - + def clean(self): generated = self.graph.get_nodes(FileEntry.EntryType.GENERATED) with Timer("done cleaning targets"): @@ -1571,11 +1371,11 @@ class UMake: self.show_target_details(graph_target) def show_target_details_run(self, targets): - umake.load_graph() - umake.parse_cmd_files() + self.load_graph() + self.parse_cmd_files() for target in targets: self.show_targets_details(target) - + def show_parsed_umakefile(self): self.load_graph() UMakefile = join(ROOT , "UMakefile") @@ -1589,92 +1389,114 @@ class UMake: for cmd in cmd_template.cmds: print(f"\t{cmd.cmd}") -umake = UMake() -if len(sys.argv) == 1: - umake.run() -else: - import argparse - parser = argparse.ArgumentParser() + def parse_args(self): + import argparse + parser = argparse.ArgumentParser() - parser.add_argument('targets', type=str, nargs="*", - help='target path') + parser.add_argument('targets', type=str, nargs="*", + help='target path') - parser.add_argument('-d', '--details', action='store_true', - help='details about the target') - - parser.add_argument('--json', action='store', dest='json_file', - help='output as json') - - parser.add_argument('--show-all-targets', action='store_true', dest="show_all_targets", - help="show all targets configured in UMakefile") + parser.add_argument('-d', '--details', action='store_true', + help='details about the target') - parser.add_argument('--show-parsed-umakefile', action='store_true', dest="show_parsed_umakefile", - help="show parsed umakefile") - - parser.add_argument('--no-remote-cache', action='store_true', dest="no_remote_cache", - help="don't use remote cache") + parser.add_argument('--json', action='store', dest='json_file', + help='output as json') - parser.add_argument('--no-local-cache', action='store_true', dest="no_local_cache", - help="don't use local cache") - - parser.add_argument('--remote-cache-stats', action='store_true', dest="remote_cache_stats", - help="show stats of remote cache") + parser.add_argument('--show-all-targets', action='store_true', dest="show_all_targets", + help="show all targets configured in UMakefile") - parser.add_argument('--remote-cache-delete', action='store_true', dest="remote_cache_delete", - help="WARNING: delete all remote cache objects") - - parser.add_argument('-v', '--variant', action='store', dest="variant", - help="compile with diffrent variants") - - parser.add_argument('--clean', action='store_true', dest="clean", - help="clean umake file, with all targets") + parser.add_argument('--show-parsed-umakefile', action='store_true', dest="show_parsed_umakefile", + help="show parsed umakefile") - args = parser.parse_args() + parser.add_argument('--no-remote-cache', action='store_true', dest="no_remote_cache", + help="don't use remote cache") - if args.json_file: - global_config.json_file = args.json_file - - if args.no_remote_cache: - global_config.remote_cache = False - - if args.no_local_cache: - global_config.local_cache = False - - if args.remote_cache_stats: - mc = MinioCache() - mc.get_cache_stats() - os.sys.exit(0) - - if args.remote_cache_delete: - mc = MinioCache() - mc.clear_bucket() - os.sys.exit(0) - - if args.show_all_targets: - umake.load_graph() - umake.show_all_targets() + parser.add_argument('--no-local-cache', action='store_true', dest="no_local_cache", + help="don't use local cache") - if args.variant: - global_config.variant = args.variant - out.variant = global_config.variant + parser.add_argument('--remote-cache-stats', action='store_true', dest="remote_cache_stats", + help="show stats of remote cache") - if args.show_parsed_umakefile: - umake.show_parsed_umakefile() - os.sys.exit(0) - - if args.clean: - umake.load_graph() - umake.clean() - os.sys.exit(0) - - if args.details: - args.targets = [join(ROOT, t) + "**" for t in args.targets] - umake.show_target_details_run(args.targets) - else: - args.targets = [join(ROOT, t) for t in args.targets] - global_config.targets = args.targets - umake.run() - -#set(self.graph.graph.vs.select(_degree=80)) + parser.add_argument('--remote-cache-delete', action='store_true', dest="remote_cache_delete", + help="WARNING: delete all remote cache objects") + + parser.add_argument('-v', '--variant', + action='append', + type=str, + dest="variant", + default=[], + help="compile with diffrent variants") + + parser.add_argument('--clean', action='store_true', dest="clean", + help="clean umake file, with all targets") + + parser.add_argument('--verbose', action='store_true', dest="verbose", + help="show verbose compilation") + + parser.add_argument('--compile-commands', action='store_true', dest="compile_commands", + help="Create compile_commands.json file with info on the build") + + args = parser.parse_args() + + global_config.compile_commands = args.compile_commands + + if args.verbose: + global_config.verbose = args.verbose + + if args.json_file: + global_config.json_file = args.json_file + + if args.no_remote_cache: + global_config.remote_cache_config = False + + if args.no_local_cache: + global_config.local_cache = False + + if args.remote_cache_stats: + mc = MinioCache() + mc.get_cache_stats() + return + + if args.remote_cache_delete: + mc = MinioCache() + mc.clear_bucket() + return + + if args.show_all_targets: + self.load_graph() + self.show_all_targets() + + if args.variant: + global_config.variant = set(args.variant) + out.variant = " ".join(global_config.variant) + + if args.show_parsed_umakefile: + self.show_parsed_umakefile() + return + + if args.clean: + self.load_graph() + self.clean() + return + + if args.details: + args.targets = [join(ROOT, t) + "**" for t in args.targets] + self.show_target_details_run(args.targets) + else: + args.targets = [join(ROOT, t) for t in args.targets] + global_config.targets = args.targets + self.run() + + def start(self): + if len(sys.argv) == 1: + self.run() + else: + self.parse_args() + out.destroy() + + + +umake = UMake() +umake.start() # print(profiler.output_text(color=True)) diff --git a/umake/utils/__init__.py b/umake/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/umake/utils/fs.py b/umake/utils/fs.py new file mode 100644 index 0000000..1fb9e0a --- /dev/null +++ b/umake/utils/fs.py @@ -0,0 +1,33 @@ +import os +from subprocess import check_output, CalledProcessError +from umake.config import ROOT + +def fs_lock(path): + lock_path = path + ".lock" + try: + fd = os.open(lock_path, os.O_CREAT | os.O_EXCL) + return fd, lock_path + except FileExistsError: + return None, None + + +def fs_unlock(fd, lock_path): + try: + os.close(fd) + finally: + os.remove(lock_path) + + +def join_paths(root, rest): + if rest[0] == "/": + ret = f"{ROOT}/{rest[1:]}" + else: + ret = f"{root}/{rest}" + return ret + + +def get_size_KB(path): + try: + return int(check_output(['du','-s', path]).split()[0].decode('utf-8')) + except CalledProcessError: + return 0 \ No newline at end of file diff --git a/umake/utils/timer.py b/umake/utils/timer.py new file mode 100644 index 0000000..b9d1332 --- /dev/null +++ b/umake/utils/timer.py @@ -0,0 +1,37 @@ +from umake.colored_output import format_text, out, bcolors +import time + +class Timer: + def __init__(self, msg, threshold=0, color=bcolors.OKGREEN): + self.msg = msg + self.postfix = "" + self.prefix = "" + self.threshold = threshold + self.color = color + self.log_lines = list() + + def set_prefix(self, prefix): + self.prefix = prefix + + def set_postfix(self, postfix): + self.postfix = postfix + + def add_log_line(self, msg): + self.log_lines.append(msg) + + def __enter__(self): + self.start = time.time() + return self + + def __exit__(self, *args): + self.end = time.time() + self.interval = self.end - self.start + out_str = "" + if self.interval > self.threshold: + out_str += format_text(f"[{self.interval:.3f}] {self.prefix} {self.msg} {self.postfix}", self.color) + if self.log_lines: + if out_str: + out_str += "\n" + out_str += "\n".join(self.log_lines) + if out_str: + out.print_neutarl(out_str)