diff --git a/.travis.yml_example b/.travis.yml_example new file mode 100644 index 0000000..f197720 --- /dev/null +++ b/.travis.yml_example @@ -0,0 +1,39 @@ +language: go + +go: +- 1.8.3 + +env: + matrix: + - PACKAGE_TYPE=deb + - PACKAGE_TYPE=rpm + +#install files needed for deployment +before_install: +- sudo apt-get -q update +- sudo apt-get install -y make rpm ruby-dev build-essential + +#install fpm used for building packages +install: +- gem install fpm --no-document + +#test the code +script: +- make test + +#build the packages +after_success: +- make pkg/$PACKAGE_TYPE + +deploy: + provider: s3 + access_key_id: + secure: + secret_access_key: + secure: + bucket: + local-dir: ./pkg/$PACKAGE_TYPE + upload-dir: $PACKAGE_TYPE + acl: public_read + skip_cleanup: true + region: eu-central-1 \ No newline at end of file diff --git a/Makefile_example b/Makefile_example new file mode 100644 index 0000000..f6479f5 --- /dev/null +++ b/Makefile_example @@ -0,0 +1,47 @@ +NAME = pkgname +URL = https://InsertUrl.here +DESC = "description" +MAINTAINER = "Maintainer name" +LICENSE = "License here" +DEPENDENCIES = "" #Add dependencies here and add '-d $(DEPENDENCIES)' to fpm in pkg/% block + +PKGDIR := ./pkg +VERSION ?= $(shell cat ./VERSION) +PACKAGE_TYPE := deb rpm + +PATH_BIN ?= /usr/bin + +test: vet ## runs unit tests + go test -v ./... + +vet: ## examines the go code with `go vet` + go vet ./... + +$(PKGDIR): $(addprefix $(PKGDIR)/,$(PACKAGE_TYPE)) ## creates artifacts for all distributions + +# PACKAGING +$(PKGDIR)/rpm: TARGET_ARTIFACT=rpm +$(PKGDIR)/rpm: FPM_DEPENDENCIES=rpm +$(PKGDIR)/rpm: TARGET_FILE=$(NAME)-$(VERSION)-x86_64.rpm +$(PKGDIR)/deb: TARGET_ARTIFACT=deb +$(PKGDIR)/deb: FPM_DEPENDENCIES=apt +$(PKGDIR)/deb: TARGET_FILE=$(NAME)_$(VERSION)_amd64.deb +$(PKGDIR)/%: build ## creates the artifact for a specific distribution + mkdir -p $(PKGDIR)/$* + fpm -s dir -t $(TARGET_ARTIFACT) \ + --name $(NAME) \ + --package ./pkg/$*/$(TARGET_FILE) \ + --category admin \ + --deb-compression bzip2 \ + --url $(URL) \ + --description $(DESC) \ + --maintainer $(MAINTAINER) \ + --license $(LICENSE) \ + --version $(VERSION) \ + --architecture amd64 \ + ./usr + +# BUILD +build: ## builds the code + mkdir -p .$(PATH_BIN) + GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build -a -o .$(PATH_BIN)/$(NAME) \ No newline at end of file diff --git a/README.md b/README.md index 7f2eb92..be6d73f 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,15 @@ # lambdaRepos POC for managing RPM and DEB repositories using aws' S3 and λ + +# Info +Code for managing `yum`(rpm) repository is located in [rpm folder](https://github.com/tactycal/lambdaRepos/tree/master/rpm) + +Code for managing `apt`(deb) repository is located in [deb folder](https://github.com/tactycal/lambdaRepos/tree/master/deb) + +Both folders contain more detailed info on setting up S3 bucket and lambda function, that keeps your repo in sync with provided packages + +## Combining with TravisCI + +It is possible to automate deployment of packages by combining this repository with Travis CI. + +Examples of `.travis.yml` and `Makefile` used for automatic deployment of go project can be found in repository \ No newline at end of file diff --git a/deb/Makefile b/deb/Makefile new file mode 100644 index 0000000..80e232f --- /dev/null +++ b/deb/Makefile @@ -0,0 +1,18 @@ +ZIPPED_FILES := s3apt.py requirements/gnupg.py # files to compress in root of zip +ZIPPED_DIR := debian # folders from /requirements to compress to root of zip + +all: requires package + +help: ## displays this message + @grep -E '^[a-zA-Z_/%\-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' + +requires: ## installs required packages + pip install -t ./requirements -r requirements.txt + +package: ## creates zip of code + zip -j code.zip $(ZIPPED_FILES) + cd requirements && zip -r ../code.zip $(ZIPPED_DIR) + +clean: ## cleans up the repository + /bin/rm -rf code.zip + /bin/rm -rf ./requirements \ No newline at end of file diff --git a/deb/README.md b/deb/README.md new file mode 100644 index 0000000..6cbf07a --- /dev/null +++ b/deb/README.md @@ -0,0 +1,143 @@ +# AWS Lambda APT repository manager for S3 + +Rewrite of [szinck/s3apt](https://github.com/szinck/s3apt) with a few changes and extra features - Release file is being generated and is signed with GPG key provided + +## Readme contents + +* [Setting up code, S3 and Lambda](#setting-up-code-s3-and-lambda) + * [Getting the code](#getting-the-code) + * [GPG key](#gpg-key) + * [Environmental variables](#environmental-variables) + * [Set up role](#set-up-role) + * [Set up lambda with CLI](#set-up-lambda-with-cli) + * [Set up lambda manually](#set-up-lambda-manually) + * [The triggers](#the-triggers) + * [Set up S3](#set-up-s3) +* [Setting up apt](#setting-up-apt) +* [Notes](#notes) + +## Setting up code, S3 and Lambda + +### Getting the code +Clone the repo, get all other required files and compress them +``` +git clone https://github.com/tactycal/lambdaRepos.git +cd lambdaRepos/deb +make all +``` + +### GPG key +create your gpg key (skip to exporting your key, if you already have it) +``` +gpg --gen-key +# Follow the instructions +# Create 'RSA and RSA' key - option 1 +# For maxium encryption it is recommended to make 4096 bits long key +# Key should not expire +``` + +export your key + +``` +gpg --export-secret-key -a "User Name" > secret.key # exports secret key to secret.key +``` + +### Set up role + +Create new role with s3 write/read access + +Here is a minimal requirement for the policy that is included in role: +``` +{"Version": "2012-10-17", + "Statement": [ + {"Sid": "", + "Action": [ + "s3:GetObject", + "s3:PutObject", + "s3:PutObjectAcl"], + "Effect": "Allow", + "Resource": "arn:aws:s3:::/*"}]} +``` + +### Environmental variables +These are the environmental variables you will have to set: + +| Key | Value | +| --- | ---| +| PUBLIC | True/False | +| GPG_KEY | File | +| GPG_PASS | GPG key password | +| BUCKET_NAME | Bucket Name | +| CACHE | Directory | + +**PUBLIC** Set to `True` for the outputs to be publicly readable + +**GPG_KEY** Location of your GPG private key from root of the bucket (e.g. secret/private.key). Not providing this variable will cause lambda to skip GPG singing + +**GPG_PASS** Password of private key uploaded to GPG_KEY (Note: environmental variables are/can be encrypted using KMS keys) + +**BUCKET_NAME** Name of the bucket. Should be the same as the one selected in triggers and the one you're using for repository + +**CACHE** Path to folder for packages cache(e.g. deb/cache) + +### Set up lambda with CLI + +[Install aws cli](http://docs.aws.amazon.com/cli/latest/userguide/installing.html) + +Create new lambda function: +``` +aws lambda create-function \ + --function-name \ + --zip-file fileb://code.zip \ + --role \ # arn from role with S3 read/write access + --handler s3apt.lambda_handler \ + --runtime python2.7 \ +# Replace '<...>' with environmental variables + --environment Variables='{PUBLIC=, GPG_KEY=, GPG_PASS=, BUCKET_NAME=, CACHE=}' +``` + +### Set up lambda manually + +If CLI is not your thing, then you can upload code manaully + +Create new lambda function, set handler to **s3apt.lambda_handler**, runtime to **python 2.7** + +Upload `code.zip` to lambda function + +### The triggers + + * Object Created(All), suffix 'deb' + * Object Removed(All), suffix 'deb' + * If you are using certain directory as a repo, set it as prefix + +### Set up S3 +Make folder in your S3 bucket with the same name as CACHE variable + +Upload secret key file to location you specified as GPG_KEY + + +Upload .deb file to desired folder, lambda function should now keep your repository up to date + +## Setting up apt + +First time set up +``` +sudo echo "deb https://s3.$AWS_SERVER.amazonaws.com/$BUCKET_NAME/$PATH_TO_FOLDER_WITH_DEBIAN_FILES /" >> /etc/apt/sources.list +#an example of link "https://s3.eu-central-1.amazonaws.com/testbucket/repo" +#add public key to trusted sources - you have to export public key or use key server +apt-key add +sudo apt update +sudo apt install +``` + +Upgrading package +``` +sudo apt update +sudo apt upgrade +``` + +## Notes + + * .deb, Release and Package files are and should be publicly accessible for previously mentioned method of setting up apt's sources list to work, if you don't want them to be, then change PUBLIC in environment variables to False and refer to szinck's guide [here](http://webscale.plumbing/managing-apt-repos-in-s3-using-lambda) + * If somebody tries to inject a malicious deb file in your repo it will be automaticly added to repository. It is your job to make bucket secure enough for this not to happen.!!! + * **You should change lambda timeout to more than 10 seconds to make sure that function will work** diff --git a/deb/requirements.txt b/deb/requirements.txt new file mode 100644 index 0000000..c76b6c9 --- /dev/null +++ b/deb/requirements.txt @@ -0,0 +1,9 @@ +boto3==1.3.1 +botocore==1.4.41 +docutils==0.12 +futures==3.0.5 +jmespath==0.9.0 +python-dateutil==2.5.3 +python-debian==0.1.28 +six==1.10.0 +python-gnupg==0.4.1 \ No newline at end of file diff --git a/deb/s3apt.py b/deb/s3apt.py new file mode 100644 index 0000000..d005748 --- /dev/null +++ b/deb/s3apt.py @@ -0,0 +1,287 @@ +from __future__ import print_function +from time import gmtime, strftime +import urllib +import boto3 +import botocore +import tempfile +import tarfile +import debian.arfile +import hashlib +import re +import sys +import os +import gnupg + +def lambda_handler(event, context): + print('Starting lambda function') + #Get bucket and key info + bucket = event['Records'][0]['s3']['bucket']['name'] + key = urllib.unquote_plus(event['Records'][0]['s3']['object']['key']).decode('utf8') + + if bucket == os.environ['BUCKET_NAME'] and key.endswith(".deb"): + #Build packages file + if event['Records'][0]['eventName'].startswith('ObjectCreated'): + s3 = boto3.resource('s3') + deb_obj = s3.Object(bucket_name=bucket, key=key) + print("S3 Notification of new key. Ensuring cached control data exists: %s" % (str(deb_obj))) + get_cached_control_data(deb_obj) + + prefix = "/".join(key.split('/')[0:-1]) + '/' + #Update packages file + rebuild_package_index(prefix) + + #Build Release file + build_release_file(prefix) + + #Sign Release file + if os.environ['GPG_KEY']!='': + sign_release_file(prefix) + + +def get_cached_control_data(deb_obj): + #gets debian control data + s3 = boto3.resource('s3') + etag = deb_obj.e_tag.strip('"') + + cache_obj = s3.Object(bucket_name=os.environ['BUCKET_NAME'], key=os.environ['CACHE'] + '/' + etag) + exists = True + try: + control_data = cache_obj.get()['Body'].read() + except botocore.exceptions.ClientError as e: + if e.response['Error']['Code'] == 'NoSuchKey': + exists = False + else: + raise(e) + + if not exists: + control_data = read_control_data(deb_obj) + cache_obj.put(Body=control_data) + + return control_data + +def read_control_data(deb_obj): + fd, tmp = tempfile.mkstemp() + fh = os.fdopen(fd, "wb") + s3fh = deb_obj.get()['Body'] + size = 1024*1024 + while True: + dat = s3fh.read(size) + fh.write(dat) + if len(dat) < size: + break + fh.close() + + try: + ctrl = get_control_data(tmp) + pkg_rec = format_package_record(ctrl, tmp) + return pkg_rec + finally: + os.remove(tmp) + +def get_control_data(debfile): + ar = debian.arfile.ArFile(debfile) + + control_fh = ar.getmember('control.tar.gz') + + tar_file = tarfile.open(fileobj=control_fh, mode='r:gz') + + # control file can be named different things + control_file_name = [x for x in tar_file.getmembers() if x.name in ['control', './control']][0] + + control_data = tar_file.extractfile(control_file_name).read().strip() + # Strip out control fields with blank values. This tries to allow folded + # and multiline fields to pass through. See the debian policy manual for + # more info on folded and multiline fields. + # https://www.debian.org/doc/debian-policy/ch-controlfields.html#s-binarycontrolfiles + lines = control_data.strip().split("\n") + filtered = [] + for line in lines: + # see if simple field + if re.search(r"^\w[\w\d_-]+\s*:", line): + k, v = line.split(':', 1) + if v.strip() != "": + filtered.append(line) + else: + # otherwise folded or multiline, just pass it through + filtered.append(line) + + return "\n".join(filtered) + +def format_package_record(ctrl, fname): + pkgrec = ctrl.strip().split("\n") + + stat = os.stat(fname) + pkgrec.append("Size: %d" % (stat.st_size)) + + md5, sha1, sha256 = checksums(fname) + pkgrec.append("MD5sum: %s" % (md5)) + pkgrec.append("SHA1: %s" % (sha1)) + pkgrec.append("SHA256: %s" % (sha256)) + + return "\n".join(pkgrec) + +def checksums(fname): + + fh = open(fname, "rb") + + md5 = hashlib.md5() + sha1 = hashlib.sha1() + sha256 = hashlib.sha256() + + size = 1024 * 1024 + while True: + dat = fh.read(size) + md5.update(dat) + sha1.update(dat) + sha256.update(dat) + if len(dat) < size: + break + + fh.close() + + return md5.hexdigest(), sha1.hexdigest(), sha256.hexdigest() + +def rebuild_package_index(prefix): + # Get all .deb keys in directory + # Get the cache entry + # build package file + deb_names = [] + deb_objs = [] + + print("REBUILDING PACKAGE INDEX: %s" % (prefix)) + s3 = boto3.resource('s3') + for obj in s3.Bucket(os.environ['BUCKET_NAME']).objects.filter(Prefix=prefix): + if not obj.key.endswith(".deb"): + continue + deb_objs.append(obj) + deb_names.append(obj.key.split('/')[-1]) + + if not len(deb_objs): + print("NOT BUILDING EMPTY PACKAGE INDEX") + return + + pkginfos = [] + for obj in deb_objs: + print(obj.key) + + pkginfo = get_cached_control_data(obj) + if obj.key.startswith(prefix): + filename = obj.key[len(prefix):] + pkginfo = pkginfo + "\n%s\n" % ("Filename: %s" % filename) + else: + pkginfo = pkginfo + "\n%s\n" % ("Filename: %s" % obj.key) + + pkginfos.append(pkginfo) + + package_index_obj = s3.Object(bucket_name=os.environ['BUCKET_NAME'], key=prefix + "Packages") + print("Writing package index: %s" % (str(package_index_obj))) + if os.environ['PUBLIC'] == 'True' : + acl = 'public-read' + else: + acl = 'private' + package_index_obj.put(Body="\n".join(sorted(pkginfos)), ACL=acl) + + print("DONE REBUILDING PACKAGE INDEX") + +def calc_package_index_hash(deb_names): + """ + Calculates a hash of all the given deb file names. This is deterministic so + we can use it for short-circuiting. + """ + + md5 = hashlib.md5() + md5.update("\n".join(sorted(deb_names))) + return md5.hexdigest() + +def get_package_index_hash(prefix): + """ + Returns the md5 hash of the names of all the packages in the index. This can be used + to detect if all the packages are represented without having to load a control data cache + file for each package.can be used + to detect if all the packages are represented without having to load a control data cache + file for each package. + """ + s3 = boto3.resource('s3') + try: + print("looking for existing Packages file: %sPackages" % prefix) + package_index_obj = s3.Object(bucket_name=os.environ['BUCKET_NAME'], key=prefix + 'Packages') + return package_index_obj.metadata.get('packages-hash', None) + except botocore.exceptions.ClientError as e: + if e.response['Error']['Code'] == '404': + return None + else: + raise(e) + +def build_release_file(prefix): + """ + gets info from Package, get the sums and puts them into file + + Releasefile layout: + ''' + Date: , DD Mmm YYYY HH:MM:SS UTC + MD5sum: + <(17 - length of size) spaces> Packages + SHA1: + <(17 - length of size) spaces> Packages + SHA256: + <(17 - length of size) spaces> Packages + ''' + """ + s3 = boto3.client('s3') + release_file = "" + s3.download_file(os.environ['BUCKET_NAME'], prefix + "Packages", '/tmp/Packages') + md5, sha1, sha256 = checksums("/tmp/Packages") + + date = 'Date: ' + strftime("%a, %d %b %Y %X UTC", gmtime()) + stat = os.stat("/tmp/Packages") + + release_file += (date + '\nMD5sum:\n ' + md5) + for i in range(0,17-len(str(stat.st_size))): + release_file +=(' ') + release_file +=("%d Packages\nSHA1:\n %s" %(stat.st_size, sha1)) + for i in range(0,17-len(str(stat.st_size))): + release_file +=(' ') + release_file +=("%d Packages\nSHA256:\n %s" %(stat.st_size, sha256 )) + for i in range(0,17-len(str(stat.st_size))): + release_file +=(' ') + release_file +=('%d Packages' % stat.st_size) + + s3 = boto3.resource('s3') + + if os.environ['PUBLIC'] == 'True' : + acl = 'public-read' + else: + acl = 'private' + + release_index_obj = s3.Object(bucket_name=os.environ['BUCKET_NAME'], key=prefix + "Release") + print("Writing Release file: %s" % (str(release_index_obj))) + release_index_obj.put(Body=release_file, ACL=acl) + +def sign_release_file(prefix): + ''' + Using gpg password assigned in env variable `GPG_PASS` and key, which's file directory is + assigned in env variable `GPG_KEY` + ''' + gpg = gnupg.GPG(gnupghome='/tmp/gpgdocs') + s3 = boto3.client('s3') + s3.download_file(os.environ['BUCKET_NAME'], os.environ['GPG_KEY'], '/tmp/gpgdocs/sec.key') + s3.download_file(os.environ['BUCKET_NAME'], prefix + 'Release', '/tmp/gpgdocs/Release') + + with open('/tmp/gpgdocs/sec.key') as stream: + sec = gpg.import_keys(stream.read(-1)) + print("Key import returned: ") + print(sec.results) + with open('/tmp/gpgdocs/Release') as stream: + # do not call passphrase=.. if password is not set as it causes bad sign + if os.environ['GPG_PASS'] == '': + signed = gpg.sign_file(stream, clearsign=True, detach=True, binary=False) + else: + signed = gpg.sign_file(stream, passphrase=os.environ['GPG_PASS'], clearsign=True, detach=True, binary=False) + + if os.environ['PUBLIC'] == 'True' : + acl = 'public-read' + else: + acl = 'private' + s3 = boto3.resource('s3') + sign_obj = s3.Object(bucket_name=os.environ['BUCKET_NAME'], key=prefix + "Release.gpg") + sign_obj.put(Body=str(signed), ACL=acl)