From f1a18c569fd13476eff0102d7410401a6c30ff69 Mon Sep 17 00:00:00 2001 From: Drew DeVault Date: Fri, 14 Feb 2020 16:04:04 -0500 Subject: [PATCH] Add support for attaching binaries to git tags This is facilitated via an s3-compatible object storage service. We recommend the use of Minio, a free software project which provides an s3-compatible API. --- config.example.ini | 15 +++ .../b4a2f2ed61b2_add_artifact_table.py | 29 +++++ gitsrht/app.py | 12 +- gitsrht/blueprints/artifacts.py | 110 ++++++++++++++++++ gitsrht/blueprints/repo.py | 8 +- gitsrht/repos.py | 79 ++++++++++++- gitsrht/templates/ref.html | 96 +++++++++++---- gitsrht/types/__init__.py | 1 + gitsrht/types/artifact.py | 19 +++ 9 files changed, 342 insertions(+), 27 deletions(-) create mode 100644 gitsrht/alembic/versions/b4a2f2ed61b2_add_artifact_table.py create mode 100644 gitsrht/blueprints/artifacts.py create mode 100644 gitsrht/types/artifact.py diff --git a/config.example.ini b/config.example.ini index cab058a..81b3992 100644 --- a/config.example.ini +++ b/config.example.ini @@ -36,6 +36,15 @@ network-key= # ill effect, if this better suits your infrastructure. redis-host= +[objects] +# Configure the S3-compatible object storage service. Leave empty to disable +# object storage. +# +# Minio is recommended as a FOSS solution over AWS: https://min.io +s3-upstream= +s3-access-key= +s3-secret-key= + [mail] # # Outgoing SMTP settings @@ -97,6 +106,12 @@ oauth-client-secret=CHANGEME # # Path to git repositories on disk repos=/var/lib/git/ +# +# Configure the S3 bucket and prefix for object storage. Leave empty to disable +# object storage. Bucket is required to enable object storage; prefix is +# optional. +s3-bucket= +s3-prefix= [git.sr.ht::dispatch] # diff --git a/gitsrht/alembic/versions/b4a2f2ed61b2_add_artifact_table.py b/gitsrht/alembic/versions/b4a2f2ed61b2_add_artifact_table.py new file mode 100644 index 0000000..dde0a9c --- /dev/null +++ b/gitsrht/alembic/versions/b4a2f2ed61b2_add_artifact_table.py @@ -0,0 +1,29 @@ +"""Add artifact table + +Revision ID: b4a2f2ed61b2 +Revises: 1152333caa0b +Create Date: 2020-02-14 12:00:52.658629 + +""" + +# revision identifiers, used by Alembic. +revision = 'b4a2f2ed61b2' +down_revision = '1152333caa0b' + +from alembic import op +import sqlalchemy as sa + + +def upgrade(): + op.create_table("artifacts", + sa.Column("id", sa.Integer, primary_key=True), + sa.Column("created", sa.DateTime, nullable=False), + sa.Column("user_id", sa.Integer, sa.ForeignKey('user.id'), nullable=False), + sa.Column("repo_id", sa.Integer, sa.ForeignKey('repository.id'), nullable=False), + sa.Column("commit", sa.Unicode, nullable=False), + sa.Column("filename", sa.Unicode, nullable=False), + sa.Column("checksum", sa.Unicode, nullable=False), + sa.Column("size", sa.Integer, nullable=False)) + +def downgrade(): + op.drop_table("artifacts") diff --git a/gitsrht/app.py b/gitsrht/app.py index 923c4e1..451a539 100644 --- a/gitsrht/app.py +++ b/gitsrht/app.py @@ -23,6 +23,7 @@ class GitApp(ScmSrhtFlask): repo_api=GitRepoApi(), oauth_service=oauth_service) from gitsrht.blueprints.api import data + from gitsrht.blueprints.artifacts import artifacts from gitsrht.blueprints.email import mail from gitsrht.blueprints.repo import repo from gitsrht.blueprints.stats import stats @@ -33,6 +34,10 @@ class GitApp(ScmSrhtFlask): self.register_blueprint(stats) self.register_blueprint(webhooks_notify) + from gitsrht.repos import object_storage_enabled + if object_storage_enabled: + self.register_blueprint(artifacts) + self.add_template_filter(urls.clone_urls) self.add_template_filter(urls.log_rss_url) self.add_template_filter(urls.refs_rss_url) @@ -44,11 +49,12 @@ class GitApp(ScmSrhtFlask): del session["notice"] return { "commit_time": commit_time, - "trim_commit": trim_commit, "humanize": humanize, - "stat": stat, "notice": notice, - "path_join": os.path.join + "object_storage_enabled": object_storage_enabled, + "path_join": os.path.join, + "stat": stat, + "trim_commit": trim_commit, } app = GitApp() diff --git a/gitsrht/blueprints/artifacts.py b/gitsrht/blueprints/artifacts.py new file mode 100644 index 0000000..8bc63fb --- /dev/null +++ b/gitsrht/blueprints/artifacts.py @@ -0,0 +1,110 @@ +import hashlib +import os +import pygit2 +from flask import Blueprint, redirect, render_template, request, redirect +from flask import abort, url_for +from gitsrht.git import Repository as GitRepository +from gitsrht.repos import delete_artifact, upload_artifact +from gitsrht.types import Artifact +from minio import Minio +from minio.error import BucketAlreadyOwnedByYou, BucketAlreadyExists +from scmsrht.access import check_access, UserAccess +from srht.config import cfg +from srht.database import db +from srht.oauth import loginrequired +from srht.validation import Validation +from werkzeug.utils import secure_filename + +artifacts = Blueprint('artifacts', __name__) + +# TODO: Make S3 support optional +s3_upstream = cfg("objects", "s3-upstream", default=None) +s3_access_key = cfg("objects", "s3-access-key", default=None) +s3_secret_key = cfg("objects", "s3-secret-key", default=None) +s3_bucket = cfg("git.sr.ht", "s3-bucket", default=None) +s3_prefix = cfg("git.sr.ht", "s3-prefix", default=None) + +@artifacts.route("///refs//upload", methods=["POST"]) +@loginrequired +def ref_upload(owner, repo, ref): + owner, repo = check_access(owner, repo, UserAccess.manage) + with GitRepository(repo.path) as git_repo: + try: + tag = git_repo.revparse_single(ref) + except KeyError: + abort(404) + except ValueError: + abort(404) + if isinstance(tag, pygit2.Commit): + target = tag.oid.hex + else: + target = tag.target.hex + valid = Validation(request) + f = request.files.get("file") + valid.expect(f, "File is required", field="file") + if not valid.ok: + return render_template("ref.html", view="refs", + owner=owner, repo=repo, git_repo=git_repo, tag=tag, + **valid.kwargs) + artifact = upload_artifact(valid, repo, target, f, f.filename) + if not valid.ok: + return render_template("ref.html", view="refs", + owner=owner, repo=repo, git_repo=git_repo, tag=tag, + **valid.kwargs) + db.session.commit() + return redirect(url_for("repo.ref", + owner=owner.canonical_name, + repo=repo.name, + ref=ref)) + +@artifacts.route("///refs//") +def ref_download(owner, repo, ref, filename): + owner, repo = check_access(owner, repo, UserAccess.read) + with GitRepository(repo.path) as git_repo: + try: + tag = git_repo.revparse_single(ref) + except KeyError: + abort(404) + except ValueError: + abort(404) + if isinstance(tag, pygit2.Commit): + target = tag.oid.hex + else: + target = tag.target.hex + artifact = (Artifact.query + .filter(Artifact.user_id == owner.id) + .filter(Artifact.repo_id == repo.id) + .filter(Artifact.commit == target) + .filter(Artifact.filename == filename)).one_or_none() + if not artifact: + abort(404) + prefix = os.path.join(s3_prefix, "artifacts", + repo.owner.canonical_name, repo.name) + url = f"https://{s3_upstream}/{s3_bucket}/{prefix}/{filename}" + return redirect(url) + +@artifacts.route("///refs//", methods=["POST"]) +def ref_delete(owner, repo, ref, filename): + owner, repo = check_access(owner, repo, UserAccess.manage) + with GitRepository(repo.path) as git_repo: + try: + tag = git_repo.revparse_single(ref) + except KeyError: + abort(404) + except ValueError: + abort(404) + if isinstance(tag, pygit2.Commit): + target = tag.oid.hex + else: + target = tag.target.hex + artifact = (Artifact.query + .filter(Artifact.user_id == owner.id) + .filter(Artifact.repo_id == repo.id) + .filter(Artifact.commit == target) + .filter(Artifact.filename == filename)).one_or_none() + if not artifact: + abort(404) + delete_artifact(artifact) + db.session.commit() + return redirect(url_for("repo.ref", + owner=owner.canonical_name, repo=repo.name, ref=ref)) diff --git a/gitsrht/blueprints/repo.py b/gitsrht/blueprints/repo.py index b4045bd..7f67ddb 100644 --- a/gitsrht/blueprints/repo.py +++ b/gitsrht/blueprints/repo.py @@ -13,6 +13,7 @@ from gitsrht.editorconfig import EditorConfig from gitsrht.git import Repository as GitRepository, commit_time, annotate_tree from gitsrht.git import diffstat, get_log from gitsrht.rss import generate_feed +from gitsrht.types import Artifact from io import BytesIO from jinja2 import Markup from pygments import highlight @@ -486,7 +487,6 @@ def refs_rss(owner, repo): return generate_feed(repo, references, title, link, description) - @repo.route("///refs/") def ref(owner, repo, ref): owner, repo = get_repo_or_redir(owner, repo) @@ -500,5 +500,9 @@ def ref(owner, repo, ref): if isinstance(tag, pygit2.Commit): return redirect(url_for(".commit", owner=owner, repo=repo.name, ref=tag.id.hex)) + artifacts = (Artifact.query + .filter(Artifact.user_id == repo.owner_id) + .filter(Artifact.repo_id == repo.id)).all() return render_template("ref.html", view="refs", - owner=owner, repo=repo, git_repo=git_repo, tag=tag) + owner=owner, repo=repo, git_repo=git_repo, tag=tag, + artifacts=artifacts) diff --git a/gitsrht/repos.py b/gitsrht/repos.py index c8894f8..534ebf1 100644 --- a/gitsrht/repos.py +++ b/gitsrht/repos.py @@ -1,12 +1,82 @@ +import hashlib +import os.path import subprocess -from gitsrht.types import Repository, Redirect +from gitsrht.types import Artifact, Repository, Redirect +from minio import Minio +from minio.error import BucketAlreadyOwnedByYou, BucketAlreadyExists, ResponseError from scmsrht.repos import SimpleRepoApi from srht.config import cfg -import os.path +from srht.database import db +from werkzeug.utils import secure_filename repos_path = cfg("git.sr.ht", "repos") post_update = cfg("git.sr.ht", "post-update-script") +s3_upstream = cfg("objects", "s3-upstream", default=None) +s3_access_key = cfg("objects", "s3-access-key", default=None) +s3_secret_key = cfg("objects", "s3-secret-key", default=None) +s3_bucket = cfg("git.sr.ht", "s3-bucket", default=None) +s3_prefix = cfg("git.sr.ht", "s3-prefix", default=None) + +object_storage_enabled = all([ + s3_upstream, + s3_access_key, + s3_secret_key, + s3_bucket, +]) + +def delete_artifact(artifact): + minio = Minio(s3_upstream, access_key=s3_access_key, + secret_key=s3_secret_key, secure=True) + repo = artifact.repo + prefix = os.path.join(s3_prefix, "artifacts", + repo.owner.canonical_name, repo.name) + try: + minio.remove_object(s3_bucket, f"{prefix}/{artifact.filename}") + except ResponseError as err: + print(err) + db.session.delete(artifact) + +def upload_artifact(valid, repo, commit, f, filename): + fn = secure_filename(filename) + artifact = (Artifact.query + .filter(Artifact.user_id == repo.owner_id) + .filter(Artifact.repo_id == repo.id) + .filter(Artifact.commit == commit) + .filter(Artifact.filename == fn)).one_or_none() + valid.expect(not artifact, "A file by this name was already uploaded.", + field="file") + if not valid.ok: + return None + minio = Minio(s3_upstream, access_key=s3_access_key, + secret_key=s3_secret_key, secure=True) + prefix = os.path.join(s3_prefix, "artifacts", + repo.owner.canonical_name, repo.name) + try: + minio.make_bucket(s3_bucket) + except BucketAlreadyOwnedByYou: + pass + except BucketAlreadyExists: + pass + sha = hashlib.sha256() + buf = f.read(1024) + while len(buf) > 0: + sha.update(buf) + buf = f.read(1024) + size = f.tell() + f.seek(0) + minio.put_object(s3_bucket, f"{prefix}/{fn}", f, size, + content_type="application/octet-stream") + artifact = Artifact() + artifact.user_id = repo.owner_id + artifact.repo_id = repo.id + artifact.commit = commit + artifact.filename = fn + artifact.checksum = f"sha256:{sha.hexdigest()}" + artifact.size = size + db.session.add(artifact) + return artifact + class GitRepoApi(SimpleRepoApi): def __init__(self): super().__init__(repos_path, @@ -34,6 +104,11 @@ class GitRepoApi(SimpleRepoApi): from gitsrht.webhooks import RepoWebhook RepoWebhook.Subscription.query.filter( RepoWebhook.Subscription.repo_id == repo.id).delete() + # TODO: Should we delete these asyncronously? + for artifact in (Artifact.query + .filter(Artifact.user_id == repo.owner_id) + .filter(Artifact.repo_id == repo.id)): + delete_artifact(artifact) super().do_delete_repo(repo) def do_clone_repo(self, source, repo): diff --git a/gitsrht/templates/ref.html b/gitsrht/templates/ref.html index adf61d2..729405b 100644 --- a/gitsrht/templates/ref.html +++ b/gitsrht/templates/ref.html @@ -5,36 +5,92 @@ {% endblock %} {% block content %}
+

+ {{tag.name}} + + {{commit_time(tag) | date}} + +

+
+ + {% if object_storage_enabled %} + {% if any(artifacts) %} +
+ {% for artifact in artifacts %} +
+ {{ artifact.filename }} {{icon('caret-right')}}
+ {{artifact.checksum}} + {% if repo.owner == current_user %} + {{csrf_token()}} + + {% endif %} +
+ {% endfor %} +
+ {% endif %} +
+ {{csrf_token()}} +
+ + + {{valid.summary("file")}} +
+ +
+ {% endif %} +
-

- {{tag.name}} - - {{commit_time(tag) | date}} - -

{% if tag.message %}
{{tag.message}}
{% endif %}
-
{% endblock %} diff --git a/gitsrht/types/__init__.py b/gitsrht/types/__init__.py index dc5f311..0ab477e 100644 --- a/gitsrht/types/__init__.py +++ b/gitsrht/types/__init__.py @@ -19,4 +19,5 @@ class Redirect(Base, BaseRedirectMixin): class Repository(Base, BaseRepositoryMixin): pass +from gitsrht.types.artifact import Artifact from gitsrht.types.sshkey import SSHKey diff --git a/gitsrht/types/artifact.py b/gitsrht/types/artifact.py new file mode 100644 index 0000000..a1efc63 --- /dev/null +++ b/gitsrht/types/artifact.py @@ -0,0 +1,19 @@ +import sqlalchemy as sa +import sqlalchemy_utils as sau +from srht.database import Base + +class Artifact(Base): + __tablename__ = 'artifacts' + id = sa.Column(sa.Integer, primary_key=True) + created = sa.Column(sa.DateTime, nullable=False) + user_id = sa.Column(sa.Integer, sa.ForeignKey('user.id'), nullable=False) + user = sa.orm.relationship('User') + repo_id = sa.Column(sa.Integer, sa.ForeignKey('repository.id'), nullable=False) + repo = sa.orm.relationship('Repository') + commit = sa.Column(sa.Unicode, nullable=False) + filename = sa.Column(sa.Unicode, nullable=False) + checksum = sa.Column(sa.Unicode, nullable=False) + size = sa.Column(sa.Integer, nullable=False) + + def __repr__(self): + return ''.format(self.id, self.fingerprint) -- 2.38.4