From 7329ab67a9453863ff6d885c78145f5863f853a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Thu, 20 Aug 2020 23:02:19 +0200 Subject: [PATCH] Periodically vacuum dangling artifacts from the object store --- gitsrht-periodic | 45 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/gitsrht-periodic b/gitsrht-periodic index afd8204..d2b9457 100755 --- a/gitsrht-periodic +++ b/gitsrht-periodic @@ -1,17 +1,20 @@ #!/usr/bin/env python3 +import os +import sys import math import random import sqlalchemy as sa import subprocess +import gitsrht.repos as gr from srht.config import cfg from srht.database import DbSession -from gitsrht.repos import GitRepoApi -from gitsrht.types import Repository, RepoVisibility +from gitsrht.types import Artifact, User, Repository, RepoVisibility +from minio import Minio from datetime import datetime, timedelta db = DbSession(cfg("git.sr.ht", "connection-string")) db.init() -repo_api = GitRepoApi() +repo_api = gr.GitRepoApi() def cleanup_autocreated(): due = datetime.utcnow() - timedelta(minutes=20) @@ -25,7 +28,7 @@ def cleanup_autocreated(): db.session.delete(r) db.session.commit() -def gc(): +def gc_git(): repo_count = Repository.query.count() # *srht-periodic scripts are run every twenty minutes, @@ -41,5 +44,37 @@ def gc(): subprocess.run(["git", "-C", r.path, "gc", "--quiet"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) +def gc_s3(): + if not gr.object_storage_enabled: + return + # Once a weekish + if random.randrange(0, 7 * 24 * 60 / 20) != 0: + return + prefix = os.path.join(gr.s3_prefix, "artifacts") + minio = Minio(gr.s3_upstream, access_key=gr.s3_access_key, + secret_key=gr.s3_secret_key, secure=True) + + objs = set(obj.object_name for obj + in minio.list_objects(gr.s3_bucket, prefix, recursive=True)) + artifacts = Artifact.query.all() + + users = {u.id: u for u in (User.query .filter(User.id.in_( + set(a.user_id for a in artifacts)))).all()} + + repos = {r.id: r for r in (Repository.query.filter(Repository.id.in_( + set(a.repo_id for a in artifacts)))).all()} + + for art in artifacts: + artifact_path = os.path.join(prefix, users[art.user_id].canonical_name, + repos[art.repo_id].name, art.filename) + objs.discard(artifact_path) + + if not objs: + return + errs = list(minio.remove_objects(gr.s3_bucket, objs)) + if errs: + raise Exception(f"While removing dangling artifacts {objs}, got errors: {errs}") + cleanup_autocreated() -gc() +gc_git() +gc_s3() -- 2.38.4