@@ 1,17 1,20 @@
#!/usr/bin/env python3
+import os
+import sys
import math
import random
import sqlalchemy as sa
import subprocess
+import gitsrht.repos as gr
from srht.config import cfg
from srht.database import DbSession
-from gitsrht.repos import GitRepoApi
-from gitsrht.types import Repository, RepoVisibility
+from gitsrht.types import Artifact, User, Repository, RepoVisibility
+from minio import Minio
from datetime import datetime, timedelta
db = DbSession(cfg("git.sr.ht", "connection-string"))
db.init()
-repo_api = GitRepoApi()
+repo_api = gr.GitRepoApi()
def cleanup_autocreated():
due = datetime.utcnow() - timedelta(minutes=20)
@@ 25,7 28,7 @@ def cleanup_autocreated():
db.session.delete(r)
db.session.commit()
-def gc():
+def gc_git():
repo_count = Repository.query.count()
# *srht-periodic scripts are run every twenty minutes,
@@ 41,5 44,37 @@ def gc():
subprocess.run(["git", "-C", r.path, "gc", "--quiet"],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+def gc_s3():
+ if not gr.object_storage_enabled:
+ return
+ # Once a weekish
+ if random.randrange(0, 7 * 24 * 60 / 20) != 0:
+ return
+ prefix = os.path.join(gr.s3_prefix, "artifacts")
+ minio = Minio(gr.s3_upstream, access_key=gr.s3_access_key,
+ secret_key=gr.s3_secret_key, secure=True)
+
+ objs = set(obj.object_name for obj
+ in minio.list_objects(gr.s3_bucket, prefix, recursive=True))
+ artifacts = Artifact.query.all()
+
+ users = {u.id: u for u in (User.query .filter(User.id.in_(
+ set(a.user_id for a in artifacts)))).all()}
+
+ repos = {r.id: r for r in (Repository.query.filter(Repository.id.in_(
+ set(a.repo_id for a in artifacts)))).all()}
+
+ for art in artifacts:
+ artifact_path = os.path.join(prefix, users[art.user_id].canonical_name,
+ repos[art.repo_id].name, art.filename)
+ objs.discard(artifact_path)
+
+ if not objs:
+ return
+ errs = list(minio.remove_objects(gr.s3_bucket, objs))
+ if errs:
+ raise Exception(f"While removing dangling artifacts {objs}, got errors: {errs}")
+
cleanup_autocreated()
-gc()
+gc_git()
+gc_s3()