~edwargix/git.sr.ht

7329ab67a9453863ff6d885c78145f5863f853a0 — наб 4 years ago 657601b
Periodically vacuum dangling artifacts from the object store
1 files changed, 40 insertions(+), 5 deletions(-)

M gitsrht-periodic
M gitsrht-periodic => gitsrht-periodic +40 -5
@@ 1,17 1,20 @@
#!/usr/bin/env python3
import os
import sys
import math
import random
import sqlalchemy as sa
import subprocess
import gitsrht.repos as gr
from srht.config import cfg
from srht.database import DbSession
from gitsrht.repos import GitRepoApi
from gitsrht.types import Repository, RepoVisibility
from gitsrht.types import Artifact, User, Repository, RepoVisibility
from minio import Minio
from datetime import datetime, timedelta

db = DbSession(cfg("git.sr.ht", "connection-string"))
db.init()
repo_api = GitRepoApi()
repo_api = gr.GitRepoApi()

def cleanup_autocreated():
    due = datetime.utcnow() - timedelta(minutes=20)


@@ 25,7 28,7 @@ def cleanup_autocreated():
      db.session.delete(r)
    db.session.commit()

def gc():
def gc_git():
    repo_count = Repository.query.count()

    # *srht-periodic scripts are run every twenty minutes,


@@ 41,5 44,37 @@ def gc():
        subprocess.run(["git", "-C", r.path, "gc", "--quiet"],
            stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

def gc_s3():
    if not gr.object_storage_enabled:
        return
    # Once a weekish
    if random.randrange(0, 7 * 24 * 60 / 20) != 0:
        return
    prefix = os.path.join(gr.s3_prefix, "artifacts")
    minio = Minio(gr.s3_upstream, access_key=gr.s3_access_key,
            secret_key=gr.s3_secret_key, secure=True)

    objs = set(obj.object_name for obj
        in minio.list_objects(gr.s3_bucket, prefix, recursive=True))
    artifacts = Artifact.query.all()

    users = {u.id: u for u in (User.query .filter(User.id.in_(
        set(a.user_id for a in artifacts)))).all()}

    repos = {r.id: r for r in (Repository.query.filter(Repository.id.in_(
        set(a.repo_id for a in artifacts)))).all()}

    for art in artifacts:
        artifact_path = os.path.join(prefix, users[art.user_id].canonical_name,
                repos[art.repo_id].name, art.filename)
        objs.discard(artifact_path)

    if not objs:
        return
    errs = list(minio.remove_objects(gr.s3_bucket, objs))
    if errs:
        raise Exception(f"While removing dangling artifacts {objs}, got errors: {errs}")

cleanup_autocreated()
gc()
gc_git()
gc_s3()