Fix numerous bugs. Add an uninstall script.

This commit is contained in:
2025-12-02 07:08:23 -05:00
parent 0f29bb140a
commit 68769f4bd7
13 changed files with 658 additions and 222 deletions

View File

@@ -13,15 +13,17 @@ from ..config import Config
from ..daemon import run_daemon
mirrors_app = typer.Typer(
help="Manage mirrors (add, list, update, search, status, watch).")
help="Manage mirrors (add, list, update, search, status, watch, daemon)."
)
@mirrors_app.command("list")
def list_mirrors_cmd():
def list_mirrors_cmd(ctx: typer.Context):
"""
List all configured mirrors.
"""
mirrors = storage.list_mirrors()
cfg: Config = ctx.obj["config"] # type: ignore[assignment]
mirrors = storage.list_mirrors(cfg=cfg)
if not mirrors:
typer.echo("No mirrors configured.")
raise typer.Exit(0)
@@ -30,16 +32,23 @@ def list_mirrors_cmd():
cats = ", ".join(m.categories) if m.categories else "-"
status = m.status or "idle"
lu = m.last_updated.isoformat(
sep=" ", timespec="seconds") if m.last_updated else "never"
sep=" ", timespec="seconds"
) if m.last_updated else "never"
typer.echo(f"{m.slug:20} [{status:8}] {cats:25} {lu}")
typer.echo(f" {m.url}")
@mirrors_app.command("add")
def add_mirror_cmd(
slug: str = typer.Argument(...,
help="Local slug for the mirror (unique)."),
url: str = typer.Argument(..., help="Source URL to mirror."),
ctx: typer.Context,
slug: str = typer.Argument(
...,
help="Local slug for the mirror (unique).",
),
url: str = typer.Argument(
...,
help="Source URL to mirror.",
),
category: List[str] = typer.Option(
None,
"--category",
@@ -63,10 +72,14 @@ def add_mirror_cmd(
By default, this queues an initial update job and returns immediately.
The actual mirroring is handled by the mirage daemon.
"""
existing = storage.get_mirror(slug)
cfg: Config = ctx.obj["config"] # type: ignore[assignment]
existing = storage.get_mirror(slug, cfg=cfg)
if existing:
typer.echo(f"Error: mirror with slug {
slug!r} already exists.", err=True)
typer.echo(
f"Error: mirror with slug {slug!r} already exists.",
err=True,
)
raise typer.Exit(1)
m = Mirror(
@@ -75,20 +88,21 @@ def add_mirror_cmd(
categories=category or [],
ignore_robots=ignore_robots,
)
storage.upsert_mirror(m)
storage.upsert_mirror(m, cfg=cfg)
typer.echo(f"Added mirror {slug!r} -> {url}")
if no_update:
typer.echo("Initial update NOT queued (per --no-update).")
return
jobs.enqueue_update(slug)
jobs.enqueue_update(slug, cfg=cfg)
typer.echo("Initial update job queued.")
typer.echo("Run `mirage mirrors status` or `mirage mirrors watch` to monitor.")
@mirrors_app.command("edit")
def edit_mirror_cmd(
ctx: typer.Context,
slug: str = typer.Argument(..., help="Mirror slug to edit."),
new_slug: Optional[str] = typer.Option(
None,
@@ -125,7 +139,9 @@ def edit_mirror_cmd(
"""
Modify properties of an existing mirror (URL, categories, ignore_robots, slug).
"""
m = storage.get_mirror(slug)
cfg: Config = ctx.obj["config"] # type: ignore[assignment]
m = storage.get_mirror(slug, cfg=cfg)
if not m:
typer.echo(f"No such mirror: {slug!r}", err=True)
raise typer.Exit(1)
@@ -150,27 +166,27 @@ def edit_mirror_cmd(
m.ignore_robots = ignore_robots
if new_slug is not None and new_slug != original_slug:
# Simple rename: remove old entry, reinsert with new slug
m.slug = new_slug
# Save under new slug
storage.upsert_mirror(m)
# Delete old slug
storage.upsert_mirror(m, cfg=cfg)
if original_slug != new_slug:
storage.delete_mirror(original_slug)
storage.delete_mirror(original_slug, cfg=cfg)
typer.echo(f"Mirror {original_slug!r} renamed to {new_slug!r}.")
else:
storage.upsert_mirror(m)
storage.upsert_mirror(m, cfg=cfg)
typer.echo(f"Mirror {slug!r} updated.")
@mirrors_app.command("remove")
def remove_mirror_cmd(
ctx: typer.Context,
slug: str = typer.Argument(..., help="Mirror slug to remove."),
):
"""
Remove a mirror definition (does not delete files on disk).
"""
ok = storage.delete_mirror(slug)
cfg: Config = ctx.obj["config"] # type: ignore[assignment]
ok = storage.delete_mirror(slug, cfg=cfg)
if not ok:
typer.echo(f"No such mirror: {slug!r}", err=True)
raise typer.Exit(1)
@@ -180,36 +196,40 @@ def remove_mirror_cmd(
@mirrors_app.command("update")
def update_mirror_cmd(
ctx: typer.Context,
slug: str = typer.Argument(..., help="Mirror slug to update."),
):
"""
Enqueue an update job for a single mirror (non-blocking).
"""
m = storage.get_mirror(slug)
cfg: Config = ctx.obj["config"] # type: ignore[assignment]
m = storage.get_mirror(slug, cfg=cfg)
if not m:
typer.echo(f"No such mirror: {slug!r}", err=True)
raise typer.Exit(1)
jobs.enqueue_update(slug)
jobs.enqueue_update(slug, cfg=cfg)
typer.echo(f"Update job queued for {slug!r}.")
@mirrors_app.command("update-all")
def update_all_cmd():
def update_all_cmd(ctx: typer.Context):
"""
Enqueue update jobs for all mirrors (non-blocking).
"""
all_mirrors = storage.list_mirrors()
cfg: Config = ctx.obj["config"] # type: ignore[assignment]
all_mirrors = storage.list_mirrors(cfg=cfg)
if not all_mirrors:
typer.echo("No mirrors configured.")
raise typer.Exit(0)
count = 0
for m in all_mirrors:
# Avoid spamming duplicates if already queued/updating
if m.status in ("queued", "updating"):
continue
jobs.enqueue_update(m.slug)
jobs.enqueue_update(m.slug, cfg=cfg)
count += 1
typer.echo(f"Queued update jobs for {count} mirror(s).")
@@ -218,6 +238,7 @@ def update_all_cmd():
@mirrors_app.command("status")
def status_cmd(
ctx: typer.Context,
slug: Optional[str] = typer.Argument(
None,
help="Optional mirror slug. If omitted, show status for all mirrors.",
@@ -226,8 +247,10 @@ def status_cmd(
"""
Show current status for mirrors.
"""
cfg: Config = ctx.obj["config"] # type: ignore[assignment]
if slug is None:
mirrors = storage.list_mirrors()
mirrors = storage.list_mirrors(cfg=cfg)
if not mirrors:
typer.echo("No mirrors configured.")
raise typer.Exit(0)
@@ -236,23 +259,26 @@ def status_cmd(
cats = ", ".join(m.categories) if m.categories else "-"
status = m.status or "idle"
lu = m.last_updated.isoformat(
sep=" ", timespec="seconds") if m.last_updated else "never"
sep=" ", timespec="seconds"
) if m.last_updated else "never"
typer.echo(f"{m.slug:20} [{status:8}] {cats:25} {lu}")
if m.last_error:
typer.echo(f" last_error: {m.last_error}")
else:
m = storage.get_mirror(slug)
m = storage.get_mirror(slug, cfg=cfg)
if not m:
typer.echo(f"No such mirror: {slug!r}", err=True)
raise typer.Exit(1)
cats = ", ".join(m.categories) if m.categories else "-"
typer.echo(f"slug : {m.slug}")
typer.echo(f"url : {m.url}")
typer.echo(f"categories : {', '.join(
m.categories) if m.categories else '-'}")
typer.echo(f"categories : {cats}")
typer.echo(f"ignore_robots: {m.ignore_robots}")
typer.echo(f"status : {m.status or 'idle'}")
lu = m.last_updated.isoformat(
sep=" ", timespec="seconds") if m.last_updated else "never"
sep=" ", timespec="seconds"
) if m.last_updated else "never"
typer.echo(f"last_updated : {lu}")
if m.last_error:
typer.echo(f"last_error : {m.last_error}")
@@ -260,6 +286,7 @@ def status_cmd(
@mirrors_app.command("watch")
def watch_cmd(
ctx: typer.Context,
slug: str = typer.Argument(..., help="Mirror slug to watch log for."),
lines: int = typer.Option(
40,
@@ -273,7 +300,9 @@ def watch_cmd(
Ctrl-C exits the watch without stopping the update job.
"""
log_path = log_path_for(slug)
cfg: Config = ctx.obj["config"] # type: ignore[assignment]
log_path = log_path_for(slug, cfg=cfg)
if not log_path.exists():
typer.echo(f"No log file yet for {slug!r}: {log_path}")
raise typer.Exit(1)
@@ -281,13 +310,11 @@ def watch_cmd(
typer.echo(f"Watching log: {log_path}")
try:
with log_path.open("r", encoding="utf-8") as f:
# show last N lines
all_lines = f.readlines()
tail = all_lines[-lines:] if lines > 0 else all_lines
for line in tail:
typer.echo(line.rstrip("\n"))
# now follow
with log_path.open("r", encoding="utf-8") as f:
f.seek(0, os.SEEK_END)
while True:
@@ -314,10 +341,6 @@ def daemon_cmd(
):
"""
Run the Mirage daemon (job worker) in the foreground.
This process watches the update queue and runs wget jobs with
concurrency. It respects the global --config option and the
MIRAGE_CONFIG environment variable.
"""
cfg: Config = ctx.obj["config"] # type: ignore[assignment]

View File

@@ -11,7 +11,7 @@ import tomllib # Python 3.11+
# -----------------------------
# Defaults & helpers
# -----------------------------
ENV_CONFIG_VAR = "MIRAGE_CONFIG"
DEFAULT_MIRROR_ROOT = Path("/srv/www/mirrors")
@@ -92,6 +92,15 @@ class Config:
d.mkdir(parents=True, exist_ok=True)
return d
@property
def jobs_dir(self) -> Path:
"""
Root for job queue files (pending/running).
"""
d = self.data_dir / "jobs"
d.mkdir(parents=True, exist_ok=True)
return d
def describe_source(self) -> str:
if self.source_path is None:
return f"{self.source_kind} (no config file, built-in defaults)"
@@ -141,8 +150,8 @@ def _search_config_paths(
paths.append((explicit, "explicit"))
return paths
# 2. Environment variable (new canonical name + legacy)
env_path = os.getenv("MIRAGE_CONFIG") or os.getenv("mirage_CONFIG")
# 2. Environment variable
env_path = os.getenv(ENV_CONFIG_VAR)
if env_path:
paths.append((Path(env_path).expanduser(), "env"))

View File

@@ -4,7 +4,6 @@ import time
from concurrent.futures import ThreadPoolExecutor, Future
from datetime import datetime
from typing import Dict, Tuple, Optional
from pathlib import Path
from .config import Config, load_config
@@ -23,10 +22,6 @@ def run_daemon(
- Watches jobs/pending for new update jobs.
- Moves them to jobs/running.
- Runs wget via update_mirror() with concurrency.
If cfg is None, load_config() is used. In normal CLI usage, the
'mirage mirrors daemon' command passes a Config built from the
global --config option / env / defaults.
"""
if cfg is None:
cfg = load_config()
@@ -39,7 +34,7 @@ def run_daemon(
print(f"[mirage-daemon] starting with max_workers={max_workers}")
print(f"[mirage-daemon] data_dir: {cfg.data_dir}")
print(f"[mirage-daemon] jobs dir: {cfg.data_dir / 'jobs'}")
print(f"[mirage-daemon] jobs dir: {cfg.jobs_dir}")
try:
while True:
@@ -50,7 +45,6 @@ def run_daemon(
try:
Path(job_path).unlink(missing_ok=True)
except TypeError:
# Python <3.8 compat (if you ever care)
p = Path(job_path)
if p.exists():
p.unlink()
@@ -59,28 +53,30 @@ def run_daemon(
f.result()
except Exception as e: # noqa: BLE001
# Internal failure => mark mirror as error
m = storage.get_mirror(slug)
m = storage.get_mirror(slug, cfg=cfg)
if m:
m.status = "error"
m.last_error = f"Internal error: {e!r}"
m.last_updated = datetime.now()
storage.upsert_mirror(m)
storage.upsert_mirror(m, cfg=cfg)
# 2. If we have capacity, pull jobs from pending
capacity = max_workers - len(running)
if capacity > 0:
pending = jobs.list_pending_jobs()
pending = jobs.list_pending_jobs(cfg=cfg)
if pending:
for pending_path, job in pending[:capacity]:
running_path = jobs.move_to_running(pending_path)
running_path = jobs.move_to_running(
pending_path, cfg=cfg
)
# mark mirror as updating early
m = storage.get_mirror(job.slug)
m = storage.get_mirror(job.slug, cfg=cfg)
if m:
m.status = "updating"
m.last_error = None
storage.upsert_mirror(m)
storage.upsert_mirror(m, cfg=cfg)
fut = executor.submit(update_mirror, job.slug)
fut = executor.submit(update_mirror, job.slug, cfg)
running[fut] = (str(running_path), job.slug)
time.sleep(poll_interval)
@@ -88,3 +84,4 @@ def run_daemon(
print("[mirage-daemon] shutting down (KeyboardInterrupt)")
finally:
executor.shutdown(wait=False)

View File

@@ -1,108 +1,90 @@
from __future__ import annotations
import json
import time
import uuid
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
from typing import List, Tuple
from typing import List, Optional, Tuple
from .config import load_config
from . import storage
from .config import Config, load_config
@dataclass
class Job:
id: str
class UpdateJob:
slug: str
type: str # currently only "update"
queued_at: datetime
def to_dict(self) -> dict:
return {
"id": self.id,
"slug": self.slug,
"type": self.type,
"queued_at": self.queued_at.isoformat(),
}
@classmethod
def from_dict(cls, data: dict) -> "Job":
return cls(
id=data["id"],
slug=data["slug"],
type=data["type"],
queued_at=datetime.fromisoformat(data["queued_at"]),
)
created_at: float
def _jobs_root() -> Path:
cfg = load_config()
root = cfg.data_dir / "jobs"
root.mkdir(parents=True, exist_ok=True)
(root / "pending").mkdir(exist_ok=True)
(root / "running").mkdir(exist_ok=True)
def _jobs_root(cfg: Config) -> Path:
root = cfg.jobs_dir
(root / "pending").mkdir(parents=True, exist_ok=True)
(root / "running").mkdir(parents=True, exist_ok=True)
return root
def pending_dir() -> Path:
return _jobs_root() / "pending"
def running_dir() -> Path:
return _jobs_root() / "running"
def enqueue_update(slug: str) -> Path:
def enqueue_update(slug: str, cfg: Optional[Config] = None) -> Path:
"""
Enqueue an update job for the given slug.
Mark mirror status as 'queued' (unless it's already queued/updating).
Create a new pending job for the given slug.
Returns the path to the created job file.
"""
job_id = uuid.uuid4().hex
job = Job(
id=job_id,
slug=slug,
type="update",
queued_at=datetime.now(),
)
pdir = pending_dir()
path = pdir / f"{job_id}.json"
with path.open("w", encoding="utf-8") as f:
json.dump(job.to_dict(), f)
m = storage.get_mirror(slug)
if m and m.status not in ("queued", "updating"):
m.status = "queued"
m.last_error = None
storage.upsert_mirror(m)
if cfg is None:
cfg = load_config()
jobs_root = _jobs_root(cfg)
job = UpdateJob(slug=slug, created_at=time.time())
payload = {"slug": job.slug, "created_at": job.created_at}
job_id = f"{int(job.created_at)}-{uuid.uuid4().hex}"
path = jobs_root / "pending" / f"{job_id}.json"
path.write_text(json.dumps(payload), encoding="utf-8")
return path
def list_pending_jobs() -> List[Tuple[Path, Job]]:
jobs: List[Tuple[Path, Job]] = []
pdir = pending_dir()
for path in sorted(pdir.glob("*.json")):
def list_pending_jobs(
cfg: Optional[Config] = None,
) -> List[Tuple[Path, UpdateJob]]:
"""
Return a sorted list of (job_path, UpdateJob) for pending jobs.
Sorted by created_at ascending (oldest first).
"""
if cfg is None:
cfg = load_config()
jobs_root = _jobs_root(cfg)
pending_dir = jobs_root / "pending"
results: List[Tuple[Path, UpdateJob]] = []
for p in sorted(pending_dir.glob("*.json")):
try:
data = json.loads(path.read_text(encoding="utf-8"))
job = Job.from_dict(data)
data = json.loads(p.read_text(encoding="utf-8"))
job = UpdateJob(
slug=str(data["slug"]),
created_at=float(data.get("created_at", 0.0)),
)
results.append((p, job))
except Exception:
continue
jobs.append((path, job))
return jobs
results.sort(key=lambda item: item[1].created_at)
return results
def load_job(path: Path) -> Job:
data = json.loads(path.read_text(encoding="utf-8"))
return Job.from_dict(data)
def move_to_running(pending_path: Path) -> Path:
def move_to_running(
pending_path: Path,
cfg: Optional[Config] = None,
) -> Path:
"""
Move a pending job file into the running directory.
Move a pending job file into the running/ directory.
Returns the new path.
"""
rdir = running_dir()
dest = rdir / pending_path.name
pending_path.replace(dest)
if cfg is None:
cfg = load_config()
jobs_root = _jobs_root(cfg)
running_dir = jobs_root / "running"
running_dir.mkdir(parents=True, exist_ok=True)
dest = running_dir / pending_path.name
pending_path.rename(dest)
return dest

View File

@@ -1,60 +1,132 @@
from __future__ import annotations
import os
import json
from threading import RLock
from typing import Dict, List, Optional
from datetime import datetime
from pathlib import Path
from typing import Dict, Optional, List
from .config import load_config
from .config import Config, load_config
from .models import Mirror
_lock = RLock()
def _db_path(cfg: Config) -> Path:
return cfg.db_path
def _load_raw(path) -> Dict[str, dict]:
def _load_raw(cfg: Config) -> Dict[str, dict]:
path = _db_path(cfg)
if not path.exists():
return {}
with path.open("r", encoding="utf-8") as f:
return json.load(f)
try:
text = path.read_text(encoding="utf-8")
return json.loads(text)
except Exception:
return {}
def _save_raw(path, data: Dict[str, dict]) -> None:
def _save_raw(cfg: Config, data: Dict[str, dict]) -> None:
path = _db_path(cfg)
path.parent.mkdir(parents=True, exist_ok=True)
tmp = path.with_suffix(".tmp")
with tmp.open("w", encoding="utf-8") as f:
json.dump(data, f, indent=2, sort_keys=True)
tmp.replace(path)
try:
tmp_path = path.with_suffix(".tmp")
# Write to a temp file first for atomicity
tmp_path.write_text(
json.dumps(data, indent=2, sort_keys=True),
encoding="utf-8",
)
# Atomically replace
os.replace(tmp_path, path)
path.chmod(0o664)
except PermissionError as e:
raise PermissionError(
f"Cannot write mirrors DB at {path}.\n"
f"Config source: {cfg.describe_source()}\n"
f"Current user: {os.getlogin()!r}\n"
"Hint: Add your user to the 'mirage' group and ensure "
"data_dir/log_dir/mirror_root are group-writable."
) from e
def list_mirrors() -> List[Mirror]:
cfg = load_config()
with _lock:
data = _load_raw(cfg.db_path)
return [Mirror.from_dict(v) for v in data.values()]
def _mirror_from_dict(slug: str, d: dict) -> Mirror:
last_updated = d.get("last_updated")
if isinstance(last_updated, str):
try:
last_updated_dt = datetime.fromisoformat(last_updated)
except ValueError:
last_updated_dt = None
else:
last_updated_dt = None
return Mirror(
slug=slug,
url=d["url"],
categories=list(d.get("categories", [])),
ignore_robots=bool(d.get("ignore_robots", False)),
status=d.get("status"),
last_updated=last_updated_dt,
last_error=d.get("last_error"),
)
def get_mirror(slug: str) -> Optional[Mirror]:
cfg = load_config()
with _lock:
data = _load_raw(cfg.db_path)
if slug not in data:
def _mirror_to_dict(m: Mirror) -> dict:
return {
"url": m.url,
"categories": list(m.categories),
"ignore_robots": bool(m.ignore_robots),
"status": m.status,
"last_updated": m.last_updated.isoformat(timespec="seconds")
if m.last_updated
else None,
"last_error": m.last_error,
}
# Public API ------------------------------------------------------------
def list_mirrors(cfg: Optional[Config] = None) -> List[Mirror]:
if cfg is None:
cfg = load_config()
raw = _load_raw(cfg)
mirrors: List[Mirror] = []
for slug, d in raw.items():
try:
mirrors.append(_mirror_from_dict(slug, d))
except Exception:
continue
# sort by slug for deterministic output
mirrors.sort(key=lambda m: m.slug)
return mirrors
def get_mirror(slug: str, cfg: Optional[Config] = None) -> Optional[Mirror]:
if cfg is None:
cfg = load_config()
raw = _load_raw(cfg)
d = raw.get(slug)
if not d:
return None
return Mirror.from_dict(data[slug])
return _mirror_from_dict(slug, d)
def upsert_mirror(m: Mirror) -> None:
cfg = load_config()
with _lock:
data = _load_raw(cfg.db_path)
data[m.slug] = m.to_dict()
_save_raw(cfg.db_path, data)
def upsert_mirror(mirror: Mirror, cfg: Optional[Config] = None) -> None:
if cfg is None:
cfg = load_config()
raw = _load_raw(cfg)
raw[mirror.slug] = _mirror_to_dict(mirror)
_save_raw(cfg, raw)
def delete_mirror(slug: str) -> bool:
cfg = load_config()
with _lock:
data = _load_raw(cfg.db_path)
if slug not in data:
return False
del data[slug]
_save_raw(cfg.db_path, data)
return True
def delete_mirror(slug: str, cfg: Optional[Config] = None) -> bool:
if cfg is None:
cfg = load_config()
raw = _load_raw(cfg)
if slug not in raw:
return False
raw.pop(slug, None)
_save_raw(cfg, raw)
return True

0
mirage/tests/__init__.py Normal file
View File

View File

@@ -0,0 +1,49 @@
from pathlib import Path
from mirage.config import load_config, ENV_CONFIG_VAR
def write_cfg(path: Path, mirror_root: str):
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(f'mirror_root = "{mirror_root}"\n', encoding="utf-8")
def test_cli_config_beats_env_and_others(tmp_path, monkeypatch):
etc = tmp_path / "etc" / "mirage"
data = tmp_path / "var" / "lib" / "mirage"
cli_cfg = tmp_path / "cli.toml"
env_cfg = tmp_path / "env.toml"
user_cfg = tmp_path / "user.toml"
write_cfg(cli_cfg, "/cli")
write_cfg(env_cfg, "/env")
write_cfg(user_cfg, "/user")
write_cfg(etc / "config.toml", "/etc")
write_cfg(data / "config.toml", "/data")
monkeypatch.setenv(ENV_CONFIG_VAR, str(env_cfg))
monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path / "xdg"))
cfg = load_config(config_path=cli_cfg)
assert str(cfg.mirror_root) == "/cli"
assert cfg.source_path == cli_cfg
assert cfg.source_kind == "cli"
def test_env_config_beats_user_and_etc(tmp_path, monkeypatch):
env_cfg = tmp_path / "env.toml"
user_cfg = tmp_path / "user.toml"
etc_cfg = tmp_path / "etc" / "mirage" / "config.toml"
write_cfg(env_cfg, "/env")
write_cfg(user_cfg, "/user")
write_cfg(etc_cfg, "/etc")
monkeypatch.setenv(ENV_CONFIG_VAR, str(env_cfg))
monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path / "xdg"))
cfg = load_config(config_path=None)
assert str(cfg.mirror_root) == "/env"
assert cfg.source_path == env_cfg
assert cfg.source_kind == "env"

View File

@@ -0,0 +1,35 @@
from mirage.config import Config
from mirage.models import Mirror
from mirage import storage
def test_storage_uses_config_data_dir(tmp_path):
data_dir = tmp_path / "data"
cfg = Config(
mirror_root=tmp_path / "mirrors",
data_dir=data_dir,
max_concurrent_updates=2,
wget_bin="/usr/bin/wget",
rg_bin="rg",
source_path=None,
source_kind="test",
)
m = Mirror(
slug="test",
url="https://example.com",
categories=["test"],
ignore_robots=False,
status=None,
last_updated=None,
last_error=None,
)
storage.upsert_mirror(m, cfg=cfg)
db_path = cfg.db_path
assert db_path.exists()
loaded = storage.get_mirror("test", cfg=cfg)
assert loaded is not None
assert loaded.slug == "test"
assert loaded.url == "https://example.com"

View File

@@ -4,50 +4,74 @@ import subprocess
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime
from pathlib import Path
from typing import Iterable, Tuple, Dict
from typing import Iterable, Tuple, Dict, Optional
from urllib.parse import urlparse
from .config import load_config
from .config import Config, load_config
from .models import Mirror
from . import storage
def mirror_dir_for(slug: str) -> Path:
cfg = load_config()
# ----------------------------------------------------------------------
# Paths / helpers
# ----------------------------------------------------------------------
def mirror_dir_for(slug: str, cfg: Optional[Config] = None) -> Path:
"""
Return (and create) the directory on disk where this mirror lives:
<cfg.mirror_root>/<slug>
"""
if cfg is None:
cfg = load_config()
d = cfg.mirror_root / slug
d.mkdir(parents=True, exist_ok=True)
return d
def log_path_for(slug: str) -> Path:
cfg = load_config()
def log_path_for(slug: str, cfg: Optional[Config] = None) -> Path:
"""
Return the log file path for this mirror:
<cfg.log_dir>/<slug>.log
"""
if cfg is None:
cfg = load_config()
return cfg.log_dir / f"{slug}.log"
def _write_log_header(log_path: Path, cmd: list[str]) -> None:
now = datetime.now().isoformat()
now = datetime.now().isoformat(sep=" ", timespec="seconds")
with log_path.open("a", encoding="utf-8") as log:
log.write(f"\n=== {now} Running: {' '.join(cmd)}\n")
log.flush()
def run_wget(mirror: Mirror) -> Tuple[int, Path]:
# ----------------------------------------------------------------------
# Wget runner
# ----------------------------------------------------------------------
def run_wget(mirror: Mirror, cfg: Optional[Config] = None) -> Tuple[int, Path]:
"""
Run wget for a single mirror, appending logs to its log file.
Returns:
(exit_code, log_path)
"""
cfg = load_config()
target_dir = mirror_dir_for(mirror.slug)
log_path = log_path_for(mirror.slug)
if cfg is None:
cfg = load_config()
# Determine path components to strip
target_dir = mirror_dir_for(mirror.slug, cfg=cfg)
log_path = log_path_for(mirror.slug, cfg=cfg)
# Determine path components to strip so slug dir becomes the site root
parsed = urlparse(mirror.url)
path_segments = [seg for seg in parsed.path.split("/") if seg]
cut_dirs = len(path_segments)
cmd = [
cmd: list[str] = [
cfg.wget_bin,
"--mirror",
"--convert-links",
@@ -60,13 +84,14 @@ def run_wget(mirror: Mirror) -> Tuple[int, Path]:
f"--execute=robots={'off' if mirror.ignore_robots else 'on'}",
"--directory-prefix",
str(target_dir),
mirror.url,
]
# Strip all leading path components so that the slug dir becomes the site
# root.
# Strip all leading path components so that the slug dir effectively
# becomes the "root" of the mirrored site.
if cut_dirs > 0:
cmd.append(f"--cut-dirs={cut_dirs}")
# Finally, the URL (ONCE we were accidentally adding it twice before)
cmd.append(mirror.url)
_write_log_header(log_path, cmd)
@@ -82,29 +107,43 @@ def run_wget(mirror: Mirror) -> Tuple[int, Path]:
return proc.returncode, log_path
def update_mirror(slug: str) -> Mirror:
# ----------------------------------------------------------------------
# Public update APIs
# ----------------------------------------------------------------------
def update_mirror(slug: str, cfg: Optional[Config] = None) -> Mirror:
"""
Update a single mirror by slug and persist its status.
Returns:
Updated Mirror instance.
Status semantics:
- 0 => status="idle"
- 4 => status="warning" (network / transient issues)
- else => status="error"
"""
m = storage.get_mirror(slug)
if cfg is None:
cfg = load_config()
m = storage.get_mirror(slug, cfg=cfg)
if not m:
raise ValueError(f"Unknown mirror: {slug!r}")
# Mark as updating
m.status = "updating"
m.last_error = None
storage.upsert_mirror(m)
storage.upsert_mirror(m, cfg=cfg)
code, log_path = run_wget(m)
code, log_path = run_wget(m, cfg=cfg)
# Reload to avoid overwriting concurrent changes
m = storage.get_mirror(slug) or m
m = storage.get_mirror(slug, cfg=cfg) or m
if code == 0:
m.status = "idle"
m.last_error = None
elif code == 4:
# network issues -> warning
m.status = "warning"
@@ -114,24 +153,29 @@ def update_mirror(slug: str) -> Mirror:
m.last_error = f"wget exited with code {code}, see {log_path}"
m.last_updated = datetime.now()
storage.upsert_mirror(m)
storage.upsert_mirror(m, cfg=cfg)
return m
def update_all_concurrent(
slugs: Iterable[str] | None = None) -> Dict[str, Mirror]:
slugs: Iterable[str] | None = None,
cfg: Optional[Config] = None,
) -> Dict[str, Mirror]:
"""
Update multiple mirrors concurrently.
Update multiple mirrors concurrently (without using the job queue/daemon).
Args:
slugs: Iterable of slugs to update. If None, update all mirrors.
cfg: Optional Config (otherwise load_config()).
Returns:
Mapping slug -> updated Mirror.
"""
cfg = load_config()
if cfg is None:
cfg = load_config()
if slugs is None:
slugs = [m.slug for m in storage.list_mirrors()]
slugs = [m.slug for m in storage.list_mirrors(cfg=cfg)]
slugs = list(slugs)
results: Dict[str, Mirror] = {}
@@ -143,7 +187,7 @@ def update_all_concurrent(
with ThreadPoolExecutor(max_workers=max_workers) as executor:
future_to_slug = {executor.submit(
update_mirror, slug): slug for slug in slugs}
update_mirror, slug, cfg): slug for slug in slugs}
for future in as_completed(future_to_slug):
slug = future_to_slug[future]
@@ -152,12 +196,12 @@ def update_all_concurrent(
results[slug] = m
except Exception as e: # noqa: BLE001
# If update fails badly, mark error
m = storage.get_mirror(slug)
m = storage.get_mirror(slug, cfg=cfg)
if m:
m.status = "error"
m.last_error = f"Internal error: {e!r}"
m.last_updated = datetime.now()
storage.upsert_mirror(m)
storage.upsert_mirror(m, cfg=cfg)
results[slug] = m
return results

View File

@@ -1,6 +1,9 @@
#!/usr/bin/env bash
set -euo pipefail
# -------------------------------
# Configurable knobs (env overrides)
# -------------------------------
MIRAGE_USER="${MIRAGE_USER:-mirage}"
MIRAGE_GROUP="${MIRAGE_GROUP:-mirage}"
@@ -15,6 +18,19 @@ MIRAGE_BIN_LINK="${MIRAGE_BIN_LINK:-/usr/local/bin/mirage}"
CONFIG_DIR="/etc/mirage"
CONFIG_FILE="$CONFIG_DIR/config.toml"
# -------------------------------
# Sanity checks
# -------------------------------
if [ "$(id -u)" -ne 0 ]; then
echo "ERROR: install.sh must be run as root (e.g. via sudo)" >&2
exit 1
fi
if ! command -v "$PYTHON_BIN" >/dev/null 2>&1; then
echo "ERROR: $PYTHON_BIN not found" >&2
exit 1
fi
echo "==> Using python: $PYTHON_BIN"
echo "==> Mirage user: $MIRAGE_USER"
echo "==> Mirage group: $MIRAGE_GROUP"
@@ -23,13 +39,12 @@ echo "==> Data dir: $DATA_DIR"
echo "==> Log dir: $LOG_DIR"
echo "==> Venv dir: $VENV_DIR"
echo "==> Config file: $CONFIG_FILE"
echo "==> CLI symlink: $MIRAGE_BIN_LINK"
echo
if ! command -v "$PYTHON_BIN" >/dev/null 2>&1; then
echo "ERROR: $PYTHON_BIN not found" >&2
exit 1
fi
# -------------------------------
# Create group/user
# -------------------------------
echo "==> Creating mirage user/group (if needed)"
if ! getent group "$MIRAGE_GROUP" >/dev/null 2>&1; then
groupadd --system "$MIRAGE_GROUP"
@@ -43,10 +58,25 @@ if ! id "$MIRAGE_USER" >/dev/null 2>&1; then
"$MIRAGE_USER"
fi
# -------------------------------
# Directories & permissions
# -------------------------------
echo "==> Creating data/log/mirror directories"
mkdir -p "$MIRROR_ROOT" "$DATA_DIR" "$LOG_DIR"
# Own everything by mirage:mirage
chown -R "$MIRAGE_USER:$MIRAGE_GROUP" "$MIRROR_ROOT" "$DATA_DIR" "$LOG_DIR"
# Ensure perms and setgid bits every run:
# - owner/group: rwx
# - others: no access
# - setgid: new files inherit group 'mirage'
chmod 2770 "$MIRROR_ROOT" "$DATA_DIR" "$LOG_DIR"
chmod -R u+rwX,g+rwX,o-rwx "$MIRROR_ROOT" "$DATA_DIR" "$LOG_DIR"
# -------------------------------
# Config file
# -------------------------------
echo "==> Installing default config in /etc/mirage (if missing)"
mkdir -p "$CONFIG_DIR"
@@ -61,12 +91,12 @@ else
cat >"$CONFIG_FILE" <<EOF
# Mirage configuration
# This file was generated by scripts/install.sh
# Paths must be writable by the 'mirage' user.
# Paths must be writable by the 'mirage' user/group.
# Root directory where mirror content is stored.
mirror_root = "${MIRROR_ROOT}"
# Directory for mirage internal state (queue DB, etc.)
# Directory for Mirage internal state (queue DB, jobs, etc.)
data_dir = "${DATA_DIR}"
# Directory for log files (per-mirror logs, daemon logs, etc.)
@@ -75,6 +105,9 @@ log_dir = "${LOG_DIR}"
# Path to wget binary
wget_bin = "/usr/bin/wget"
# Path to ripgrep (rg) binary
rg_bin = "/usr/bin/rg"
# Maximum number of concurrent mirror updates
max_concurrent_updates = 4
EOF
@@ -85,9 +118,16 @@ fi
# Typical /etc ownership: root:root
chown root:root "$CONFIG_FILE"
echo "==> Creating virtualenv at $VENV_DIR"
mkdir -p "$(dirname "$VENV_DIR")"
"$PYTHON_BIN" -m venv "$VENV_DIR"
# -------------------------------
# Virtualenv + Python package
# -------------------------------
if [ -d "$VENV_DIR" ]; then
echo "==> Reusing existing virtualenv at $VENV_DIR"
else
echo "==> Creating virtualenv at $VENV_DIR"
mkdir -p "$(dirname "$VENV_DIR")"
"$PYTHON_BIN" -m venv "$VENV_DIR"
fi
echo "==> Installing mirage into virtualenv"
"$VENV_DIR/bin/pip" install --upgrade pip setuptools wheel
@@ -100,6 +140,9 @@ if [ ! -x "$MIRAGE_BIN" ]; then
exit 1
fi
# -------------------------------
# CLI symlink
# -------------------------------
echo "==> Installing mirage CLI symlink at $MIRAGE_BIN_LINK"
if [ -L "$MIRAGE_BIN_LINK" ] || [ -e "$MIRAGE_BIN_LINK" ]; then
if [ -L "$MIRAGE_BIN_LINK" ] && [ "$(readlink -f "$MIRAGE_BIN_LINK")" = "$MIRAGE_BIN" ]; then
@@ -113,18 +156,46 @@ else
echo " Created symlink: $MIRAGE_BIN_LINK -> $MIRAGE_BIN"
fi
echo "==> Installing systemd units"
install -D -m 644 systemd/mirage.service /etc/systemd/system/mirage.service
install -D -m 644 systemd/mirage-update.service /etc/systemd/system/mirage-update.service
install -D -m 644 systemd/mirage-update.timer /etc/systemd/system/mirage-update.timer
# -------------------------------
# systemd units
# -------------------------------
if command -v systemctl >/dev/null 2>&1; then
echo "==> Installing systemd units"
install -D -m 644 systemd/mirage.service /etc/systemd/system/mirage.service
install -D -m 644 systemd/mirage-update.service /etc/systemd/system/mirage-update.service
install -D -m 644 systemd/mirage-update.timer /etc/systemd/system/mirage-update.timer
echo "==> Reloading systemd"
systemctl daemon-reload
echo "==> Reloading systemd"
systemctl daemon-reload
echo "==> Enabling and starting mirage daemon + timer"
systemctl enable --now mirage.service
systemctl enable --now mirage-update.timer
echo "==> Enabling and starting mirage daemon + timer"
systemctl enable --now mirage.service
systemctl enable --now mirage-update.timer
else
echo "==> systemctl not found; skipping systemd unit installation."
fi
# -------------------------------
# Add installing user to group (docker-style)
# -------------------------------
ADDED_USER=""
INSTALL_USER="${SUDO_USER:-}"
if [ -n "$INSTALL_USER" ] && [ "$INSTALL_USER" != "root" ]; then
if id "$INSTALL_USER" >/dev/null 2>&1; then
if id -nG "$INSTALL_USER" | tr ' ' '\n' | grep -qx "$MIRAGE_GROUP"; then
echo "==> User $INSTALL_USER is already in group $MIRAGE_GROUP"
else
echo "==> Adding $INSTALL_USER to group $MIRAGE_GROUP"
usermod -aG "$MIRAGE_GROUP" "$INSTALL_USER"
ADDED_USER="$INSTALL_USER"
fi
fi
fi
# -------------------------------
# Summary
# -------------------------------
cat <<EOF
==> Install complete.
@@ -135,4 +206,25 @@ Log dir : $LOG_DIR
Config : $CONFIG_FILE
Venv : $VENV_DIR
Binary : $MIRAGE_BIN
Symlink : $MIRAGE_BIN_LINK
EOF
if [ -n "$ADDED_USER" ]; then
cat <<EOF
User '$ADDED_USER' was added to the '$MIRAGE_GROUP' group.
To use 'mirage' without sudo *immediately* in your current shell, run:
newgrp $MIRAGE_GROUP
Otherwise, log out and back in so your group membership is refreshed.
EOF
else
echo
echo "If you want a non-root user to manage mirrors, add them to the '$MIRAGE_GROUP' group:"
echo
echo " sudo usermod -aG $MIRAGE_GROUP <username>"
echo
fi

131
scripts/uninstall.sh Executable file
View File

@@ -0,0 +1,131 @@
#!/usr/bin/env bash
set -euo pipefail
MIRAGE_USER="${MIRAGE_USER:-mirage}"
MIRAGE_GROUP="${MIRAGE_GROUP:-mirage}"
MIRROR_ROOT="${MIRAGE_MIRROR_ROOT:-/srv/www/mirrors}"
DATA_DIR="${MIRAGE_DATA_DIR:-/var/lib/mirage}"
LOG_DIR="${MIRAGE_LOG_DIR:-/var/log/mirage}"
VENV_DIR="${MIRAGE_VENV_DIR:-/opt/mirage/venv}"
MIRAGE_BIN_LINK="${MIRAGE_BIN_LINK:-/usr/local/bin/mirage}"
CONFIG_DIR="/etc/mirage"
CONFIG_FILE="$CONFIG_DIR/config.toml"
PURGE_DATA=0
if [ "${1:-}" = "--purge" ]; then
PURGE_DATA=1
fi
if [ "$(id -u)" -ne 0 ]; then
echo "ERROR: uninstall.sh must be run as root (e.g. via sudo)" >&2
exit 1
fi
echo "==> Uninstalling Mirage"
echo " Mirage user: $MIRAGE_USER"
echo " Mirage group: $MIRAGE_GROUP"
echo " Mirror root: $MIRROR_ROOT"
echo " Data dir: $DATA_DIR"
echo " Log dir: $LOG_DIR"
echo " Venv dir: $VENV_DIR"
echo " Config file: $CONFIG_FILE"
echo " CLI symlink: $MIRAGE_BIN_LINK"
echo " Purge data: $PURGE_DATA"
echo
safe_rm_dir() {
local d="$1"
if [ -z "$d" ] || [ "$d" = "/" ]; then
echo " Refusing to remove unsafe directory '$d'" >&2
return 1
fi
if [ -d "$d" ]; then
echo " Removing directory: $d"
rm -rf "$d"
fi
}
# -------------------------------
# Stop/disable systemd units
# -------------------------------
if command -v systemctl >/dev/null 2>&1; then
echo "==> Stopping systemd units (if running)"
systemctl stop mirage.service mirage-update.service mirage-update.timer 2>/dev/null || true
echo "==> Disabling systemd units"
systemctl disable mirage.service mirage-update.timer 2>/dev/null || true
echo "==> Removing systemd unit files"
rm -f /etc/systemd/system/mirage.service
rm -f /etc/systemd/system/mirage-update.service
rm -f /etc/systemd/system/mirage-update.timer
echo "==> Reloading systemd"
systemctl daemon-reload || true
else
echo "==> systemctl not found; skipping systemd cleanup."
fi
# -------------------------------
# Remove venv
# -------------------------------
if [ -d "$VENV_DIR" ]; then
echo "==> Removing virtualenv at $VENV_DIR"
rm -rf "$VENV_DIR"
fi
# -------------------------------
# Remove CLI symlink (if ours)
# -------------------------------
if [ -L "$MIRAGE_BIN_LINK" ]; then
TARGET="$(readlink -f "$MIRAGE_BIN_LINK")"
if [ "$TARGET" = "$VENV_DIR/bin/mirage" ]; then
echo "==> Removing CLI symlink $MIRAGE_BIN_LINK"
rm -f "$MIRAGE_BIN_LINK"
else
echo "==> Not touching $MIRAGE_BIN_LINK (points to $TARGET, not $VENV_DIR/bin/mirage)"
fi
elif [ -e "$MIRAGE_BIN_LINK" ]; then
echo "==> Not touching $MIRAGE_BIN_LINK (exists but is not a symlink)"
fi
# -------------------------------
# Optional purge of data/config/mirrors/user/group
# -------------------------------
if [ "$PURGE_DATA" -eq 1 ]; then
echo "==> Purging Mirage data/config/mirrors"
# Remove config dir
if [ -d "$CONFIG_DIR" ]; then
echo " Removing config dir: $CONFIG_DIR"
rm -rf "$CONFIG_DIR"
fi
safe_rm_dir "$MIRROR_ROOT"
safe_rm_dir "$DATA_DIR"
safe_rm_dir "$LOG_DIR"
# Remove user/group (best-effort)
if id "$MIRAGE_USER" >/dev/null 2>&1; then
echo " Removing user: $MIRAGE_USER"
userdel "$MIRAGE_USER" 2>/dev/null || true
fi
if getent group "$MIRAGE_GROUP" >/dev/null 2>&1; then
echo " Removing group: $MIRAGE_GROUP"
groupdel "$MIRAGE_GROUP" 2>/dev/null || true
fi
else
echo "==> Leaving mirrors, data, logs, and config on disk."
echo " - Mirrors root: $MIRROR_ROOT"
echo " - Data dir : $DATA_DIR"
echo " - Log dir : $LOG_DIR"
echo " - Config : $CONFIG_FILE"
echo
echo " Run with '--purge' to remove these as well (including user/group)."
fi
echo
echo "==> Mirage uninstall complete."

View File

@@ -5,5 +5,6 @@ Description=Enqueue periodic updates for Mirage mirrors
Type=oneshot
User=mirage
Group=mirage
ExecStart=/opt/mirage/venv/bin/mirage mirrors update-all
UMask=0007
WorkingDirectory=/var/lib/mirage
ExecStart=/opt/mirage/venv/bin/mirage mirrors update-all

View File

@@ -7,10 +7,11 @@ Wants=network-online.target
Type=simple
User=mirage
Group=mirage
ExecStart=/opt/mirage/venv/bin/mirage daemon
WorkingDirectory=/var/lib/mirage
UMask=0007
ExecStart=/opt/mirage/venv/bin/mirage mirrors daemon
Restart=on-failure
RestartSec=5
WorkingDirectory=/var/lib/mirage
[Install]
WantedBy=multi-user.target