diff --git a/mirage/commands/mirrors.py b/mirage/commands/mirrors.py index 95c4793..ea4aafd 100644 --- a/mirage/commands/mirrors.py +++ b/mirage/commands/mirrors.py @@ -13,15 +13,17 @@ from ..config import Config from ..daemon import run_daemon mirrors_app = typer.Typer( - help="Manage mirrors (add, list, update, search, status, watch).") + help="Manage mirrors (add, list, update, search, status, watch, daemon)." +) @mirrors_app.command("list") -def list_mirrors_cmd(): +def list_mirrors_cmd(ctx: typer.Context): """ List all configured mirrors. """ - mirrors = storage.list_mirrors() + cfg: Config = ctx.obj["config"] # type: ignore[assignment] + mirrors = storage.list_mirrors(cfg=cfg) if not mirrors: typer.echo("No mirrors configured.") raise typer.Exit(0) @@ -30,16 +32,23 @@ def list_mirrors_cmd(): cats = ", ".join(m.categories) if m.categories else "-" status = m.status or "idle" lu = m.last_updated.isoformat( - sep=" ", timespec="seconds") if m.last_updated else "never" + sep=" ", timespec="seconds" + ) if m.last_updated else "never" typer.echo(f"{m.slug:20} [{status:8}] {cats:25} {lu}") typer.echo(f" {m.url}") @mirrors_app.command("add") def add_mirror_cmd( - slug: str = typer.Argument(..., - help="Local slug for the mirror (unique)."), - url: str = typer.Argument(..., help="Source URL to mirror."), + ctx: typer.Context, + slug: str = typer.Argument( + ..., + help="Local slug for the mirror (unique).", + ), + url: str = typer.Argument( + ..., + help="Source URL to mirror.", + ), category: List[str] = typer.Option( None, "--category", @@ -63,10 +72,14 @@ def add_mirror_cmd( By default, this queues an initial update job and returns immediately. The actual mirroring is handled by the mirage daemon. """ - existing = storage.get_mirror(slug) + cfg: Config = ctx.obj["config"] # type: ignore[assignment] + + existing = storage.get_mirror(slug, cfg=cfg) if existing: - typer.echo(f"Error: mirror with slug { - slug!r} already exists.", err=True) + typer.echo( + f"Error: mirror with slug {slug!r} already exists.", + err=True, + ) raise typer.Exit(1) m = Mirror( @@ -75,20 +88,21 @@ def add_mirror_cmd( categories=category or [], ignore_robots=ignore_robots, ) - storage.upsert_mirror(m) + storage.upsert_mirror(m, cfg=cfg) typer.echo(f"Added mirror {slug!r} -> {url}") if no_update: typer.echo("Initial update NOT queued (per --no-update).") return - jobs.enqueue_update(slug) + jobs.enqueue_update(slug, cfg=cfg) typer.echo("Initial update job queued.") typer.echo("Run `mirage mirrors status` or `mirage mirrors watch` to monitor.") @mirrors_app.command("edit") def edit_mirror_cmd( + ctx: typer.Context, slug: str = typer.Argument(..., help="Mirror slug to edit."), new_slug: Optional[str] = typer.Option( None, @@ -125,7 +139,9 @@ def edit_mirror_cmd( """ Modify properties of an existing mirror (URL, categories, ignore_robots, slug). """ - m = storage.get_mirror(slug) + cfg: Config = ctx.obj["config"] # type: ignore[assignment] + + m = storage.get_mirror(slug, cfg=cfg) if not m: typer.echo(f"No such mirror: {slug!r}", err=True) raise typer.Exit(1) @@ -150,27 +166,27 @@ def edit_mirror_cmd( m.ignore_robots = ignore_robots if new_slug is not None and new_slug != original_slug: - # Simple rename: remove old entry, reinsert with new slug m.slug = new_slug - # Save under new slug - storage.upsert_mirror(m) - # Delete old slug + storage.upsert_mirror(m, cfg=cfg) if original_slug != new_slug: - storage.delete_mirror(original_slug) + storage.delete_mirror(original_slug, cfg=cfg) typer.echo(f"Mirror {original_slug!r} renamed to {new_slug!r}.") else: - storage.upsert_mirror(m) + storage.upsert_mirror(m, cfg=cfg) typer.echo(f"Mirror {slug!r} updated.") @mirrors_app.command("remove") def remove_mirror_cmd( + ctx: typer.Context, slug: str = typer.Argument(..., help="Mirror slug to remove."), ): """ Remove a mirror definition (does not delete files on disk). """ - ok = storage.delete_mirror(slug) + cfg: Config = ctx.obj["config"] # type: ignore[assignment] + + ok = storage.delete_mirror(slug, cfg=cfg) if not ok: typer.echo(f"No such mirror: {slug!r}", err=True) raise typer.Exit(1) @@ -180,36 +196,40 @@ def remove_mirror_cmd( @mirrors_app.command("update") def update_mirror_cmd( + ctx: typer.Context, slug: str = typer.Argument(..., help="Mirror slug to update."), ): """ Enqueue an update job for a single mirror (non-blocking). """ - m = storage.get_mirror(slug) + cfg: Config = ctx.obj["config"] # type: ignore[assignment] + + m = storage.get_mirror(slug, cfg=cfg) if not m: typer.echo(f"No such mirror: {slug!r}", err=True) raise typer.Exit(1) - jobs.enqueue_update(slug) + jobs.enqueue_update(slug, cfg=cfg) typer.echo(f"Update job queued for {slug!r}.") @mirrors_app.command("update-all") -def update_all_cmd(): +def update_all_cmd(ctx: typer.Context): """ Enqueue update jobs for all mirrors (non-blocking). """ - all_mirrors = storage.list_mirrors() + cfg: Config = ctx.obj["config"] # type: ignore[assignment] + + all_mirrors = storage.list_mirrors(cfg=cfg) if not all_mirrors: typer.echo("No mirrors configured.") raise typer.Exit(0) count = 0 for m in all_mirrors: - # Avoid spamming duplicates if already queued/updating if m.status in ("queued", "updating"): continue - jobs.enqueue_update(m.slug) + jobs.enqueue_update(m.slug, cfg=cfg) count += 1 typer.echo(f"Queued update jobs for {count} mirror(s).") @@ -218,6 +238,7 @@ def update_all_cmd(): @mirrors_app.command("status") def status_cmd( + ctx: typer.Context, slug: Optional[str] = typer.Argument( None, help="Optional mirror slug. If omitted, show status for all mirrors.", @@ -226,8 +247,10 @@ def status_cmd( """ Show current status for mirrors. """ + cfg: Config = ctx.obj["config"] # type: ignore[assignment] + if slug is None: - mirrors = storage.list_mirrors() + mirrors = storage.list_mirrors(cfg=cfg) if not mirrors: typer.echo("No mirrors configured.") raise typer.Exit(0) @@ -236,23 +259,26 @@ def status_cmd( cats = ", ".join(m.categories) if m.categories else "-" status = m.status or "idle" lu = m.last_updated.isoformat( - sep=" ", timespec="seconds") if m.last_updated else "never" + sep=" ", timespec="seconds" + ) if m.last_updated else "never" typer.echo(f"{m.slug:20} [{status:8}] {cats:25} {lu}") if m.last_error: typer.echo(f" last_error: {m.last_error}") else: - m = storage.get_mirror(slug) + m = storage.get_mirror(slug, cfg=cfg) if not m: typer.echo(f"No such mirror: {slug!r}", err=True) raise typer.Exit(1) + + cats = ", ".join(m.categories) if m.categories else "-" typer.echo(f"slug : {m.slug}") typer.echo(f"url : {m.url}") - typer.echo(f"categories : {', '.join( - m.categories) if m.categories else '-'}") + typer.echo(f"categories : {cats}") typer.echo(f"ignore_robots: {m.ignore_robots}") typer.echo(f"status : {m.status or 'idle'}") lu = m.last_updated.isoformat( - sep=" ", timespec="seconds") if m.last_updated else "never" + sep=" ", timespec="seconds" + ) if m.last_updated else "never" typer.echo(f"last_updated : {lu}") if m.last_error: typer.echo(f"last_error : {m.last_error}") @@ -260,6 +286,7 @@ def status_cmd( @mirrors_app.command("watch") def watch_cmd( + ctx: typer.Context, slug: str = typer.Argument(..., help="Mirror slug to watch log for."), lines: int = typer.Option( 40, @@ -273,7 +300,9 @@ def watch_cmd( Ctrl-C exits the watch without stopping the update job. """ - log_path = log_path_for(slug) + cfg: Config = ctx.obj["config"] # type: ignore[assignment] + + log_path = log_path_for(slug, cfg=cfg) if not log_path.exists(): typer.echo(f"No log file yet for {slug!r}: {log_path}") raise typer.Exit(1) @@ -281,13 +310,11 @@ def watch_cmd( typer.echo(f"Watching log: {log_path}") try: with log_path.open("r", encoding="utf-8") as f: - # show last N lines all_lines = f.readlines() tail = all_lines[-lines:] if lines > 0 else all_lines for line in tail: typer.echo(line.rstrip("\n")) - # now follow with log_path.open("r", encoding="utf-8") as f: f.seek(0, os.SEEK_END) while True: @@ -314,10 +341,6 @@ def daemon_cmd( ): """ Run the Mirage daemon (job worker) in the foreground. - - This process watches the update queue and runs wget jobs with - concurrency. It respects the global --config option and the - MIRAGE_CONFIG environment variable. """ cfg: Config = ctx.obj["config"] # type: ignore[assignment] diff --git a/mirage/config.py b/mirage/config.py index 7ed6f29..59746e9 100644 --- a/mirage/config.py +++ b/mirage/config.py @@ -11,7 +11,7 @@ import tomllib # Python 3.11+ # ----------------------------- # Defaults & helpers # ----------------------------- - +ENV_CONFIG_VAR = "MIRAGE_CONFIG" DEFAULT_MIRROR_ROOT = Path("/srv/www/mirrors") @@ -92,6 +92,15 @@ class Config: d.mkdir(parents=True, exist_ok=True) return d + @property + def jobs_dir(self) -> Path: + """ + Root for job queue files (pending/running). + """ + d = self.data_dir / "jobs" + d.mkdir(parents=True, exist_ok=True) + return d + def describe_source(self) -> str: if self.source_path is None: return f"{self.source_kind} (no config file, built-in defaults)" @@ -141,8 +150,8 @@ def _search_config_paths( paths.append((explicit, "explicit")) return paths - # 2. Environment variable (new canonical name + legacy) - env_path = os.getenv("MIRAGE_CONFIG") or os.getenv("mirage_CONFIG") + # 2. Environment variable + env_path = os.getenv(ENV_CONFIG_VAR) if env_path: paths.append((Path(env_path).expanduser(), "env")) diff --git a/mirage/daemon.py b/mirage/daemon.py index 01a0e2e..ab835ec 100644 --- a/mirage/daemon.py +++ b/mirage/daemon.py @@ -4,7 +4,6 @@ import time from concurrent.futures import ThreadPoolExecutor, Future from datetime import datetime from typing import Dict, Tuple, Optional - from pathlib import Path from .config import Config, load_config @@ -23,10 +22,6 @@ def run_daemon( - Watches jobs/pending for new update jobs. - Moves them to jobs/running. - Runs wget via update_mirror() with concurrency. - - If cfg is None, load_config() is used. In normal CLI usage, the - 'mirage mirrors daemon' command passes a Config built from the - global --config option / env / defaults. """ if cfg is None: cfg = load_config() @@ -39,7 +34,7 @@ def run_daemon( print(f"[mirage-daemon] starting with max_workers={max_workers}") print(f"[mirage-daemon] data_dir: {cfg.data_dir}") - print(f"[mirage-daemon] jobs dir: {cfg.data_dir / 'jobs'}") + print(f"[mirage-daemon] jobs dir: {cfg.jobs_dir}") try: while True: @@ -50,7 +45,6 @@ def run_daemon( try: Path(job_path).unlink(missing_ok=True) except TypeError: - # Python <3.8 compat (if you ever care) p = Path(job_path) if p.exists(): p.unlink() @@ -59,28 +53,30 @@ def run_daemon( f.result() except Exception as e: # noqa: BLE001 # Internal failure => mark mirror as error - m = storage.get_mirror(slug) + m = storage.get_mirror(slug, cfg=cfg) if m: m.status = "error" m.last_error = f"Internal error: {e!r}" m.last_updated = datetime.now() - storage.upsert_mirror(m) + storage.upsert_mirror(m, cfg=cfg) # 2. If we have capacity, pull jobs from pending capacity = max_workers - len(running) if capacity > 0: - pending = jobs.list_pending_jobs() + pending = jobs.list_pending_jobs(cfg=cfg) if pending: for pending_path, job in pending[:capacity]: - running_path = jobs.move_to_running(pending_path) + running_path = jobs.move_to_running( + pending_path, cfg=cfg + ) # mark mirror as updating early - m = storage.get_mirror(job.slug) + m = storage.get_mirror(job.slug, cfg=cfg) if m: m.status = "updating" m.last_error = None - storage.upsert_mirror(m) + storage.upsert_mirror(m, cfg=cfg) - fut = executor.submit(update_mirror, job.slug) + fut = executor.submit(update_mirror, job.slug, cfg) running[fut] = (str(running_path), job.slug) time.sleep(poll_interval) @@ -88,3 +84,4 @@ def run_daemon( print("[mirage-daemon] shutting down (KeyboardInterrupt)") finally: executor.shutdown(wait=False) + diff --git a/mirage/jobs.py b/mirage/jobs.py index fb1a61f..6b0dbca 100644 --- a/mirage/jobs.py +++ b/mirage/jobs.py @@ -1,108 +1,90 @@ from __future__ import annotations import json +import time import uuid from dataclasses import dataclass -from datetime import datetime from pathlib import Path -from typing import List, Tuple +from typing import List, Optional, Tuple -from .config import load_config -from . import storage +from .config import Config, load_config @dataclass -class Job: - id: str +class UpdateJob: slug: str - type: str # currently only "update" - queued_at: datetime - - def to_dict(self) -> dict: - return { - "id": self.id, - "slug": self.slug, - "type": self.type, - "queued_at": self.queued_at.isoformat(), - } - - @classmethod - def from_dict(cls, data: dict) -> "Job": - return cls( - id=data["id"], - slug=data["slug"], - type=data["type"], - queued_at=datetime.fromisoformat(data["queued_at"]), - ) + created_at: float -def _jobs_root() -> Path: - cfg = load_config() - root = cfg.data_dir / "jobs" - root.mkdir(parents=True, exist_ok=True) - (root / "pending").mkdir(exist_ok=True) - (root / "running").mkdir(exist_ok=True) +def _jobs_root(cfg: Config) -> Path: + root = cfg.jobs_dir + (root / "pending").mkdir(parents=True, exist_ok=True) + (root / "running").mkdir(parents=True, exist_ok=True) return root -def pending_dir() -> Path: - return _jobs_root() / "pending" - - -def running_dir() -> Path: - return _jobs_root() / "running" - - -def enqueue_update(slug: str) -> Path: +def enqueue_update(slug: str, cfg: Optional[Config] = None) -> Path: """ - Enqueue an update job for the given slug. - Mark mirror status as 'queued' (unless it's already queued/updating). + Create a new pending job for the given slug. + Returns the path to the created job file. """ - job_id = uuid.uuid4().hex - job = Job( - id=job_id, - slug=slug, - type="update", - queued_at=datetime.now(), - ) - pdir = pending_dir() - path = pdir / f"{job_id}.json" - with path.open("w", encoding="utf-8") as f: - json.dump(job.to_dict(), f) - - m = storage.get_mirror(slug) - if m and m.status not in ("queued", "updating"): - m.status = "queued" - m.last_error = None - storage.upsert_mirror(m) + if cfg is None: + cfg = load_config() + jobs_root = _jobs_root(cfg) + job = UpdateJob(slug=slug, created_at=time.time()) + payload = {"slug": job.slug, "created_at": job.created_at} + job_id = f"{int(job.created_at)}-{uuid.uuid4().hex}" + path = jobs_root / "pending" / f"{job_id}.json" + path.write_text(json.dumps(payload), encoding="utf-8") return path -def list_pending_jobs() -> List[Tuple[Path, Job]]: - jobs: List[Tuple[Path, Job]] = [] - pdir = pending_dir() - for path in sorted(pdir.glob("*.json")): +def list_pending_jobs( + cfg: Optional[Config] = None, +) -> List[Tuple[Path, UpdateJob]]: + """ + Return a sorted list of (job_path, UpdateJob) for pending jobs. + Sorted by created_at ascending (oldest first). + """ + if cfg is None: + cfg = load_config() + + jobs_root = _jobs_root(cfg) + pending_dir = jobs_root / "pending" + results: List[Tuple[Path, UpdateJob]] = [] + + for p in sorted(pending_dir.glob("*.json")): try: - data = json.loads(path.read_text(encoding="utf-8")) - job = Job.from_dict(data) + data = json.loads(p.read_text(encoding="utf-8")) + job = UpdateJob( + slug=str(data["slug"]), + created_at=float(data.get("created_at", 0.0)), + ) + results.append((p, job)) except Exception: continue - jobs.append((path, job)) - return jobs + + results.sort(key=lambda item: item[1].created_at) + return results -def load_job(path: Path) -> Job: - data = json.loads(path.read_text(encoding="utf-8")) - return Job.from_dict(data) - - -def move_to_running(pending_path: Path) -> Path: +def move_to_running( + pending_path: Path, + cfg: Optional[Config] = None, +) -> Path: """ - Move a pending job file into the running directory. + Move a pending job file into the running/ directory. + Returns the new path. """ - rdir = running_dir() - dest = rdir / pending_path.name - pending_path.replace(dest) + if cfg is None: + cfg = load_config() + + jobs_root = _jobs_root(cfg) + running_dir = jobs_root / "running" + running_dir.mkdir(parents=True, exist_ok=True) + + dest = running_dir / pending_path.name + pending_path.rename(dest) return dest diff --git a/mirage/storage.py b/mirage/storage.py index 7e34543..d8e2a6b 100644 --- a/mirage/storage.py +++ b/mirage/storage.py @@ -1,60 +1,132 @@ from __future__ import annotations +import os import json -from threading import RLock -from typing import Dict, List, Optional +from datetime import datetime +from pathlib import Path +from typing import Dict, Optional, List -from .config import load_config +from .config import Config, load_config from .models import Mirror -_lock = RLock() + +def _db_path(cfg: Config) -> Path: + return cfg.db_path -def _load_raw(path) -> Dict[str, dict]: +def _load_raw(cfg: Config) -> Dict[str, dict]: + path = _db_path(cfg) if not path.exists(): return {} - with path.open("r", encoding="utf-8") as f: - return json.load(f) + try: + text = path.read_text(encoding="utf-8") + return json.loads(text) + except Exception: + return {} -def _save_raw(path, data: Dict[str, dict]) -> None: + +def _save_raw(cfg: Config, data: Dict[str, dict]) -> None: + path = _db_path(cfg) path.parent.mkdir(parents=True, exist_ok=True) - tmp = path.with_suffix(".tmp") - with tmp.open("w", encoding="utf-8") as f: - json.dump(data, f, indent=2, sort_keys=True) - tmp.replace(path) + try: + tmp_path = path.with_suffix(".tmp") + + # Write to a temp file first for atomicity + tmp_path.write_text( + json.dumps(data, indent=2, sort_keys=True), + encoding="utf-8", + ) + + # Atomically replace + os.replace(tmp_path, path) + path.chmod(0o664) + except PermissionError as e: + raise PermissionError( + f"Cannot write mirrors DB at {path}.\n" + f"Config source: {cfg.describe_source()}\n" + f"Current user: {os.getlogin()!r}\n" + "Hint: Add your user to the 'mirage' group and ensure " + "data_dir/log_dir/mirror_root are group-writable." + ) from e -def list_mirrors() -> List[Mirror]: - cfg = load_config() - with _lock: - data = _load_raw(cfg.db_path) - return [Mirror.from_dict(v) for v in data.values()] +def _mirror_from_dict(slug: str, d: dict) -> Mirror: + last_updated = d.get("last_updated") + if isinstance(last_updated, str): + try: + last_updated_dt = datetime.fromisoformat(last_updated) + except ValueError: + last_updated_dt = None + else: + last_updated_dt = None + + return Mirror( + slug=slug, + url=d["url"], + categories=list(d.get("categories", [])), + ignore_robots=bool(d.get("ignore_robots", False)), + status=d.get("status"), + last_updated=last_updated_dt, + last_error=d.get("last_error"), + ) -def get_mirror(slug: str) -> Optional[Mirror]: - cfg = load_config() - with _lock: - data = _load_raw(cfg.db_path) - if slug not in data: +def _mirror_to_dict(m: Mirror) -> dict: + return { + "url": m.url, + "categories": list(m.categories), + "ignore_robots": bool(m.ignore_robots), + "status": m.status, + "last_updated": m.last_updated.isoformat(timespec="seconds") + if m.last_updated + else None, + "last_error": m.last_error, + } + + +# Public API ------------------------------------------------------------ + + +def list_mirrors(cfg: Optional[Config] = None) -> List[Mirror]: + if cfg is None: + cfg = load_config() + raw = _load_raw(cfg) + mirrors: List[Mirror] = [] + for slug, d in raw.items(): + try: + mirrors.append(_mirror_from_dict(slug, d)) + except Exception: + continue + # sort by slug for deterministic output + mirrors.sort(key=lambda m: m.slug) + return mirrors + + +def get_mirror(slug: str, cfg: Optional[Config] = None) -> Optional[Mirror]: + if cfg is None: + cfg = load_config() + raw = _load_raw(cfg) + d = raw.get(slug) + if not d: return None - return Mirror.from_dict(data[slug]) + return _mirror_from_dict(slug, d) -def upsert_mirror(m: Mirror) -> None: - cfg = load_config() - with _lock: - data = _load_raw(cfg.db_path) - data[m.slug] = m.to_dict() - _save_raw(cfg.db_path, data) +def upsert_mirror(mirror: Mirror, cfg: Optional[Config] = None) -> None: + if cfg is None: + cfg = load_config() + raw = _load_raw(cfg) + raw[mirror.slug] = _mirror_to_dict(mirror) + _save_raw(cfg, raw) -def delete_mirror(slug: str) -> bool: - cfg = load_config() - with _lock: - data = _load_raw(cfg.db_path) - if slug not in data: - return False - del data[slug] - _save_raw(cfg.db_path, data) - return True +def delete_mirror(slug: str, cfg: Optional[Config] = None) -> bool: + if cfg is None: + cfg = load_config() + raw = _load_raw(cfg) + if slug not in raw: + return False + raw.pop(slug, None) + _save_raw(cfg, raw) + return True diff --git a/mirage/tests/__init__.py b/mirage/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/mirage/tests/test_config_precedence.py b/mirage/tests/test_config_precedence.py new file mode 100644 index 0000000..0e96425 --- /dev/null +++ b/mirage/tests/test_config_precedence.py @@ -0,0 +1,49 @@ +from pathlib import Path + +from mirage.config import load_config, ENV_CONFIG_VAR + + +def write_cfg(path: Path, mirror_root: str): + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(f'mirror_root = "{mirror_root}"\n', encoding="utf-8") + + +def test_cli_config_beats_env_and_others(tmp_path, monkeypatch): + etc = tmp_path / "etc" / "mirage" + data = tmp_path / "var" / "lib" / "mirage" + cli_cfg = tmp_path / "cli.toml" + env_cfg = tmp_path / "env.toml" + user_cfg = tmp_path / "user.toml" + + write_cfg(cli_cfg, "/cli") + write_cfg(env_cfg, "/env") + write_cfg(user_cfg, "/user") + write_cfg(etc / "config.toml", "/etc") + write_cfg(data / "config.toml", "/data") + + monkeypatch.setenv(ENV_CONFIG_VAR, str(env_cfg)) + monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path / "xdg")) + + cfg = load_config(config_path=cli_cfg) + assert str(cfg.mirror_root) == "/cli" + assert cfg.source_path == cli_cfg + assert cfg.source_kind == "cli" + + +def test_env_config_beats_user_and_etc(tmp_path, monkeypatch): + env_cfg = tmp_path / "env.toml" + user_cfg = tmp_path / "user.toml" + etc_cfg = tmp_path / "etc" / "mirage" / "config.toml" + + write_cfg(env_cfg, "/env") + write_cfg(user_cfg, "/user") + write_cfg(etc_cfg, "/etc") + + monkeypatch.setenv(ENV_CONFIG_VAR, str(env_cfg)) + monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path / "xdg")) + + cfg = load_config(config_path=None) + assert str(cfg.mirror_root) == "/env" + assert cfg.source_path == env_cfg + assert cfg.source_kind == "env" + diff --git a/mirage/tests/test_storage_paths.py b/mirage/tests/test_storage_paths.py new file mode 100644 index 0000000..9557a3b --- /dev/null +++ b/mirage/tests/test_storage_paths.py @@ -0,0 +1,35 @@ +from mirage.config import Config +from mirage.models import Mirror +from mirage import storage + + +def test_storage_uses_config_data_dir(tmp_path): + data_dir = tmp_path / "data" + cfg = Config( + mirror_root=tmp_path / "mirrors", + data_dir=data_dir, + max_concurrent_updates=2, + wget_bin="/usr/bin/wget", + rg_bin="rg", + source_path=None, + source_kind="test", + ) + + m = Mirror( + slug="test", + url="https://example.com", + categories=["test"], + ignore_robots=False, + status=None, + last_updated=None, + last_error=None, + ) + + storage.upsert_mirror(m, cfg=cfg) + db_path = cfg.db_path + assert db_path.exists() + + loaded = storage.get_mirror("test", cfg=cfg) + assert loaded is not None + assert loaded.slug == "test" + assert loaded.url == "https://example.com" diff --git a/mirage/updater.py b/mirage/updater.py index f8fd4c9..709eb82 100644 --- a/mirage/updater.py +++ b/mirage/updater.py @@ -4,50 +4,74 @@ import subprocess from concurrent.futures import ThreadPoolExecutor, as_completed from datetime import datetime from pathlib import Path -from typing import Iterable, Tuple, Dict +from typing import Iterable, Tuple, Dict, Optional from urllib.parse import urlparse -from .config import load_config +from .config import Config, load_config from .models import Mirror from . import storage -def mirror_dir_for(slug: str) -> Path: - cfg = load_config() +# ---------------------------------------------------------------------- +# Paths / helpers +# ---------------------------------------------------------------------- + + +def mirror_dir_for(slug: str, cfg: Optional[Config] = None) -> Path: + """ + Return (and create) the directory on disk where this mirror lives: + + / + """ + if cfg is None: + cfg = load_config() d = cfg.mirror_root / slug d.mkdir(parents=True, exist_ok=True) return d -def log_path_for(slug: str) -> Path: - cfg = load_config() +def log_path_for(slug: str, cfg: Optional[Config] = None) -> Path: + """ + Return the log file path for this mirror: + + /.log + """ + if cfg is None: + cfg = load_config() return cfg.log_dir / f"{slug}.log" def _write_log_header(log_path: Path, cmd: list[str]) -> None: - now = datetime.now().isoformat() + now = datetime.now().isoformat(sep=" ", timespec="seconds") with log_path.open("a", encoding="utf-8") as log: log.write(f"\n=== {now} Running: {' '.join(cmd)}\n") log.flush() -def run_wget(mirror: Mirror) -> Tuple[int, Path]: +# ---------------------------------------------------------------------- +# Wget runner +# ---------------------------------------------------------------------- + + +def run_wget(mirror: Mirror, cfg: Optional[Config] = None) -> Tuple[int, Path]: """ Run wget for a single mirror, appending logs to its log file. Returns: (exit_code, log_path) """ - cfg = load_config() - target_dir = mirror_dir_for(mirror.slug) - log_path = log_path_for(mirror.slug) + if cfg is None: + cfg = load_config() - # Determine path components to strip + target_dir = mirror_dir_for(mirror.slug, cfg=cfg) + log_path = log_path_for(mirror.slug, cfg=cfg) + + # Determine path components to strip so slug dir becomes the site root parsed = urlparse(mirror.url) path_segments = [seg for seg in parsed.path.split("/") if seg] cut_dirs = len(path_segments) - cmd = [ + cmd: list[str] = [ cfg.wget_bin, "--mirror", "--convert-links", @@ -60,13 +84,14 @@ def run_wget(mirror: Mirror) -> Tuple[int, Path]: f"--execute=robots={'off' if mirror.ignore_robots else 'on'}", "--directory-prefix", str(target_dir), - mirror.url, ] - # Strip all leading path components so that the slug dir becomes the site - # root. + # Strip all leading path components so that the slug dir effectively + # becomes the "root" of the mirrored site. if cut_dirs > 0: cmd.append(f"--cut-dirs={cut_dirs}") + + # Finally, the URL (ONCE – we were accidentally adding it twice before) cmd.append(mirror.url) _write_log_header(log_path, cmd) @@ -82,29 +107,43 @@ def run_wget(mirror: Mirror) -> Tuple[int, Path]: return proc.returncode, log_path -def update_mirror(slug: str) -> Mirror: +# ---------------------------------------------------------------------- +# Public update APIs +# ---------------------------------------------------------------------- + + +def update_mirror(slug: str, cfg: Optional[Config] = None) -> Mirror: """ Update a single mirror by slug and persist its status. Returns: Updated Mirror instance. + + Status semantics: + - 0 => status="idle" + - 4 => status="warning" (network / transient issues) + - else => status="error" """ - m = storage.get_mirror(slug) + if cfg is None: + cfg = load_config() + + m = storage.get_mirror(slug, cfg=cfg) if not m: raise ValueError(f"Unknown mirror: {slug!r}") # Mark as updating m.status = "updating" m.last_error = None - storage.upsert_mirror(m) + storage.upsert_mirror(m, cfg=cfg) - code, log_path = run_wget(m) + code, log_path = run_wget(m, cfg=cfg) # Reload to avoid overwriting concurrent changes - m = storage.get_mirror(slug) or m + m = storage.get_mirror(slug, cfg=cfg) or m if code == 0: m.status = "idle" + m.last_error = None elif code == 4: # network issues -> warning m.status = "warning" @@ -114,24 +153,29 @@ def update_mirror(slug: str) -> Mirror: m.last_error = f"wget exited with code {code}, see {log_path}" m.last_updated = datetime.now() - storage.upsert_mirror(m) + storage.upsert_mirror(m, cfg=cfg) return m def update_all_concurrent( - slugs: Iterable[str] | None = None) -> Dict[str, Mirror]: + slugs: Iterable[str] | None = None, + cfg: Optional[Config] = None, +) -> Dict[str, Mirror]: """ - Update multiple mirrors concurrently. + Update multiple mirrors concurrently (without using the job queue/daemon). Args: slugs: Iterable of slugs to update. If None, update all mirrors. + cfg: Optional Config (otherwise load_config()). Returns: Mapping slug -> updated Mirror. """ - cfg = load_config() + if cfg is None: + cfg = load_config() + if slugs is None: - slugs = [m.slug for m in storage.list_mirrors()] + slugs = [m.slug for m in storage.list_mirrors(cfg=cfg)] slugs = list(slugs) results: Dict[str, Mirror] = {} @@ -143,7 +187,7 @@ def update_all_concurrent( with ThreadPoolExecutor(max_workers=max_workers) as executor: future_to_slug = {executor.submit( - update_mirror, slug): slug for slug in slugs} + update_mirror, slug, cfg): slug for slug in slugs} for future in as_completed(future_to_slug): slug = future_to_slug[future] @@ -152,12 +196,12 @@ def update_all_concurrent( results[slug] = m except Exception as e: # noqa: BLE001 # If update fails badly, mark error - m = storage.get_mirror(slug) + m = storage.get_mirror(slug, cfg=cfg) if m: m.status = "error" m.last_error = f"Internal error: {e!r}" m.last_updated = datetime.now() - storage.upsert_mirror(m) + storage.upsert_mirror(m, cfg=cfg) results[slug] = m return results diff --git a/scripts/install.sh b/scripts/install.sh index 943ea38..77f938e 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -1,6 +1,9 @@ #!/usr/bin/env bash set -euo pipefail +# ------------------------------- +# Configurable knobs (env overrides) +# ------------------------------- MIRAGE_USER="${MIRAGE_USER:-mirage}" MIRAGE_GROUP="${MIRAGE_GROUP:-mirage}" @@ -15,6 +18,19 @@ MIRAGE_BIN_LINK="${MIRAGE_BIN_LINK:-/usr/local/bin/mirage}" CONFIG_DIR="/etc/mirage" CONFIG_FILE="$CONFIG_DIR/config.toml" +# ------------------------------- +# Sanity checks +# ------------------------------- +if [ "$(id -u)" -ne 0 ]; then + echo "ERROR: install.sh must be run as root (e.g. via sudo)" >&2 + exit 1 +fi + +if ! command -v "$PYTHON_BIN" >/dev/null 2>&1; then + echo "ERROR: $PYTHON_BIN not found" >&2 + exit 1 +fi + echo "==> Using python: $PYTHON_BIN" echo "==> Mirage user: $MIRAGE_USER" echo "==> Mirage group: $MIRAGE_GROUP" @@ -23,13 +39,12 @@ echo "==> Data dir: $DATA_DIR" echo "==> Log dir: $LOG_DIR" echo "==> Venv dir: $VENV_DIR" echo "==> Config file: $CONFIG_FILE" +echo "==> CLI symlink: $MIRAGE_BIN_LINK" echo -if ! command -v "$PYTHON_BIN" >/dev/null 2>&1; then - echo "ERROR: $PYTHON_BIN not found" >&2 - exit 1 -fi - +# ------------------------------- +# Create group/user +# ------------------------------- echo "==> Creating mirage user/group (if needed)" if ! getent group "$MIRAGE_GROUP" >/dev/null 2>&1; then groupadd --system "$MIRAGE_GROUP" @@ -43,10 +58,25 @@ if ! id "$MIRAGE_USER" >/dev/null 2>&1; then "$MIRAGE_USER" fi +# ------------------------------- +# Directories & permissions +# ------------------------------- echo "==> Creating data/log/mirror directories" mkdir -p "$MIRROR_ROOT" "$DATA_DIR" "$LOG_DIR" + +# Own everything by mirage:mirage chown -R "$MIRAGE_USER:$MIRAGE_GROUP" "$MIRROR_ROOT" "$DATA_DIR" "$LOG_DIR" +# Ensure perms and setgid bits every run: +# - owner/group: rwx +# - others: no access +# - setgid: new files inherit group 'mirage' +chmod 2770 "$MIRROR_ROOT" "$DATA_DIR" "$LOG_DIR" +chmod -R u+rwX,g+rwX,o-rwx "$MIRROR_ROOT" "$DATA_DIR" "$LOG_DIR" + +# ------------------------------- +# Config file +# ------------------------------- echo "==> Installing default config in /etc/mirage (if missing)" mkdir -p "$CONFIG_DIR" @@ -61,12 +91,12 @@ else cat >"$CONFIG_FILE" < Creating virtualenv at $VENV_DIR" -mkdir -p "$(dirname "$VENV_DIR")" -"$PYTHON_BIN" -m venv "$VENV_DIR" +# ------------------------------- +# Virtualenv + Python package +# ------------------------------- +if [ -d "$VENV_DIR" ]; then + echo "==> Reusing existing virtualenv at $VENV_DIR" +else + echo "==> Creating virtualenv at $VENV_DIR" + mkdir -p "$(dirname "$VENV_DIR")" + "$PYTHON_BIN" -m venv "$VENV_DIR" +fi echo "==> Installing mirage into virtualenv" "$VENV_DIR/bin/pip" install --upgrade pip setuptools wheel @@ -100,6 +140,9 @@ if [ ! -x "$MIRAGE_BIN" ]; then exit 1 fi +# ------------------------------- +# CLI symlink +# ------------------------------- echo "==> Installing mirage CLI symlink at $MIRAGE_BIN_LINK" if [ -L "$MIRAGE_BIN_LINK" ] || [ -e "$MIRAGE_BIN_LINK" ]; then if [ -L "$MIRAGE_BIN_LINK" ] && [ "$(readlink -f "$MIRAGE_BIN_LINK")" = "$MIRAGE_BIN" ]; then @@ -113,18 +156,46 @@ else echo " Created symlink: $MIRAGE_BIN_LINK -> $MIRAGE_BIN" fi -echo "==> Installing systemd units" -install -D -m 644 systemd/mirage.service /etc/systemd/system/mirage.service -install -D -m 644 systemd/mirage-update.service /etc/systemd/system/mirage-update.service -install -D -m 644 systemd/mirage-update.timer /etc/systemd/system/mirage-update.timer +# ------------------------------- +# systemd units +# ------------------------------- +if command -v systemctl >/dev/null 2>&1; then + echo "==> Installing systemd units" + install -D -m 644 systemd/mirage.service /etc/systemd/system/mirage.service + install -D -m 644 systemd/mirage-update.service /etc/systemd/system/mirage-update.service + install -D -m 644 systemd/mirage-update.timer /etc/systemd/system/mirage-update.timer -echo "==> Reloading systemd" -systemctl daemon-reload + echo "==> Reloading systemd" + systemctl daemon-reload -echo "==> Enabling and starting mirage daemon + timer" -systemctl enable --now mirage.service -systemctl enable --now mirage-update.timer + echo "==> Enabling and starting mirage daemon + timer" + systemctl enable --now mirage.service + systemctl enable --now mirage-update.timer +else + echo "==> systemctl not found; skipping systemd unit installation." +fi +# ------------------------------- +# Add installing user to group (docker-style) +# ------------------------------- +ADDED_USER="" +INSTALL_USER="${SUDO_USER:-}" + +if [ -n "$INSTALL_USER" ] && [ "$INSTALL_USER" != "root" ]; then + if id "$INSTALL_USER" >/dev/null 2>&1; then + if id -nG "$INSTALL_USER" | tr ' ' '\n' | grep -qx "$MIRAGE_GROUP"; then + echo "==> User $INSTALL_USER is already in group $MIRAGE_GROUP" + else + echo "==> Adding $INSTALL_USER to group $MIRAGE_GROUP" + usermod -aG "$MIRAGE_GROUP" "$INSTALL_USER" + ADDED_USER="$INSTALL_USER" + fi + fi +fi + +# ------------------------------- +# Summary +# ------------------------------- cat < Install complete. @@ -135,4 +206,25 @@ Log dir : $LOG_DIR Config : $CONFIG_FILE Venv : $VENV_DIR Binary : $MIRAGE_BIN +Symlink : $MIRAGE_BIN_LINK EOF + +if [ -n "$ADDED_USER" ]; then + cat <" + echo +fi + diff --git a/scripts/uninstall.sh b/scripts/uninstall.sh new file mode 100755 index 0000000..84fd300 --- /dev/null +++ b/scripts/uninstall.sh @@ -0,0 +1,131 @@ +#!/usr/bin/env bash +set -euo pipefail + +MIRAGE_USER="${MIRAGE_USER:-mirage}" +MIRAGE_GROUP="${MIRAGE_GROUP:-mirage}" + +MIRROR_ROOT="${MIRAGE_MIRROR_ROOT:-/srv/www/mirrors}" +DATA_DIR="${MIRAGE_DATA_DIR:-/var/lib/mirage}" +LOG_DIR="${MIRAGE_LOG_DIR:-/var/log/mirage}" +VENV_DIR="${MIRAGE_VENV_DIR:-/opt/mirage/venv}" + +MIRAGE_BIN_LINK="${MIRAGE_BIN_LINK:-/usr/local/bin/mirage}" +CONFIG_DIR="/etc/mirage" +CONFIG_FILE="$CONFIG_DIR/config.toml" + +PURGE_DATA=0 + +if [ "${1:-}" = "--purge" ]; then + PURGE_DATA=1 +fi + +if [ "$(id -u)" -ne 0 ]; then + echo "ERROR: uninstall.sh must be run as root (e.g. via sudo)" >&2 + exit 1 +fi + +echo "==> Uninstalling Mirage" +echo " Mirage user: $MIRAGE_USER" +echo " Mirage group: $MIRAGE_GROUP" +echo " Mirror root: $MIRROR_ROOT" +echo " Data dir: $DATA_DIR" +echo " Log dir: $LOG_DIR" +echo " Venv dir: $VENV_DIR" +echo " Config file: $CONFIG_FILE" +echo " CLI symlink: $MIRAGE_BIN_LINK" +echo " Purge data: $PURGE_DATA" +echo + +safe_rm_dir() { + local d="$1" + if [ -z "$d" ] || [ "$d" = "/" ]; then + echo " Refusing to remove unsafe directory '$d'" >&2 + return 1 + fi + if [ -d "$d" ]; then + echo " Removing directory: $d" + rm -rf "$d" + fi +} + +# ------------------------------- +# Stop/disable systemd units +# ------------------------------- +if command -v systemctl >/dev/null 2>&1; then + echo "==> Stopping systemd units (if running)" + systemctl stop mirage.service mirage-update.service mirage-update.timer 2>/dev/null || true + + echo "==> Disabling systemd units" + systemctl disable mirage.service mirage-update.timer 2>/dev/null || true + + echo "==> Removing systemd unit files" + rm -f /etc/systemd/system/mirage.service + rm -f /etc/systemd/system/mirage-update.service + rm -f /etc/systemd/system/mirage-update.timer + + echo "==> Reloading systemd" + systemctl daemon-reload || true +else + echo "==> systemctl not found; skipping systemd cleanup." +fi + +# ------------------------------- +# Remove venv +# ------------------------------- +if [ -d "$VENV_DIR" ]; then + echo "==> Removing virtualenv at $VENV_DIR" + rm -rf "$VENV_DIR" +fi + +# ------------------------------- +# Remove CLI symlink (if ours) +# ------------------------------- +if [ -L "$MIRAGE_BIN_LINK" ]; then + TARGET="$(readlink -f "$MIRAGE_BIN_LINK")" + if [ "$TARGET" = "$VENV_DIR/bin/mirage" ]; then + echo "==> Removing CLI symlink $MIRAGE_BIN_LINK" + rm -f "$MIRAGE_BIN_LINK" + else + echo "==> Not touching $MIRAGE_BIN_LINK (points to $TARGET, not $VENV_DIR/bin/mirage)" + fi +elif [ -e "$MIRAGE_BIN_LINK" ]; then + echo "==> Not touching $MIRAGE_BIN_LINK (exists but is not a symlink)" +fi + +# ------------------------------- +# Optional purge of data/config/mirrors/user/group +# ------------------------------- +if [ "$PURGE_DATA" -eq 1 ]; then + echo "==> Purging Mirage data/config/mirrors" + + # Remove config dir + if [ -d "$CONFIG_DIR" ]; then + echo " Removing config dir: $CONFIG_DIR" + rm -rf "$CONFIG_DIR" + fi + + safe_rm_dir "$MIRROR_ROOT" + safe_rm_dir "$DATA_DIR" + safe_rm_dir "$LOG_DIR" + + # Remove user/group (best-effort) + if id "$MIRAGE_USER" >/dev/null 2>&1; then + echo " Removing user: $MIRAGE_USER" + userdel "$MIRAGE_USER" 2>/dev/null || true + fi + if getent group "$MIRAGE_GROUP" >/dev/null 2>&1; then + echo " Removing group: $MIRAGE_GROUP" + groupdel "$MIRAGE_GROUP" 2>/dev/null || true + fi +else + echo "==> Leaving mirrors, data, logs, and config on disk." + echo " - Mirrors root: $MIRROR_ROOT" + echo " - Data dir : $DATA_DIR" + echo " - Log dir : $LOG_DIR" + echo " - Config : $CONFIG_FILE" + echo + echo " Run with '--purge' to remove these as well (including user/group)." +fi + +echo +echo "==> Mirage uninstall complete." diff --git a/systemd/mirage-update.service b/systemd/mirage-update.service index bcc5197..16d6bb5 100644 --- a/systemd/mirage-update.service +++ b/systemd/mirage-update.service @@ -5,5 +5,6 @@ Description=Enqueue periodic updates for Mirage mirrors Type=oneshot User=mirage Group=mirage -ExecStart=/opt/mirage/venv/bin/mirage mirrors update-all +UMask=0007 WorkingDirectory=/var/lib/mirage +ExecStart=/opt/mirage/venv/bin/mirage mirrors update-all diff --git a/systemd/mirage.service b/systemd/mirage.service index f3bcae9..5b0d087 100644 --- a/systemd/mirage.service +++ b/systemd/mirage.service @@ -7,10 +7,11 @@ Wants=network-online.target Type=simple User=mirage Group=mirage -ExecStart=/opt/mirage/venv/bin/mirage daemon +WorkingDirectory=/var/lib/mirage +UMask=0007 +ExecStart=/opt/mirage/venv/bin/mirage mirrors daemon Restart=on-failure RestartSec=5 -WorkingDirectory=/var/lib/mirage [Install] WantedBy=multi-user.target