Use proper config handling

This commit is contained in:
2025-12-02 06:04:49 -05:00
parent f4e595d90a
commit 0f29bb140a
9 changed files with 382 additions and 121 deletions

View File

@@ -1,44 +1,46 @@
from __future__ import annotations
from pathlib import Path
from typing import Optional
import typer
from .commands import mirrors_app, misc_app
from .daemon import run_daemon
from .config import load_config
from .commands import mirrors_app, admin_app
app = typer.Typer(
help=(
"Mirage - mirror management that's too good to be true.\n\n"
"Manage local mirrors of websites for offline use.\n"
"Use `mirage mirrors ...` to add/list/update/search.\n"
"Run `mirage daemon` (e.g. under systemd) to process update jobs."
)
help="Mirage offline website mirror manager.",
rich_markup_mode="rich",
)
app.add_typer(mirrors_app, name="mirrors")
app.add_typer(misc_app, name="misc")
# Attach sub-command groups
app.add_typer(mirrors_app, name="mirrors",
help="Manage mirrors (add, list, update, daemon, etc.).")
app.add_typer(admin_app, name="admin",
help="Admin / debug utilities (config, paths, etc.).")
@app.command("daemon")
def daemon_cmd(
poll_interval: float = typer.Option(
1.0,
"--poll-interval",
help="Seconds between job queue polls.",
@app.callback()
def main(
ctx: typer.Context,
config: Optional[Path] = typer.Option(
None,
"--config",
"-c",
dir_okay=False,
file_okay=True,
exists=False, # we handle missing ourselves
readable=True,
help="Explicit path to mirage config file (overrides env and defaults).",
),
):
"""
Run the Mirage mirror daemon.
Mirage command-line interface.
This is intended to be managed by systemd:
- `ExecStart=/usr/bin/mirage daemon`
You can override which config file is used with [bold]--config[/bold]
or via the [bold]MIRAGE_CONFIG[/bold] environment variable.
"""
run_daemon(poll_interval=poll_interval)
def app_main():
app()
if __name__ == "__main__":
app_main()
cfg = load_config(config_path=config)
# Stash config in Typer's context so all subcommands can get at it.
ctx.obj = {"config": cfg}

View File

@@ -1,4 +1,4 @@
from .mirrors import mirrors_app
from .misc import misc_app
from .admin import admin_app
__all__ = ["mirrors_app", "misc_app"]
__all__ = ["mirrors_app", "admin_app"]

63
mirage/commands/admin.py Normal file
View File

@@ -0,0 +1,63 @@
from __future__ import annotations
from pathlib import Path
from typing import Optional
import typer
from ..config import Config, list_config_candidates
admin_app = typer.Typer(help="Administrative and debugging commands.")
@admin_app.command("config")
def show_config(ctx: typer.Context):
"""
Show the effective configuration values and where they came from.
"""
cfg: Config = ctx.obj["config"] # type: ignore[assignment]
typer.echo(f"Config source : {cfg.describe_source()}")
typer.echo("")
typer.echo(f"mirror_root : {cfg.mirror_root}")
typer.echo(f"data_dir : {cfg.data_dir}")
typer.echo(f"log_dir : {cfg.log_dir}")
typer.echo(f"wget_bin : {cfg.wget_bin}")
typer.echo(f"rg_bin : {cfg.rg_bin}")
typer.echo(f"max_concurrent: {cfg.max_concurrent_updates}")
@admin_app.command("config-path")
def config_path(ctx: typer.Context):
"""
Print the path to the config file Mirage actually loaded.
"""
cfg: Config = ctx.obj["config"] # type: ignore[assignment]
if cfg.source_path is None:
typer.echo("No config file used (built-in defaults only).")
else:
typer.echo(f"{cfg.source_path} ({cfg.source_kind})")
@admin_app.command("config-search-paths")
def config_search_paths(
config: Optional[Path] = typer.Option(
None,
"--config",
"-c",
dir_okay=False,
file_okay=True,
exists=False,
readable=False,
help="Show search order as if this explicit path was passed.",
)
):
"""
List all locations Mirage will search for a config file, in order.
"""
candidates = list_config_candidates(explicit=config)
for path, kind in candidates:
status = "exists" if path.is_file() else "missing"
typer.echo(f"[{kind:8}] {path} ({status})")

View File

@@ -9,6 +9,8 @@ import typer
from .. import storage, jobs
from ..models import Mirror
from ..updater import log_path_for
from ..config import Config
from ..daemon import run_daemon
mirrors_app = typer.Typer(
help="Manage mirrors (add, list, update, search, status, watch).")
@@ -298,3 +300,35 @@ def watch_cmd(
typer.echo(line.rstrip("\n"))
except KeyboardInterrupt:
typer.echo("\n[watch] Detaching from log.")
@mirrors_app.command("daemon")
def daemon_cmd(
ctx: typer.Context,
poll_interval: float = typer.Option(
1.0,
"--poll-interval",
"-p",
help="Seconds between job queue polls.",
),
):
"""
Run the Mirage daemon (job worker) in the foreground.
This process watches the update queue and runs wget jobs with
concurrency. It respects the global --config option and the
MIRAGE_CONFIG environment variable.
"""
cfg: Config = ctx.obj["config"] # type: ignore[assignment]
typer.echo(
f"[daemon] Using config from: {cfg.source_path or '<defaults>'} "
f"({cfg.source_kind})"
)
typer.echo(f"[daemon] mirror_root = {cfg.mirror_root}")
typer.echo(f"[daemon] data_dir = {cfg.data_dir}")
typer.echo(f"[daemon] log_dir = {cfg.log_dir}")
typer.echo(f"[daemon] max_workers = {cfg.max_concurrent_updates}")
typer.echo()
run_daemon(cfg=cfg, poll_interval=poll_interval)

View File

@@ -1,33 +0,0 @@
from __future__ import annotations
import typer
# type: ignore[attr-defined]
from ..config import load_config, _default_config_path
misc_app = typer.Typer(help="Miscellaneous commands (config, info).")
@misc_app.command("config-path")
def config_path_cmd():
"""
Show where the active config file is located (or would be created).
"""
# Slight hack: default path; real path reading is in load_config()
p = _default_config_path()
typer.echo(str(p))
@misc_app.command("config-show")
def config_show_cmd():
"""
Print the current configuration values.
"""
cfg = load_config()
typer.echo(f"mirror_root = {cfg.mirror_root}")
typer.echo(f"data_dir = {cfg.data_dir}")
typer.echo(f"log_dir = {cfg.log_dir}")
typer.echo(f"db_path = {cfg.db_path}")
typer.echo(f"max_concurrent_updates = {cfg.max_concurrent_updates}")
typer.echo(f"wget_bin = {cfg.wget_bin}")
typer.echo(f"rg_bin = {cfg.rg_bin}")

View File

@@ -1,30 +1,83 @@
from __future__ import annotations
import os
import tomllib # Python 3.11+; on 3.10 use 'tomli' instead
from dataclasses import dataclass
from pathlib import Path
from typing import Optional
from typing import Optional, List, Tuple
import tomllib # Python 3.11+
# -----------------------------
# Defaults & helpers
# -----------------------------
DEFAULT_MIRROR_ROOT = Path("/srv/www/mirrors")
DEFAULT_DATA_DIR = Path("~/.local/share/mirrorctl").expanduser()
def default_data_dir() -> Path:
"""
Choose a reasonable default data dir.
- MIRAGE_DATA_DIR env if set
- If running as root: /var/lib/mirage
- Else: ~/.local/share/mirage
"""
env = os.getenv("MIRAGE_DATA_DIR")
if env:
return Path(env).expanduser()
if os.name == "posix" and hasattr(os, "geteuid") and os.geteuid() == 0:
return Path("/var/lib/mirage")
return Path("~/.local/share/mirage").expanduser()
def default_log_dir(data_dir: Path) -> Path:
"""
Default log dir:
- MIRAGE_LOG_DIR env if set
- If running as root: /var/log/mirage
- Else: <data_dir>/logs
"""
env = os.getenv("MIRAGE_LOG_DIR")
if env:
return Path(env).expanduser()
if os.name == "posix" and hasattr(os, "geteuid") and os.geteuid() == 0:
return Path("/var/log/mirage")
return data_dir / "logs"
DEFAULT_DATA_DIR = default_data_dir()
DEFAULT_LOG_DIR = default_log_dir(DEFAULT_DATA_DIR)
DEFAULT_MAX_CONCURRENT_UPDATES = 4
DEFAULT_WGET_BIN = "wget"
DEFAULT_RG_BIN = "rg"
DEFAULT_WGET_BIN = "/usr/bin/wget"
DEFAULT_RG_BIN = "/usr/bin/rg"
@dataclass
class Config:
mirror_root: Path
data_dir: Path
_log_dir: Path
max_concurrent_updates: int
wget_bin: str
rg_bin: str
# metadata about where this config came from
source_path: Optional[Path] = None # actual file used, if any
# "explicit", "env", "user", "system", "state", "local", ...
source_kind: str = "default"
@property
def log_dir(self) -> Path:
d = self.data_dir / "logs"
"""
Ensure and return the log directory.
"""
d = self._log_dir
d.mkdir(parents=True, exist_ok=True)
return d
@@ -35,112 +88,213 @@ class Config:
@property
def config_dir(self) -> Path:
# For future use (e.g. storing per-mirror configs)
d = self.data_dir / "config"
d.mkdir(parents=True, exist_ok=True)
return d
def describe_source(self) -> str:
if self.source_path is None:
return f"{self.source_kind} (no config file, built-in defaults)"
return f"{self.source_kind}: {self.source_path}"
def default_config() -> Config:
data_dir = DEFAULT_DATA_DIR
log_dir = DEFAULT_LOG_DIR
return Config(
mirror_root=DEFAULT_MIRROR_ROOT,
data_dir=DEFAULT_DATA_DIR,
data_dir=data_dir,
_log_dir=log_dir,
max_concurrent_updates=DEFAULT_MAX_CONCURRENT_UPDATES,
wget_bin=DEFAULT_WGET_BIN,
rg_bin=DEFAULT_RG_BIN,
source_path=None,
source_kind="default",
)
def _default_config_path() -> Path:
# -----------------------------
# Config file search
# -----------------------------
def _default_user_config_path() -> Path:
xdg = os.getenv("XDG_CONFIG_HOME")
if xdg:
base = Path(xdg)
base = Path(xdg).expanduser()
else:
base = Path("~/.config").expanduser()
return base / "mirrorctl" / "config.toml"
return base / "mirage" / "config.toml"
def _search_config_paths() -> list[Path]:
env = os.getenv("MIRRORCTL_CONFIG")
paths: list[Path] = []
if env:
paths.append(Path(env))
def _search_config_paths(
explicit: Optional[Path] = None,
) -> List[Tuple[Path, str]]:
"""
Return a list of (path, kind) in the order we will look for configs.
# user config
paths.append(_default_config_path())
kind is one of: "explicit", "env", "local", "user", "system", "state"
"""
paths: List[Tuple[Path, str]] = []
# system config
paths.append(Path("/etc/mirrorctl/config.toml"))
# 1. CLI override always wins, and if provided we ONLY use it.
if explicit is not None:
paths.append((explicit, "explicit"))
return paths
# 2. Environment variable (new canonical name + legacy)
env_path = os.getenv("MIRAGE_CONFIG") or os.getenv("mirage_CONFIG")
if env_path:
paths.append((Path(env_path).expanduser(), "env"))
# 3. Project-local configs (useful for dev / per-repo configs)
cwd = Path.cwd()
paths.extend(
[
(cwd / "mirage.toml", "local"),
(cwd / "config.toml", "local"),
(cwd / "etc" / "mirage" / "config.toml", "local"),
]
)
# 4. User config (~/.config/mirage/config.toml)
user_cfg = _default_user_config_path()
paths.append((user_cfg, "user"))
# 5. System config
paths.append((Path("/etc/mirage/config.toml"), "system"))
# 6. State config (for container-ish setups)
paths.append((Path("/var/lib/mirage/config.toml"), "state"))
return paths
def list_config_candidates(explicit: Optional[Path] = None) -> List[Tuple[Path, str]]:
"""
Public helper so the CLI can show all places Mirage will look for a config.
"""
return _search_config_paths(explicit)
def _ensure_default_config_file(path: Path, cfg: Config) -> None:
"""
Create a minimal config file at 'path' if it does not already exist.
"""
if path.exists():
return
path.parent.mkdir(parents=True, exist_ok=True)
content = f"""# mirrorctl configuration
# Directory where mirrors will be stored
path.parent.mkdir(parents=True, exist_ok=True)
content = f"""# Mirage configuration
# This file was generated automatically.
# Paths must be writable by the 'mirage' user (or your current user).
# Root directory where mirror content is stored.
mirror_root = "{cfg.mirror_root}"
# Directory for mirrorctl metadata (db, logs, etc.)
# Directory for Mirage internal state (queue DB, etc.).
data_dir = "{cfg.data_dir}"
# Max parallel mirror updates
# Directory for log files.
log_dir = "{cfg.log_dir}"
# Maximum number of concurrent mirror updates.
max_concurrent_updates = {cfg.max_concurrent_updates}
# Path to wget binary
# Path to wget binary.
wget_bin = "{cfg.wget_bin}"
# Path to ripgrep (rg) binary
# Path to ripgrep (rg) binary.
rg_bin = "{cfg.rg_bin}"
"""
path.write_text(content, encoding="utf-8")
def load_config() -> Config:
def load_config(
config_path: Optional[Path] = None,
*,
create_if_missing: bool = True,
) -> Config:
"""
Load configuration from MIRRORCTL_CONFIG, XDG config, or /etc.
If no config exists, create a default one in
~/.config/mirrorctl/config.toml.
"""
cfg = default_config()
Load configuration with the following precedence:
1. Explicit path (CLI --config)
2. $MIRAGE_CONFIG (or legacy $mirage_CONFIG)
3. Local files: ./mirage.toml, ./config.toml, ./etc/mirage/config.toml
4. User config: ~/.config/mirage/config.toml
5. System: /etc/mirage/config.toml
6. State: /var/lib/mirage/config.toml
If no file is found and create_if_missing=True, a default user config
is written to ~/.config/mirage/config.toml and then loaded.
Returns a Config object with .source_path and .source_kind populated.
"""
base_cfg = default_config()
candidates = _search_config_paths(config_path)
paths = _search_config_paths()
used_path: Optional[Path] = None
used_kind: str = "default"
for p in paths:
for p, kind in candidates:
if p.is_file():
used_path = p
used_kind = kind
break
if used_path is None:
# create default user config and read it back
user_path = _default_config_path()
_ensure_default_config_file(user_path, cfg)
used_path = user_path
if create_if_missing:
# Create a default user config (non-root) if nothing exists.
# For root/daemon installs, install.sh should have placed /etc/mirage/config.toml.
user_path = _default_user_config_path()
_ensure_default_config_file(user_path, base_cfg)
used_path = user_path
used_kind = "generated-user"
else:
# No config anywhere; stick with defaults.
return base_cfg
data = {}
try:
raw = used_path.read_bytes()
data = tomllib.loads(raw.decode("utf-8"))
except Exception:
# Fall back to defaults if config is unreadable
data = {}
# If unreadable, keep defaults but mark the source.
cfg = base_cfg
cfg.source_path = used_path
cfg.source_kind = f"{used_kind}-unreadable"
return cfg
# Apply overrides from file
mirror_root = Path(data.get("mirror_root", cfg.mirror_root))
data_dir = Path(data.get("data_dir", cfg.data_dir))
max_concurrent = int(
data.get("max_concurrent_updates", cfg.max_concurrent_updates))
wget_bin = str(data.get("wget_bin", cfg.wget_bin))
rg_bin = str(data.get("rg_bin", cfg.rg_bin))
mirror_root = Path(
data.get("mirror_root", base_cfg.mirror_root)).expanduser()
data_dir = Path(data.get("data_dir", base_cfg.data_dir)).expanduser()
return Config(
# log_dir: fall back to default_log_dir(data_dir) if not specified.
log_dir_raw = data.get("log_dir")
if log_dir_raw is not None:
log_dir = Path(log_dir_raw).expanduser()
else:
log_dir = default_log_dir(data_dir)
max_concurrent = int(
data.get("max_concurrent_updates", base_cfg.max_concurrent_updates)
)
wget_bin = str(data.get("wget_bin", base_cfg.wget_bin))
rg_bin = str(data.get("rg_bin", base_cfg.rg_bin))
cfg = Config(
mirror_root=mirror_root,
data_dir=data_dir,
_log_dir=log_dir,
max_concurrent_updates=max_concurrent,
wget_bin=wget_bin,
rg_bin=rg_bin,
source_path=used_path,
source_kind=used_kind,
)
# Ensure dirs exist
cfg.data_dir.mkdir(parents=True, exist_ok=True)
cfg.log_dir # triggers creation
return cfg

View File

@@ -3,23 +3,34 @@ from __future__ import annotations
import time
from concurrent.futures import ThreadPoolExecutor, Future
from datetime import datetime
from typing import Dict, Tuple
from typing import Dict, Tuple, Optional
from .config import load_config
from pathlib import Path
from .config import Config, load_config
from . import jobs
from . import storage
from .updater import update_mirror
def run_daemon(poll_interval: float = 1.0) -> None:
def run_daemon(
cfg: Optional[Config] = None,
poll_interval: float = 1.0,
) -> None:
"""
Simple job-processing daemon.
- Watches jobs/pending for new update jobs.
- Moves them to jobs/running.
- Runs wget via update_mirror() with concurrency.
If cfg is None, load_config() is used. In normal CLI usage, the
'mirage mirrors daemon' command passes a Config built from the
global --config option / env / defaults.
"""
cfg = load_config()
if cfg is None:
cfg = load_config()
max_workers = max(1, cfg.max_concurrent_updates)
executor = ThreadPoolExecutor(max_workers=max_workers)
@@ -27,6 +38,7 @@ def run_daemon(poll_interval: float = 1.0) -> None:
running: Dict[Future, Tuple[str, str]] = {}
print(f"[mirage-daemon] starting with max_workers={max_workers}")
print(f"[mirage-daemon] data_dir: {cfg.data_dir}")
print(f"[mirage-daemon] jobs dir: {cfg.data_dir / 'jobs'}")
try:
@@ -35,13 +47,13 @@ def run_daemon(poll_interval: float = 1.0) -> None:
finished = [f for f in running if f.done()]
for f in finished:
job_path, slug = running.pop(f)
# Remove job file from running
from pathlib import Path
try:
# type: ignore[arg-type]
Path(job_path).unlink(missing_ok=True)
except TypeError:
Path(job_path).unlink(missing_ok=True)
# Python <3.8 compat (if you ever care)
p = Path(job_path)
if p.exists():
p.unlink()
try:
f.result()

View File

@@ -5,6 +5,7 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime
from pathlib import Path
from typing import Iterable, Tuple, Dict
from urllib.parse import urlparse
from .config import load_config
from .models import Mirror
@@ -41,6 +42,11 @@ def run_wget(mirror: Mirror) -> Tuple[int, Path]:
target_dir = mirror_dir_for(mirror.slug)
log_path = log_path_for(mirror.slug)
# Determine path components to strip
parsed = urlparse(mirror.url)
path_segments = [seg for seg in parsed.path.split("/") if seg]
cut_dirs = len(path_segments)
cmd = [
cfg.wget_bin,
"--mirror",
@@ -48,12 +54,21 @@ def run_wget(mirror: Mirror) -> Tuple[int, Path]:
"--page-requisites",
"--no-parent",
"--adjust-extension",
"--no-host-directories",
"--wait=0.5",
"--random-wait",
f"--execute=robots={'off' if mirror.ignore_robots else 'on'}",
"--directory-prefix",
str(target_dir),
mirror.url,
]
# Strip all leading path components so that the slug dir becomes the site
# root.
if cut_dirs > 0:
cmd.append(f"--cut-dirs={cut_dirs}")
cmd.append(mirror.url)
_write_log_header(log_path, cmd)
with log_path.open("a", encoding="utf-8") as log:

View File

@@ -11,6 +11,7 @@ VENV_DIR="${MIRAGE_VENV_DIR:-/opt/mirage/venv}"
PYTHON_BIN="${PYTHON_BIN:-python3}"
MIRAGE_BIN_LINK="${MIRAGE_BIN_LINK:-/usr/local/bin/mirage}"
CONFIG_DIR="/etc/mirage"
CONFIG_FILE="$CONFIG_DIR/config.toml"
@@ -99,6 +100,19 @@ if [ ! -x "$MIRAGE_BIN" ]; then
exit 1
fi
echo "==> Installing mirage CLI symlink at $MIRAGE_BIN_LINK"
if [ -L "$MIRAGE_BIN_LINK" ] || [ -e "$MIRAGE_BIN_LINK" ]; then
if [ -L "$MIRAGE_BIN_LINK" ] && [ "$(readlink -f "$MIRAGE_BIN_LINK")" = "$MIRAGE_BIN" ]; then
echo " Symlink already points to $MIRAGE_BIN (leaving it)"
else
echo " WARNING: $MIRAGE_BIN_LINK already exists and is not a symlink to $MIRAGE_BIN"
echo " Not overwriting it. Adjust or remove manually if you want mirage there."
fi
else
ln -s "$MIRAGE_BIN" "$MIRAGE_BIN_LINK"
echo " Created symlink: $MIRAGE_BIN_LINK -> $MIRAGE_BIN"
fi
echo "==> Installing systemd units"
install -D -m 644 systemd/mirage.service /etc/systemd/system/mirage.service
install -D -m 644 systemd/mirage-update.service /etc/systemd/system/mirage-update.service