diff --git a/.gitignore b/.gitignore
index ad6539f..0ee5b63 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,219 @@
index.html*
/mirrors
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[codz]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py.cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+# For a library or package, you might want to ignore these files since the code is
+# intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+# However, in case of collaboration, if having platform-specific dependencies or dependencies
+# having no cross-platform support, pipenv may install dependencies that don't work, or not
+# install all needed dependencies.
+# Pipfile.lock
+
+# UV
+# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+# This is especially recommended for binary packages to ensure reproducibility, and is more
+# commonly ignored for libraries.
+# uv.lock
+
+# poetry
+# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+# This is especially recommended for binary packages to ensure reproducibility, and is more
+# commonly ignored for libraries.
+# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+# poetry.lock
+# poetry.toml
+
+# pdm
+# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
+# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
+# pdm.lock
+# pdm.toml
+.pdm-python
+.pdm-build/
+
+# pixi
+# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
+# pixi.lock
+# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
+# in the .venv directory. It is recommended not to include this directory in version control.
+.pixi
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# Redis
+*.rdb
+*.aof
+*.pid
+
+# RabbitMQ
+mnesia/
+rabbitmq/
+rabbitmq-data/
+
+# ActiveMQ
+activemq-data/
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.envrc
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+# and can be added to the global gitignore or merged into this file. For a more nuclear
+# option (not recommended) you can uncomment the following to ignore the entire idea folder.
+# .idea/
+
+# Abstra
+# Abstra is an AI-powered process automation framework.
+# Ignore directories containing user credentials, local state, and settings.
+# Learn more at https://abstra.io/docs
+.abstra/
+
+# Visual Studio Code
+# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
+# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+# and can be added to the global gitignore or merged into this file. However, if you prefer,
+# you could uncomment the following to ignore the entire vscode folder
+# .vscode/
+
+# Ruff stuff:
+.ruff_cache/
+
+# PyPI configuration file
+.pypirc
+
+# Marimo
+marimo/_static/
+marimo/_lsp/
+__marimo__/
+
+# Streamlit
+.streamlit/secrets.toml
diff --git a/app.py b/app.py
new file mode 100755
index 0000000..90523f8
--- /dev/null
+++ b/app.py
@@ -0,0 +1,523 @@
+#!/usr/bin/env python3
+from flask import Flask, request, redirect, url_for, jsonify, render_template_string, abort
+import threading
+from mirror_manager import (
+ load_mirrors,
+ add_mirror,
+ update_mirror,
+ LOG_ROOT,
+)
+
+app = Flask(__name__)
+
+# --- background update helper ---
+
+
+def _run_update_in_background(slug: str):
+ th = threading.Thread(target=update_mirror, args=(slug,), daemon=True)
+ th.start()
+
+# --- templates ---
+
+
+INDEX_TEMPLATE = r"""
+
+
+
+
+ Mirror Manager
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ | Slug |
+ Category |
+ URL |
+ Last updated |
+ Status |
+ |
+
+
+
+ {% for m in mirrors %}
+
+
+
+ {{ m.slug }}
+
+ |
+ {{ m.category }} |
+ {{ m.url }} |
+
+ {% if m.last_updated %}
+ {{ m.last_updated }}
+ {% else %}
+ never
+ {% endif %}
+ |
+
+ {% set st = m.status or 'idle' %}
+
+ {{ st }}
+ |
+
+ log
+ ·
+
+ |
+
+ {% endfor %}
+ {% if mirrors|length == 0 %}
+ | No mirrors yet. Add one on the right. |
+ {% endif %}
+
+
+
+
+
+
+
+
+
+
+
+
+"""
+
+# --- routes ---
+
+
+@app.route("/", methods=["GET"])
+def index():
+ mirrors = load_mirrors()
+ categories = sorted({m["category"] for m in mirrors})
+ # format last_updated nicely
+ rows = []
+ for m in mirrors:
+ last_disp = None
+ raw = m.get("last_updated")
+ if raw:
+ last_disp = raw.replace("T", " ").replace("Z", " UTC")
+ rows.append({
+ "slug": m["slug"],
+ "category": m["category"],
+ "url": m["url"],
+ "status": m.get("status") or "idle",
+ "last_updated_raw": raw,
+ "last_updated": last_disp,
+ })
+ return render_template_string(INDEX_TEMPLATE, mirrors=rows, categories=categories, error=None)
+
+
+@app.route("/add", methods=["POST"])
+def add_mirror_route():
+ slug = (request.form.get("slug") or "").strip()
+ category = (request.form.get("category") or "").strip()
+ url = (request.form.get("url") or "").strip()
+ ignore_robots = bool(request.form.get("ignore_robots"))
+
+ error = None
+ if not slug or not category or not url:
+ error = "Slug, category, and URL are required."
+ elif " " in slug:
+ error = "Slug cannot contain spaces."
+ if error:
+ mirrors = load_mirrors()
+ categories = sorted({m["category"] for m in mirrors})
+ rows = []
+ for m in mirrors:
+ raw = m.get("last_updated")
+ last_disp = raw.replace("T", " ").replace(
+ "Z", " UTC") if raw else None
+ rows.append({
+ "slug": m["slug"],
+ "category": m["category"],
+ "url": m["url"],
+ "status": m.get("status") or "idle",
+ "last_updated_raw": raw,
+ "last_updated": last_disp,
+ })
+ return render_template_string(INDEX_TEMPLATE, mirrors=rows, categories=categories, error=error), 400
+
+ try:
+ add_mirror(slug, category, url, ignore_robots=ignore_robots)
+ except Exception as e:
+ mirrors = load_mirrors()
+ categories = sorted({m["category"] for m in mirrors})
+ rows = []
+ for m in mirrors:
+ raw = m.get("last_updated")
+ last_disp = raw.replace("T", " ").replace(
+ "Z", " UTC") if raw else None
+ rows.append({
+ "slug": m["slug"],
+ "category": m["category"],
+ "url": m["url"],
+ "status": m.get("status") or "idle",
+ "last_updated_raw": raw,
+ "last_updated": last_disp,
+ })
+ return render_template_string(INDEX_TEMPLATE,
+ mirrors=rows,
+ categories=categories,
+ error=str(e)), 400
+
+ # kick off background update
+ _run_update_in_background(slug)
+ return redirect(url_for("index"))
+
+
+@app.route("/update/", methods=["POST"])
+def trigger_update(slug):
+ # fire-and-forget; UI will see status flip to 'updating'
+ _run_update_in_background(slug)
+ return redirect(url_for("index"))
+
+
+@app.route("/status", methods=["GET"])
+def status():
+ mirrors = load_mirrors()
+ out = []
+ for m in mirrors:
+ raw = m.get("last_updated")
+ last_disp = raw.replace("T", " ").replace("Z", " UTC") if raw else None
+ out.append({
+ "slug": m["slug"],
+ "category": m["category"],
+ "url": m["url"],
+ "status": m.get("status") or "idle",
+ "last_updated": raw,
+ "last_updated_display": last_disp or "",
+ })
+ return jsonify({"mirrors": out})
+
+
+@app.route("/logs/")
+def view_log(slug):
+ log_path = LOG_ROOT / f"{slug}.log"
+ if not log_path.exists():
+ abort(404)
+ text = log_path.read_text(encoding="utf-8", errors="replace")
+ return "" + (text.replace("&", "&").replace("<", "<")) + ""
+
+
+if __name__ == "__main__":
+ app.run(host="127.0.0.1", port=5000, debug=False)
diff --git a/data/mirrors.json b/data/mirrors.json
new file mode 100644
index 0000000..bced335
--- /dev/null
+++ b/data/mirrors.json
@@ -0,0 +1,12 @@
+[
+ {
+ "slug": "wgpu-tutorial",
+ "category": "rust",
+ "url": "https://sotrh.github.io/learn-wgpu/",
+ "ignore_robots": false,
+ "created_at": "2025-12-02T07:15:12Z",
+ "last_updated": null,
+ "status": "error",
+ "last_error": "wget exited with 4"
+ }
+]
\ No newline at end of file
diff --git a/mirror_manager.py b/mirror_manager.py
new file mode 100755
index 0000000..6c13b60
--- /dev/null
+++ b/mirror_manager.py
@@ -0,0 +1,138 @@
+#!/usr/bin/env python3
+"""
+Manage the various mirrors for the mirror website.
+"""
+
+import json
+import subprocess
+import datetime as dt
+from pathlib import Path
+
+BASE = Path("/srv/www")
+DATA_FILE = BASE / "data" / "mirrors.json"
+MIRROR_ROOT = BASE / "mirrors"
+LOG_ROOT = BASE / "logs"
+
+MIRROR_ROOT.mkdir(parents=True, exist_ok=True)
+LOG_ROOT.mkdir(parents=True, exist_ok=True)
+DATA_FILE.parent.mkdir(parents=True, exist_ok=True)
+
+
+def _now_iso() -> str:
+ return dt.datetime.utcnow().replace(microsecond=0).isoformat() + "Z"
+
+
+def load_mirrors() -> list[dict]:
+ if not DATA_FILE.exists():
+ return []
+ with DATA_FILE.open("r", encoding="utf-8") as f:
+ return json.load(f)
+
+
+def save_mirrors(mirrors: list[dict]) -> None:
+ tmp = DATA_FILE.with_suffix(".tmp")
+ with tmp.open("w", encoding="utf-8") as f:
+ json.dump(mirrors, f, indent=2)
+ tmp.replace(DATA_FILE)
+
+
+def get_mirror(mirrors: list[dict], slug: str) -> dict | None:
+ for m in mirrors:
+ if m["slug"] == slug:
+ return m
+ return None
+
+
+def add_mirror(slug: str,
+ category: str,
+ url: str,
+ ignore_robots: bool = False) -> dict:
+ mirrors = load_mirrors()
+ if get_mirror(mirrors, slug) is not None:
+ raise ValueError(f"Mirror with slug '{slug}' already exists!")
+
+ m = {
+ "slug": slug,
+ "category": category,
+ "url": url,
+ "ignore_robots": bool(ignore_robots),
+ "created_at": _now_iso(),
+ "last_updated": None,
+ "status": "queued",
+ "last_error": None,
+ }
+
+ mirrors.append(m)
+ save_mirrors(mirrors)
+ return m
+
+
+def update_mirror(slug: str) -> None:
+ """Run wget mirror for a singel slug (blocking)."""
+ mirrors = load_mirrors()
+ m = get_mirror(mirrors, slug)
+ if m is None:
+ raise ValueError(f"No such mirror: {slug}")
+
+ m["status"] = "updating"
+ m["last_error"] = None
+ save_mirrors(mirrors)
+
+ target_dir = MIRROR_ROOT / slug
+ target_dir.mkdir(parents=True, exist_ok=True)
+ log_file = LOG_ROOT / f"{slug}.log"
+
+ robots_setting = "off" if m.get("ignore_robots") else "on"
+
+ # Polite wget:
+ # --mirror implies -r -N -l inf --no-remove-listing
+ cmd = [
+ "wget",
+ "--mirror",
+ "--convert-links",
+ "--adjust-extension",
+ "--page-requisites",
+ "--no-parent",
+ "--wait=0.70",
+ "--random-wait",
+ # "--limit-rate=50m",
+ f"execute=robots={robots_setting}",
+ "-P",
+ str(target_dir),
+ m["url"],
+ ]
+
+ try:
+ with log_file.open("a", encoding="utf-8") as lf:
+ lf.write(f"\n=== {_now_iso()} : "
+ f"Starting mirror of {m['url']} ===\n")
+ lf.flush()
+ subprocess.run(
+ cmd,
+ stdout=lf,
+ stderr=subprocess.STDOUT,
+ check=True,
+ )
+ lf.write(f"=== {_now_iso()} : Completed mirror of {m['url']} ===\n")
+ lf.flush()
+ m["last_updated"] = _now_iso()
+ m["status"] = "idle"
+ m["last_error"] = None
+ except subprocess.CalledProcessError as e:
+ m["status"] = "error"
+ m["last_error"] = f"wget exited with {e.returncode}"
+ with log_file.open("a", encoding="utf-8") as lf:
+ lf.write(f"*** ERROR: wget failed with code {e.returncode}\n")
+ except Exception as e:
+ m["status"] = "error"
+ m["last_error"] = f"{type(e).__name__}: {e}"
+ with log_file.open("a", encoding="utf-8") as lf:
+ lf.write(f"*** ERROR: {type(e).__name__}: {e}\n")
+ finally:
+ save_mirrors(mirrors)
+
+
+def update_all_mirrors() -> None:
+ mirrors = load_mirrors()
+ for m in mirrors:
+ update_mirror(m["slug"])
diff --git a/mirrors.txt b/mirrors.txt
deleted file mode 100644
index f65be1b..0000000
--- a/mirrors.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-# Slug URL
-wgpu-tutorial https://sotrh.github.io/learn-wgpu/
diff --git a/update_mirrors.py b/update_mirrors.py
new file mode 100755
index 0000000..f723cb6
--- /dev/null
+++ b/update_mirrors.py
@@ -0,0 +1,15 @@
+#!/usr/bin/env python3
+import sys
+from mirror_manager import update_all_mirrors, update_mirror
+
+
+def main():
+ if len(sys.argv) == 2:
+ slug = sys.argv[1]
+ update_mirror(slug)
+ else:
+ update_all_mirrors()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/update_mirrors.sh b/update_mirrors.sh
deleted file mode 100755
index 9ccbe09..0000000
--- a/update_mirrors.sh
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/env bash
-set -euo pipefail
-
-BASE="/srv/www"
-URL_LIST="$BASE/mirrors.txt"
-OUTDIR="$BASE/mirrors"
-
-mkdir -p "$OUTDIR"
-
-# If a slug is passed as an argument, only update that one.
-ONLY_SLUG="${1:-}"
-
-while read -r slug url; do
- # skip empty lines & comments
- [ -z "${slug:-}" ] && continue
- [[ "$slug" =~ ^# ]] && continue
-
- if [ -n "$ONLY_SLUG" ] && [ "$slug" != "$ONLY_SLUG" ]; then
- continue
- fi
-
- echo "=== Mirroring $slug ($url) ==="
-
- # Each mirror in its own directory
- TARGET_DIR="$OUTDIR/$slug"
- mkdir -p "$TARGET_DIR"
- cd "$TARGET_DIR"
-
- # Mirror site
- wget \
- --mirror \
- --convert-links \
- --adjust-extension \
- --page-requisites \
- --no-parent \
- "$url"
-
- echo "=== Done $slug ==="
-done < "$URL_LIST"
-
-# Regenerate index page
-cd "$BASE"
-python3 "$BASE/generate_index.py"