Complete project rework into mirage client/server without web
This commit is contained in:
5
.gitignore
vendored
5
.gitignore
vendored
@@ -1,8 +1,3 @@
|
||||
index.html*
|
||||
/mirrors
|
||||
/node_modules
|
||||
/data
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[codz]
|
||||
|
||||
@@ -1,29 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
BASE="/srv/www"
|
||||
URL_LIST="$BASE/mirrors.txt"
|
||||
|
||||
if [ $# -lt 1 ]; then
|
||||
echo "Usage: $0 URL [slug]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
url="$1"
|
||||
if [ $# -ge 2 ]; then
|
||||
slug="$2"
|
||||
else
|
||||
# crude slugify: strip scheme, replace non alnum with underscores
|
||||
slug="$(echo "$url" | sed 's#https\?://##; s#[^a-zA-Z0-9._-]#_#g')"
|
||||
fi
|
||||
|
||||
# Check if URL already exists
|
||||
if grep -q " $url\$" "$URL_LIST" 2>/dev/null; then
|
||||
echo "URL already in list. Not adding again."
|
||||
else
|
||||
echo "$slug $url" >> "$URL_LIST"
|
||||
echo "Added: $slug $url"
|
||||
fi
|
||||
|
||||
# Run update for just this slug
|
||||
"$BASE/update_mirrors.sh" "$slug"
|
||||
639
app.py
639
app.py
@@ -1,639 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
from mirror_manager import (
|
||||
load_mirrors,
|
||||
add_mirror,
|
||||
update_mirror,
|
||||
MIRROR_ROOT,
|
||||
LOG_ROOT,
|
||||
)
|
||||
import re
|
||||
import html
|
||||
import subprocess
|
||||
import threading
|
||||
from pathlib import Path
|
||||
from flask import (
|
||||
Flask,
|
||||
request,
|
||||
redirect,
|
||||
url_for,
|
||||
jsonify,
|
||||
send_from_directory,
|
||||
render_template_string
|
||||
)
|
||||
|
||||
|
||||
BASE = Path("/srv/www")
|
||||
STATIC_DIR = BASE / "static"
|
||||
STATIC_DIR.mkdir(exist_ok=True)
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
|
||||
def _run_update_in_background(slug: str):
|
||||
th = threading.Thread(target=update_mirror, args=(slug,), daemon=True)
|
||||
th.start()
|
||||
|
||||
|
||||
# -------------------- TEMPLATES --------------------
|
||||
INDEX_TEMPLATE = r"""
|
||||
<!doctype html>
|
||||
<html class="h-full">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>Mirror Manager</title>
|
||||
<link rel="stylesheet" href="{{ url_for('static_file', filename='tailwind.css') }}">
|
||||
</head>
|
||||
<body class="h-full bg-slate-950 text-slate-100">
|
||||
<div class="min-h-full">
|
||||
<header class="border-b border-slate-800 bg-slate-950/80 backdrop-blur">
|
||||
<div class="max-w-5xl mx-auto px-4 py-4 flex flex-col sm:flex-row sm:items-center sm:justify-between gap-2">
|
||||
<div>
|
||||
<h1 class="text-xl font-semibold tracking-tight">Mirror Manager</h1>
|
||||
<p class="text-xs text-slate-400">Local offline mirrors of external sites, grouped by category.</p>
|
||||
</div>
|
||||
<div class="flex items-center gap-2 text-xs text-slate-400">
|
||||
<span class="inline-flex items-center gap-1 px-2 py-1 rounded-full border border-slate-700 bg-slate-900/70">
|
||||
<span class="w-2 h-2 rounded-full bg-emerald-400"></span>
|
||||
Running locally
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
</header>
|
||||
|
||||
<main class="max-w-5xl mx-auto px-4 py-4 space-y-4">
|
||||
<!-- Mirrors list -->
|
||||
<section class="bg-slate-950/80 border border-slate-800 rounded-2xl p-4 shadow-xl shadow-black/40">
|
||||
<div class="flex flex-col md:flex-row md:items-center md:justify-between gap-3 mb-3">
|
||||
<div class="flex flex-wrap items-center gap-2">
|
||||
<span class="text-xs text-slate-400">Categories:</span>
|
||||
<button class="px-2.5 py-1 rounded-full text-xs border bg-slate-900 border-slate-700 text-slate-100 hover:border-sky-500 cat-pill cat-pill-active" data-category="all">
|
||||
All ({{ mirrors|length }})
|
||||
</button>
|
||||
{% for cat in categories %}
|
||||
<button class="px-2.5 py-1 rounded-full text-xs border bg-slate-900 border-slate-800 text-slate-400 hover:border-sky-500 hover:text-slate-100 cat-pill" data-category="{{ cat }}">
|
||||
{{ cat }}
|
||||
</button>
|
||||
{% endfor %}
|
||||
</div>
|
||||
<div class="flex gap-2">
|
||||
<input
|
||||
id="search"
|
||||
class="w-full md:w-64 rounded-full bg-slate-900 border border-slate-700 px-3 py-1.5 text-sm text-slate-100 placeholder:text-slate-500 focus:outline-none focus:ring-2 focus:ring-sky-500 focus:border-sky-500"
|
||||
placeholder="Filter by slug / URL / category…"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="overflow-x-auto border border-slate-800 rounded-xl">
|
||||
<table class="min-w-full text-sm">
|
||||
<thead class="bg-slate-900/70 text-xs uppercase text-slate-400">
|
||||
<tr>
|
||||
<th class="px-3 py-2 text-left whitespace-nowrap">Slug</th>
|
||||
<th class="px-3 py-2 text-left whitespace-nowrap">Categories</th>
|
||||
<th class="px-3 py-2 text-left whitespace-nowrap">URL</th>
|
||||
<th class="px-3 py-2 text-left whitespace-nowrap">Last updated</th>
|
||||
<th class="px-3 py-2 text-left whitespace-nowrap">Status</th>
|
||||
<th class="px-3 py-2 text-left"></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody id="mirror-table" class="divide-y divide-slate-900/80">
|
||||
{% for m in mirrors %}
|
||||
<tr class="hover:bg-slate-900/80 transition" data-slug="{{ m.slug }}" data-categories="{{ m.categories_joined }}" data-search="{{ (m.slug ~ ' ' ~ m.categories_joined ~ ' ' ~ m.url)|lower }}">
|
||||
<td class="px-3 py-2 align-top">
|
||||
<div class="flex flex-col gap-1">
|
||||
<a href="/mirrors/{{ m.slug }}/" target="_blank" class="font-mono text-xs text-sky-400 hover:text-sky-300 break-all">
|
||||
{{ m.slug }}
|
||||
</a>
|
||||
<a href="{{ url_for('log_view', slug=m.slug) }}" target="_blank" class="text-[0.65rem] text-slate-400 hover:text-slate-200">
|
||||
View live log
|
||||
</a>
|
||||
</div>
|
||||
</td>
|
||||
<td class="px-3 py-2 align-top">
|
||||
<div class="flex flex-wrap gap-1">
|
||||
{% for c in m.categories %}
|
||||
<span class="px-1.5 py-0.5 rounded-full text-[0.65rem] bg-slate-800/80 text-slate-300 border border-slate-700">{{ c }}</span>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</td>
|
||||
<td class="px-3 py-2 align-top max-w-xs">
|
||||
<code class="font-mono text-[0.7rem] text-slate-300 break-all">{{ m.url }}</code>
|
||||
</td>
|
||||
<td class="px-3 py-2 align-top text-xs text-slate-300">
|
||||
{% if m.last_updated %}
|
||||
<span title="{{ m.last_updated_raw }}">{{ m.last_updated }}</span>
|
||||
{% else %}
|
||||
<span class="text-slate-600">never</span>
|
||||
{% endif %}
|
||||
</td>
|
||||
<td class="px-3 py-2 align-top text-xs">
|
||||
{% set st = m.status or 'idle' %}
|
||||
<div class="inline-flex items-center gap-1.5 px-2 py-0.5 rounded-full bg-slate-900 border border-slate-800">
|
||||
<span class="w-2 h-2 rounded-full
|
||||
{% if st == 'idle' %}bg-emerald-400{% elif st == 'updating' %}bg-amber-400 animate-pulse{% elif st == 'warning' %}bg-yellow-400{% else %}bg-rose-400{% endif %}"></span>
|
||||
<span class="capitalize">{{ st }}</span>
|
||||
</div>
|
||||
</td>
|
||||
<td class="px-3 py-2 align-top text-right text-[0.7rem]">
|
||||
<form method="post" action="{{ url_for('trigger_update', slug=m.slug) }}" class="inline">
|
||||
<button class="inline-flex items-center gap-1 px-2 py-1 rounded-full border border-slate-700 text-slate-200 hover:border-sky-500 hover:text-sky-100">
|
||||
<span>Update</span>
|
||||
</button>
|
||||
</form>
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
{% if mirrors|length == 0 %}
|
||||
<tr>
|
||||
<td colspan="6" class="px-3 py-6 text-center text-sm text-slate-500">
|
||||
No mirrors yet. Add one below.
|
||||
</td>
|
||||
</tr>
|
||||
{% endif %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<!-- Add mirror -->
|
||||
<section class="bg-slate-950/80 border border-slate-800 rounded-2xl p-4 shadow-xl shadow-black/40 space-y-3">
|
||||
<h2 class="text-sm font-semibold">Add mirror</h2>
|
||||
<form method="post" action="{{ url_for('add_mirror_route') }}" class="space-y-3">
|
||||
<div>
|
||||
<label for="slug" class="block text-xs font-medium text-slate-300 mb-1">Slug</label>
|
||||
<input id="slug" name="slug" required class="w-full rounded-lg bg-slate-900 border border-slate-700 px-2.5 py-1.5 text-sm text-slate-100 focus:outline-none focus:ring-2 focus:ring-sky-500 focus:border-sky-500 font-mono" placeholder="e.g. wgpu-tutorial" />
|
||||
</div>
|
||||
<div>
|
||||
<label for="categories" class="block text-xs font-medium text-slate-300 mb-1">Categories</label>
|
||||
<input id="categories" name="categories" required class="w-full rounded-lg bg-slate-900 border border-slate-700 px-2.5 py-1.5 text-sm text-slate-100 focus:outline-none focus:ring-2 focus:ring-sky-500 focus:border-sky-500" placeholder="e.g. tutorials, graphics, rust" />
|
||||
</div>
|
||||
<div>
|
||||
<label for="url" class="block text-xs font-medium text-slate-300 mb-1">URL</label>
|
||||
<input id="url" name="url" required class="w-full rounded-lg bg-slate-900 border border-slate-700 px-2.5 py-1.5 text-sm text-slate-100 focus:outline-none focus:ring-2 focus:ring-sky-500 focus:border-sky-500" placeholder="https://example.com/some/path/" />
|
||||
</div>
|
||||
<div class="flex items-start gap-2">
|
||||
<input id="ignore_robots" name="ignore_robots" value="1" type="checkbox" class="mt-0.5 rounded border-slate-600 bg-slate-900 text-sky-500 focus:ring-sky-500" />
|
||||
<label for="ignore_robots" class="text-xs text-slate-400">
|
||||
Ignore robots.txt (only if you explicitly want to archive disallowed paths).
|
||||
</label>
|
||||
</div>
|
||||
{% if error %}
|
||||
<p class="text-xs text-rose-300 bg-rose-950/60 border border-rose-900 rounded-lg px-2 py-1">{{ error }}</p>
|
||||
{% endif %}
|
||||
<button type="submit" class="w-full inline-flex items-center justify-center gap-1.5 rounded-full bg-gradient-to-r from-sky-500 to-indigo-500 px-3 py-2 text-xs font-medium text-white hover:from-sky-400 hover:to-indigo-400">
|
||||
Add & mirror
|
||||
</button>
|
||||
<p class="text-[0.7rem] text-slate-500">
|
||||
New mirrors are cloned in the background. Status will show as <span class="text-amber-300">updating</span> until done.
|
||||
</p>
|
||||
</form>
|
||||
</section>
|
||||
|
||||
<!-- Content search -->
|
||||
<section class="bg-slate-950/80 border border-slate-800 rounded-2xl p-4 shadow-xl shadow-black/40">
|
||||
<h2 class="text-sm font-semibold mb-2">Content search</h2>
|
||||
<form id="search-form" class="space-y-2">
|
||||
<input id="content-query" class="w-full rounded-lg bg-slate-900 border border-slate-700 px-2.5 py-1.5 text-sm text-slate-100 focus:outline-none focus:ring-2 focus:ring-sky-500 focus:border-sky-500" placeholder="Search text across all mirrors (using rg)…" />
|
||||
<button type="submit" class="w-full inline-flex items-center justify-center gap-1.5 rounded-full border border-slate-700 bg-slate-900 px-3 py-2 text-xs font-medium text-slate-100 hover:border-sky-500 hover:text-sky-100">
|
||||
Run ripgrep search
|
||||
</button>
|
||||
</form>
|
||||
<div id="search-results" class="mt-2 max-h-64 overflow-y-auto text-[0.7rem] space-y-1 text-slate-300"></div>
|
||||
</section>
|
||||
</main>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
// Category + name filter
|
||||
const pills = Array.from(document.querySelectorAll('.cat-pill'));
|
||||
const rows = Array.from(document.querySelectorAll('#mirror-table tr[data-slug]'));
|
||||
const searchInput = document.getElementById('search');
|
||||
|
||||
function applyFilters() {
|
||||
const active = pills.find(p => p.classList.contains('cat-pill-active'));
|
||||
const cat = active ? active.dataset.category : 'all';
|
||||
const q = (searchInput.value || '').toLowerCase();
|
||||
|
||||
rows.forEach(row => {
|
||||
const cats = row.dataset.categories.split(',').map(s => s.trim());
|
||||
const searchStr = row.dataset.search;
|
||||
const matchesCat = (cat === 'all' || cats.includes(cat));
|
||||
const matchesSearch = (!q || searchStr.includes(q));
|
||||
row.style.display = (matchesCat && matchesSearch) ? '' : 'none';
|
||||
});
|
||||
}
|
||||
|
||||
pills.forEach(p => {
|
||||
p.addEventListener('click', () => {
|
||||
pills.forEach(x => x.classList.remove('cat-pill-active', 'border-sky-500', 'text-slate-100'));
|
||||
p.classList.add('cat-pill-active', 'border-sky-500', 'text-slate-100');
|
||||
applyFilters();
|
||||
});
|
||||
});
|
||||
|
||||
searchInput.addEventListener('input', applyFilters);
|
||||
|
||||
// Live status polling
|
||||
async function pollStatus() {
|
||||
try {
|
||||
const resp = await fetch("{{ url_for('status') }}");
|
||||
if (!resp.ok) return;
|
||||
const data = await resp.json();
|
||||
const bySlug = {};
|
||||
data.mirrors.forEach(m => bySlug[m.slug] = m);
|
||||
|
||||
rows.forEach(row => {
|
||||
const slug = row.dataset.slug;
|
||||
const m = bySlug[slug];
|
||||
if (!m) return;
|
||||
const tds = row.querySelectorAll('td');
|
||||
const lastCell = tds[3];
|
||||
const statusCell = tds[4];
|
||||
|
||||
lastCell.innerHTML = m.last_updated_display || '<span class="text-slate-600">never</span>';
|
||||
|
||||
const st = m.status || 'idle';
|
||||
statusCell.innerHTML =
|
||||
'<div class="inline-flex items-center gap-1.5 px-2 py-0.5 rounded-full bg-slate-900 border border-slate-800">' +
|
||||
'<span class="w-2 h-2 rounded-full ' +
|
||||
(st === "idle" ? "bg-emerald-400" :
|
||||
st === "updating" ? "bg-amber-400 animate-pulse" :
|
||||
st === "warning" ? "bg-yellow-400" : "bg-rose-400") +
|
||||
'"></span>' +
|
||||
'<span class="capitalize">' + st + '</span>' +
|
||||
'</div>';
|
||||
});
|
||||
} catch (e) {}
|
||||
}
|
||||
setInterval(pollStatus, 5000);
|
||||
|
||||
// Content search via rg
|
||||
const searchForm = document.getElementById('search-form');
|
||||
const contentQuery = document.getElementById('content-query');
|
||||
const searchResults = document.getElementById('search-results');
|
||||
|
||||
searchForm.addEventListener('submit', async (e) => {
|
||||
e.preventDefault();
|
||||
const q = contentQuery.value.trim();
|
||||
if (!q) return;
|
||||
searchResults.textContent = 'Searching…';
|
||||
try {
|
||||
const resp = await fetch("{{ url_for('content_search') }}?q=" + encodeURIComponent(q));
|
||||
if (!resp.ok) {
|
||||
searchResults.textContent = 'Search failed.';
|
||||
return;
|
||||
}
|
||||
const data = await resp.json();
|
||||
if (data.results.length === 0) {
|
||||
searchResults.textContent = 'No matches.';
|
||||
return;
|
||||
}
|
||||
searchResults.innerHTML = '';
|
||||
|
||||
data.results.forEach(r => {
|
||||
const wrapper = document.createElement('div');
|
||||
wrapper.className = "border border-slate-800 rounded-lg px-2 py-1 bg-slate-900/70";
|
||||
|
||||
const pathLine = document.createElement('div');
|
||||
pathLine.className = "font-mono text-[0.65rem] text-sky-300 break-all";
|
||||
|
||||
if (r.url) {
|
||||
const link = document.createElement('a');
|
||||
link.href = r.url;
|
||||
link.target = "_blank";
|
||||
link.rel = "noopener noreferrer";
|
||||
link.textContent = r.path + (r.line ? `:${r.line}` : "");
|
||||
pathLine.appendChild(link);
|
||||
} else {
|
||||
pathLine.textContent = r.path + (r.line ? `:${r.line}` : "");
|
||||
}
|
||||
|
||||
const snippetLine = document.createElement('div');
|
||||
snippetLine.className = "text-[0.7rem] text-slate-200 whitespace-pre-wrap";
|
||||
snippetLine.textContent = r.snippet || "";
|
||||
|
||||
wrapper.appendChild(pathLine);
|
||||
wrapper.appendChild(snippetLine);
|
||||
searchResults.appendChild(wrapper);
|
||||
});
|
||||
} catch (e) {
|
||||
searchResults.textContent = 'Search failed.';
|
||||
}
|
||||
});
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
|
||||
LOG_TEMPLATE = r"""
|
||||
<!doctype html>
|
||||
<html class="h-full">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>Log: {{ slug }}</title>
|
||||
<link rel="stylesheet" href="{{ url_for('static_file', filename='tailwind.css') }}">
|
||||
</head>
|
||||
<body class="h-full bg-slate-950 text-slate-100">
|
||||
<div class="max-w-5xl mx-auto px-4 py-4 space-y-2">
|
||||
<div class="flex items-center justify-between mb-2">
|
||||
<div>
|
||||
<h1 class="text-sm font-semibold">Log for <span class="font-mono text-sky-400">{{ slug }}</span></h1>
|
||||
<p class="text-[0.65rem] text-slate-400">Live tail of wget output (auto-refreshing).</p>
|
||||
</div>
|
||||
<a href="/mirrors/{{ slug }}/" target="_blank" class="text-xs text-sky-400 hover:text-sky-200">Open mirror</a>
|
||||
</div>
|
||||
<div class="border border-slate-800 rounded-xl bg-slate-950/90 max-h-[75vh] overflow-y-auto">
|
||||
<pre id="log" class="text-[0.65rem] p-3 font-mono whitespace-pre-wrap"></pre>
|
||||
</div>
|
||||
</div>
|
||||
<script>
|
||||
const logEl = document.getElementById('log');
|
||||
async function pollLog() {
|
||||
try {
|
||||
const resp = await fetch("{{ url_for('log_tail', slug=slug) }}");
|
||||
if (!resp.ok) return;
|
||||
const text = await resp.text();
|
||||
logEl.textContent = text;
|
||||
logEl.parentElement.scrollTop = logEl.parentElement.scrollHeight;
|
||||
} catch (e) {}
|
||||
}
|
||||
setInterval(pollLog, 1500);
|
||||
pollLog();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
# -------------------- ROUTES --------------------
|
||||
|
||||
|
||||
@app.route("/static/<path:filename>")
|
||||
def static_file(filename):
|
||||
return send_from_directory(STATIC_DIR, filename)
|
||||
|
||||
|
||||
@app.route("/", methods=["GET"])
|
||||
def index():
|
||||
mirrors = load_mirrors()
|
||||
cats = set()
|
||||
rows = []
|
||||
for m in mirrors:
|
||||
categories = m.get("categories") or []
|
||||
for c in categories:
|
||||
cats.add(c)
|
||||
raw = m.get("last_updated")
|
||||
disp = raw.replace("T", " ").replace("Z", " UTC") if raw else None
|
||||
rows.append({
|
||||
"slug": m["slug"],
|
||||
"categories": categories,
|
||||
"categories_joined": ", ".join(categories),
|
||||
"url": m["url"],
|
||||
"status": m.get("status") or "idle",
|
||||
"last_updated_raw": raw,
|
||||
"last_updated": disp,
|
||||
})
|
||||
return render_template_string(INDEX_TEMPLATE, mirrors=rows, categories=sorted(cats), error=None)
|
||||
|
||||
|
||||
@app.route("/add", methods=["POST"])
|
||||
def add_mirror_route():
|
||||
slug = (request.form.get("slug") or "").strip()
|
||||
categories = (request.form.get("categories") or "").strip()
|
||||
url = (request.form.get("url") or "").strip()
|
||||
ignore_robots = bool(request.form.get("ignore_robots"))
|
||||
|
||||
error = None
|
||||
if not slug or not categories or not url:
|
||||
error = "Slug, categories, and URL are required."
|
||||
elif " " in slug:
|
||||
error = "Slug cannot contain spaces."
|
||||
|
||||
if error:
|
||||
# re-render with error
|
||||
mirrors = load_mirrors()
|
||||
cats = set()
|
||||
rows = []
|
||||
for m in mirrors:
|
||||
cs = m.get("categories") or []
|
||||
for c in cs:
|
||||
cats.add(c)
|
||||
raw = m.get("last_updated")
|
||||
disp = raw.replace("T", " ").replace("Z", " UTC") if raw else None
|
||||
rows.append({
|
||||
"slug": m["slug"],
|
||||
"categories": cs,
|
||||
"categories_joined": ", ".join(cs),
|
||||
"url": m["url"],
|
||||
"status": m.get("status") or "idle",
|
||||
"last_updated_raw": raw,
|
||||
"last_updated": disp,
|
||||
})
|
||||
return render_template_string(INDEX_TEMPLATE, mirrors=rows, categories=sorted(cats), error=error), 400
|
||||
|
||||
try:
|
||||
add_mirror(slug, categories, url, ignore_robots=ignore_robots)
|
||||
except Exception as e:
|
||||
mirrors = load_mirrors()
|
||||
cats = set()
|
||||
rows = []
|
||||
for m in mirrors:
|
||||
cs = m.get("categories") or []
|
||||
for c in cs:
|
||||
cats.add(c)
|
||||
raw = m.get("last_updated")
|
||||
disp = raw.replace("T", " ").replace("Z", " UTC") if raw else None
|
||||
rows.append({
|
||||
"slug": m["slug"],
|
||||
"categories": cs,
|
||||
"categories_joined": ", ".join(cs),
|
||||
"url": m["url"],
|
||||
"status": m.get("status") or "idle",
|
||||
"last_updated_raw": raw,
|
||||
"last_updated": disp,
|
||||
})
|
||||
return render_template_string(INDEX_TEMPLATE, mirrors=rows, categories=sorted(cats), error=str(e)), 400
|
||||
|
||||
_run_update_in_background(slug)
|
||||
return redirect(url_for("index"))
|
||||
|
||||
|
||||
@app.route("/update/<slug>", methods=["POST"])
|
||||
def trigger_update(slug):
|
||||
_run_update_in_background(slug)
|
||||
return redirect(url_for("index"))
|
||||
|
||||
|
||||
@app.route("/status", methods=["GET"])
|
||||
def status():
|
||||
mirrors = load_mirrors()
|
||||
out = []
|
||||
for m in mirrors:
|
||||
raw = m.get("last_updated")
|
||||
disp = raw.replace("T", " ").replace("Z", " UTC") if raw else None
|
||||
out.append({
|
||||
"slug": m["slug"],
|
||||
"categories": m.get("categories") or [],
|
||||
"url": m["url"],
|
||||
"status": m.get("status") or "idle",
|
||||
"last_updated": raw,
|
||||
"last_updated_display": disp or "",
|
||||
})
|
||||
return jsonify({"mirrors": out})
|
||||
|
||||
|
||||
@app.route("/logs/<slug>")
|
||||
def log_view(slug):
|
||||
log_path = LOG_ROOT / f"{slug}.log"
|
||||
if not log_path.exists():
|
||||
log_path.touch()
|
||||
return render_template_string(LOG_TEMPLATE, slug=slug)
|
||||
|
||||
|
||||
@app.route("/logs/<slug>/tail")
|
||||
def log_tail(slug):
|
||||
log_path = LOG_ROOT / f"{slug}.log"
|
||||
if not log_path.exists():
|
||||
return "", 200
|
||||
try:
|
||||
with log_path.open("rb") as f:
|
||||
f.seek(0, 2)
|
||||
size = f.tell()
|
||||
block = 65536
|
||||
if size <= block:
|
||||
f.seek(0)
|
||||
data = f.read()
|
||||
else:
|
||||
f.seek(-block, 2)
|
||||
data = f.read()
|
||||
return data.decode("utf-8", errors="replace")
|
||||
except OSError:
|
||||
return "", 200
|
||||
|
||||
|
||||
def strip_html(text: str) -> str:
|
||||
# Remove script and style blocks first
|
||||
text = re.sub(
|
||||
r"<script\b[^<]*(?:(?!</script>)<[^<]*)*</script>",
|
||||
" ",
|
||||
text,
|
||||
flags=re.IGNORECASE,
|
||||
)
|
||||
text = re.sub(
|
||||
r"<style\b[^<]*(?:(?!</style>)<[^<]*)*</style>",
|
||||
" ",
|
||||
text,
|
||||
flags=re.IGNORECASE,
|
||||
)
|
||||
# Strip all remaining tags
|
||||
text = re.sub(r"<[^>]+>", " ", text)
|
||||
# Unescape HTML entities (& → &, etc.)
|
||||
text = html.unescape(text)
|
||||
# Collapse whitespace
|
||||
text = re.sub(r"\s+", " ", text).strip()
|
||||
return text
|
||||
|
||||
|
||||
def make_snippet(text: str,
|
||||
query: str,
|
||||
radius: int = 80,
|
||||
max_len: int = 240) -> str:
|
||||
if not text:
|
||||
return ""
|
||||
lower = text.lower()
|
||||
qlower = query.lower()
|
||||
idx = lower.find(qlower)
|
||||
if idx == -1:
|
||||
snippet = text[:max_len]
|
||||
if len(text) > max_len:
|
||||
snippet += "…"
|
||||
return snippet
|
||||
start = max(0, idx - radius)
|
||||
end = min(len(text), idx + len(query) + radius)
|
||||
snippet = text[start:end]
|
||||
if start > 0:
|
||||
snippet = "…" + snippet
|
||||
if end < len(text):
|
||||
snippet += "…"
|
||||
return snippet
|
||||
|
||||
|
||||
@app.route("/search", methods=["GET"])
|
||||
def content_search():
|
||||
q = (request.args.get("q") or "").strip()
|
||||
if not q:
|
||||
return jsonify({"results": []})
|
||||
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
[
|
||||
"rg",
|
||||
"--line-number",
|
||||
"--no-heading",
|
||||
"--color", "never",
|
||||
"--max-count", "5", # per file
|
||||
"--type-add", "page:*.{html,htm,md,markdown,txt}",
|
||||
"-tpage",
|
||||
q,
|
||||
str(MIRROR_ROOT),
|
||||
],
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.DEVNULL,
|
||||
text=True,
|
||||
timeout=10,
|
||||
)
|
||||
except FileNotFoundError:
|
||||
return jsonify({
|
||||
"results": [{
|
||||
"path": "(error)",
|
||||
"line": 0,
|
||||
"url": "",
|
||||
"snippet": "ripgrep (rg) is not installed."
|
||||
}]
|
||||
})
|
||||
except subprocess.TimeoutExpired:
|
||||
return jsonify({
|
||||
"results": [{
|
||||
"path": "(error)",
|
||||
"line": 0,
|
||||
"url": "",
|
||||
"snippet": "rg timed out."
|
||||
}]
|
||||
})
|
||||
|
||||
results = []
|
||||
for line in proc.stdout.splitlines():
|
||||
parts = line.split(":", 2)
|
||||
if len(parts) != 3:
|
||||
continue
|
||||
path, lineno, raw_content = parts
|
||||
|
||||
# Strip HTML/JS/CSS markup from this line before making a snippet
|
||||
text_content = strip_html(raw_content)
|
||||
if not text_content:
|
||||
continue
|
||||
|
||||
snippet = make_snippet(text_content, q)
|
||||
|
||||
try:
|
||||
rel_path = str(Path(path).relative_to(MIRROR_ROOT))
|
||||
except ValueError:
|
||||
rel_path = path
|
||||
|
||||
url = "/mirrors/" + rel_path.replace("\\", "/")
|
||||
|
||||
results.append({
|
||||
"path": rel_path,
|
||||
"line": int(lineno),
|
||||
"url": url,
|
||||
"snippet": snippet,
|
||||
})
|
||||
|
||||
if len(results) >= 50:
|
||||
break
|
||||
|
||||
return jsonify({"results": results})
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(host="127.0.0.1", port=5000, debug=False)
|
||||
@@ -1,61 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
import pathlib
|
||||
import html
|
||||
|
||||
BASE = pathlib.Path("/srv/www")
|
||||
URL_LIST = BASE / "mirrors.txt"
|
||||
OUTDIR = BASE / "mirrors"
|
||||
INDEX = BASE / "index.html"
|
||||
|
||||
entries = []
|
||||
|
||||
if URL_LIST.exists():
|
||||
for line in URL_LIST.read_text(encoding="utf-8").splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
parts = line.split(None, 1)
|
||||
if len(parts) != 2:
|
||||
continue
|
||||
slug, url = parts
|
||||
mirror_dir = OUTDIR / slug
|
||||
if not mirror_dir.exists():
|
||||
# not mirrored yet, but still list it
|
||||
status = " (not downloaded yet)"
|
||||
else:
|
||||
status = ""
|
||||
entries.append((slug, url, status))
|
||||
|
||||
items_html = []
|
||||
for slug, url, status in entries:
|
||||
slug_esc = html.escape(slug)
|
||||
url_esc = html.escape(url)
|
||||
status_esc = html.escape(status)
|
||||
# Link goes to the directory; nginx autoindex or an index file will handle it
|
||||
items_html.append(
|
||||
f'<li><a href="mirrors/{slug_esc}/">{slug_esc}</a>'
|
||||
f' – <code>{url_esc}</code>{status_esc}</li>'
|
||||
)
|
||||
|
||||
html_doc = f"""<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>My Tutorial Mirrors</title>
|
||||
<style>
|
||||
body {{ font-family: sans-serif; max-width: 800px; margin: 2rem auto; }}
|
||||
h1 {{ margin-bottom: 0.5rem; }}
|
||||
code {{ font-size: 0.9em; }}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Nytegear Mirrors</h1>
|
||||
<p>This page is generated automatically from <code>mirrors.txt</code>.</p>
|
||||
<ul>
|
||||
{''.join(items_html)}
|
||||
</ul>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
INDEX.write_text(html_doc, encoding="utf-8")
|
||||
3
mirage/__init__.py
Normal file
3
mirage/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
"""Mirage core package."""
|
||||
|
||||
__all__ = []
|
||||
44
mirage/cli.py
Normal file
44
mirage/cli.py
Normal file
@@ -0,0 +1,44 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import typer
|
||||
|
||||
from .commands import mirrors_app, misc_app
|
||||
from .daemon import run_daemon
|
||||
|
||||
app = typer.Typer(
|
||||
help=(
|
||||
"Mirage - mirror management that's too good to be true.\n\n"
|
||||
"Manage local mirrors of websites for offline use.\n"
|
||||
"Use `mirage mirrors ...` to add/list/update/search.\n"
|
||||
"Run `mirage daemon` (e.g. under systemd) to process update jobs."
|
||||
)
|
||||
)
|
||||
|
||||
app.add_typer(mirrors_app, name="mirrors")
|
||||
app.add_typer(misc_app, name="misc")
|
||||
|
||||
|
||||
@app.command("daemon")
|
||||
def daemon_cmd(
|
||||
poll_interval: float = typer.Option(
|
||||
1.0,
|
||||
"--poll-interval",
|
||||
help="Seconds between job queue polls.",
|
||||
),
|
||||
):
|
||||
"""
|
||||
Run the Mirage mirror daemon.
|
||||
|
||||
This is intended to be managed by systemd:
|
||||
- `ExecStart=/usr/bin/mirage daemon`
|
||||
"""
|
||||
run_daemon(poll_interval=poll_interval)
|
||||
|
||||
|
||||
def app_main():
|
||||
app()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app_main()
|
||||
|
||||
4
mirage/commands/__init__.py
Normal file
4
mirage/commands/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
||||
from .mirrors import mirrors_app
|
||||
from .misc import misc_app
|
||||
|
||||
__all__ = ["mirrors_app", "misc_app"]
|
||||
300
mirage/commands/mirrors.py
Normal file
300
mirage/commands/mirrors.py
Normal file
@@ -0,0 +1,300 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import time
|
||||
from typing import List, Optional
|
||||
|
||||
import typer
|
||||
|
||||
from .. import storage, jobs
|
||||
from ..models import Mirror
|
||||
from ..updater import log_path_for
|
||||
|
||||
mirrors_app = typer.Typer(
|
||||
help="Manage mirrors (add, list, update, search, status, watch).")
|
||||
|
||||
|
||||
@mirrors_app.command("list")
|
||||
def list_mirrors_cmd():
|
||||
"""
|
||||
List all configured mirrors.
|
||||
"""
|
||||
mirrors = storage.list_mirrors()
|
||||
if not mirrors:
|
||||
typer.echo("No mirrors configured.")
|
||||
raise typer.Exit(0)
|
||||
|
||||
for m in mirrors:
|
||||
cats = ", ".join(m.categories) if m.categories else "-"
|
||||
status = m.status or "idle"
|
||||
lu = m.last_updated.isoformat(
|
||||
sep=" ", timespec="seconds") if m.last_updated else "never"
|
||||
typer.echo(f"{m.slug:20} [{status:8}] {cats:25} {lu}")
|
||||
typer.echo(f" {m.url}")
|
||||
|
||||
|
||||
@mirrors_app.command("add")
|
||||
def add_mirror_cmd(
|
||||
slug: str = typer.Argument(...,
|
||||
help="Local slug for the mirror (unique)."),
|
||||
url: str = typer.Argument(..., help="Source URL to mirror."),
|
||||
category: List[str] = typer.Option(
|
||||
None,
|
||||
"--category",
|
||||
"-c",
|
||||
help="Category tag(s) to apply. Can be passed multiple times.",
|
||||
),
|
||||
ignore_robots: bool = typer.Option(
|
||||
False,
|
||||
"--ignore-robots",
|
||||
help="Ignore robots.txt when mirroring (wget robots=off).",
|
||||
),
|
||||
no_update: bool = typer.Option(
|
||||
False,
|
||||
"--no-update",
|
||||
help="Do not enqueue an initial update job.",
|
||||
),
|
||||
):
|
||||
"""
|
||||
Add a new mirror definition.
|
||||
|
||||
By default, this queues an initial update job and returns immediately.
|
||||
The actual mirroring is handled by the mirage daemon.
|
||||
"""
|
||||
existing = storage.get_mirror(slug)
|
||||
if existing:
|
||||
typer.echo(f"Error: mirror with slug {
|
||||
slug!r} already exists.", err=True)
|
||||
raise typer.Exit(1)
|
||||
|
||||
m = Mirror(
|
||||
slug=slug,
|
||||
url=url,
|
||||
categories=category or [],
|
||||
ignore_robots=ignore_robots,
|
||||
)
|
||||
storage.upsert_mirror(m)
|
||||
typer.echo(f"Added mirror {slug!r} -> {url}")
|
||||
|
||||
if no_update:
|
||||
typer.echo("Initial update NOT queued (per --no-update).")
|
||||
return
|
||||
|
||||
jobs.enqueue_update(slug)
|
||||
typer.echo("Initial update job queued.")
|
||||
typer.echo("Run `mirage mirrors status` or `mirage mirrors watch` to monitor.")
|
||||
|
||||
|
||||
@mirrors_app.command("edit")
|
||||
def edit_mirror_cmd(
|
||||
slug: str = typer.Argument(..., help="Mirror slug to edit."),
|
||||
new_slug: Optional[str] = typer.Option(
|
||||
None,
|
||||
"--slug",
|
||||
help="Rename the mirror to this slug.",
|
||||
),
|
||||
url: Optional[str] = typer.Option(
|
||||
None,
|
||||
"--url",
|
||||
help="Update the source URL.",
|
||||
),
|
||||
category: List[str] = typer.Option(
|
||||
None,
|
||||
"--category",
|
||||
"-c",
|
||||
help="Replace categories with these (can be passed multiple times).",
|
||||
),
|
||||
add_category: List[str] = typer.Option(
|
||||
None,
|
||||
"--add-category",
|
||||
help="Add category tag(s) without removing existing ones.",
|
||||
),
|
||||
remove_category: List[str] = typer.Option(
|
||||
None,
|
||||
"--remove-category",
|
||||
help="Remove these category tag(s) from the mirror.",
|
||||
),
|
||||
ignore_robots: Optional[bool] = typer.Option(
|
||||
None,
|
||||
"--ignore-robots/--respect-robots",
|
||||
help="Toggle ignoring robots.txt.",
|
||||
),
|
||||
):
|
||||
"""
|
||||
Modify properties of an existing mirror (URL, categories, ignore_robots, slug).
|
||||
"""
|
||||
m = storage.get_mirror(slug)
|
||||
if not m:
|
||||
typer.echo(f"No such mirror: {slug!r}", err=True)
|
||||
raise typer.Exit(1)
|
||||
|
||||
original_slug = m.slug
|
||||
|
||||
if url is not None:
|
||||
m.url = url
|
||||
|
||||
if category:
|
||||
m.categories = list(category)
|
||||
|
||||
if add_category:
|
||||
for c in add_category:
|
||||
if c not in m.categories:
|
||||
m.categories.append(c)
|
||||
|
||||
if remove_category:
|
||||
m.categories = [c for c in m.categories if c not in remove_category]
|
||||
|
||||
if ignore_robots is not None:
|
||||
m.ignore_robots = ignore_robots
|
||||
|
||||
if new_slug is not None and new_slug != original_slug:
|
||||
# Simple rename: remove old entry, reinsert with new slug
|
||||
m.slug = new_slug
|
||||
# Save under new slug
|
||||
storage.upsert_mirror(m)
|
||||
# Delete old slug
|
||||
if original_slug != new_slug:
|
||||
storage.delete_mirror(original_slug)
|
||||
typer.echo(f"Mirror {original_slug!r} renamed to {new_slug!r}.")
|
||||
else:
|
||||
storage.upsert_mirror(m)
|
||||
typer.echo(f"Mirror {slug!r} updated.")
|
||||
|
||||
|
||||
@mirrors_app.command("remove")
|
||||
def remove_mirror_cmd(
|
||||
slug: str = typer.Argument(..., help="Mirror slug to remove."),
|
||||
):
|
||||
"""
|
||||
Remove a mirror definition (does not delete files on disk).
|
||||
"""
|
||||
ok = storage.delete_mirror(slug)
|
||||
if not ok:
|
||||
typer.echo(f"No such mirror: {slug!r}", err=True)
|
||||
raise typer.Exit(1)
|
||||
typer.echo(f"Removed mirror {slug!r} from metadata.")
|
||||
typer.echo("NOTE: this does not delete the mirrored files on disk.")
|
||||
|
||||
|
||||
@mirrors_app.command("update")
|
||||
def update_mirror_cmd(
|
||||
slug: str = typer.Argument(..., help="Mirror slug to update."),
|
||||
):
|
||||
"""
|
||||
Enqueue an update job for a single mirror (non-blocking).
|
||||
"""
|
||||
m = storage.get_mirror(slug)
|
||||
if not m:
|
||||
typer.echo(f"No such mirror: {slug!r}", err=True)
|
||||
raise typer.Exit(1)
|
||||
|
||||
jobs.enqueue_update(slug)
|
||||
typer.echo(f"Update job queued for {slug!r}.")
|
||||
|
||||
|
||||
@mirrors_app.command("update-all")
|
||||
def update_all_cmd():
|
||||
"""
|
||||
Enqueue update jobs for all mirrors (non-blocking).
|
||||
"""
|
||||
all_mirrors = storage.list_mirrors()
|
||||
if not all_mirrors:
|
||||
typer.echo("No mirrors configured.")
|
||||
raise typer.Exit(0)
|
||||
|
||||
count = 0
|
||||
for m in all_mirrors:
|
||||
# Avoid spamming duplicates if already queued/updating
|
||||
if m.status in ("queued", "updating"):
|
||||
continue
|
||||
jobs.enqueue_update(m.slug)
|
||||
count += 1
|
||||
|
||||
typer.echo(f"Queued update jobs for {count} mirror(s).")
|
||||
typer.echo("Daemon will process them in the background.")
|
||||
|
||||
|
||||
@mirrors_app.command("status")
|
||||
def status_cmd(
|
||||
slug: Optional[str] = typer.Argument(
|
||||
None,
|
||||
help="Optional mirror slug. If omitted, show status for all mirrors.",
|
||||
),
|
||||
):
|
||||
"""
|
||||
Show current status for mirrors.
|
||||
"""
|
||||
if slug is None:
|
||||
mirrors = storage.list_mirrors()
|
||||
if not mirrors:
|
||||
typer.echo("No mirrors configured.")
|
||||
raise typer.Exit(0)
|
||||
|
||||
for m in mirrors:
|
||||
cats = ", ".join(m.categories) if m.categories else "-"
|
||||
status = m.status or "idle"
|
||||
lu = m.last_updated.isoformat(
|
||||
sep=" ", timespec="seconds") if m.last_updated else "never"
|
||||
typer.echo(f"{m.slug:20} [{status:8}] {cats:25} {lu}")
|
||||
if m.last_error:
|
||||
typer.echo(f" last_error: {m.last_error}")
|
||||
else:
|
||||
m = storage.get_mirror(slug)
|
||||
if not m:
|
||||
typer.echo(f"No such mirror: {slug!r}", err=True)
|
||||
raise typer.Exit(1)
|
||||
typer.echo(f"slug : {m.slug}")
|
||||
typer.echo(f"url : {m.url}")
|
||||
typer.echo(f"categories : {', '.join(
|
||||
m.categories) if m.categories else '-'}")
|
||||
typer.echo(f"ignore_robots: {m.ignore_robots}")
|
||||
typer.echo(f"status : {m.status or 'idle'}")
|
||||
lu = m.last_updated.isoformat(
|
||||
sep=" ", timespec="seconds") if m.last_updated else "never"
|
||||
typer.echo(f"last_updated : {lu}")
|
||||
if m.last_error:
|
||||
typer.echo(f"last_error : {m.last_error}")
|
||||
|
||||
|
||||
@mirrors_app.command("watch")
|
||||
def watch_cmd(
|
||||
slug: str = typer.Argument(..., help="Mirror slug to watch log for."),
|
||||
lines: int = typer.Option(
|
||||
40,
|
||||
"--lines",
|
||||
"-n",
|
||||
help="Show this many trailing lines before following.",
|
||||
),
|
||||
):
|
||||
"""
|
||||
Tail the wget log for a mirror (like `tail -f`).
|
||||
|
||||
Ctrl-C exits the watch without stopping the update job.
|
||||
"""
|
||||
log_path = log_path_for(slug)
|
||||
if not log_path.exists():
|
||||
typer.echo(f"No log file yet for {slug!r}: {log_path}")
|
||||
raise typer.Exit(1)
|
||||
|
||||
typer.echo(f"Watching log: {log_path}")
|
||||
try:
|
||||
with log_path.open("r", encoding="utf-8") as f:
|
||||
# show last N lines
|
||||
all_lines = f.readlines()
|
||||
tail = all_lines[-lines:] if lines > 0 else all_lines
|
||||
for line in tail:
|
||||
typer.echo(line.rstrip("\n"))
|
||||
|
||||
# now follow
|
||||
with log_path.open("r", encoding="utf-8") as f:
|
||||
f.seek(0, os.SEEK_END)
|
||||
while True:
|
||||
where = f.tell()
|
||||
line = f.readline()
|
||||
if not line:
|
||||
time.sleep(0.5)
|
||||
f.seek(where)
|
||||
else:
|
||||
typer.echo(line.rstrip("\n"))
|
||||
except KeyboardInterrupt:
|
||||
typer.echo("\n[watch] Detaching from log.")
|
||||
33
mirage/commands/misc.py
Normal file
33
mirage/commands/misc.py
Normal file
@@ -0,0 +1,33 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import typer
|
||||
|
||||
# type: ignore[attr-defined]
|
||||
from ..config import load_config, _default_config_path
|
||||
|
||||
misc_app = typer.Typer(help="Miscellaneous commands (config, info).")
|
||||
|
||||
|
||||
@misc_app.command("config-path")
|
||||
def config_path_cmd():
|
||||
"""
|
||||
Show where the active config file is located (or would be created).
|
||||
"""
|
||||
# Slight hack: default path; real path reading is in load_config()
|
||||
p = _default_config_path()
|
||||
typer.echo(str(p))
|
||||
|
||||
|
||||
@misc_app.command("config-show")
|
||||
def config_show_cmd():
|
||||
"""
|
||||
Print the current configuration values.
|
||||
"""
|
||||
cfg = load_config()
|
||||
typer.echo(f"mirror_root = {cfg.mirror_root}")
|
||||
typer.echo(f"data_dir = {cfg.data_dir}")
|
||||
typer.echo(f"log_dir = {cfg.log_dir}")
|
||||
typer.echo(f"db_path = {cfg.db_path}")
|
||||
typer.echo(f"max_concurrent_updates = {cfg.max_concurrent_updates}")
|
||||
typer.echo(f"wget_bin = {cfg.wget_bin}")
|
||||
typer.echo(f"rg_bin = {cfg.rg_bin}")
|
||||
146
mirage/config.py
Normal file
146
mirage/config.py
Normal file
@@ -0,0 +1,146 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import tomllib # Python 3.11+; on 3.10 use 'tomli' instead
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
|
||||
DEFAULT_MIRROR_ROOT = Path("/srv/www/mirrors")
|
||||
DEFAULT_DATA_DIR = Path("~/.local/share/mirrorctl").expanduser()
|
||||
DEFAULT_MAX_CONCURRENT_UPDATES = 4
|
||||
DEFAULT_WGET_BIN = "wget"
|
||||
DEFAULT_RG_BIN = "rg"
|
||||
|
||||
|
||||
@dataclass
|
||||
class Config:
|
||||
mirror_root: Path
|
||||
data_dir: Path
|
||||
max_concurrent_updates: int
|
||||
wget_bin: str
|
||||
rg_bin: str
|
||||
|
||||
@property
|
||||
def log_dir(self) -> Path:
|
||||
d = self.data_dir / "logs"
|
||||
d.mkdir(parents=True, exist_ok=True)
|
||||
return d
|
||||
|
||||
@property
|
||||
def db_path(self) -> Path:
|
||||
self.data_dir.mkdir(parents=True, exist_ok=True)
|
||||
return self.data_dir / "mirrors.json"
|
||||
|
||||
@property
|
||||
def config_dir(self) -> Path:
|
||||
# For future use (e.g. storing per-mirror configs)
|
||||
d = self.data_dir / "config"
|
||||
d.mkdir(parents=True, exist_ok=True)
|
||||
return d
|
||||
|
||||
|
||||
def default_config() -> Config:
|
||||
return Config(
|
||||
mirror_root=DEFAULT_MIRROR_ROOT,
|
||||
data_dir=DEFAULT_DATA_DIR,
|
||||
max_concurrent_updates=DEFAULT_MAX_CONCURRENT_UPDATES,
|
||||
wget_bin=DEFAULT_WGET_BIN,
|
||||
rg_bin=DEFAULT_RG_BIN,
|
||||
)
|
||||
|
||||
|
||||
def _default_config_path() -> Path:
|
||||
xdg = os.getenv("XDG_CONFIG_HOME")
|
||||
if xdg:
|
||||
base = Path(xdg)
|
||||
else:
|
||||
base = Path("~/.config").expanduser()
|
||||
return base / "mirrorctl" / "config.toml"
|
||||
|
||||
|
||||
def _search_config_paths() -> list[Path]:
|
||||
env = os.getenv("MIRRORCTL_CONFIG")
|
||||
paths: list[Path] = []
|
||||
if env:
|
||||
paths.append(Path(env))
|
||||
|
||||
# user config
|
||||
paths.append(_default_config_path())
|
||||
|
||||
# system config
|
||||
paths.append(Path("/etc/mirrorctl/config.toml"))
|
||||
|
||||
return paths
|
||||
|
||||
|
||||
def _ensure_default_config_file(path: Path, cfg: Config) -> None:
|
||||
if path.exists():
|
||||
return
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
content = f"""# mirrorctl configuration
|
||||
|
||||
# Directory where mirrors will be stored
|
||||
mirror_root = "{cfg.mirror_root}"
|
||||
|
||||
# Directory for mirrorctl metadata (db, logs, etc.)
|
||||
data_dir = "{cfg.data_dir}"
|
||||
|
||||
# Max parallel mirror updates
|
||||
max_concurrent_updates = {cfg.max_concurrent_updates}
|
||||
|
||||
# Path to wget binary
|
||||
wget_bin = "{cfg.wget_bin}"
|
||||
|
||||
# Path to ripgrep (rg) binary
|
||||
rg_bin = "{cfg.rg_bin}"
|
||||
"""
|
||||
path.write_text(content, encoding="utf-8")
|
||||
|
||||
|
||||
def load_config() -> Config:
|
||||
"""
|
||||
Load configuration from MIRRORCTL_CONFIG, XDG config, or /etc.
|
||||
If no config exists, create a default one in
|
||||
~/.config/mirrorctl/config.toml.
|
||||
"""
|
||||
cfg = default_config()
|
||||
|
||||
paths = _search_config_paths()
|
||||
used_path: Optional[Path] = None
|
||||
|
||||
for p in paths:
|
||||
if p.is_file():
|
||||
used_path = p
|
||||
break
|
||||
|
||||
if used_path is None:
|
||||
# create default user config and read it back
|
||||
user_path = _default_config_path()
|
||||
_ensure_default_config_file(user_path, cfg)
|
||||
used_path = user_path
|
||||
|
||||
data = {}
|
||||
try:
|
||||
raw = used_path.read_bytes()
|
||||
data = tomllib.loads(raw.decode("utf-8"))
|
||||
except Exception:
|
||||
# Fall back to defaults if config is unreadable
|
||||
data = {}
|
||||
|
||||
# Apply overrides from file
|
||||
mirror_root = Path(data.get("mirror_root", cfg.mirror_root))
|
||||
data_dir = Path(data.get("data_dir", cfg.data_dir))
|
||||
max_concurrent = int(
|
||||
data.get("max_concurrent_updates", cfg.max_concurrent_updates))
|
||||
wget_bin = str(data.get("wget_bin", cfg.wget_bin))
|
||||
rg_bin = str(data.get("rg_bin", cfg.rg_bin))
|
||||
|
||||
return Config(
|
||||
mirror_root=mirror_root,
|
||||
data_dir=data_dir,
|
||||
max_concurrent_updates=max_concurrent,
|
||||
wget_bin=wget_bin,
|
||||
rg_bin=rg_bin,
|
||||
)
|
||||
78
mirage/daemon.py
Normal file
78
mirage/daemon.py
Normal file
@@ -0,0 +1,78 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor, Future
|
||||
from datetime import datetime
|
||||
from typing import Dict, Tuple
|
||||
|
||||
from .config import load_config
|
||||
from . import jobs
|
||||
from . import storage
|
||||
from .updater import update_mirror
|
||||
|
||||
|
||||
def run_daemon(poll_interval: float = 1.0) -> None:
|
||||
"""
|
||||
Simple job-processing daemon.
|
||||
|
||||
- Watches jobs/pending for new update jobs.
|
||||
- Moves them to jobs/running.
|
||||
- Runs wget via update_mirror() with concurrency.
|
||||
"""
|
||||
cfg = load_config()
|
||||
max_workers = max(1, cfg.max_concurrent_updates)
|
||||
executor = ThreadPoolExecutor(max_workers=max_workers)
|
||||
|
||||
# Map Future -> (job_path, slug)
|
||||
running: Dict[Future, Tuple[str, str]] = {}
|
||||
|
||||
print(f"[mirage-daemon] starting with max_workers={max_workers}")
|
||||
print(f"[mirage-daemon] jobs dir: {cfg.data_dir / 'jobs'}")
|
||||
|
||||
try:
|
||||
while True:
|
||||
# 1. Collect finished jobs
|
||||
finished = [f for f in running if f.done()]
|
||||
for f in finished:
|
||||
job_path, slug = running.pop(f)
|
||||
# Remove job file from running
|
||||
from pathlib import Path
|
||||
try:
|
||||
# type: ignore[arg-type]
|
||||
Path(job_path).unlink(missing_ok=True)
|
||||
except TypeError:
|
||||
Path(job_path).unlink(missing_ok=True)
|
||||
|
||||
try:
|
||||
f.result()
|
||||
except Exception as e: # noqa: BLE001
|
||||
# Internal failure => mark mirror as error
|
||||
m = storage.get_mirror(slug)
|
||||
if m:
|
||||
m.status = "error"
|
||||
m.last_error = f"Internal error: {e!r}"
|
||||
m.last_updated = datetime.now()
|
||||
storage.upsert_mirror(m)
|
||||
|
||||
# 2. If we have capacity, pull jobs from pending
|
||||
capacity = max_workers - len(running)
|
||||
if capacity > 0:
|
||||
pending = jobs.list_pending_jobs()
|
||||
if pending:
|
||||
for pending_path, job in pending[:capacity]:
|
||||
running_path = jobs.move_to_running(pending_path)
|
||||
# mark mirror as updating early
|
||||
m = storage.get_mirror(job.slug)
|
||||
if m:
|
||||
m.status = "updating"
|
||||
m.last_error = None
|
||||
storage.upsert_mirror(m)
|
||||
|
||||
fut = executor.submit(update_mirror, job.slug)
|
||||
running[fut] = (str(running_path), job.slug)
|
||||
|
||||
time.sleep(poll_interval)
|
||||
except KeyboardInterrupt:
|
||||
print("[mirage-daemon] shutting down (KeyboardInterrupt)")
|
||||
finally:
|
||||
executor.shutdown(wait=False)
|
||||
108
mirage/jobs.py
Normal file
108
mirage/jobs.py
Normal file
@@ -0,0 +1,108 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import uuid
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import List, Tuple
|
||||
|
||||
from .config import load_config
|
||||
from . import storage
|
||||
|
||||
|
||||
@dataclass
|
||||
class Job:
|
||||
id: str
|
||||
slug: str
|
||||
type: str # currently only "update"
|
||||
queued_at: datetime
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"id": self.id,
|
||||
"slug": self.slug,
|
||||
"type": self.type,
|
||||
"queued_at": self.queued_at.isoformat(),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> "Job":
|
||||
return cls(
|
||||
id=data["id"],
|
||||
slug=data["slug"],
|
||||
type=data["type"],
|
||||
queued_at=datetime.fromisoformat(data["queued_at"]),
|
||||
)
|
||||
|
||||
|
||||
def _jobs_root() -> Path:
|
||||
cfg = load_config()
|
||||
root = cfg.data_dir / "jobs"
|
||||
root.mkdir(parents=True, exist_ok=True)
|
||||
(root / "pending").mkdir(exist_ok=True)
|
||||
(root / "running").mkdir(exist_ok=True)
|
||||
return root
|
||||
|
||||
|
||||
def pending_dir() -> Path:
|
||||
return _jobs_root() / "pending"
|
||||
|
||||
|
||||
def running_dir() -> Path:
|
||||
return _jobs_root() / "running"
|
||||
|
||||
|
||||
def enqueue_update(slug: str) -> Path:
|
||||
"""
|
||||
Enqueue an update job for the given slug.
|
||||
Mark mirror status as 'queued' (unless it's already queued/updating).
|
||||
"""
|
||||
job_id = uuid.uuid4().hex
|
||||
job = Job(
|
||||
id=job_id,
|
||||
slug=slug,
|
||||
type="update",
|
||||
queued_at=datetime.now(),
|
||||
)
|
||||
pdir = pending_dir()
|
||||
path = pdir / f"{job_id}.json"
|
||||
with path.open("w", encoding="utf-8") as f:
|
||||
json.dump(job.to_dict(), f)
|
||||
|
||||
m = storage.get_mirror(slug)
|
||||
if m and m.status not in ("queued", "updating"):
|
||||
m.status = "queued"
|
||||
m.last_error = None
|
||||
storage.upsert_mirror(m)
|
||||
|
||||
return path
|
||||
|
||||
|
||||
def list_pending_jobs() -> List[Tuple[Path, Job]]:
|
||||
jobs: List[Tuple[Path, Job]] = []
|
||||
pdir = pending_dir()
|
||||
for path in sorted(pdir.glob("*.json")):
|
||||
try:
|
||||
data = json.loads(path.read_text(encoding="utf-8"))
|
||||
job = Job.from_dict(data)
|
||||
except Exception:
|
||||
continue
|
||||
jobs.append((path, job))
|
||||
return jobs
|
||||
|
||||
|
||||
def load_job(path: Path) -> Job:
|
||||
data = json.loads(path.read_text(encoding="utf-8"))
|
||||
return Job.from_dict(data)
|
||||
|
||||
|
||||
def move_to_running(pending_path: Path) -> Path:
|
||||
"""
|
||||
Move a pending job file into the running directory.
|
||||
"""
|
||||
rdir = running_dir()
|
||||
dest = rdir / pending_path.name
|
||||
pending_path.replace(dest)
|
||||
return dest
|
||||
|
||||
3
mirage/models/__init__.py
Normal file
3
mirage/models/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from .mirror import Mirror
|
||||
|
||||
__all__ = ["Mirror"]
|
||||
58
mirage/models/mirror.py
Normal file
58
mirage/models/mirror.py
Normal file
@@ -0,0 +1,58 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import List, Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class Mirror:
|
||||
"""
|
||||
Representation of a single mirrored site.
|
||||
|
||||
Attributes:
|
||||
slug: Local identifier (and directory name under mirror_root).
|
||||
url: Source URL to mirror.
|
||||
categories: Arbitrary tags/categories (strings).
|
||||
ignore_robots: Whether to disable robots.txt (wget robots=off).
|
||||
status: Current status: "idle", "updating", "warning", "error".
|
||||
last_updated: Last successful/attempted update timestamp.
|
||||
last_error: Text description of last error/warning.
|
||||
"""
|
||||
slug: str
|
||||
url: str
|
||||
categories: List[str] = field(default_factory=list)
|
||||
ignore_robots: bool = False
|
||||
status: str = "idle"
|
||||
last_updated: Optional[datetime] = None
|
||||
last_error: Optional[str] = None
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
if self.last_updated:
|
||||
update = self.last_updated.isoformat()
|
||||
else:
|
||||
update = None
|
||||
|
||||
return {
|
||||
"slug": self.slug,
|
||||
"url": self.url,
|
||||
"categories": self.categories,
|
||||
"ignore_robots": self.ignore_robots,
|
||||
"status": self.status,
|
||||
"last_updated": update,
|
||||
"last_error": self.last_error,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> "Mirror":
|
||||
lu = data.get("last_updated")
|
||||
last_updated = datetime.fromisoformat(lu) if lu else None
|
||||
return cls(
|
||||
slug=data["slug"],
|
||||
url=data["url"],
|
||||
categories=data.get("categories", []),
|
||||
ignore_robots=data.get("ignore_robots", False),
|
||||
status=data.get("status", "idle"),
|
||||
last_updated=last_updated,
|
||||
last_error=data.get("last_error"),
|
||||
)
|
||||
144
mirage/search.py
Normal file
144
mirage/search.py
Normal file
@@ -0,0 +1,144 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import html
|
||||
import re
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import List, Dict
|
||||
|
||||
from .config import load_config
|
||||
|
||||
|
||||
def strip_html(text: str) -> str:
|
||||
# Remove script and style blocks
|
||||
text = re.sub(
|
||||
r"<script\b[^<]*(?:(?!</script>)<[^<]*)*</script>",
|
||||
" ",
|
||||
text,
|
||||
flags=re.IGNORECASE,
|
||||
)
|
||||
text = re.sub(
|
||||
r"<style\b[^<]*(?:(?!</style>)<[^<]*)*</style>",
|
||||
" ",
|
||||
text,
|
||||
flags=re.IGNORECASE,
|
||||
)
|
||||
# Strip all remaining tags
|
||||
text = re.sub(r"<[^>]+>", " ", text)
|
||||
# Unescape HTML entities (& -> &)
|
||||
text = html.unescape(text)
|
||||
# Collapse whitespace
|
||||
text = re.sub(r"\s+", " ", text).strip()
|
||||
return text
|
||||
|
||||
|
||||
def make_snippet(text: str,
|
||||
query: str,
|
||||
radius: int = 80,
|
||||
max_len: int = 240) -> str:
|
||||
if not text:
|
||||
return ""
|
||||
lower = text.lower()
|
||||
qlower = query.lower()
|
||||
idx = lower.find(qlower)
|
||||
if idx == -1:
|
||||
snippet = text[:max_len]
|
||||
if len(text) > max_len:
|
||||
snippet += "…"
|
||||
return snippet
|
||||
start = max(0, idx - radius)
|
||||
end = min(len(text), idx + len(query) + radius)
|
||||
snippet = text[start:end]
|
||||
if start > 0:
|
||||
snippet = "…" + snippet
|
||||
if end < len(text):
|
||||
snippet += "…"
|
||||
return snippet
|
||||
|
||||
|
||||
def search_content(query: str, limit: int = 50) -> List[Dict]:
|
||||
"""
|
||||
Run ripgrep across all mirrors and return text snippets around matches.
|
||||
|
||||
Returns list of dicts:
|
||||
{
|
||||
"path": "slug/host/path/file.html",
|
||||
"line": 42,
|
||||
"snippet": "text around the query ..."
|
||||
}
|
||||
"""
|
||||
query = (query or "").strip()
|
||||
if not query:
|
||||
return []
|
||||
|
||||
cfg = load_config()
|
||||
root = cfg.mirror_root
|
||||
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
[
|
||||
cfg.rg_bin,
|
||||
"--line-number",
|
||||
"--no-heading",
|
||||
"--color",
|
||||
"never",
|
||||
"--max-count",
|
||||
"5", # per file
|
||||
"--type-add",
|
||||
"page:*.{html,htm,md,markdown,txt}",
|
||||
"-tpage",
|
||||
query,
|
||||
str(root),
|
||||
],
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.DEVNULL,
|
||||
text=True,
|
||||
timeout=10,
|
||||
)
|
||||
except FileNotFoundError:
|
||||
return [{
|
||||
"path": "(error)",
|
||||
"line": 0,
|
||||
"snippet": f"ripgrep binary not found: {cfg.rg_bin!r}",
|
||||
}]
|
||||
except subprocess.TimeoutExpired:
|
||||
return [{
|
||||
"path": "(error)",
|
||||
"line": 0,
|
||||
"snippet": "rg timed out.",
|
||||
}]
|
||||
|
||||
results: List[Dict] = []
|
||||
|
||||
for line in proc.stdout.splitlines():
|
||||
parts = line.split(":", 2)
|
||||
if len(parts) != 3:
|
||||
continue
|
||||
path_str, lineno_str, raw_content = parts
|
||||
|
||||
text_content = strip_html(raw_content)
|
||||
if not text_content:
|
||||
continue
|
||||
|
||||
snippet = make_snippet(text_content, query)
|
||||
|
||||
try:
|
||||
rel_path = str(Path(path_str).relative_to(root))
|
||||
except ValueError:
|
||||
rel_path = path_str
|
||||
|
||||
try:
|
||||
lineno = int(lineno_str)
|
||||
except ValueError:
|
||||
lineno = 0
|
||||
|
||||
results.append({
|
||||
"path": rel_path,
|
||||
"line": lineno,
|
||||
"snippet": snippet,
|
||||
})
|
||||
|
||||
if len(results) >= limit:
|
||||
break
|
||||
|
||||
return results
|
||||
60
mirage/storage.py
Normal file
60
mirage/storage.py
Normal file
@@ -0,0 +1,60 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from threading import RLock
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from .config import load_config
|
||||
from .models import Mirror
|
||||
|
||||
_lock = RLock()
|
||||
|
||||
|
||||
def _load_raw(path) -> Dict[str, dict]:
|
||||
if not path.exists():
|
||||
return {}
|
||||
with path.open("r", encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def _save_raw(path, data: Dict[str, dict]) -> None:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp = path.with_suffix(".tmp")
|
||||
with tmp.open("w", encoding="utf-8") as f:
|
||||
json.dump(data, f, indent=2, sort_keys=True)
|
||||
tmp.replace(path)
|
||||
|
||||
|
||||
def list_mirrors() -> List[Mirror]:
|
||||
cfg = load_config()
|
||||
with _lock:
|
||||
data = _load_raw(cfg.db_path)
|
||||
return [Mirror.from_dict(v) for v in data.values()]
|
||||
|
||||
|
||||
def get_mirror(slug: str) -> Optional[Mirror]:
|
||||
cfg = load_config()
|
||||
with _lock:
|
||||
data = _load_raw(cfg.db_path)
|
||||
if slug not in data:
|
||||
return None
|
||||
return Mirror.from_dict(data[slug])
|
||||
|
||||
|
||||
def upsert_mirror(m: Mirror) -> None:
|
||||
cfg = load_config()
|
||||
with _lock:
|
||||
data = _load_raw(cfg.db_path)
|
||||
data[m.slug] = m.to_dict()
|
||||
_save_raw(cfg.db_path, data)
|
||||
|
||||
|
||||
def delete_mirror(slug: str) -> bool:
|
||||
cfg = load_config()
|
||||
with _lock:
|
||||
data = _load_raw(cfg.db_path)
|
||||
if slug not in data:
|
||||
return False
|
||||
del data[slug]
|
||||
_save_raw(cfg.db_path, data)
|
||||
return True
|
||||
148
mirage/updater.py
Normal file
148
mirage/updater.py
Normal file
@@ -0,0 +1,148 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Iterable, Tuple, Dict
|
||||
|
||||
from .config import load_config
|
||||
from .models import Mirror
|
||||
from . import storage
|
||||
|
||||
|
||||
def mirror_dir_for(slug: str) -> Path:
|
||||
cfg = load_config()
|
||||
d = cfg.mirror_root / slug
|
||||
d.mkdir(parents=True, exist_ok=True)
|
||||
return d
|
||||
|
||||
|
||||
def log_path_for(slug: str) -> Path:
|
||||
cfg = load_config()
|
||||
return cfg.log_dir / f"{slug}.log"
|
||||
|
||||
|
||||
def _write_log_header(log_path: Path, cmd: list[str]) -> None:
|
||||
now = datetime.now().isoformat()
|
||||
with log_path.open("a", encoding="utf-8") as log:
|
||||
log.write(f"\n=== {now} Running: {' '.join(cmd)}\n")
|
||||
log.flush()
|
||||
|
||||
|
||||
def run_wget(mirror: Mirror) -> Tuple[int, Path]:
|
||||
"""
|
||||
Run wget for a single mirror, appending logs to its log file.
|
||||
|
||||
Returns:
|
||||
(exit_code, log_path)
|
||||
"""
|
||||
cfg = load_config()
|
||||
target_dir = mirror_dir_for(mirror.slug)
|
||||
log_path = log_path_for(mirror.slug)
|
||||
|
||||
cmd = [
|
||||
cfg.wget_bin,
|
||||
"--mirror",
|
||||
"--convert-links",
|
||||
"--page-requisites",
|
||||
"--no-parent",
|
||||
"--adjust-extension",
|
||||
f"--execute=robots={'off' if mirror.ignore_robots else 'on'}",
|
||||
"--directory-prefix",
|
||||
str(target_dir),
|
||||
mirror.url,
|
||||
]
|
||||
|
||||
_write_log_header(log_path, cmd)
|
||||
|
||||
with log_path.open("a", encoding="utf-8") as log:
|
||||
proc = subprocess.run(
|
||||
cmd,
|
||||
stdout=log,
|
||||
stderr=log,
|
||||
text=True,
|
||||
)
|
||||
|
||||
return proc.returncode, log_path
|
||||
|
||||
|
||||
def update_mirror(slug: str) -> Mirror:
|
||||
"""
|
||||
Update a single mirror by slug and persist its status.
|
||||
|
||||
Returns:
|
||||
Updated Mirror instance.
|
||||
"""
|
||||
m = storage.get_mirror(slug)
|
||||
if not m:
|
||||
raise ValueError(f"Unknown mirror: {slug!r}")
|
||||
|
||||
# Mark as updating
|
||||
m.status = "updating"
|
||||
m.last_error = None
|
||||
storage.upsert_mirror(m)
|
||||
|
||||
code, log_path = run_wget(m)
|
||||
|
||||
# Reload to avoid overwriting concurrent changes
|
||||
m = storage.get_mirror(slug) or m
|
||||
|
||||
if code == 0:
|
||||
m.status = "idle"
|
||||
elif code == 4:
|
||||
# network issues -> warning
|
||||
m.status = "warning"
|
||||
m.last_error = f"wget exited with code {code}, see {log_path}"
|
||||
else:
|
||||
m.status = "error"
|
||||
m.last_error = f"wget exited with code {code}, see {log_path}"
|
||||
|
||||
m.last_updated = datetime.now()
|
||||
storage.upsert_mirror(m)
|
||||
return m
|
||||
|
||||
|
||||
def update_all_concurrent(
|
||||
slugs: Iterable[str] | None = None) -> Dict[str, Mirror]:
|
||||
"""
|
||||
Update multiple mirrors concurrently.
|
||||
|
||||
Args:
|
||||
slugs: Iterable of slugs to update. If None, update all mirrors.
|
||||
|
||||
Returns:
|
||||
Mapping slug -> updated Mirror.
|
||||
"""
|
||||
cfg = load_config()
|
||||
if slugs is None:
|
||||
slugs = [m.slug for m in storage.list_mirrors()]
|
||||
|
||||
slugs = list(slugs)
|
||||
results: Dict[str, Mirror] = {}
|
||||
|
||||
if not slugs:
|
||||
return results
|
||||
|
||||
max_workers = max(1, cfg.max_concurrent_updates)
|
||||
|
||||
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||
future_to_slug = {executor.submit(
|
||||
update_mirror, slug): slug for slug in slugs}
|
||||
|
||||
for future in as_completed(future_to_slug):
|
||||
slug = future_to_slug[future]
|
||||
try:
|
||||
m = future.result()
|
||||
results[slug] = m
|
||||
except Exception as e: # noqa: BLE001
|
||||
# If update fails badly, mark error
|
||||
m = storage.get_mirror(slug)
|
||||
if m:
|
||||
m.status = "error"
|
||||
m.last_error = f"Internal error: {e!r}"
|
||||
m.last_updated = datetime.now()
|
||||
storage.upsert_mirror(m)
|
||||
results[slug] = m
|
||||
|
||||
return results
|
||||
@@ -1,190 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
import json
|
||||
import subprocess
|
||||
import datetime as dt
|
||||
from pathlib import Path
|
||||
import threading
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
BASE = Path("/srv/www")
|
||||
DATA_FILE = BASE / "data" / "mirrors.json"
|
||||
MIRROR_ROOT = BASE / "mirrors"
|
||||
LOG_ROOT = BASE / "logs"
|
||||
|
||||
MIRROR_ROOT.mkdir(parents=True, exist_ok=True)
|
||||
LOG_ROOT.mkdir(parents=True, exist_ok=True)
|
||||
DATA_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
_LOCK = threading.Lock()
|
||||
|
||||
|
||||
def _now_iso() -> str:
|
||||
return dt.datetime.utcnow().replace(microsecond=0).isoformat() + "Z"
|
||||
|
||||
|
||||
def load_mirrors() -> list[dict]:
|
||||
with _LOCK:
|
||||
if not DATA_FILE.exists():
|
||||
return []
|
||||
with DATA_FILE.open("r", encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def save_mirrors(mirrors: list[dict]) -> None:
|
||||
with _LOCK:
|
||||
tmp = DATA_FILE.with_suffix(".tmp")
|
||||
with tmp.open("w", encoding="utf-8") as f:
|
||||
json.dump(mirrors, f, indent=2)
|
||||
tmp.replace(DATA_FILE)
|
||||
|
||||
|
||||
def get_mirror(mirrors: list[dict], slug: str) -> dict | None:
|
||||
for m in mirrors:
|
||||
if m["slug"] == slug:
|
||||
return m
|
||||
return None
|
||||
|
||||
|
||||
def _normalise_categories(raw: str) -> list[str]:
|
||||
# "tutorials, wgpu, rust" -> ["tutorials","wgpu","rust"]
|
||||
parts = [p.strip() for p in raw.split(",")]
|
||||
return [p for p in parts if p]
|
||||
|
||||
|
||||
def add_mirror(slug: str,
|
||||
categories: str,
|
||||
url: str,
|
||||
ignore_robots: bool = False) -> dict:
|
||||
mirrors = load_mirrors()
|
||||
if get_mirror(mirrors, slug) is not None:
|
||||
raise ValueError(f"Mirror with slug '{slug}' already exists")
|
||||
|
||||
cats = _normalise_categories(categories)
|
||||
if not cats:
|
||||
raise ValueError("At least one category is required")
|
||||
|
||||
m = {
|
||||
"slug": slug,
|
||||
"categories": cats,
|
||||
"url": url,
|
||||
"ignore_robots": bool(ignore_robots),
|
||||
"created_at": _now_iso(),
|
||||
"last_updated": None,
|
||||
"status": "queued", # idle | updating | queued | warning | error
|
||||
"last_error": None,
|
||||
}
|
||||
mirrors.append(m)
|
||||
save_mirrors(mirrors)
|
||||
return m
|
||||
|
||||
|
||||
def _set_status(slug: str, *,
|
||||
status: str,
|
||||
last_error: str | None = None,
|
||||
last_updated: str | None = None):
|
||||
mirrors = load_mirrors()
|
||||
m = get_mirror(mirrors, slug)
|
||||
if m is None:
|
||||
return
|
||||
m["status"] = status
|
||||
if last_error is not None:
|
||||
m["last_error"] = last_error
|
||||
if last_updated is not None:
|
||||
m["last_updated"] = last_updated
|
||||
save_mirrors(mirrors)
|
||||
|
||||
|
||||
def update_mirror(slug: str) -> None:
|
||||
"""Run wget mirror for a single slug (blocking in this thread)."""
|
||||
mirrors = load_mirrors()
|
||||
m = get_mirror(mirrors, slug)
|
||||
if m is None:
|
||||
raise ValueError(f"No such mirror: {slug}")
|
||||
|
||||
_set_status(slug, status="updating", last_error=None)
|
||||
|
||||
target_dir = MIRROR_ROOT / slug
|
||||
target_dir.mkdir(parents=True, exist_ok=True)
|
||||
log_file = LOG_ROOT / f"{slug}.log"
|
||||
|
||||
robots_setting = "off" if m.get("ignore_robots") else "on"
|
||||
|
||||
cmd = [
|
||||
"wget",
|
||||
"--mirror", # recurse, keep timestamps
|
||||
"--convert-links",
|
||||
"--adjust-extension",
|
||||
"--page-requisites",
|
||||
"--no-parent",
|
||||
"--wait=0.5",
|
||||
"--random-wait",
|
||||
"--limit-rate=50m",
|
||||
"--tries=3",
|
||||
"--retry-connrefused",
|
||||
f"--execute=robots={robots_setting}",
|
||||
"-P",
|
||||
str(target_dir),
|
||||
m["url"],
|
||||
]
|
||||
|
||||
try:
|
||||
with log_file.open("a", encoding="utf-8") as lf:
|
||||
lf.write(f"\n=== {_now_iso()} : Starting mirror of {
|
||||
m['url']} ===\n")
|
||||
lf.flush()
|
||||
proc = subprocess.run(
|
||||
cmd,
|
||||
stdout=lf,
|
||||
stderr=subprocess.STDOUT,
|
||||
)
|
||||
lf.write(f"=== {_now_iso()} : wget exited with code {
|
||||
proc.returncode} ===\n")
|
||||
lf.flush()
|
||||
|
||||
# Classify result
|
||||
if proc.returncode == 0:
|
||||
_set_status(slug, status="idle",
|
||||
last_updated=_now_iso(), last_error=None)
|
||||
else:
|
||||
# If we see FINISHED in the log and the directory has content,
|
||||
# treat this as a partial/ok-with-warnings case.
|
||||
text = log_file.read_text(encoding="utf-8", errors="ignore")
|
||||
has_finished = "FINISHED --" in text
|
||||
has_files = any(target_dir.rglob("*"))
|
||||
if has_finished and has_files:
|
||||
_set_status(
|
||||
slug,
|
||||
status="warning",
|
||||
last_updated=_now_iso(),
|
||||
last_error=f"wget exited with {
|
||||
proc.returncode} (partial; see log)",
|
||||
)
|
||||
else:
|
||||
_set_status(
|
||||
slug,
|
||||
status="error",
|
||||
last_error=f"wget exited with {proc.returncode}",
|
||||
)
|
||||
except Exception as e:
|
||||
_set_status(
|
||||
slug,
|
||||
status="error",
|
||||
last_error=f"{type(e).__name__}: {e}",
|
||||
)
|
||||
|
||||
|
||||
def update_all_mirrors(max_workers: int = 3) -> None:
|
||||
mirrors = load_mirrors()
|
||||
slugs = [m["slug"] for m in mirrors]
|
||||
if not slugs:
|
||||
return
|
||||
# Run several in parallel
|
||||
with ThreadPoolExecutor(max_workers=max_workers) as pool:
|
||||
futures = {pool.submit(update_mirror, slug): slug for slug in slugs}
|
||||
for fut in as_completed(futures):
|
||||
slug = futures[fut]
|
||||
try:
|
||||
fut.result()
|
||||
except Exception as e:
|
||||
_set_status(slug, status="error", last_error=f"{
|
||||
type(e).__name__}: {e}")
|
||||
1067
package-lock.json
generated
1067
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
23
package.json
23
package.json
@@ -1,23 +0,0 @@
|
||||
{
|
||||
"devDependencies": {
|
||||
"tailwindcss": "^4.1.17"
|
||||
},
|
||||
"name": "www",
|
||||
"version": "1.0.0",
|
||||
"description": "",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
},
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://git.nytegear.com/aargonian/nytegear-mirror-websites.git"
|
||||
},
|
||||
"keywords": [],
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"type": "commonjs",
|
||||
"dependencies": {
|
||||
"@tailwindcss/cli": "^4.1.17"
|
||||
}
|
||||
}
|
||||
19
pyproject.toml
Normal file
19
pyproject.toml
Normal file
@@ -0,0 +1,19 @@
|
||||
[build-system]
|
||||
requires = ["setuptools>=64", "wheel"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "mirage"
|
||||
version = "0.1.0"
|
||||
description = "Mirror management that's too good to be true."
|
||||
authors = [{ name = "Aaron Gorodetzky", email = "aaron@nytegear.com" }]
|
||||
requires-python = ">=3.10"
|
||||
dependencies = [
|
||||
"typer[all]>=0.12.0",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
mirage = "mirage.cli:app"
|
||||
|
||||
[tool.setuptools]
|
||||
packages = ["mirage"]
|
||||
File diff suppressed because one or more lines are too long
6
systemd/mirage-update.service
Normal file
6
systemd/mirage-update.service
Normal file
@@ -0,0 +1,6 @@
|
||||
[Unit]
|
||||
Description=Enqueue periodic updates for Mirage mirrors
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/usr/bin/mirage mirrors update-all
|
||||
@@ -1,10 +1,11 @@
|
||||
# systemd/mirage-update.timer
|
||||
[Unit]
|
||||
Description=Daily update of offline mirrors
|
||||
Description=Run Mirage mirror updates periodically
|
||||
|
||||
[Timer]
|
||||
OnCalendar=03:00
|
||||
Persistent=true
|
||||
Unit=update-mirrors.service
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
|
||||
15
systemd/mirage.service
Normal file
15
systemd/mirage.service
Normal file
@@ -0,0 +1,15 @@
|
||||
[Unit]
|
||||
Description=Mirage mirror daemon
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=mirage
|
||||
Group=mirage
|
||||
ExecStart=/usr/bin/mirage daemon
|
||||
Restart=on-failure
|
||||
RestartSec=5
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
@@ -1,15 +0,0 @@
|
||||
[Unit]
|
||||
Description=Mirror Manager Flask App
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
User=aargonian
|
||||
Group=aargonian
|
||||
WorkingDirectory=/srv/www
|
||||
Environment="FLASK_ENV=production"
|
||||
ExecStart=/usr/bin/python3 /srv/www/app.py
|
||||
Restart=on-failure
|
||||
RestartSec=5
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
@@ -1,9 +0,0 @@
|
||||
[Unit]
|
||||
Description=Update Offline Website Mirrors
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
User=aargonian
|
||||
Group=aargonian
|
||||
WorkingDirectory=/srv/www
|
||||
ExecStart=/usr/bin/python3 /srv/www/update_mirrors.py
|
||||
@@ -1 +0,0 @@
|
||||
@import "tailwindcss";
|
||||
@@ -1,16 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
import sys
|
||||
from mirror_manager import update_all_mirrors, update_mirror
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) == 2:
|
||||
slug = sys.argv[1]
|
||||
update_mirror(slug)
|
||||
else:
|
||||
# bump max_workers if you're feeling brave / bandwidth-rich
|
||||
update_all_mirrors(max_workers=8)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user