Files
mirage/app.py
2025-12-02 03:17:49 -05:00

640 lines
24 KiB
Python
Executable File

#!/usr/bin/env python3
from mirror_manager import (
load_mirrors,
add_mirror,
update_mirror,
MIRROR_ROOT,
LOG_ROOT,
)
import re
import html
import subprocess
import threading
from pathlib import Path
from flask import (
Flask,
request,
redirect,
url_for,
jsonify,
send_from_directory,
render_template_string
)
BASE = Path("/srv/www")
STATIC_DIR = BASE / "static"
STATIC_DIR.mkdir(exist_ok=True)
app = Flask(__name__)
def _run_update_in_background(slug: str):
th = threading.Thread(target=update_mirror, args=(slug,), daemon=True)
th.start()
# -------------------- TEMPLATES --------------------
INDEX_TEMPLATE = r"""
<!doctype html>
<html class="h-full">
<head>
<meta charset="utf-8">
<title>Mirror Manager</title>
<link rel="stylesheet" href="{{ url_for('static_file', filename='tailwind.css') }}">
</head>
<body class="h-full bg-slate-950 text-slate-100">
<div class="min-h-full">
<header class="border-b border-slate-800 bg-slate-950/80 backdrop-blur">
<div class="max-w-5xl mx-auto px-4 py-4 flex flex-col sm:flex-row sm:items-center sm:justify-between gap-2">
<div>
<h1 class="text-xl font-semibold tracking-tight">Mirror Manager</h1>
<p class="text-xs text-slate-400">Local offline mirrors of external sites, grouped by category.</p>
</div>
<div class="flex items-center gap-2 text-xs text-slate-400">
<span class="inline-flex items-center gap-1 px-2 py-1 rounded-full border border-slate-700 bg-slate-900/70">
<span class="w-2 h-2 rounded-full bg-emerald-400"></span>
Running locally
</span>
</div>
</div>
</header>
<main class="max-w-5xl mx-auto px-4 py-4 space-y-4">
<!-- Mirrors list -->
<section class="bg-slate-950/80 border border-slate-800 rounded-2xl p-4 shadow-xl shadow-black/40">
<div class="flex flex-col md:flex-row md:items-center md:justify-between gap-3 mb-3">
<div class="flex flex-wrap items-center gap-2">
<span class="text-xs text-slate-400">Categories:</span>
<button class="px-2.5 py-1 rounded-full text-xs border bg-slate-900 border-slate-700 text-slate-100 hover:border-sky-500 cat-pill cat-pill-active" data-category="all">
All ({{ mirrors|length }})
</button>
{% for cat in categories %}
<button class="px-2.5 py-1 rounded-full text-xs border bg-slate-900 border-slate-800 text-slate-400 hover:border-sky-500 hover:text-slate-100 cat-pill" data-category="{{ cat }}">
{{ cat }}
</button>
{% endfor %}
</div>
<div class="flex gap-2">
<input
id="search"
class="w-full md:w-64 rounded-full bg-slate-900 border border-slate-700 px-3 py-1.5 text-sm text-slate-100 placeholder:text-slate-500 focus:outline-none focus:ring-2 focus:ring-sky-500 focus:border-sky-500"
placeholder="Filter by slug / URL / category…"
/>
</div>
</div>
<div class="overflow-x-auto border border-slate-800 rounded-xl">
<table class="min-w-full text-sm">
<thead class="bg-slate-900/70 text-xs uppercase text-slate-400">
<tr>
<th class="px-3 py-2 text-left whitespace-nowrap">Slug</th>
<th class="px-3 py-2 text-left whitespace-nowrap">Categories</th>
<th class="px-3 py-2 text-left whitespace-nowrap">URL</th>
<th class="px-3 py-2 text-left whitespace-nowrap">Last updated</th>
<th class="px-3 py-2 text-left whitespace-nowrap">Status</th>
<th class="px-3 py-2 text-left"></th>
</tr>
</thead>
<tbody id="mirror-table" class="divide-y divide-slate-900/80">
{% for m in mirrors %}
<tr class="hover:bg-slate-900/80 transition" data-slug="{{ m.slug }}" data-categories="{{ m.categories_joined }}" data-search="{{ (m.slug ~ ' ' ~ m.categories_joined ~ ' ' ~ m.url)|lower }}">
<td class="px-3 py-2 align-top">
<div class="flex flex-col gap-1">
<a href="/mirrors/{{ m.slug }}/" target="_blank" class="font-mono text-xs text-sky-400 hover:text-sky-300 break-all">
{{ m.slug }}
</a>
<a href="{{ url_for('log_view', slug=m.slug) }}" target="_blank" class="text-[0.65rem] text-slate-400 hover:text-slate-200">
View live log
</a>
</div>
</td>
<td class="px-3 py-2 align-top">
<div class="flex flex-wrap gap-1">
{% for c in m.categories %}
<span class="px-1.5 py-0.5 rounded-full text-[0.65rem] bg-slate-800/80 text-slate-300 border border-slate-700">{{ c }}</span>
{% endfor %}
</div>
</td>
<td class="px-3 py-2 align-top max-w-xs">
<code class="font-mono text-[0.7rem] text-slate-300 break-all">{{ m.url }}</code>
</td>
<td class="px-3 py-2 align-top text-xs text-slate-300">
{% if m.last_updated %}
<span title="{{ m.last_updated_raw }}">{{ m.last_updated }}</span>
{% else %}
<span class="text-slate-600">never</span>
{% endif %}
</td>
<td class="px-3 py-2 align-top text-xs">
{% set st = m.status or 'idle' %}
<div class="inline-flex items-center gap-1.5 px-2 py-0.5 rounded-full bg-slate-900 border border-slate-800">
<span class="w-2 h-2 rounded-full
{% if st == 'idle' %}bg-emerald-400{% elif st == 'updating' %}bg-amber-400 animate-pulse{% elif st == 'warning' %}bg-yellow-400{% else %}bg-rose-400{% endif %}"></span>
<span class="capitalize">{{ st }}</span>
</div>
</td>
<td class="px-3 py-2 align-top text-right text-[0.7rem]">
<form method="post" action="{{ url_for('trigger_update', slug=m.slug) }}" class="inline">
<button class="inline-flex items-center gap-1 px-2 py-1 rounded-full border border-slate-700 text-slate-200 hover:border-sky-500 hover:text-sky-100">
<span>Update</span>
</button>
</form>
</td>
</tr>
{% endfor %}
{% if mirrors|length == 0 %}
<tr>
<td colspan="6" class="px-3 py-6 text-center text-sm text-slate-500">
No mirrors yet. Add one below.
</td>
</tr>
{% endif %}
</tbody>
</table>
</div>
</section>
<!-- Add mirror -->
<section class="bg-slate-950/80 border border-slate-800 rounded-2xl p-4 shadow-xl shadow-black/40 space-y-3">
<h2 class="text-sm font-semibold">Add mirror</h2>
<form method="post" action="{{ url_for('add_mirror_route') }}" class="space-y-3">
<div>
<label for="slug" class="block text-xs font-medium text-slate-300 mb-1">Slug</label>
<input id="slug" name="slug" required class="w-full rounded-lg bg-slate-900 border border-slate-700 px-2.5 py-1.5 text-sm text-slate-100 focus:outline-none focus:ring-2 focus:ring-sky-500 focus:border-sky-500 font-mono" placeholder="e.g. wgpu-tutorial" />
</div>
<div>
<label for="categories" class="block text-xs font-medium text-slate-300 mb-1">Categories</label>
<input id="categories" name="categories" required class="w-full rounded-lg bg-slate-900 border border-slate-700 px-2.5 py-1.5 text-sm text-slate-100 focus:outline-none focus:ring-2 focus:ring-sky-500 focus:border-sky-500" placeholder="e.g. tutorials, graphics, rust" />
</div>
<div>
<label for="url" class="block text-xs font-medium text-slate-300 mb-1">URL</label>
<input id="url" name="url" required class="w-full rounded-lg bg-slate-900 border border-slate-700 px-2.5 py-1.5 text-sm text-slate-100 focus:outline-none focus:ring-2 focus:ring-sky-500 focus:border-sky-500" placeholder="https://example.com/some/path/" />
</div>
<div class="flex items-start gap-2">
<input id="ignore_robots" name="ignore_robots" value="1" type="checkbox" class="mt-0.5 rounded border-slate-600 bg-slate-900 text-sky-500 focus:ring-sky-500" />
<label for="ignore_robots" class="text-xs text-slate-400">
Ignore robots.txt (only if you explicitly want to archive disallowed paths).
</label>
</div>
{% if error %}
<p class="text-xs text-rose-300 bg-rose-950/60 border border-rose-900 rounded-lg px-2 py-1">{{ error }}</p>
{% endif %}
<button type="submit" class="w-full inline-flex items-center justify-center gap-1.5 rounded-full bg-gradient-to-r from-sky-500 to-indigo-500 px-3 py-2 text-xs font-medium text-white hover:from-sky-400 hover:to-indigo-400">
Add &amp; mirror
</button>
<p class="text-[0.7rem] text-slate-500">
New mirrors are cloned in the background. Status will show as <span class="text-amber-300">updating</span> until done.
</p>
</form>
</section>
<!-- Content search -->
<section class="bg-slate-950/80 border border-slate-800 rounded-2xl p-4 shadow-xl shadow-black/40">
<h2 class="text-sm font-semibold mb-2">Content search</h2>
<form id="search-form" class="space-y-2">
<input id="content-query" class="w-full rounded-lg bg-slate-900 border border-slate-700 px-2.5 py-1.5 text-sm text-slate-100 focus:outline-none focus:ring-2 focus:ring-sky-500 focus:border-sky-500" placeholder="Search text across all mirrors (using rg)…" />
<button type="submit" class="w-full inline-flex items-center justify-center gap-1.5 rounded-full border border-slate-700 bg-slate-900 px-3 py-2 text-xs font-medium text-slate-100 hover:border-sky-500 hover:text-sky-100">
Run ripgrep search
</button>
</form>
<div id="search-results" class="mt-2 max-h-64 overflow-y-auto text-[0.7rem] space-y-1 text-slate-300"></div>
</section>
</main>
</div>
<script>
// Category + name filter
const pills = Array.from(document.querySelectorAll('.cat-pill'));
const rows = Array.from(document.querySelectorAll('#mirror-table tr[data-slug]'));
const searchInput = document.getElementById('search');
function applyFilters() {
const active = pills.find(p => p.classList.contains('cat-pill-active'));
const cat = active ? active.dataset.category : 'all';
const q = (searchInput.value || '').toLowerCase();
rows.forEach(row => {
const cats = row.dataset.categories.split(',').map(s => s.trim());
const searchStr = row.dataset.search;
const matchesCat = (cat === 'all' || cats.includes(cat));
const matchesSearch = (!q || searchStr.includes(q));
row.style.display = (matchesCat && matchesSearch) ? '' : 'none';
});
}
pills.forEach(p => {
p.addEventListener('click', () => {
pills.forEach(x => x.classList.remove('cat-pill-active', 'border-sky-500', 'text-slate-100'));
p.classList.add('cat-pill-active', 'border-sky-500', 'text-slate-100');
applyFilters();
});
});
searchInput.addEventListener('input', applyFilters);
// Live status polling
async function pollStatus() {
try {
const resp = await fetch("{{ url_for('status') }}");
if (!resp.ok) return;
const data = await resp.json();
const bySlug = {};
data.mirrors.forEach(m => bySlug[m.slug] = m);
rows.forEach(row => {
const slug = row.dataset.slug;
const m = bySlug[slug];
if (!m) return;
const tds = row.querySelectorAll('td');
const lastCell = tds[3];
const statusCell = tds[4];
lastCell.innerHTML = m.last_updated_display || '<span class="text-slate-600">never</span>';
const st = m.status || 'idle';
statusCell.innerHTML =
'<div class="inline-flex items-center gap-1.5 px-2 py-0.5 rounded-full bg-slate-900 border border-slate-800">' +
'<span class="w-2 h-2 rounded-full ' +
(st === "idle" ? "bg-emerald-400" :
st === "updating" ? "bg-amber-400 animate-pulse" :
st === "warning" ? "bg-yellow-400" : "bg-rose-400") +
'"></span>' +
'<span class="capitalize">' + st + '</span>' +
'</div>';
});
} catch (e) {}
}
setInterval(pollStatus, 5000);
// Content search via rg
const searchForm = document.getElementById('search-form');
const contentQuery = document.getElementById('content-query');
const searchResults = document.getElementById('search-results');
searchForm.addEventListener('submit', async (e) => {
e.preventDefault();
const q = contentQuery.value.trim();
if (!q) return;
searchResults.textContent = 'Searching…';
try {
const resp = await fetch("{{ url_for('content_search') }}?q=" + encodeURIComponent(q));
if (!resp.ok) {
searchResults.textContent = 'Search failed.';
return;
}
const data = await resp.json();
if (data.results.length === 0) {
searchResults.textContent = 'No matches.';
return;
}
searchResults.innerHTML = '';
data.results.forEach(r => {
const wrapper = document.createElement('div');
wrapper.className = "border border-slate-800 rounded-lg px-2 py-1 bg-slate-900/70";
const pathLine = document.createElement('div');
pathLine.className = "font-mono text-[0.65rem] text-sky-300 break-all";
if (r.url) {
const link = document.createElement('a');
link.href = r.url;
link.target = "_blank";
link.rel = "noopener noreferrer";
link.textContent = r.path + (r.line ? `:${r.line}` : "");
pathLine.appendChild(link);
} else {
pathLine.textContent = r.path + (r.line ? `:${r.line}` : "");
}
const snippetLine = document.createElement('div');
snippetLine.className = "text-[0.7rem] text-slate-200 whitespace-pre-wrap";
snippetLine.textContent = r.snippet || "";
wrapper.appendChild(pathLine);
wrapper.appendChild(snippetLine);
searchResults.appendChild(wrapper);
});
} catch (e) {
searchResults.textContent = 'Search failed.';
}
});
</script>
</body>
</html>
"""
LOG_TEMPLATE = r"""
<!doctype html>
<html class="h-full">
<head>
<meta charset="utf-8">
<title>Log: {{ slug }}</title>
<link rel="stylesheet" href="{{ url_for('static_file', filename='tailwind.css') }}">
</head>
<body class="h-full bg-slate-950 text-slate-100">
<div class="max-w-5xl mx-auto px-4 py-4 space-y-2">
<div class="flex items-center justify-between mb-2">
<div>
<h1 class="text-sm font-semibold">Log for <span class="font-mono text-sky-400">{{ slug }}</span></h1>
<p class="text-[0.65rem] text-slate-400">Live tail of wget output (auto-refreshing).</p>
</div>
<a href="/mirrors/{{ slug }}/" target="_blank" class="text-xs text-sky-400 hover:text-sky-200">Open mirror</a>
</div>
<div class="border border-slate-800 rounded-xl bg-slate-950/90 max-h-[75vh] overflow-y-auto">
<pre id="log" class="text-[0.65rem] p-3 font-mono whitespace-pre-wrap"></pre>
</div>
</div>
<script>
const logEl = document.getElementById('log');
async function pollLog() {
try {
const resp = await fetch("{{ url_for('log_tail', slug=slug) }}");
if (!resp.ok) return;
const text = await resp.text();
logEl.textContent = text;
logEl.parentElement.scrollTop = logEl.parentElement.scrollHeight;
} catch (e) {}
}
setInterval(pollLog, 1500);
pollLog();
</script>
</body>
</html>
"""
# -------------------- ROUTES --------------------
@app.route("/static/<path:filename>")
def static_file(filename):
return send_from_directory(STATIC_DIR, filename)
@app.route("/", methods=["GET"])
def index():
mirrors = load_mirrors()
cats = set()
rows = []
for m in mirrors:
categories = m.get("categories") or []
for c in categories:
cats.add(c)
raw = m.get("last_updated")
disp = raw.replace("T", " ").replace("Z", " UTC") if raw else None
rows.append({
"slug": m["slug"],
"categories": categories,
"categories_joined": ", ".join(categories),
"url": m["url"],
"status": m.get("status") or "idle",
"last_updated_raw": raw,
"last_updated": disp,
})
return render_template_string(INDEX_TEMPLATE, mirrors=rows, categories=sorted(cats), error=None)
@app.route("/add", methods=["POST"])
def add_mirror_route():
slug = (request.form.get("slug") or "").strip()
categories = (request.form.get("categories") or "").strip()
url = (request.form.get("url") or "").strip()
ignore_robots = bool(request.form.get("ignore_robots"))
error = None
if not slug or not categories or not url:
error = "Slug, categories, and URL are required."
elif " " in slug:
error = "Slug cannot contain spaces."
if error:
# re-render with error
mirrors = load_mirrors()
cats = set()
rows = []
for m in mirrors:
cs = m.get("categories") or []
for c in cs:
cats.add(c)
raw = m.get("last_updated")
disp = raw.replace("T", " ").replace("Z", " UTC") if raw else None
rows.append({
"slug": m["slug"],
"categories": cs,
"categories_joined": ", ".join(cs),
"url": m["url"],
"status": m.get("status") or "idle",
"last_updated_raw": raw,
"last_updated": disp,
})
return render_template_string(INDEX_TEMPLATE, mirrors=rows, categories=sorted(cats), error=error), 400
try:
add_mirror(slug, categories, url, ignore_robots=ignore_robots)
except Exception as e:
mirrors = load_mirrors()
cats = set()
rows = []
for m in mirrors:
cs = m.get("categories") or []
for c in cs:
cats.add(c)
raw = m.get("last_updated")
disp = raw.replace("T", " ").replace("Z", " UTC") if raw else None
rows.append({
"slug": m["slug"],
"categories": cs,
"categories_joined": ", ".join(cs),
"url": m["url"],
"status": m.get("status") or "idle",
"last_updated_raw": raw,
"last_updated": disp,
})
return render_template_string(INDEX_TEMPLATE, mirrors=rows, categories=sorted(cats), error=str(e)), 400
_run_update_in_background(slug)
return redirect(url_for("index"))
@app.route("/update/<slug>", methods=["POST"])
def trigger_update(slug):
_run_update_in_background(slug)
return redirect(url_for("index"))
@app.route("/status", methods=["GET"])
def status():
mirrors = load_mirrors()
out = []
for m in mirrors:
raw = m.get("last_updated")
disp = raw.replace("T", " ").replace("Z", " UTC") if raw else None
out.append({
"slug": m["slug"],
"categories": m.get("categories") or [],
"url": m["url"],
"status": m.get("status") or "idle",
"last_updated": raw,
"last_updated_display": disp or "",
})
return jsonify({"mirrors": out})
@app.route("/logs/<slug>")
def log_view(slug):
log_path = LOG_ROOT / f"{slug}.log"
if not log_path.exists():
log_path.touch()
return render_template_string(LOG_TEMPLATE, slug=slug)
@app.route("/logs/<slug>/tail")
def log_tail(slug):
log_path = LOG_ROOT / f"{slug}.log"
if not log_path.exists():
return "", 200
try:
with log_path.open("rb") as f:
f.seek(0, 2)
size = f.tell()
block = 65536
if size <= block:
f.seek(0)
data = f.read()
else:
f.seek(-block, 2)
data = f.read()
return data.decode("utf-8", errors="replace")
except OSError:
return "", 200
def strip_html(text: str) -> str:
# Remove script and style blocks first
text = re.sub(
r"<script\b[^<]*(?:(?!</script>)<[^<]*)*</script>",
" ",
text,
flags=re.IGNORECASE,
)
text = re.sub(
r"<style\b[^<]*(?:(?!</style>)<[^<]*)*</style>",
" ",
text,
flags=re.IGNORECASE,
)
# Strip all remaining tags
text = re.sub(r"<[^>]+>", " ", text)
# Unescape HTML entities (&amp; → &, etc.)
text = html.unescape(text)
# Collapse whitespace
text = re.sub(r"\s+", " ", text).strip()
return text
def make_snippet(text: str,
query: str,
radius: int = 80,
max_len: int = 240) -> str:
if not text:
return ""
lower = text.lower()
qlower = query.lower()
idx = lower.find(qlower)
if idx == -1:
snippet = text[:max_len]
if len(text) > max_len:
snippet += ""
return snippet
start = max(0, idx - radius)
end = min(len(text), idx + len(query) + radius)
snippet = text[start:end]
if start > 0:
snippet = "" + snippet
if end < len(text):
snippet += ""
return snippet
@app.route("/search", methods=["GET"])
def content_search():
q = (request.args.get("q") or "").strip()
if not q:
return jsonify({"results": []})
try:
proc = subprocess.run(
[
"rg",
"--line-number",
"--no-heading",
"--color", "never",
"--max-count", "5", # per file
"--type-add", "page:*.{html,htm,md,markdown,txt}",
"-tpage",
q,
str(MIRROR_ROOT),
],
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL,
text=True,
timeout=10,
)
except FileNotFoundError:
return jsonify({
"results": [{
"path": "(error)",
"line": 0,
"url": "",
"snippet": "ripgrep (rg) is not installed."
}]
})
except subprocess.TimeoutExpired:
return jsonify({
"results": [{
"path": "(error)",
"line": 0,
"url": "",
"snippet": "rg timed out."
}]
})
results = []
for line in proc.stdout.splitlines():
parts = line.split(":", 2)
if len(parts) != 3:
continue
path, lineno, raw_content = parts
# Strip HTML/JS/CSS markup from this line before making a snippet
text_content = strip_html(raw_content)
if not text_content:
continue
snippet = make_snippet(text_content, q)
try:
rel_path = str(Path(path).relative_to(MIRROR_ROOT))
except ValueError:
rel_path = path
url = "/mirrors/" + rel_path.replace("\\", "/")
results.append({
"path": rel_path,
"line": int(lineno),
"url": url,
"snippet": snippet,
})
if len(results) >= 50:
break
return jsonify({"results": results})
if __name__ == "__main__":
app.run(host="127.0.0.1", port=5000, debug=False)