Another major update

This commit is contained in:
2025-12-02 02:58:50 -05:00
parent e817265e8a
commit 8ae11f4b03
11 changed files with 1605 additions and 419 deletions

1
.gitignore vendored
View File

@@ -1,5 +1,6 @@
index.html* index.html*
/mirrors /mirrors
/node_modules
# Byte-compiled / optimized / DLL files # Byte-compiled / optimized / DLL files
__pycache__/ __pycache__/

665
app.py
View File

@@ -1,373 +1,239 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from flask import Flask, request, redirect, url_for, jsonify, render_template_string, abort
import threading
from mirror_manager import ( from mirror_manager import (
load_mirrors, load_mirrors,
add_mirror, add_mirror,
update_mirror, update_mirror,
MIRROR_ROOT,
LOG_ROOT, LOG_ROOT,
) )
import subprocess
import threading
from pathlib import Path
from flask import (
Flask,
request,
redirect,
url_for,
jsonify,
send_from_directory,
render_template_string
)
BASE = Path("/srv/www")
STATIC_DIR = BASE / "static"
STATIC_DIR.mkdir(exist_ok=True)
app = Flask(__name__) app = Flask(__name__)
# --- background update helper ---
def _run_update_in_background(slug: str): def _run_update_in_background(slug: str):
th = threading.Thread(target=update_mirror, args=(slug,), daemon=True) th = threading.Thread(target=update_mirror, args=(slug,), daemon=True)
th.start() th.start()
# --- templates --- # -------------------- TEMPLATES --------------------
INDEX_TEMPLATE = r""" INDEX_TEMPLATE = r"""
<!doctype html> <!doctype html>
<html> <html class="h-full">
<head> <head>
<meta charset="utf-8"> <meta charset="utf-8">
<title>Mirror Manager</title> <title>Mirror Manager</title>
<style> <link rel="stylesheet" href="{{ url_for('static_file', filename='tailwind.css') }}">
:root {
color-scheme: dark light;
}
body {
font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
margin: 0;
padding: 0;
background: #0f172a;
color: #e5e7eb;
}
main {
max-width: 1100px;
margin: 2rem auto;
padding: 0 1rem 3rem;
}
header {
display: flex;
flex-wrap: wrap;
align-items: baseline;
gap: 0.5rem 1rem;
justify-content: space-between;
margin-bottom: 1.5rem;
}
h1 {
font-size: 1.75rem;
margin: 0;
}
.subtitle { color: #9ca3af; font-size: 0.9rem; }
.card {
background: #020617;
border-radius: 0.75rem;
padding: 1rem 1.2rem;
box-shadow: 0 10px 30px rgba(0,0,0,0.4);
border: 1px solid #1f2937;
}
.grid {
display: grid;
grid-template-columns: minmax(0, 2fr) minmax(0, 3fr);
gap: 1rem;
align-items: flex-start;
}
@media (max-width: 900px) {
.grid {
grid-template-columns: minmax(0, 1fr);
}
}
label {
display: block;
font-size: 0.8rem;
text-transform: uppercase;
letter-spacing: 0.05em;
color: #9ca3af;
margin-bottom: 0.25rem;
}
input[type=text], select {
width: 100%;
padding: 0.4rem 0.5rem;
border-radius: 0.5rem;
border: 1px solid #374151;
background: #020617;
color: #e5e7eb;
font-size: 0.9rem;
}
input[type=text]:focus, select:focus {
outline: none;
border-color: #3b82f6;
box-shadow: 0 0 0 1px #3b82f6;
}
.btn {
display: inline-flex;
align-items: center;
justify-content: center;
gap: 0.4rem;
padding: 0.5rem 0.9rem;
border-radius: 999px;
border: none;
cursor: pointer;
font-size: 0.9rem;
font-weight: 500;
}
.btn-primary {
background: linear-gradient(135deg, #3b82f6, #8b5cf6);
color: white;
}
.btn-secondary {
background: transparent;
border: 1px solid #374151;
color: #e5e7eb;
}
.btn[disabled] {
opacity: 0.5;
cursor: default;
}
.toolbar {
display: flex;
flex-wrap: wrap;
gap: 0.5rem;
margin-bottom: 0.75rem;
align-items: center;
justify-content: space-between;
}
.toolbar-left, .toolbar-right {
display: flex;
flex-wrap: wrap;
gap: 0.5rem;
align-items: center;
}
.pill {
font-size: 0.8rem;
padding: 0.25rem 0.6rem;
border-radius: 999px;
border: 1px solid #374151;
background: #020617;
cursor: pointer;
}
.pill.active {
background: #3b82f6;
border-color: #3b82f6;
color: white;
}
table {
width: 100%;
border-collapse: collapse;
font-size: 0.9rem;
}
th, td {
padding: 0.45rem 0.5rem;
text-align: left;
border-bottom: 1px solid #111827;
vertical-align: middle;
}
th {
font-size: 0.75rem;
text-transform: uppercase;
letter-spacing: 0.05em;
color: #9ca3af;
}
tr:hover td {
background: rgba(31,41,55,0.6);
}
code {
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
font-size: 0.8rem;
}
.badge {
font-size: 0.75rem;
padding: 0.1rem 0.5rem;
border-radius: 999px;
text-transform: uppercase;
letter-spacing: 0.06em;
}
.badge-idle { background: #065f46; color: #a7f3d0; }
.badge-updating { background: #92400e; color: #fed7aa; }
.badge-error { background: #7f1d1d; color: #fecaca; }
.badge-queued { background: #1f2937; color: #e5e7eb; }
.status-dot {
width: 0.6rem;
height: 0.6rem;
border-radius: 999px;
display: inline-block;
margin-right: 0.3rem;
}
.status-idle { background: #22c55e; }
.status-updating { background: #f97316; animation: pulse 1.2s infinite; }
.status-error { background: #ef4444; }
.status-queued { background: #6b7280; }
@keyframes pulse {
0% { transform: scale(1); opacity: 1; }
50% { transform: scale(1.25); opacity: 0.7; }
100% { transform: scale(1); opacity: 1; }
}
.log-link {
font-size: 0.8rem;
color: #93c5fd;
text-decoration: none;
}
.log-link:hover {
text-decoration: underline;
}
.muted { color: #6b7280; font-size: 0.8rem; }
.search-input {
min-width: 220px;
}
</style>
</head> </head>
<body> <body class="h-full bg-slate-950 text-slate-100">
<main> <div class="min-h-full">
<header> <header class="border-b border-slate-800 bg-slate-950/80 backdrop-blur">
<div class="max-w-6xl mx-auto px-4 py-4 flex flex-col sm:flex-row sm:items-center sm:justify-between gap-2">
<div> <div>
<h1>Mirror Manager</h1> <h1 class="text-xl font-semibold tracking-tight">Mirror Manager</h1>
<div class="subtitle">Local archive of external sites, grouped by category.</div> <p class="text-xs text-slate-400">Local offline mirrors of external sites, grouped by category.</p>
</div>
<div class="flex items-center gap-2 text-xs text-slate-400">
<span class="inline-flex items-center gap-1 px-2 py-1 rounded-full border border-slate-700 bg-slate-900/70">
<span class="w-2 h-2 rounded-full bg-emerald-400"></span>
Running locally
</span>
</div>
</div> </div>
</header> </header>
<div class="grid"> <main class="max-w-6xl mx-auto px-4 py-4 space-y-4">
<!-- Left: mirror list --> <div class="flex flex-col lg:flex-row gap-4">
<section class="card"> <!-- Left: mirrors list -->
<div class="toolbar"> <section class="flex-1 bg-slate-950/80 border border-slate-800 rounded-2xl p-4 shadow-xl shadow-black/40">
<div class="toolbar-left"> <div class="flex flex-col md:flex-row md:items-center md:justify-between gap-3 mb-3">
<span class="muted">Categories:</span> <div class="flex flex-wrap items-center gap-2">
<button class="pill active" data-category="all">All ({{ mirrors|length }})</button> <span class="text-xs text-slate-400">Categories:</span>
<button class="px-2.5 py-1 rounded-full text-xs border bg-slate-900 border-slate-700 text-slate-100 hover:border-sky-500 cat-pill cat-pill-active" data-category="all">
All ({{ mirrors|length }})
</button>
{% for cat in categories %} {% for cat in categories %}
<button class="pill" data-category="{{ cat }}">{{ cat }}</button> <button class="px-2.5 py-1 rounded-full text-xs border bg-slate-900 border-slate-800 text-slate-400 hover:border-sky-500 hover:text-slate-100 cat-pill" data-category="{{ cat }}">
{{ cat }}
</button>
{% endfor %} {% endfor %}
</div> </div>
<div class="toolbar-right"> <div class="flex flex-col sm:flex-row gap-2">
<input type="text" id="search" class="search-input" placeholder="Search slug / URL / category…"> <div class="relative">
<input id="search" class="w-full sm:w-64 rounded-full bg-slate-900 border border-slate-700 px-3 py-1.5 text-sm text-slate-100 placeholder:text-slate-500 focus:outline-none focus:ring-2 focus:ring-sky-500 focus:border-sky-500" placeholder="Filter by slug / URL / category…" />
</div>
</div> </div>
</div> </div>
<table id="mirror-table"> <div class="overflow-x-auto border border-slate-800 rounded-xl">
<thead> <table class="min-w-full text-sm">
<thead class="bg-slate-900/70 text-xs uppercase text-slate-400">
<tr> <tr>
<th>Slug</th> <th class="px-3 py-2 text-left whitespace-nowrap">Slug</th>
<th>Category</th> <th class="px-3 py-2 text-left whitespace-nowrap">Categories</th>
<th>URL</th> <th class="px-3 py-2 text-left whitespace-nowrap">URL</th>
<th>Last updated</th> <th class="px-3 py-2 text-left whitespace-nowrap">Last updated</th>
<th>Status</th> <th class="px-3 py-2 text-left whitespace-nowrap">Status</th>
<th></th> <th class="px-3 py-2 text-left"></th>
</tr> </tr>
</thead> </thead>
<tbody> <tbody id="mirror-table" class="divide-y divide-slate-900/80">
{% for m in mirrors %} {% for m in mirrors %}
<tr data-slug="{{ m.slug }}" data-category="{{ m.category }}" data-search="{{ (m.slug ~ ' ' ~ m.category ~ ' ' ~ m.url)|lower }}"> <tr class="hover:bg-slate-900/80 transition" data-slug="{{ m.slug }}" data-categories="{{ m.categories_joined }}" data-search="{{ (m.slug ~ ' ' ~ m.categories_joined ~ ' ' ~ m.url)|lower }}">
<td> <td class="px-3 py-2 align-top">
<a href="/mirrors/{{ m.slug }}/" target="_blank"> <div class="flex flex-col gap-1">
<code>{{ m.slug }}</code> <a href="/mirrors/{{ m.slug }}/" target="_blank" class="font-mono text-xs text-sky-400 hover:text-sky-300 break-all">
{{ m.slug }}
</a> </a>
<a href="{{ url_for('log_view', slug=m.slug) }}" target="_blank" class="text-[0.65rem] text-slate-400 hover:text-slate-200">
View live log
</a>
</div>
</td> </td>
<td>{{ m.category }}</td> <td class="px-3 py-2 align-top">
<td><code>{{ m.url }}</code></td> <div class="flex flex-wrap gap-1">
<td> {% for c in m.categories %}
<span class="px-1.5 py-0.5 rounded-full text-[0.65rem] bg-slate-800/80 text-slate-300 border border-slate-700">{{ c }}</span>
{% endfor %}
</div>
</td>
<td class="px-3 py-2 align-top max-w-xs">
<code class="font-mono text-[0.7rem] text-slate-300 break-all">{{ m.url }}</code>
</td>
<td class="px-3 py-2 align-top text-xs text-slate-300">
{% if m.last_updated %} {% if m.last_updated %}
<span title="{{ m.last_updated_raw }}">{{ m.last_updated }}</span> <span title="{{ m.last_updated_raw }}">{{ m.last_updated }}</span>
{% else %} {% else %}
<span class="muted">never</span> <span class="text-slate-600">never</span>
{% endif %} {% endif %}
</td> </td>
<td> <td class="px-3 py-2 align-top text-xs">
{% set st = m.status or 'idle' %} {% set st = m.status or 'idle' %}
<span class="status-dot status-{{ st }}"></span> <div class="inline-flex items-center gap-1.5 px-2 py-0.5 rounded-full bg-slate-900 border border-slate-800">
<span class="badge badge-{{ st }}">{{ st }}</span> <span class="w-2 h-2 rounded-full
{% if st == 'idle' %}bg-emerald-400{% elif st == 'updating' %}bg-amber-400 animate-pulse{% elif st == 'warning' %}bg-yellow-400{% else %}bg-rose-400{% endif %}"></span>
<span class="capitalize">{{ st }}</span>
</div>
</td> </td>
<td> <td class="px-3 py-2 align-top text-right text-[0.7rem]">
<a class="log-link" href="{{ url_for('view_log', slug=m.slug) }}" target="_blank">log</a> <form method="post" action="{{ url_for('trigger_update', slug=m.slug) }}" class="inline">
&nbsp;·&nbsp; <button class="inline-flex items-center gap-1 px-2 py-1 rounded-full border border-slate-700 text-slate-200 hover:border-sky-500 hover:text-sky-100">
<form method="post" action="{{ url_for('trigger_update', slug=m.slug) }}" style="display:inline;"> <span>Update</span>
<button class="btn btn-secondary" style="padding:0.2rem 0.6rem; font-size:0.75rem;">Update</button> </button>
</form> </form>
</td> </td>
</tr> </tr>
{% endfor %} {% endfor %}
{% if mirrors|length == 0 %} {% if mirrors|length == 0 %}
<tr><td colspan="6" class="muted">No mirrors yet. Add one on the right.</td></tr> <tr>
<td colspan="6" class="px-3 py-6 text-center text-sm text-slate-500">
No mirrors yet. Add one on the right.
</td>
</tr>
{% endif %} {% endif %}
</tbody> </tbody>
</table> </table>
</div>
</section> </section>
<!-- Right: add mirror --> <!-- Right: add mirror + content search -->
<section class="card"> <section class="w-full lg:w-80 flex flex-col gap-4">
<h2 style="margin-top:0; font-size:1.1rem;">Add mirror</h2> <div class="bg-slate-950/80 border border-slate-800 rounded-2xl p-4 shadow-xl shadow-black/40">
<form method="post" action="{{ url_for('add_mirror_route') }}"> <h2 class="text-sm font-semibold mb-2">Add mirror</h2>
<div style="margin-bottom:0.6rem;"> <form method="post" action="{{ url_for('add_mirror_route') }}" class="space-y-3">
<label for="slug">Slug</label> <div>
<input type="text" id="slug" name="slug" required placeholder="e.g. python_tutorial"> <label for="slug" class="block text-xs font-medium text-slate-300 mb-1">Slug</label>
<input id="slug" name="slug" required class="w-full rounded-lg bg-slate-900 border border-slate-700 px-2.5 py-1.5 text-sm text-slate-100 focus:outline-none focus:ring-2 focus:ring-sky-500 focus:border-sky-500 font-mono" placeholder="e.g. wgpu-tutorial" />
</div> </div>
<div style="margin-bottom:0.6rem;"> <div>
<label for="category">Category</label> <label for="categories" class="block text-xs font-medium text-slate-300 mb-1">Categories</label>
<input type="text" id="category" name="category" required placeholder="e.g. tutorial, docs, blog"> <input id="categories" name="categories" required class="w-full rounded-lg bg-slate-900 border border-slate-700 px-2.5 py-1.5 text-sm text-slate-100 focus:outline-none focus:ring-2 focus:ring-sky-500 focus:border-sky-500" placeholder="e.g. tutorials, graphics, rust" />
</div> </div>
<div style="margin-bottom:0.6rem;"> <div>
<label for="url">URL</label> <label for="url" class="block text-xs font-medium text-slate-300 mb-1">URL</label>
<input type="text" id="url" name="url" required placeholder="https://example.com/some/path/"> <input id="url" name="url" required class="w-full rounded-lg bg-slate-900 border border-slate-700 px-2.5 py-1.5 text-sm text-slate-100 focus:outline-none focus:ring-2 focus:ring-sky-500 focus:border-sky-500" placeholder="https://example.com/some/path/" />
</div> </div>
<div style="margin-bottom:0.8rem;"> <div class="flex items-start gap-2">
<label style="display:flex; align-items:center; gap:0.4rem;"> <input id="ignore_robots" name="ignore_robots" value="1" type="checkbox" class="mt-0.5 rounded border-slate-600 bg-slate-900 text-sky-500 focus:ring-sky-500" />
<input type="checkbox" name="ignore_robots" value="1"> <label for="ignore_robots" class="text-xs text-slate-400">
<span style="text-transform:none; letter-spacing:0; font-size:0.85rem;"> Ignore robots.txt (only if you explicitly want to archive disallowed paths).
Ignore robots.txt (not recommended unless you know you need it)
</span>
</label> </label>
</div> </div>
{% if error %} {% if error %}
<div style="color:#fecaca; font-size:0.85rem; margin-bottom:0.5rem;">{{ error }}</div> <p class="text-xs text-rose-300 bg-rose-950/60 border border-rose-900 rounded-lg px-2 py-1">{{ error }}</p>
{% endif %} {% endif %}
<button type="submit" class="btn btn-primary">Add &amp; mirror</button> <button type="submit" class="w-full inline-flex items-center justify-center gap-1.5 rounded-full bg-gradient-to-r from-sky-500 to-indigo-500 px-3 py-2 text-xs font-medium text-white hover:from-sky-400 hover:to-indigo-400">
<p class="muted" style="margin-top:0.5rem;"> Add &amp; mirror
New mirrors are cloned in the background. Status will show as <strong>updating</strong> until done. </button>
<p class="text-[0.7rem] text-slate-500">
New mirrors are cloned in the background. Status will show as <span class="text-amber-300">updating</span> until done.
</p> </p>
</form> </form>
</div>
<div class="bg-slate-950/80 border border-slate-800 rounded-2xl p-4 shadow-xl shadow-black/40">
<h2 class="text-sm font-semibold mb-2">Content search</h2>
<form id="search-form" class="space-y-2">
<input id="content-query" class="w-full rounded-lg bg-slate-900 border border-slate-700 px-2.5 py-1.5 text-sm text-slate-100 focus:outline-none focus:ring-2 focus:ring-sky-500 focus:border-sky-500" placeholder="Search text across all mirrors (using rg)…" />
<button type="submit" class="w-full inline-flex items-center justify-center gap-1.5 rounded-full border border-slate-700 bg-slate-900 px-3 py-2 text-xs font-medium text-slate-100 hover:border-sky-500 hover:text-sky-100">
Run ripgrep search
</button>
</form>
<div id="search-results" class="mt-2 max-h-64 overflow-y-auto text-[0.7rem] space-y-1 text-slate-300"></div>
</div>
</section> </section>
</div> </div>
</main> </main>
</div>
<script> <script>
// category filter // Category + name filter
const pills = Array.from(document.querySelectorAll('.pill')); const pills = Array.from(document.querySelectorAll('.cat-pill'));
const rows = Array.from(document.querySelectorAll('#mirror-table tbody tr')); const rows = Array.from(document.querySelectorAll('#mirror-table tr[data-slug]'));
const searchInput = document.getElementById('search'); const searchInput = document.getElementById('search');
function applyFilters() { function applyFilters() {
const activePill = pills.find(p => p.classList.contains('active')); const active = pills.find(p => p.classList.contains('cat-pill-active'));
const cat = activePill ? activePill.dataset.category : 'all'; const cat = active ? active.dataset.category : 'all';
const q = (searchInput.value || '').toLowerCase(); const q = (searchInput.value || '').toLowerCase();
rows.forEach(row => { rows.forEach(row => {
const rowCat = row.dataset.category; const cats = row.dataset.categories.split(',').map(s => s.trim());
const searchStr = row.dataset.search; const searchStr = row.dataset.search;
const matchCat = (cat === 'all' || rowCat === cat); const matchesCat = (cat === 'all' || cats.includes(cat));
const matchSearch = (!q || searchStr.includes(q)); const matchesSearch = (!q || searchStr.includes(q));
row.style.display = (matchCat && matchSearch) ? '' : 'none'; row.style.display = (matchesCat && matchesSearch) ? '' : 'none';
}); });
} }
pills.forEach(p => { pills.forEach(p => {
p.addEventListener('click', () => { p.addEventListener('click', () => {
pills.forEach(x => x.classList.remove('active')); pills.forEach(x => x.classList.remove('cat-pill-active', 'border-sky-500', 'text-slate-100'));
p.classList.add('active'); p.classList.add('cat-pill-active', 'border-sky-500', 'text-slate-100');
applyFilters(); applyFilters();
}); });
}); });
searchInput.addEventListener('input', () => { searchInput.addEventListener('input', applyFilters);
applyFilters();
});
// polling for live status // Live status polling
async function pollStatus() { async function pollStatus() {
try { try {
const resp = await fetch("{{ url_for('status') }}"); const resp = await fetch("{{ url_for('status') }}");
@@ -381,113 +247,199 @@ INDEX_TEMPLATE = r"""
const m = bySlug[slug]; const m = bySlug[slug];
if (!m) return; if (!m) return;
const tds = row.querySelectorAll('td'); const tds = row.querySelectorAll('td');
// last updated const lastCell = tds[3];
const lastUpdatedCell = tds[3];
lastUpdatedCell.innerHTML = m.last_updated_display || '<span class="muted">never</span>';
// status
const statusCell = tds[4]; const statusCell = tds[4];
lastCell.innerHTML = m.last_updated_display || '<span class="text-slate-600">never</span>';
const st = m.status || 'idle'; const st = m.status || 'idle';
statusCell.innerHTML = statusCell.innerHTML =
'<span class="status-dot status-' + st + '"></span>' + '<div class="inline-flex items-center gap-1.5 px-2 py-0.5 rounded-full bg-slate-900 border border-slate-800">' +
'<span class="badge badge-' + st + '">' + st + '</span>'; '<span class="w-2 h-2 rounded-full ' +
(st === "idle" ? "bg-emerald-400" :
st === "updating" ? "bg-amber-400 animate-pulse" :
st === "warning" ? "bg-yellow-400" : "bg-rose-400") +
'"></span>' +
'<span class="capitalize">' + st + '</span>' +
'</div>';
});
} catch (e) {}
}
setInterval(pollStatus, 5000);
// Content search via rg
const searchForm = document.getElementById('search-form');
const contentQuery = document.getElementById('content-query');
const searchResults = document.getElementById('search-results');
searchForm.addEventListener('submit', async (e) => {
e.preventDefault();
const q = contentQuery.value.trim();
if (!q) return;
searchResults.textContent = 'Searching…';
try {
const resp = await fetch("{{ url_for('content_search') }}?q=" + encodeURIComponent(q));
if (!resp.ok) {
searchResults.textContent = 'Search failed.';
return;
}
const data = await resp.json();
if (data.results.length === 0) {
searchResults.textContent = 'No matches.';
return;
}
searchResults.innerHTML = '';
data.results.forEach(r => {
const div = document.createElement('div');
div.className = "border border-slate-800 rounded-lg px-2 py-1 bg-slate-900/70";
div.innerHTML =
'<div class="font-mono text-[0.65rem] text-sky-300 break-all">' + r.path + '</div>' +
'<div class="text-[0.7rem] text-slate-200 whitespace-pre-wrap">' + r.line + '</div>';
searchResults.appendChild(div);
}); });
} catch (e) { } catch (e) {
// ignore searchResults.textContent = 'Search failed.';
} }
} });
setInterval(pollStatus, 5000);
</script> </script>
</body> </body>
</html> </html>
""" """
# --- routes --- LOG_TEMPLATE = r"""
<!doctype html>
<html class="h-full">
<head>
<meta charset="utf-8">
<title>Log: {{ slug }}</title>
<link rel="stylesheet" href="{{ url_for('static_file', filename='tailwind.css') }}">
</head>
<body class="h-full bg-slate-950 text-slate-100">
<div class="max-w-5xl mx-auto px-4 py-4 space-y-2">
<div class="flex items-center justify-between mb-2">
<div>
<h1 class="text-sm font-semibold">Log for <span class="font-mono text-sky-400">{{ slug }}</span></h1>
<p class="text-[0.65rem] text-slate-400">Live tail of wget output (auto-refreshing).</p>
</div>
<a href="/mirrors/{{ slug }}/" target="_blank" class="text-xs text-sky-400 hover:text-sky-200">Open mirror</a>
</div>
<div class="border border-slate-800 rounded-xl bg-slate-950/90 max-h-[75vh] overflow-y-auto">
<pre id="log" class="text-[0.65rem] p-3 font-mono whitespace-pre-wrap"></pre>
</div>
</div>
<script>
const logEl = document.getElementById('log');
async function pollLog() {
try {
const resp = await fetch("{{ url_for('log_tail', slug=slug) }}");
if (!resp.ok) return;
const text = await resp.text();
logEl.textContent = text;
logEl.parentElement.scrollTop = logEl.parentElement.scrollHeight;
} catch (e) {}
}
setInterval(pollLog, 1500);
pollLog();
</script>
</body>
</html>
"""
# -------------------- ROUTES --------------------
@app.route("/static/<path:filename>")
def static_file(filename):
return send_from_directory(STATIC_DIR, filename)
@app.route("/", methods=["GET"]) @app.route("/", methods=["GET"])
def index(): def index():
mirrors = load_mirrors() mirrors = load_mirrors()
categories = sorted({m["category"] for m in mirrors}) cats = set()
# format last_updated nicely
rows = [] rows = []
for m in mirrors: for m in mirrors:
last_disp = None categories = m.get("categories") or []
for c in categories:
cats.add(c)
raw = m.get("last_updated") raw = m.get("last_updated")
if raw: disp = raw.replace("T", " ").replace("Z", " UTC") if raw else None
last_disp = raw.replace("T", " ").replace("Z", " UTC")
rows.append({ rows.append({
"slug": m["slug"], "slug": m["slug"],
"category": m["category"], "categories": categories,
"categories_joined": ", ".join(categories),
"url": m["url"], "url": m["url"],
"status": m.get("status") or "idle", "status": m.get("status") or "idle",
"last_updated_raw": raw, "last_updated_raw": raw,
"last_updated": last_disp, "last_updated": disp,
}) })
return render_template_string(INDEX_TEMPLATE, mirrors=rows, categories=categories, error=None) return render_template_string(INDEX_TEMPLATE, mirrors=rows, categories=sorted(cats), error=None)
@app.route("/add", methods=["POST"]) @app.route("/add", methods=["POST"])
def add_mirror_route(): def add_mirror_route():
slug = (request.form.get("slug") or "").strip() slug = (request.form.get("slug") or "").strip()
category = (request.form.get("category") or "").strip() categories = (request.form.get("categories") or "").strip()
url = (request.form.get("url") or "").strip() url = (request.form.get("url") or "").strip()
ignore_robots = bool(request.form.get("ignore_robots")) ignore_robots = bool(request.form.get("ignore_robots"))
error = None error = None
if not slug or not category or not url: if not slug or not categories or not url:
error = "Slug, category, and URL are required." error = "Slug, categories, and URL are required."
elif " " in slug: elif " " in slug:
error = "Slug cannot contain spaces." error = "Slug cannot contain spaces."
if error: if error:
# re-render with error
mirrors = load_mirrors() mirrors = load_mirrors()
categories = sorted({m["category"] for m in mirrors}) cats = set()
rows = [] rows = []
for m in mirrors: for m in mirrors:
cs = m.get("categories") or []
for c in cs:
cats.add(c)
raw = m.get("last_updated") raw = m.get("last_updated")
last_disp = raw.replace("T", " ").replace( disp = raw.replace("T", " ").replace("Z", " UTC") if raw else None
"Z", " UTC") if raw else None
rows.append({ rows.append({
"slug": m["slug"], "slug": m["slug"],
"category": m["category"], "categories": cs,
"categories_joined": ", ".join(cs),
"url": m["url"], "url": m["url"],
"status": m.get("status") or "idle", "status": m.get("status") or "idle",
"last_updated_raw": raw, "last_updated_raw": raw,
"last_updated": last_disp, "last_updated": disp,
}) })
return render_template_string(INDEX_TEMPLATE, mirrors=rows, categories=categories, error=error), 400 return render_template_string(INDEX_TEMPLATE, mirrors=rows, categories=sorted(cats), error=error), 400
try: try:
add_mirror(slug, category, url, ignore_robots=ignore_robots) add_mirror(slug, categories, url, ignore_robots=ignore_robots)
except Exception as e: except Exception as e:
mirrors = load_mirrors() mirrors = load_mirrors()
categories = sorted({m["category"] for m in mirrors}) cats = set()
rows = [] rows = []
for m in mirrors: for m in mirrors:
cs = m.get("categories") or []
for c in cs:
cats.add(c)
raw = m.get("last_updated") raw = m.get("last_updated")
last_disp = raw.replace("T", " ").replace( disp = raw.replace("T", " ").replace("Z", " UTC") if raw else None
"Z", " UTC") if raw else None
rows.append({ rows.append({
"slug": m["slug"], "slug": m["slug"],
"category": m["category"], "categories": cs,
"categories_joined": ", ".join(cs),
"url": m["url"], "url": m["url"],
"status": m.get("status") or "idle", "status": m.get("status") or "idle",
"last_updated_raw": raw, "last_updated_raw": raw,
"last_updated": last_disp, "last_updated": disp,
}) })
return render_template_string(INDEX_TEMPLATE, return render_template_string(INDEX_TEMPLATE, mirrors=rows, categories=sorted(cats), error=str(e)), 400
mirrors=rows,
categories=categories,
error=str(e)), 400
# kick off background update
_run_update_in_background(slug) _run_update_in_background(slug)
return redirect(url_for("index")) return redirect(url_for("index"))
@app.route("/update/<slug>", methods=["POST"]) @app.route("/update/<slug>", methods=["POST"])
def trigger_update(slug): def trigger_update(slug):
# fire-and-forget; UI will see status flip to 'updating'
_run_update_in_background(slug) _run_update_in_background(slug)
return redirect(url_for("index")) return redirect(url_for("index"))
@@ -498,25 +450,78 @@ def status():
out = [] out = []
for m in mirrors: for m in mirrors:
raw = m.get("last_updated") raw = m.get("last_updated")
last_disp = raw.replace("T", " ").replace("Z", " UTC") if raw else None disp = raw.replace("T", " ").replace("Z", " UTC") if raw else None
out.append({ out.append({
"slug": m["slug"], "slug": m["slug"],
"category": m["category"], "categories": m.get("categories") or [],
"url": m["url"], "url": m["url"],
"status": m.get("status") or "idle", "status": m.get("status") or "idle",
"last_updated": raw, "last_updated": raw,
"last_updated_display": last_disp or "", "last_updated_display": disp or "",
}) })
return jsonify({"mirrors": out}) return jsonify({"mirrors": out})
@app.route("/logs/<slug>") @app.route("/logs/<slug>")
def view_log(slug): def log_view(slug):
log_path = LOG_ROOT / f"{slug}.log" log_path = LOG_ROOT / f"{slug}.log"
if not log_path.exists(): if not log_path.exists():
abort(404) log_path.touch()
text = log_path.read_text(encoding="utf-8", errors="replace") return render_template_string(LOG_TEMPLATE, slug=slug)
return "<pre>" + (text.replace("&", "&amp;").replace("<", "&lt;")) + "</pre>"
@app.route("/logs/<slug>/tail")
def log_tail(slug):
log_path = LOG_ROOT / f"{slug}.log"
if not log_path.exists():
return "", 200
try:
with log_path.open("rb") as f:
f.seek(0, 2)
size = f.tell()
block = 65536
if size <= block:
f.seek(0)
data = f.read()
else:
f.seek(-block, 2)
data = f.read()
return data.decode("utf-8", errors="replace")
except OSError:
return "", 200
@app.route("/search", methods=["GET"])
def content_search():
q = (request.args.get("q") or "").strip()
if not q:
return jsonify({"results": []})
try:
proc = subprocess.run(
["rg", "--line-number", "--no-heading",
"--color", "never", q, str(MIRROR_ROOT)],
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL,
text=True,
timeout=10,
)
except FileNotFoundError:
return jsonify({"results": [{"path": "(error)", "line": "ripgrep (rg) not installed"}]})
except subprocess.TimeoutExpired:
return jsonify({"results": [{"path": "(error)", "line": "rg timed out"}]})
results = []
for line in proc.stdout.splitlines()[:50]:
parts = line.split(":", 2)
if len(parts) != 3:
continue
path, lineno, content = parts
rel = str(Path(path).relative_to(MIRROR_ROOT))
results.append({
"path": f"{rel}:{lineno}",
"line": content.strip(),
})
return jsonify({"results": results})
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -1,12 +1,10 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
"""
Manage the various mirrors for the mirror website.
"""
import json import json
import subprocess import subprocess
import datetime as dt import datetime as dt
from pathlib import Path from pathlib import Path
import threading
from concurrent.futures import ThreadPoolExecutor, as_completed
BASE = Path("/srv/www") BASE = Path("/srv/www")
DATA_FILE = BASE / "data" / "mirrors.json" DATA_FILE = BASE / "data" / "mirrors.json"
@@ -17,12 +15,15 @@ MIRROR_ROOT.mkdir(parents=True, exist_ok=True)
LOG_ROOT.mkdir(parents=True, exist_ok=True) LOG_ROOT.mkdir(parents=True, exist_ok=True)
DATA_FILE.parent.mkdir(parents=True, exist_ok=True) DATA_FILE.parent.mkdir(parents=True, exist_ok=True)
_LOCK = threading.Lock()
def _now_iso() -> str: def _now_iso() -> str:
return dt.datetime.utcnow().replace(microsecond=0).isoformat() + "Z" return dt.datetime.utcnow().replace(microsecond=0).isoformat() + "Z"
def load_mirrors() -> list[dict]: def load_mirrors() -> list[dict]:
with _LOCK:
if not DATA_FILE.exists(): if not DATA_FILE.exists():
return [] return []
with DATA_FILE.open("r", encoding="utf-8") as f: with DATA_FILE.open("r", encoding="utf-8") as f:
@@ -30,6 +31,7 @@ def load_mirrors() -> list[dict]:
def save_mirrors(mirrors: list[dict]) -> None: def save_mirrors(mirrors: list[dict]) -> None:
with _LOCK:
tmp = DATA_FILE.with_suffix(".tmp") tmp = DATA_FILE.with_suffix(".tmp")
with tmp.open("w", encoding="utf-8") as f: with tmp.open("w", encoding="utf-8") as f:
json.dump(mirrors, f, indent=2) json.dump(mirrors, f, indent=2)
@@ -43,40 +45,63 @@ def get_mirror(mirrors: list[dict], slug: str) -> dict | None:
return None return None
def _normalise_categories(raw: str) -> list[str]:
# "tutorials, wgpu, rust" -> ["tutorials","wgpu","rust"]
parts = [p.strip() for p in raw.split(",")]
return [p for p in parts if p]
def add_mirror(slug: str, def add_mirror(slug: str,
category: str, categories: str,
url: str, url: str,
ignore_robots: bool = False) -> dict: ignore_robots: bool = False) -> dict:
mirrors = load_mirrors() mirrors = load_mirrors()
if get_mirror(mirrors, slug) is not None: if get_mirror(mirrors, slug) is not None:
raise ValueError(f"Mirror with slug '{slug}' already exists!") raise ValueError(f"Mirror with slug '{slug}' already exists")
cats = _normalise_categories(categories)
if not cats:
raise ValueError("At least one category is required")
m = { m = {
"slug": slug, "slug": slug,
"category": category, "categories": cats,
"url": url, "url": url,
"ignore_robots": bool(ignore_robots), "ignore_robots": bool(ignore_robots),
"created_at": _now_iso(), "created_at": _now_iso(),
"last_updated": None, "last_updated": None,
"status": "queued", "status": "queued", # idle | updating | queued | warning | error
"last_error": None, "last_error": None,
} }
mirrors.append(m) mirrors.append(m)
save_mirrors(mirrors) save_mirrors(mirrors)
return m return m
def _set_status(slug: str, *,
status: str,
last_error: str | None = None,
last_updated: str | None = None):
mirrors = load_mirrors()
m = get_mirror(mirrors, slug)
if m is None:
return
m["status"] = status
if last_error is not None:
m["last_error"] = last_error
if last_updated is not None:
m["last_updated"] = last_updated
save_mirrors(mirrors)
def update_mirror(slug: str) -> None: def update_mirror(slug: str) -> None:
"""Run wget mirror for a singel slug (blocking).""" """Run wget mirror for a single slug (blocking in this thread)."""
mirrors = load_mirrors() mirrors = load_mirrors()
m = get_mirror(mirrors, slug) m = get_mirror(mirrors, slug)
if m is None: if m is None:
raise ValueError(f"No such mirror: {slug}") raise ValueError(f"No such mirror: {slug}")
m["status"] = "updating" _set_status(slug, status="updating", last_error=None)
m["last_error"] = None
save_mirrors(mirrors)
target_dir = MIRROR_ROOT / slug target_dir = MIRROR_ROOT / slug
target_dir.mkdir(parents=True, exist_ok=True) target_dir.mkdir(parents=True, exist_ok=True)
@@ -84,19 +109,19 @@ def update_mirror(slug: str) -> None:
robots_setting = "off" if m.get("ignore_robots") else "on" robots_setting = "off" if m.get("ignore_robots") else "on"
# Polite wget:
# --mirror implies -r -N -l inf --no-remove-listing
cmd = [ cmd = [
"wget", "wget",
"--mirror", "--mirror", # recurse, keep timestamps
"--convert-links", "--convert-links",
"--adjust-extension", "--adjust-extension",
"--page-requisites", "--page-requisites",
"--no-parent", "--no-parent",
"--wait=0.70", "--wait=0.5",
"--random-wait", "--random-wait",
# "--limit-rate=50m", "--limit-rate=50m",
f"execute=robots={robots_setting}", "--tries=3",
"--retry-connrefused",
f"--execute=robots={robots_setting}",
"-P", "-P",
str(target_dir), str(target_dir),
m["url"], m["url"],
@@ -104,35 +129,62 @@ def update_mirror(slug: str) -> None:
try: try:
with log_file.open("a", encoding="utf-8") as lf: with log_file.open("a", encoding="utf-8") as lf:
lf.write(f"\n=== {_now_iso()} : " lf.write(f"\n=== {_now_iso()} : Starting mirror of {
f"Starting mirror of {m['url']} ===\n") m['url']} ===\n")
lf.flush() lf.flush()
subprocess.run( proc = subprocess.run(
cmd, cmd,
stdout=lf, stdout=lf,
stderr=subprocess.STDOUT, stderr=subprocess.STDOUT,
check=True,
) )
lf.write(f"=== {_now_iso()} : Completed mirror of {m['url']} ===\n") lf.write(f"=== {_now_iso()} : wget exited with code {
proc.returncode} ===\n")
lf.flush() lf.flush()
m["last_updated"] = _now_iso()
m["status"] = "idle" # Classify result
m["last_error"] = None if proc.returncode == 0:
except subprocess.CalledProcessError as e: _set_status(slug, status="idle",
m["status"] = "error" last_updated=_now_iso(), last_error=None)
m["last_error"] = f"wget exited with {e.returncode}" else:
with log_file.open("a", encoding="utf-8") as lf: # If we see FINISHED in the log and the directory has content,
lf.write(f"*** ERROR: wget failed with code {e.returncode}\n") # treat this as a partial/ok-with-warnings case.
text = log_file.read_text(encoding="utf-8", errors="ignore")
has_finished = "FINISHED --" in text
has_files = any(target_dir.rglob("*"))
if has_finished and has_files:
_set_status(
slug,
status="warning",
last_updated=_now_iso(),
last_error=f"wget exited with {
proc.returncode} (partial; see log)",
)
else:
_set_status(
slug,
status="error",
last_error=f"wget exited with {proc.returncode}",
)
except Exception as e: except Exception as e:
m["status"] = "error" _set_status(
m["last_error"] = f"{type(e).__name__}: {e}" slug,
with log_file.open("a", encoding="utf-8") as lf: status="error",
lf.write(f"*** ERROR: {type(e).__name__}: {e}\n") last_error=f"{type(e).__name__}: {e}",
finally: )
save_mirrors(mirrors)
def update_all_mirrors() -> None: def update_all_mirrors(max_workers: int = 3) -> None:
mirrors = load_mirrors() mirrors = load_mirrors()
for m in mirrors: slugs = [m["slug"] for m in mirrors]
update_mirror(m["slug"]) if not slugs:
return
# Run several in parallel
with ThreadPoolExecutor(max_workers=max_workers) as pool:
futures = {pool.submit(update_mirror, slug): slug for slug in slugs}
for fut in as_completed(futures):
slug = futures[fut]
try:
fut.result()
except Exception as e:
_set_status(slug, status="error", last_error=f"{
type(e).__name__}: {e}")

1067
package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

23
package.json Normal file
View File

@@ -0,0 +1,23 @@
{
"devDependencies": {
"tailwindcss": "^4.1.17"
},
"name": "www",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"repository": {
"type": "git",
"url": "https://git.nytegear.com/aargonian/nytegear-mirror-websites.git"
},
"keywords": [],
"author": "",
"license": "ISC",
"type": "commonjs",
"dependencies": {
"@tailwindcss/cli": "^4.1.17"
}
}

2
static/tailwind.css Normal file

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,15 @@
[Unit]
Description=Mirror Manager Flask App
After=network.target
[Service]
User=aargonian
Group=aargonian
WorkingDirectory=/srv/www
Environment="FLASK_ENV=production"
ExecStart=/usr/bin/python3 /srv/www/app.py
Restart=on-failure
RestartSec=5
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Update Offline Website Mirrors
[Service]
Type=oneshot
User=aargonian
Group=aargonian
WorkingDirectory=/srv/www
ExecStart=/usr/bin/python3 /srv/www/update_mirrors.py

View File

@@ -0,0 +1,10 @@
[Unit]
Description=Daily update of offline mirrors
[Timer]
OnCalendar=03:00
Persistent=true
Unit=update-mirrors.service
[Install]
WantedBy=timers.target

1
tailwind-input.css Normal file
View File

@@ -0,0 +1 @@
@import "tailwindcss";

View File

@@ -8,7 +8,8 @@ def main():
slug = sys.argv[1] slug = sys.argv[1]
update_mirror(slug) update_mirror(slug)
else: else:
update_all_mirrors() # bump max_workers if you're feeling brave / bandwidth-rich
update_all_mirrors(max_workers=8)
if __name__ == "__main__": if __name__ == "__main__":