Another major update

This commit is contained in:
2025-12-02 02:58:50 -05:00
parent e817265e8a
commit 8ae11f4b03
11 changed files with 1605 additions and 419 deletions

5
.gitignore vendored
View File

@@ -1,5 +1,6 @@
index.html*
/mirrors
/node_modules
# Byte-compiled / optimized / DLL files
__pycache__/
@@ -198,9 +199,9 @@ cython_debug/
.abstra/
# Visual Studio Code
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
# and can be added to the global gitignore or merged into this file. However, if you prefer,
# and can be added to the global gitignore or merged into this file. However, if you prefer,
# you could uncomment the following to ignore the entire vscode folder
# .vscode/

737
app.py
View File

@@ -1,373 +1,239 @@
#!/usr/bin/env python3
from flask import Flask, request, redirect, url_for, jsonify, render_template_string, abort
import threading
from mirror_manager import (
load_mirrors,
add_mirror,
update_mirror,
MIRROR_ROOT,
LOG_ROOT,
)
import subprocess
import threading
from pathlib import Path
from flask import (
Flask,
request,
redirect,
url_for,
jsonify,
send_from_directory,
render_template_string
)
BASE = Path("/srv/www")
STATIC_DIR = BASE / "static"
STATIC_DIR.mkdir(exist_ok=True)
app = Flask(__name__)
# --- background update helper ---
def _run_update_in_background(slug: str):
th = threading.Thread(target=update_mirror, args=(slug,), daemon=True)
th.start()
# --- templates ---
# -------------------- TEMPLATES --------------------
INDEX_TEMPLATE = r"""
<!doctype html>
<html>
<html class="h-full">
<head>
<meta charset="utf-8">
<title>Mirror Manager</title>
<style>
:root {
color-scheme: dark light;
}
body {
font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
margin: 0;
padding: 0;
background: #0f172a;
color: #e5e7eb;
}
main {
max-width: 1100px;
margin: 2rem auto;
padding: 0 1rem 3rem;
}
header {
display: flex;
flex-wrap: wrap;
align-items: baseline;
gap: 0.5rem 1rem;
justify-content: space-between;
margin-bottom: 1.5rem;
}
h1 {
font-size: 1.75rem;
margin: 0;
}
.subtitle { color: #9ca3af; font-size: 0.9rem; }
.card {
background: #020617;
border-radius: 0.75rem;
padding: 1rem 1.2rem;
box-shadow: 0 10px 30px rgba(0,0,0,0.4);
border: 1px solid #1f2937;
}
.grid {
display: grid;
grid-template-columns: minmax(0, 2fr) minmax(0, 3fr);
gap: 1rem;
align-items: flex-start;
}
@media (max-width: 900px) {
.grid {
grid-template-columns: minmax(0, 1fr);
}
}
label {
display: block;
font-size: 0.8rem;
text-transform: uppercase;
letter-spacing: 0.05em;
color: #9ca3af;
margin-bottom: 0.25rem;
}
input[type=text], select {
width: 100%;
padding: 0.4rem 0.5rem;
border-radius: 0.5rem;
border: 1px solid #374151;
background: #020617;
color: #e5e7eb;
font-size: 0.9rem;
}
input[type=text]:focus, select:focus {
outline: none;
border-color: #3b82f6;
box-shadow: 0 0 0 1px #3b82f6;
}
.btn {
display: inline-flex;
align-items: center;
justify-content: center;
gap: 0.4rem;
padding: 0.5rem 0.9rem;
border-radius: 999px;
border: none;
cursor: pointer;
font-size: 0.9rem;
font-weight: 500;
}
.btn-primary {
background: linear-gradient(135deg, #3b82f6, #8b5cf6);
color: white;
}
.btn-secondary {
background: transparent;
border: 1px solid #374151;
color: #e5e7eb;
}
.btn[disabled] {
opacity: 0.5;
cursor: default;
}
.toolbar {
display: flex;
flex-wrap: wrap;
gap: 0.5rem;
margin-bottom: 0.75rem;
align-items: center;
justify-content: space-between;
}
.toolbar-left, .toolbar-right {
display: flex;
flex-wrap: wrap;
gap: 0.5rem;
align-items: center;
}
.pill {
font-size: 0.8rem;
padding: 0.25rem 0.6rem;
border-radius: 999px;
border: 1px solid #374151;
background: #020617;
cursor: pointer;
}
.pill.active {
background: #3b82f6;
border-color: #3b82f6;
color: white;
}
table {
width: 100%;
border-collapse: collapse;
font-size: 0.9rem;
}
th, td {
padding: 0.45rem 0.5rem;
text-align: left;
border-bottom: 1px solid #111827;
vertical-align: middle;
}
th {
font-size: 0.75rem;
text-transform: uppercase;
letter-spacing: 0.05em;
color: #9ca3af;
}
tr:hover td {
background: rgba(31,41,55,0.6);
}
code {
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
font-size: 0.8rem;
}
.badge {
font-size: 0.75rem;
padding: 0.1rem 0.5rem;
border-radius: 999px;
text-transform: uppercase;
letter-spacing: 0.06em;
}
.badge-idle { background: #065f46; color: #a7f3d0; }
.badge-updating { background: #92400e; color: #fed7aa; }
.badge-error { background: #7f1d1d; color: #fecaca; }
.badge-queued { background: #1f2937; color: #e5e7eb; }
.status-dot {
width: 0.6rem;
height: 0.6rem;
border-radius: 999px;
display: inline-block;
margin-right: 0.3rem;
}
.status-idle { background: #22c55e; }
.status-updating { background: #f97316; animation: pulse 1.2s infinite; }
.status-error { background: #ef4444; }
.status-queued { background: #6b7280; }
@keyframes pulse {
0% { transform: scale(1); opacity: 1; }
50% { transform: scale(1.25); opacity: 0.7; }
100% { transform: scale(1); opacity: 1; }
}
.log-link {
font-size: 0.8rem;
color: #93c5fd;
text-decoration: none;
}
.log-link:hover {
text-decoration: underline;
}
.muted { color: #6b7280; font-size: 0.8rem; }
.search-input {
min-width: 220px;
}
</style>
<link rel="stylesheet" href="{{ url_for('static_file', filename='tailwind.css') }}">
</head>
<body>
<main>
<header>
<div>
<h1>Mirror Manager</h1>
<div class="subtitle">Local archive of external sites, grouped by category.</div>
<body class="h-full bg-slate-950 text-slate-100">
<div class="min-h-full">
<header class="border-b border-slate-800 bg-slate-950/80 backdrop-blur">
<div class="max-w-6xl mx-auto px-4 py-4 flex flex-col sm:flex-row sm:items-center sm:justify-between gap-2">
<div>
<h1 class="text-xl font-semibold tracking-tight">Mirror Manager</h1>
<p class="text-xs text-slate-400">Local offline mirrors of external sites, grouped by category.</p>
</div>
<div class="flex items-center gap-2 text-xs text-slate-400">
<span class="inline-flex items-center gap-1 px-2 py-1 rounded-full border border-slate-700 bg-slate-900/70">
<span class="w-2 h-2 rounded-full bg-emerald-400"></span>
Running locally
</span>
</div>
</div>
</header>
<div class="grid">
<!-- Left: mirror list -->
<section class="card">
<div class="toolbar">
<div class="toolbar-left">
<span class="muted">Categories:</span>
<button class="pill active" data-category="all">All ({{ mirrors|length }})</button>
{% for cat in categories %}
<button class="pill" data-category="{{ cat }}">{{ cat }}</button>
{% endfor %}
<main class="max-w-6xl mx-auto px-4 py-4 space-y-4">
<div class="flex flex-col lg:flex-row gap-4">
<!-- Left: mirrors list -->
<section class="flex-1 bg-slate-950/80 border border-slate-800 rounded-2xl p-4 shadow-xl shadow-black/40">
<div class="flex flex-col md:flex-row md:items-center md:justify-between gap-3 mb-3">
<div class="flex flex-wrap items-center gap-2">
<span class="text-xs text-slate-400">Categories:</span>
<button class="px-2.5 py-1 rounded-full text-xs border bg-slate-900 border-slate-700 text-slate-100 hover:border-sky-500 cat-pill cat-pill-active" data-category="all">
All ({{ mirrors|length }})
</button>
{% for cat in categories %}
<button class="px-2.5 py-1 rounded-full text-xs border bg-slate-900 border-slate-800 text-slate-400 hover:border-sky-500 hover:text-slate-100 cat-pill" data-category="{{ cat }}">
{{ cat }}
</button>
{% endfor %}
</div>
<div class="flex flex-col sm:flex-row gap-2">
<div class="relative">
<input id="search" class="w-full sm:w-64 rounded-full bg-slate-900 border border-slate-700 px-3 py-1.5 text-sm text-slate-100 placeholder:text-slate-500 focus:outline-none focus:ring-2 focus:ring-sky-500 focus:border-sky-500" placeholder="Filter by slug / URL / category…" />
</div>
</div>
</div>
<div class="toolbar-right">
<input type="text" id="search" class="search-input" placeholder="Search slug / URL / category…">
</div>
</div>
<table id="mirror-table">
<thead>
<tr>
<th>Slug</th>
<th>Category</th>
<th>URL</th>
<th>Last updated</th>
<th>Status</th>
<th></th>
</tr>
</thead>
<tbody>
{% for m in mirrors %}
<tr data-slug="{{ m.slug }}" data-category="{{ m.category }}" data-search="{{ (m.slug ~ ' ' ~ m.category ~ ' ' ~ m.url)|lower }}">
<td>
<a href="/mirrors/{{ m.slug }}/" target="_blank">
<code>{{ m.slug }}</code>
</a>
</td>
<td>{{ m.category }}</td>
<td><code>{{ m.url }}</code></td>
<td>
{% if m.last_updated %}
<span title="{{ m.last_updated_raw }}">{{ m.last_updated }}</span>
{% else %}
<span class="muted">never</span>
<div class="overflow-x-auto border border-slate-800 rounded-xl">
<table class="min-w-full text-sm">
<thead class="bg-slate-900/70 text-xs uppercase text-slate-400">
<tr>
<th class="px-3 py-2 text-left whitespace-nowrap">Slug</th>
<th class="px-3 py-2 text-left whitespace-nowrap">Categories</th>
<th class="px-3 py-2 text-left whitespace-nowrap">URL</th>
<th class="px-3 py-2 text-left whitespace-nowrap">Last updated</th>
<th class="px-3 py-2 text-left whitespace-nowrap">Status</th>
<th class="px-3 py-2 text-left"></th>
</tr>
</thead>
<tbody id="mirror-table" class="divide-y divide-slate-900/80">
{% for m in mirrors %}
<tr class="hover:bg-slate-900/80 transition" data-slug="{{ m.slug }}" data-categories="{{ m.categories_joined }}" data-search="{{ (m.slug ~ ' ' ~ m.categories_joined ~ ' ' ~ m.url)|lower }}">
<td class="px-3 py-2 align-top">
<div class="flex flex-col gap-1">
<a href="/mirrors/{{ m.slug }}/" target="_blank" class="font-mono text-xs text-sky-400 hover:text-sky-300 break-all">
{{ m.slug }}
</a>
<a href="{{ url_for('log_view', slug=m.slug) }}" target="_blank" class="text-[0.65rem] text-slate-400 hover:text-slate-200">
View live log
</a>
</div>
</td>
<td class="px-3 py-2 align-top">
<div class="flex flex-wrap gap-1">
{% for c in m.categories %}
<span class="px-1.5 py-0.5 rounded-full text-[0.65rem] bg-slate-800/80 text-slate-300 border border-slate-700">{{ c }}</span>
{% endfor %}
</div>
</td>
<td class="px-3 py-2 align-top max-w-xs">
<code class="font-mono text-[0.7rem] text-slate-300 break-all">{{ m.url }}</code>
</td>
<td class="px-3 py-2 align-top text-xs text-slate-300">
{% if m.last_updated %}
<span title="{{ m.last_updated_raw }}">{{ m.last_updated }}</span>
{% else %}
<span class="text-slate-600">never</span>
{% endif %}
</td>
<td class="px-3 py-2 align-top text-xs">
{% set st = m.status or 'idle' %}
<div class="inline-flex items-center gap-1.5 px-2 py-0.5 rounded-full bg-slate-900 border border-slate-800">
<span class="w-2 h-2 rounded-full
{% if st == 'idle' %}bg-emerald-400{% elif st == 'updating' %}bg-amber-400 animate-pulse{% elif st == 'warning' %}bg-yellow-400{% else %}bg-rose-400{% endif %}"></span>
<span class="capitalize">{{ st }}</span>
</div>
</td>
<td class="px-3 py-2 align-top text-right text-[0.7rem]">
<form method="post" action="{{ url_for('trigger_update', slug=m.slug) }}" class="inline">
<button class="inline-flex items-center gap-1 px-2 py-1 rounded-full border border-slate-700 text-slate-200 hover:border-sky-500 hover:text-sky-100">
<span>Update</span>
</button>
</form>
</td>
</tr>
{% endfor %}
{% if mirrors|length == 0 %}
<tr>
<td colspan="6" class="px-3 py-6 text-center text-sm text-slate-500">
No mirrors yet. Add one on the right.
</td>
</tr>
{% endif %}
</td>
<td>
{% set st = m.status or 'idle' %}
<span class="status-dot status-{{ st }}"></span>
<span class="badge badge-{{ st }}">{{ st }}</span>
</td>
<td>
<a class="log-link" href="{{ url_for('view_log', slug=m.slug) }}" target="_blank">log</a>
&nbsp;·&nbsp;
<form method="post" action="{{ url_for('trigger_update', slug=m.slug) }}" style="display:inline;">
<button class="btn btn-secondary" style="padding:0.2rem 0.6rem; font-size:0.75rem;">Update</button>
</form>
</td>
</tr>
{% endfor %}
{% if mirrors|length == 0 %}
<tr><td colspan="6" class="muted">No mirrors yet. Add one on the right.</td></tr>
{% endif %}
</tbody>
</table>
</section>
</tbody>
</table>
</div>
</section>
<!-- Right: add mirror -->
<section class="card">
<h2 style="margin-top:0; font-size:1.1rem;">Add mirror</h2>
<form method="post" action="{{ url_for('add_mirror_route') }}">
<div style="margin-bottom:0.6rem;">
<label for="slug">Slug</label>
<input type="text" id="slug" name="slug" required placeholder="e.g. python_tutorial">
<!-- Right: add mirror + content search -->
<section class="w-full lg:w-80 flex flex-col gap-4">
<div class="bg-slate-950/80 border border-slate-800 rounded-2xl p-4 shadow-xl shadow-black/40">
<h2 class="text-sm font-semibold mb-2">Add mirror</h2>
<form method="post" action="{{ url_for('add_mirror_route') }}" class="space-y-3">
<div>
<label for="slug" class="block text-xs font-medium text-slate-300 mb-1">Slug</label>
<input id="slug" name="slug" required class="w-full rounded-lg bg-slate-900 border border-slate-700 px-2.5 py-1.5 text-sm text-slate-100 focus:outline-none focus:ring-2 focus:ring-sky-500 focus:border-sky-500 font-mono" placeholder="e.g. wgpu-tutorial" />
</div>
<div>
<label for="categories" class="block text-xs font-medium text-slate-300 mb-1">Categories</label>
<input id="categories" name="categories" required class="w-full rounded-lg bg-slate-900 border border-slate-700 px-2.5 py-1.5 text-sm text-slate-100 focus:outline-none focus:ring-2 focus:ring-sky-500 focus:border-sky-500" placeholder="e.g. tutorials, graphics, rust" />
</div>
<div>
<label for="url" class="block text-xs font-medium text-slate-300 mb-1">URL</label>
<input id="url" name="url" required class="w-full rounded-lg bg-slate-900 border border-slate-700 px-2.5 py-1.5 text-sm text-slate-100 focus:outline-none focus:ring-2 focus:ring-sky-500 focus:border-sky-500" placeholder="https://example.com/some/path/" />
</div>
<div class="flex items-start gap-2">
<input id="ignore_robots" name="ignore_robots" value="1" type="checkbox" class="mt-0.5 rounded border-slate-600 bg-slate-900 text-sky-500 focus:ring-sky-500" />
<label for="ignore_robots" class="text-xs text-slate-400">
Ignore robots.txt (only if you explicitly want to archive disallowed paths).
</label>
</div>
{% if error %}
<p class="text-xs text-rose-300 bg-rose-950/60 border border-rose-900 rounded-lg px-2 py-1">{{ error }}</p>
{% endif %}
<button type="submit" class="w-full inline-flex items-center justify-center gap-1.5 rounded-full bg-gradient-to-r from-sky-500 to-indigo-500 px-3 py-2 text-xs font-medium text-white hover:from-sky-400 hover:to-indigo-400">
Add &amp; mirror
</button>
<p class="text-[0.7rem] text-slate-500">
New mirrors are cloned in the background. Status will show as <span class="text-amber-300">updating</span> until done.
</p>
</form>
</div>
<div style="margin-bottom:0.6rem;">
<label for="category">Category</label>
<input type="text" id="category" name="category" required placeholder="e.g. tutorial, docs, blog">
<div class="bg-slate-950/80 border border-slate-800 rounded-2xl p-4 shadow-xl shadow-black/40">
<h2 class="text-sm font-semibold mb-2">Content search</h2>
<form id="search-form" class="space-y-2">
<input id="content-query" class="w-full rounded-lg bg-slate-900 border border-slate-700 px-2.5 py-1.5 text-sm text-slate-100 focus:outline-none focus:ring-2 focus:ring-sky-500 focus:border-sky-500" placeholder="Search text across all mirrors (using rg)…" />
<button type="submit" class="w-full inline-flex items-center justify-center gap-1.5 rounded-full border border-slate-700 bg-slate-900 px-3 py-2 text-xs font-medium text-slate-100 hover:border-sky-500 hover:text-sky-100">
Run ripgrep search
</button>
</form>
<div id="search-results" class="mt-2 max-h-64 overflow-y-auto text-[0.7rem] space-y-1 text-slate-300"></div>
</div>
<div style="margin-bottom:0.6rem;">
<label for="url">URL</label>
<input type="text" id="url" name="url" required placeholder="https://example.com/some/path/">
</div>
<div style="margin-bottom:0.8rem;">
<label style="display:flex; align-items:center; gap:0.4rem;">
<input type="checkbox" name="ignore_robots" value="1">
<span style="text-transform:none; letter-spacing:0; font-size:0.85rem;">
Ignore robots.txt (not recommended unless you know you need it)
</span>
</label>
</div>
{% if error %}
<div style="color:#fecaca; font-size:0.85rem; margin-bottom:0.5rem;">{{ error }}</div>
{% endif %}
<button type="submit" class="btn btn-primary">Add &amp; mirror</button>
<p class="muted" style="margin-top:0.5rem;">
New mirrors are cloned in the background. Status will show as <strong>updating</strong> until done.
</p>
</form>
</section>
</div>
</main>
</section>
</div>
</main>
</div>
<script>
// category filter
const pills = Array.from(document.querySelectorAll('.pill'));
const rows = Array.from(document.querySelectorAll('#mirror-table tbody tr'));
// Category + name filter
const pills = Array.from(document.querySelectorAll('.cat-pill'));
const rows = Array.from(document.querySelectorAll('#mirror-table tr[data-slug]'));
const searchInput = document.getElementById('search');
function applyFilters() {
const activePill = pills.find(p => p.classList.contains('active'));
const cat = activePill ? activePill.dataset.category : 'all';
const active = pills.find(p => p.classList.contains('cat-pill-active'));
const cat = active ? active.dataset.category : 'all';
const q = (searchInput.value || '').toLowerCase();
rows.forEach(row => {
const rowCat = row.dataset.category;
const cats = row.dataset.categories.split(',').map(s => s.trim());
const searchStr = row.dataset.search;
const matchCat = (cat === 'all' || rowCat === cat);
const matchSearch = (!q || searchStr.includes(q));
row.style.display = (matchCat && matchSearch) ? '' : 'none';
const matchesCat = (cat === 'all' || cats.includes(cat));
const matchesSearch = (!q || searchStr.includes(q));
row.style.display = (matchesCat && matchesSearch) ? '' : 'none';
});
}
pills.forEach(p => {
p.addEventListener('click', () => {
pills.forEach(x => x.classList.remove('active'));
p.classList.add('active');
pills.forEach(x => x.classList.remove('cat-pill-active', 'border-sky-500', 'text-slate-100'));
p.classList.add('cat-pill-active', 'border-sky-500', 'text-slate-100');
applyFilters();
});
});
searchInput.addEventListener('input', () => {
applyFilters();
});
searchInput.addEventListener('input', applyFilters);
// polling for live status
// Live status polling
async function pollStatus() {
try {
const resp = await fetch("{{ url_for('status') }}");
@@ -381,113 +247,199 @@ INDEX_TEMPLATE = r"""
const m = bySlug[slug];
if (!m) return;
const tds = row.querySelectorAll('td');
// last updated
const lastUpdatedCell = tds[3];
lastUpdatedCell.innerHTML = m.last_updated_display || '<span class="muted">never</span>';
// status
const lastCell = tds[3];
const statusCell = tds[4];
lastCell.innerHTML = m.last_updated_display || '<span class="text-slate-600">never</span>';
const st = m.status || 'idle';
statusCell.innerHTML =
'<span class="status-dot status-' + st + '"></span>' +
'<span class="badge badge-' + st + '">' + st + '</span>';
'<div class="inline-flex items-center gap-1.5 px-2 py-0.5 rounded-full bg-slate-900 border border-slate-800">' +
'<span class="w-2 h-2 rounded-full ' +
(st === "idle" ? "bg-emerald-400" :
st === "updating" ? "bg-amber-400 animate-pulse" :
st === "warning" ? "bg-yellow-400" : "bg-rose-400") +
'"></span>' +
'<span class="capitalize">' + st + '</span>' +
'</div>';
});
} catch (e) {}
}
setInterval(pollStatus, 5000);
// Content search via rg
const searchForm = document.getElementById('search-form');
const contentQuery = document.getElementById('content-query');
const searchResults = document.getElementById('search-results');
searchForm.addEventListener('submit', async (e) => {
e.preventDefault();
const q = contentQuery.value.trim();
if (!q) return;
searchResults.textContent = 'Searching…';
try {
const resp = await fetch("{{ url_for('content_search') }}?q=" + encodeURIComponent(q));
if (!resp.ok) {
searchResults.textContent = 'Search failed.';
return;
}
const data = await resp.json();
if (data.results.length === 0) {
searchResults.textContent = 'No matches.';
return;
}
searchResults.innerHTML = '';
data.results.forEach(r => {
const div = document.createElement('div');
div.className = "border border-slate-800 rounded-lg px-2 py-1 bg-slate-900/70";
div.innerHTML =
'<div class="font-mono text-[0.65rem] text-sky-300 break-all">' + r.path + '</div>' +
'<div class="text-[0.7rem] text-slate-200 whitespace-pre-wrap">' + r.line + '</div>';
searchResults.appendChild(div);
});
} catch (e) {
// ignore
searchResults.textContent = 'Search failed.';
}
}
setInterval(pollStatus, 5000);
});
</script>
</body>
</html>
"""
# --- routes ---
LOG_TEMPLATE = r"""
<!doctype html>
<html class="h-full">
<head>
<meta charset="utf-8">
<title>Log: {{ slug }}</title>
<link rel="stylesheet" href="{{ url_for('static_file', filename='tailwind.css') }}">
</head>
<body class="h-full bg-slate-950 text-slate-100">
<div class="max-w-5xl mx-auto px-4 py-4 space-y-2">
<div class="flex items-center justify-between mb-2">
<div>
<h1 class="text-sm font-semibold">Log for <span class="font-mono text-sky-400">{{ slug }}</span></h1>
<p class="text-[0.65rem] text-slate-400">Live tail of wget output (auto-refreshing).</p>
</div>
<a href="/mirrors/{{ slug }}/" target="_blank" class="text-xs text-sky-400 hover:text-sky-200">Open mirror</a>
</div>
<div class="border border-slate-800 rounded-xl bg-slate-950/90 max-h-[75vh] overflow-y-auto">
<pre id="log" class="text-[0.65rem] p-3 font-mono whitespace-pre-wrap"></pre>
</div>
</div>
<script>
const logEl = document.getElementById('log');
async function pollLog() {
try {
const resp = await fetch("{{ url_for('log_tail', slug=slug) }}");
if (!resp.ok) return;
const text = await resp.text();
logEl.textContent = text;
logEl.parentElement.scrollTop = logEl.parentElement.scrollHeight;
} catch (e) {}
}
setInterval(pollLog, 1500);
pollLog();
</script>
</body>
</html>
"""
# -------------------- ROUTES --------------------
@app.route("/static/<path:filename>")
def static_file(filename):
return send_from_directory(STATIC_DIR, filename)
@app.route("/", methods=["GET"])
def index():
mirrors = load_mirrors()
categories = sorted({m["category"] for m in mirrors})
# format last_updated nicely
cats = set()
rows = []
for m in mirrors:
last_disp = None
categories = m.get("categories") or []
for c in categories:
cats.add(c)
raw = m.get("last_updated")
if raw:
last_disp = raw.replace("T", " ").replace("Z", " UTC")
disp = raw.replace("T", " ").replace("Z", " UTC") if raw else None
rows.append({
"slug": m["slug"],
"category": m["category"],
"categories": categories,
"categories_joined": ", ".join(categories),
"url": m["url"],
"status": m.get("status") or "idle",
"last_updated_raw": raw,
"last_updated": last_disp,
"last_updated": disp,
})
return render_template_string(INDEX_TEMPLATE, mirrors=rows, categories=categories, error=None)
return render_template_string(INDEX_TEMPLATE, mirrors=rows, categories=sorted(cats), error=None)
@app.route("/add", methods=["POST"])
def add_mirror_route():
slug = (request.form.get("slug") or "").strip()
category = (request.form.get("category") or "").strip()
categories = (request.form.get("categories") or "").strip()
url = (request.form.get("url") or "").strip()
ignore_robots = bool(request.form.get("ignore_robots"))
error = None
if not slug or not category or not url:
error = "Slug, category, and URL are required."
if not slug or not categories or not url:
error = "Slug, categories, and URL are required."
elif " " in slug:
error = "Slug cannot contain spaces."
if error:
# re-render with error
mirrors = load_mirrors()
categories = sorted({m["category"] for m in mirrors})
cats = set()
rows = []
for m in mirrors:
cs = m.get("categories") or []
for c in cs:
cats.add(c)
raw = m.get("last_updated")
last_disp = raw.replace("T", " ").replace(
"Z", " UTC") if raw else None
disp = raw.replace("T", " ").replace("Z", " UTC") if raw else None
rows.append({
"slug": m["slug"],
"category": m["category"],
"categories": cs,
"categories_joined": ", ".join(cs),
"url": m["url"],
"status": m.get("status") or "idle",
"last_updated_raw": raw,
"last_updated": last_disp,
"last_updated": disp,
})
return render_template_string(INDEX_TEMPLATE, mirrors=rows, categories=categories, error=error), 400
return render_template_string(INDEX_TEMPLATE, mirrors=rows, categories=sorted(cats), error=error), 400
try:
add_mirror(slug, category, url, ignore_robots=ignore_robots)
add_mirror(slug, categories, url, ignore_robots=ignore_robots)
except Exception as e:
mirrors = load_mirrors()
categories = sorted({m["category"] for m in mirrors})
cats = set()
rows = []
for m in mirrors:
cs = m.get("categories") or []
for c in cs:
cats.add(c)
raw = m.get("last_updated")
last_disp = raw.replace("T", " ").replace(
"Z", " UTC") if raw else None
disp = raw.replace("T", " ").replace("Z", " UTC") if raw else None
rows.append({
"slug": m["slug"],
"category": m["category"],
"categories": cs,
"categories_joined": ", ".join(cs),
"url": m["url"],
"status": m.get("status") or "idle",
"last_updated_raw": raw,
"last_updated": last_disp,
"last_updated": disp,
})
return render_template_string(INDEX_TEMPLATE,
mirrors=rows,
categories=categories,
error=str(e)), 400
return render_template_string(INDEX_TEMPLATE, mirrors=rows, categories=sorted(cats), error=str(e)), 400
# kick off background update
_run_update_in_background(slug)
return redirect(url_for("index"))
@app.route("/update/<slug>", methods=["POST"])
def trigger_update(slug):
# fire-and-forget; UI will see status flip to 'updating'
_run_update_in_background(slug)
return redirect(url_for("index"))
@@ -498,25 +450,78 @@ def status():
out = []
for m in mirrors:
raw = m.get("last_updated")
last_disp = raw.replace("T", " ").replace("Z", " UTC") if raw else None
disp = raw.replace("T", " ").replace("Z", " UTC") if raw else None
out.append({
"slug": m["slug"],
"category": m["category"],
"categories": m.get("categories") or [],
"url": m["url"],
"status": m.get("status") or "idle",
"last_updated": raw,
"last_updated_display": last_disp or "",
"last_updated_display": disp or "",
})
return jsonify({"mirrors": out})
@app.route("/logs/<slug>")
def view_log(slug):
def log_view(slug):
log_path = LOG_ROOT / f"{slug}.log"
if not log_path.exists():
abort(404)
text = log_path.read_text(encoding="utf-8", errors="replace")
return "<pre>" + (text.replace("&", "&amp;").replace("<", "&lt;")) + "</pre>"
log_path.touch()
return render_template_string(LOG_TEMPLATE, slug=slug)
@app.route("/logs/<slug>/tail")
def log_tail(slug):
log_path = LOG_ROOT / f"{slug}.log"
if not log_path.exists():
return "", 200
try:
with log_path.open("rb") as f:
f.seek(0, 2)
size = f.tell()
block = 65536
if size <= block:
f.seek(0)
data = f.read()
else:
f.seek(-block, 2)
data = f.read()
return data.decode("utf-8", errors="replace")
except OSError:
return "", 200
@app.route("/search", methods=["GET"])
def content_search():
q = (request.args.get("q") or "").strip()
if not q:
return jsonify({"results": []})
try:
proc = subprocess.run(
["rg", "--line-number", "--no-heading",
"--color", "never", q, str(MIRROR_ROOT)],
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL,
text=True,
timeout=10,
)
except FileNotFoundError:
return jsonify({"results": [{"path": "(error)", "line": "ripgrep (rg) not installed"}]})
except subprocess.TimeoutExpired:
return jsonify({"results": [{"path": "(error)", "line": "rg timed out"}]})
results = []
for line in proc.stdout.splitlines()[:50]:
parts = line.split(":", 2)
if len(parts) != 3:
continue
path, lineno, content = parts
rel = str(Path(path).relative_to(MIRROR_ROOT))
results.append({
"path": f"{rel}:{lineno}",
"line": content.strip(),
})
return jsonify({"results": results})
if __name__ == "__main__":

View File

@@ -1,12 +1,10 @@
#!/usr/bin/env python3
"""
Manage the various mirrors for the mirror website.
"""
import json
import subprocess
import datetime as dt
from pathlib import Path
import threading
from concurrent.futures import ThreadPoolExecutor, as_completed
BASE = Path("/srv/www")
DATA_FILE = BASE / "data" / "mirrors.json"
@@ -17,23 +15,27 @@ MIRROR_ROOT.mkdir(parents=True, exist_ok=True)
LOG_ROOT.mkdir(parents=True, exist_ok=True)
DATA_FILE.parent.mkdir(parents=True, exist_ok=True)
_LOCK = threading.Lock()
def _now_iso() -> str:
return dt.datetime.utcnow().replace(microsecond=0).isoformat() + "Z"
def load_mirrors() -> list[dict]:
if not DATA_FILE.exists():
return []
with DATA_FILE.open("r", encoding="utf-8") as f:
return json.load(f)
with _LOCK:
if not DATA_FILE.exists():
return []
with DATA_FILE.open("r", encoding="utf-8") as f:
return json.load(f)
def save_mirrors(mirrors: list[dict]) -> None:
tmp = DATA_FILE.with_suffix(".tmp")
with tmp.open("w", encoding="utf-8") as f:
json.dump(mirrors, f, indent=2)
tmp.replace(DATA_FILE)
with _LOCK:
tmp = DATA_FILE.with_suffix(".tmp")
with tmp.open("w", encoding="utf-8") as f:
json.dump(mirrors, f, indent=2)
tmp.replace(DATA_FILE)
def get_mirror(mirrors: list[dict], slug: str) -> dict | None:
@@ -43,40 +45,63 @@ def get_mirror(mirrors: list[dict], slug: str) -> dict | None:
return None
def _normalise_categories(raw: str) -> list[str]:
# "tutorials, wgpu, rust" -> ["tutorials","wgpu","rust"]
parts = [p.strip() for p in raw.split(",")]
return [p for p in parts if p]
def add_mirror(slug: str,
category: str,
categories: str,
url: str,
ignore_robots: bool = False) -> dict:
mirrors = load_mirrors()
if get_mirror(mirrors, slug) is not None:
raise ValueError(f"Mirror with slug '{slug}' already exists!")
raise ValueError(f"Mirror with slug '{slug}' already exists")
cats = _normalise_categories(categories)
if not cats:
raise ValueError("At least one category is required")
m = {
"slug": slug,
"category": category,
"categories": cats,
"url": url,
"ignore_robots": bool(ignore_robots),
"created_at": _now_iso(),
"last_updated": None,
"status": "queued",
"status": "queued", # idle | updating | queued | warning | error
"last_error": None,
}
mirrors.append(m)
save_mirrors(mirrors)
return m
def _set_status(slug: str, *,
status: str,
last_error: str | None = None,
last_updated: str | None = None):
mirrors = load_mirrors()
m = get_mirror(mirrors, slug)
if m is None:
return
m["status"] = status
if last_error is not None:
m["last_error"] = last_error
if last_updated is not None:
m["last_updated"] = last_updated
save_mirrors(mirrors)
def update_mirror(slug: str) -> None:
"""Run wget mirror for a singel slug (blocking)."""
"""Run wget mirror for a single slug (blocking in this thread)."""
mirrors = load_mirrors()
m = get_mirror(mirrors, slug)
if m is None:
raise ValueError(f"No such mirror: {slug}")
m["status"] = "updating"
m["last_error"] = None
save_mirrors(mirrors)
_set_status(slug, status="updating", last_error=None)
target_dir = MIRROR_ROOT / slug
target_dir.mkdir(parents=True, exist_ok=True)
@@ -84,19 +109,19 @@ def update_mirror(slug: str) -> None:
robots_setting = "off" if m.get("ignore_robots") else "on"
# Polite wget:
# --mirror implies -r -N -l inf --no-remove-listing
cmd = [
"wget",
"--mirror",
"--mirror", # recurse, keep timestamps
"--convert-links",
"--adjust-extension",
"--page-requisites",
"--no-parent",
"--wait=0.70",
"--wait=0.5",
"--random-wait",
# "--limit-rate=50m",
f"execute=robots={robots_setting}",
"--limit-rate=50m",
"--tries=3",
"--retry-connrefused",
f"--execute=robots={robots_setting}",
"-P",
str(target_dir),
m["url"],
@@ -104,35 +129,62 @@ def update_mirror(slug: str) -> None:
try:
with log_file.open("a", encoding="utf-8") as lf:
lf.write(f"\n=== {_now_iso()} : "
f"Starting mirror of {m['url']} ===\n")
lf.write(f"\n=== {_now_iso()} : Starting mirror of {
m['url']} ===\n")
lf.flush()
subprocess.run(
proc = subprocess.run(
cmd,
stdout=lf,
stderr=subprocess.STDOUT,
check=True,
)
lf.write(f"=== {_now_iso()} : Completed mirror of {m['url']} ===\n")
lf.flush()
m["last_updated"] = _now_iso()
m["status"] = "idle"
m["last_error"] = None
except subprocess.CalledProcessError as e:
m["status"] = "error"
m["last_error"] = f"wget exited with {e.returncode}"
with log_file.open("a", encoding="utf-8") as lf:
lf.write(f"*** ERROR: wget failed with code {e.returncode}\n")
lf.write(f"=== {_now_iso()} : wget exited with code {
proc.returncode} ===\n")
lf.flush()
# Classify result
if proc.returncode == 0:
_set_status(slug, status="idle",
last_updated=_now_iso(), last_error=None)
else:
# If we see FINISHED in the log and the directory has content,
# treat this as a partial/ok-with-warnings case.
text = log_file.read_text(encoding="utf-8", errors="ignore")
has_finished = "FINISHED --" in text
has_files = any(target_dir.rglob("*"))
if has_finished and has_files:
_set_status(
slug,
status="warning",
last_updated=_now_iso(),
last_error=f"wget exited with {
proc.returncode} (partial; see log)",
)
else:
_set_status(
slug,
status="error",
last_error=f"wget exited with {proc.returncode}",
)
except Exception as e:
m["status"] = "error"
m["last_error"] = f"{type(e).__name__}: {e}"
with log_file.open("a", encoding="utf-8") as lf:
lf.write(f"*** ERROR: {type(e).__name__}: {e}\n")
finally:
save_mirrors(mirrors)
_set_status(
slug,
status="error",
last_error=f"{type(e).__name__}: {e}",
)
def update_all_mirrors() -> None:
def update_all_mirrors(max_workers: int = 3) -> None:
mirrors = load_mirrors()
for m in mirrors:
update_mirror(m["slug"])
slugs = [m["slug"] for m in mirrors]
if not slugs:
return
# Run several in parallel
with ThreadPoolExecutor(max_workers=max_workers) as pool:
futures = {pool.submit(update_mirror, slug): slug for slug in slugs}
for fut in as_completed(futures):
slug = futures[fut]
try:
fut.result()
except Exception as e:
_set_status(slug, status="error", last_error=f"{
type(e).__name__}: {e}")

1067
package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

23
package.json Normal file
View File

@@ -0,0 +1,23 @@
{
"devDependencies": {
"tailwindcss": "^4.1.17"
},
"name": "www",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"repository": {
"type": "git",
"url": "https://git.nytegear.com/aargonian/nytegear-mirror-websites.git"
},
"keywords": [],
"author": "",
"license": "ISC",
"type": "commonjs",
"dependencies": {
"@tailwindcss/cli": "^4.1.17"
}
}

2
static/tailwind.css Normal file

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,15 @@
[Unit]
Description=Mirror Manager Flask App
After=network.target
[Service]
User=aargonian
Group=aargonian
WorkingDirectory=/srv/www
Environment="FLASK_ENV=production"
ExecStart=/usr/bin/python3 /srv/www/app.py
Restart=on-failure
RestartSec=5
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Update Offline Website Mirrors
[Service]
Type=oneshot
User=aargonian
Group=aargonian
WorkingDirectory=/srv/www
ExecStart=/usr/bin/python3 /srv/www/update_mirrors.py

View File

@@ -0,0 +1,10 @@
[Unit]
Description=Daily update of offline mirrors
[Timer]
OnCalendar=03:00
Persistent=true
Unit=update-mirrors.service
[Install]
WantedBy=timers.target

1
tailwind-input.css Normal file
View File

@@ -0,0 +1 @@
@import "tailwindcss";

View File

@@ -8,7 +8,8 @@ def main():
slug = sys.argv[1]
update_mirror(slug)
else:
update_all_mirrors()
# bump max_workers if you're feeling brave / bandwidth-rich
update_all_mirrors(max_workers=8)
if __name__ == "__main__":