Significant improvement on the app
This commit is contained in:
217
.gitignore
vendored
217
.gitignore
vendored
@@ -1,2 +1,219 @@
|
||||
index.html*
|
||||
/mirrors
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[codz]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py.cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
cover/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
.pybuilder/
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
# For a library or package, you might want to ignore these files since the code is
|
||||
# intended to run in multiple environments; otherwise, check them in:
|
||||
# .python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
# Pipfile.lock
|
||||
|
||||
# UV
|
||||
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||
# commonly ignored for libraries.
|
||||
# uv.lock
|
||||
|
||||
# poetry
|
||||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||
# commonly ignored for libraries.
|
||||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||
# poetry.lock
|
||||
# poetry.toml
|
||||
|
||||
# pdm
|
||||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
||||
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
||||
# pdm.lock
|
||||
# pdm.toml
|
||||
.pdm-python
|
||||
.pdm-build/
|
||||
|
||||
# pixi
|
||||
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
||||
# pixi.lock
|
||||
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
||||
# in the .venv directory. It is recommended not to include this directory in version control.
|
||||
.pixi
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# Redis
|
||||
*.rdb
|
||||
*.aof
|
||||
*.pid
|
||||
|
||||
# RabbitMQ
|
||||
mnesia/
|
||||
rabbitmq/
|
||||
rabbitmq-data/
|
||||
|
||||
# ActiveMQ
|
||||
activemq-data/
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.envrc
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# pytype static type analyzer
|
||||
.pytype/
|
||||
|
||||
# Cython debug symbols
|
||||
cython_debug/
|
||||
|
||||
# PyCharm
|
||||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
# .idea/
|
||||
|
||||
# Abstra
|
||||
# Abstra is an AI-powered process automation framework.
|
||||
# Ignore directories containing user credentials, local state, and settings.
|
||||
# Learn more at https://abstra.io/docs
|
||||
.abstra/
|
||||
|
||||
# Visual Studio Code
|
||||
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
||||
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
||||
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
||||
# you could uncomment the following to ignore the entire vscode folder
|
||||
# .vscode/
|
||||
|
||||
# Ruff stuff:
|
||||
.ruff_cache/
|
||||
|
||||
# PyPI configuration file
|
||||
.pypirc
|
||||
|
||||
# Marimo
|
||||
marimo/_static/
|
||||
marimo/_lsp/
|
||||
__marimo__/
|
||||
|
||||
# Streamlit
|
||||
.streamlit/secrets.toml
|
||||
|
||||
523
app.py
Executable file
523
app.py
Executable file
@@ -0,0 +1,523 @@
|
||||
#!/usr/bin/env python3
|
||||
from flask import Flask, request, redirect, url_for, jsonify, render_template_string, abort
|
||||
import threading
|
||||
from mirror_manager import (
|
||||
load_mirrors,
|
||||
add_mirror,
|
||||
update_mirror,
|
||||
LOG_ROOT,
|
||||
)
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
# --- background update helper ---
|
||||
|
||||
|
||||
def _run_update_in_background(slug: str):
|
||||
th = threading.Thread(target=update_mirror, args=(slug,), daemon=True)
|
||||
th.start()
|
||||
|
||||
# --- templates ---
|
||||
|
||||
|
||||
INDEX_TEMPLATE = r"""
|
||||
<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>Mirror Manager</title>
|
||||
<style>
|
||||
:root {
|
||||
color-scheme: dark light;
|
||||
}
|
||||
body {
|
||||
font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
background: #0f172a;
|
||||
color: #e5e7eb;
|
||||
}
|
||||
main {
|
||||
max-width: 1100px;
|
||||
margin: 2rem auto;
|
||||
padding: 0 1rem 3rem;
|
||||
}
|
||||
header {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
align-items: baseline;
|
||||
gap: 0.5rem 1rem;
|
||||
justify-content: space-between;
|
||||
margin-bottom: 1.5rem;
|
||||
}
|
||||
h1 {
|
||||
font-size: 1.75rem;
|
||||
margin: 0;
|
||||
}
|
||||
.subtitle { color: #9ca3af; font-size: 0.9rem; }
|
||||
|
||||
.card {
|
||||
background: #020617;
|
||||
border-radius: 0.75rem;
|
||||
padding: 1rem 1.2rem;
|
||||
box-shadow: 0 10px 30px rgba(0,0,0,0.4);
|
||||
border: 1px solid #1f2937;
|
||||
}
|
||||
|
||||
.grid {
|
||||
display: grid;
|
||||
grid-template-columns: minmax(0, 2fr) minmax(0, 3fr);
|
||||
gap: 1rem;
|
||||
align-items: flex-start;
|
||||
}
|
||||
@media (max-width: 900px) {
|
||||
.grid {
|
||||
grid-template-columns: minmax(0, 1fr);
|
||||
}
|
||||
}
|
||||
|
||||
label {
|
||||
display: block;
|
||||
font-size: 0.8rem;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.05em;
|
||||
color: #9ca3af;
|
||||
margin-bottom: 0.25rem;
|
||||
}
|
||||
input[type=text], select {
|
||||
width: 100%;
|
||||
padding: 0.4rem 0.5rem;
|
||||
border-radius: 0.5rem;
|
||||
border: 1px solid #374151;
|
||||
background: #020617;
|
||||
color: #e5e7eb;
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
input[type=text]:focus, select:focus {
|
||||
outline: none;
|
||||
border-color: #3b82f6;
|
||||
box-shadow: 0 0 0 1px #3b82f6;
|
||||
}
|
||||
|
||||
.btn {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
gap: 0.4rem;
|
||||
padding: 0.5rem 0.9rem;
|
||||
border-radius: 999px;
|
||||
border: none;
|
||||
cursor: pointer;
|
||||
font-size: 0.9rem;
|
||||
font-weight: 500;
|
||||
}
|
||||
.btn-primary {
|
||||
background: linear-gradient(135deg, #3b82f6, #8b5cf6);
|
||||
color: white;
|
||||
}
|
||||
.btn-secondary {
|
||||
background: transparent;
|
||||
border: 1px solid #374151;
|
||||
color: #e5e7eb;
|
||||
}
|
||||
.btn[disabled] {
|
||||
opacity: 0.5;
|
||||
cursor: default;
|
||||
}
|
||||
|
||||
.toolbar {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 0.5rem;
|
||||
margin-bottom: 0.75rem;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
}
|
||||
|
||||
.toolbar-left, .toolbar-right {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 0.5rem;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
.pill {
|
||||
font-size: 0.8rem;
|
||||
padding: 0.25rem 0.6rem;
|
||||
border-radius: 999px;
|
||||
border: 1px solid #374151;
|
||||
background: #020617;
|
||||
cursor: pointer;
|
||||
}
|
||||
.pill.active {
|
||||
background: #3b82f6;
|
||||
border-color: #3b82f6;
|
||||
color: white;
|
||||
}
|
||||
|
||||
table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
th, td {
|
||||
padding: 0.45rem 0.5rem;
|
||||
text-align: left;
|
||||
border-bottom: 1px solid #111827;
|
||||
vertical-align: middle;
|
||||
}
|
||||
th {
|
||||
font-size: 0.75rem;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.05em;
|
||||
color: #9ca3af;
|
||||
}
|
||||
tr:hover td {
|
||||
background: rgba(31,41,55,0.6);
|
||||
}
|
||||
code {
|
||||
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
|
||||
font-size: 0.8rem;
|
||||
}
|
||||
.badge {
|
||||
font-size: 0.75rem;
|
||||
padding: 0.1rem 0.5rem;
|
||||
border-radius: 999px;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.06em;
|
||||
}
|
||||
.badge-idle { background: #065f46; color: #a7f3d0; }
|
||||
.badge-updating { background: #92400e; color: #fed7aa; }
|
||||
.badge-error { background: #7f1d1d; color: #fecaca; }
|
||||
.badge-queued { background: #1f2937; color: #e5e7eb; }
|
||||
|
||||
.status-dot {
|
||||
width: 0.6rem;
|
||||
height: 0.6rem;
|
||||
border-radius: 999px;
|
||||
display: inline-block;
|
||||
margin-right: 0.3rem;
|
||||
}
|
||||
.status-idle { background: #22c55e; }
|
||||
.status-updating { background: #f97316; animation: pulse 1.2s infinite; }
|
||||
.status-error { background: #ef4444; }
|
||||
.status-queued { background: #6b7280; }
|
||||
|
||||
@keyframes pulse {
|
||||
0% { transform: scale(1); opacity: 1; }
|
||||
50% { transform: scale(1.25); opacity: 0.7; }
|
||||
100% { transform: scale(1); opacity: 1; }
|
||||
}
|
||||
|
||||
.log-link {
|
||||
font-size: 0.8rem;
|
||||
color: #93c5fd;
|
||||
text-decoration: none;
|
||||
}
|
||||
.log-link:hover {
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
.muted { color: #6b7280; font-size: 0.8rem; }
|
||||
|
||||
.search-input {
|
||||
min-width: 220px;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<main>
|
||||
<header>
|
||||
<div>
|
||||
<h1>Mirror Manager</h1>
|
||||
<div class="subtitle">Local archive of external sites, grouped by category.</div>
|
||||
</div>
|
||||
</header>
|
||||
|
||||
<div class="grid">
|
||||
<!-- Left: mirror list -->
|
||||
<section class="card">
|
||||
<div class="toolbar">
|
||||
<div class="toolbar-left">
|
||||
<span class="muted">Categories:</span>
|
||||
<button class="pill active" data-category="all">All ({{ mirrors|length }})</button>
|
||||
{% for cat in categories %}
|
||||
<button class="pill" data-category="{{ cat }}">{{ cat }}</button>
|
||||
{% endfor %}
|
||||
</div>
|
||||
<div class="toolbar-right">
|
||||
<input type="text" id="search" class="search-input" placeholder="Search slug / URL / category…">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<table id="mirror-table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Slug</th>
|
||||
<th>Category</th>
|
||||
<th>URL</th>
|
||||
<th>Last updated</th>
|
||||
<th>Status</th>
|
||||
<th></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for m in mirrors %}
|
||||
<tr data-slug="{{ m.slug }}" data-category="{{ m.category }}" data-search="{{ (m.slug ~ ' ' ~ m.category ~ ' ' ~ m.url)|lower }}">
|
||||
<td>
|
||||
<a href="/mirrors/{{ m.slug }}/" target="_blank">
|
||||
<code>{{ m.slug }}</code>
|
||||
</a>
|
||||
</td>
|
||||
<td>{{ m.category }}</td>
|
||||
<td><code>{{ m.url }}</code></td>
|
||||
<td>
|
||||
{% if m.last_updated %}
|
||||
<span title="{{ m.last_updated_raw }}">{{ m.last_updated }}</span>
|
||||
{% else %}
|
||||
<span class="muted">never</span>
|
||||
{% endif %}
|
||||
</td>
|
||||
<td>
|
||||
{% set st = m.status or 'idle' %}
|
||||
<span class="status-dot status-{{ st }}"></span>
|
||||
<span class="badge badge-{{ st }}">{{ st }}</span>
|
||||
</td>
|
||||
<td>
|
||||
<a class="log-link" href="{{ url_for('view_log', slug=m.slug) }}" target="_blank">log</a>
|
||||
·
|
||||
<form method="post" action="{{ url_for('trigger_update', slug=m.slug) }}" style="display:inline;">
|
||||
<button class="btn btn-secondary" style="padding:0.2rem 0.6rem; font-size:0.75rem;">Update</button>
|
||||
</form>
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
{% if mirrors|length == 0 %}
|
||||
<tr><td colspan="6" class="muted">No mirrors yet. Add one on the right.</td></tr>
|
||||
{% endif %}
|
||||
</tbody>
|
||||
</table>
|
||||
</section>
|
||||
|
||||
<!-- Right: add mirror -->
|
||||
<section class="card">
|
||||
<h2 style="margin-top:0; font-size:1.1rem;">Add mirror</h2>
|
||||
<form method="post" action="{{ url_for('add_mirror_route') }}">
|
||||
<div style="margin-bottom:0.6rem;">
|
||||
<label for="slug">Slug</label>
|
||||
<input type="text" id="slug" name="slug" required placeholder="e.g. python_tutorial">
|
||||
</div>
|
||||
<div style="margin-bottom:0.6rem;">
|
||||
<label for="category">Category</label>
|
||||
<input type="text" id="category" name="category" required placeholder="e.g. tutorial, docs, blog">
|
||||
</div>
|
||||
<div style="margin-bottom:0.6rem;">
|
||||
<label for="url">URL</label>
|
||||
<input type="text" id="url" name="url" required placeholder="https://example.com/some/path/">
|
||||
</div>
|
||||
<div style="margin-bottom:0.8rem;">
|
||||
<label style="display:flex; align-items:center; gap:0.4rem;">
|
||||
<input type="checkbox" name="ignore_robots" value="1">
|
||||
<span style="text-transform:none; letter-spacing:0; font-size:0.85rem;">
|
||||
Ignore robots.txt (not recommended unless you know you need it)
|
||||
</span>
|
||||
</label>
|
||||
</div>
|
||||
{% if error %}
|
||||
<div style="color:#fecaca; font-size:0.85rem; margin-bottom:0.5rem;">{{ error }}</div>
|
||||
{% endif %}
|
||||
<button type="submit" class="btn btn-primary">Add & mirror</button>
|
||||
<p class="muted" style="margin-top:0.5rem;">
|
||||
New mirrors are cloned in the background. Status will show as <strong>updating</strong> until done.
|
||||
</p>
|
||||
</form>
|
||||
</section>
|
||||
</div>
|
||||
</main>
|
||||
|
||||
<script>
|
||||
// category filter
|
||||
const pills = Array.from(document.querySelectorAll('.pill'));
|
||||
const rows = Array.from(document.querySelectorAll('#mirror-table tbody tr'));
|
||||
const searchInput = document.getElementById('search');
|
||||
|
||||
function applyFilters() {
|
||||
const activePill = pills.find(p => p.classList.contains('active'));
|
||||
const cat = activePill ? activePill.dataset.category : 'all';
|
||||
const q = (searchInput.value || '').toLowerCase();
|
||||
|
||||
rows.forEach(row => {
|
||||
const rowCat = row.dataset.category;
|
||||
const searchStr = row.dataset.search;
|
||||
const matchCat = (cat === 'all' || rowCat === cat);
|
||||
const matchSearch = (!q || searchStr.includes(q));
|
||||
row.style.display = (matchCat && matchSearch) ? '' : 'none';
|
||||
});
|
||||
}
|
||||
|
||||
pills.forEach(p => {
|
||||
p.addEventListener('click', () => {
|
||||
pills.forEach(x => x.classList.remove('active'));
|
||||
p.classList.add('active');
|
||||
applyFilters();
|
||||
});
|
||||
});
|
||||
|
||||
searchInput.addEventListener('input', () => {
|
||||
applyFilters();
|
||||
});
|
||||
|
||||
// polling for live status
|
||||
async function pollStatus() {
|
||||
try {
|
||||
const resp = await fetch("{{ url_for('status') }}");
|
||||
if (!resp.ok) return;
|
||||
const data = await resp.json();
|
||||
const bySlug = {};
|
||||
data.mirrors.forEach(m => bySlug[m.slug] = m);
|
||||
|
||||
rows.forEach(row => {
|
||||
const slug = row.dataset.slug;
|
||||
const m = bySlug[slug];
|
||||
if (!m) return;
|
||||
const tds = row.querySelectorAll('td');
|
||||
// last updated
|
||||
const lastUpdatedCell = tds[3];
|
||||
lastUpdatedCell.innerHTML = m.last_updated_display || '<span class="muted">never</span>';
|
||||
// status
|
||||
const statusCell = tds[4];
|
||||
const st = m.status || 'idle';
|
||||
statusCell.innerHTML =
|
||||
'<span class="status-dot status-' + st + '"></span>' +
|
||||
'<span class="badge badge-' + st + '">' + st + '</span>';
|
||||
});
|
||||
} catch (e) {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
|
||||
setInterval(pollStatus, 5000);
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
# --- routes ---
|
||||
|
||||
|
||||
@app.route("/", methods=["GET"])
|
||||
def index():
|
||||
mirrors = load_mirrors()
|
||||
categories = sorted({m["category"] for m in mirrors})
|
||||
# format last_updated nicely
|
||||
rows = []
|
||||
for m in mirrors:
|
||||
last_disp = None
|
||||
raw = m.get("last_updated")
|
||||
if raw:
|
||||
last_disp = raw.replace("T", " ").replace("Z", " UTC")
|
||||
rows.append({
|
||||
"slug": m["slug"],
|
||||
"category": m["category"],
|
||||
"url": m["url"],
|
||||
"status": m.get("status") or "idle",
|
||||
"last_updated_raw": raw,
|
||||
"last_updated": last_disp,
|
||||
})
|
||||
return render_template_string(INDEX_TEMPLATE, mirrors=rows, categories=categories, error=None)
|
||||
|
||||
|
||||
@app.route("/add", methods=["POST"])
|
||||
def add_mirror_route():
|
||||
slug = (request.form.get("slug") or "").strip()
|
||||
category = (request.form.get("category") or "").strip()
|
||||
url = (request.form.get("url") or "").strip()
|
||||
ignore_robots = bool(request.form.get("ignore_robots"))
|
||||
|
||||
error = None
|
||||
if not slug or not category or not url:
|
||||
error = "Slug, category, and URL are required."
|
||||
elif " " in slug:
|
||||
error = "Slug cannot contain spaces."
|
||||
if error:
|
||||
mirrors = load_mirrors()
|
||||
categories = sorted({m["category"] for m in mirrors})
|
||||
rows = []
|
||||
for m in mirrors:
|
||||
raw = m.get("last_updated")
|
||||
last_disp = raw.replace("T", " ").replace(
|
||||
"Z", " UTC") if raw else None
|
||||
rows.append({
|
||||
"slug": m["slug"],
|
||||
"category": m["category"],
|
||||
"url": m["url"],
|
||||
"status": m.get("status") or "idle",
|
||||
"last_updated_raw": raw,
|
||||
"last_updated": last_disp,
|
||||
})
|
||||
return render_template_string(INDEX_TEMPLATE, mirrors=rows, categories=categories, error=error), 400
|
||||
|
||||
try:
|
||||
add_mirror(slug, category, url, ignore_robots=ignore_robots)
|
||||
except Exception as e:
|
||||
mirrors = load_mirrors()
|
||||
categories = sorted({m["category"] for m in mirrors})
|
||||
rows = []
|
||||
for m in mirrors:
|
||||
raw = m.get("last_updated")
|
||||
last_disp = raw.replace("T", " ").replace(
|
||||
"Z", " UTC") if raw else None
|
||||
rows.append({
|
||||
"slug": m["slug"],
|
||||
"category": m["category"],
|
||||
"url": m["url"],
|
||||
"status": m.get("status") or "idle",
|
||||
"last_updated_raw": raw,
|
||||
"last_updated": last_disp,
|
||||
})
|
||||
return render_template_string(INDEX_TEMPLATE,
|
||||
mirrors=rows,
|
||||
categories=categories,
|
||||
error=str(e)), 400
|
||||
|
||||
# kick off background update
|
||||
_run_update_in_background(slug)
|
||||
return redirect(url_for("index"))
|
||||
|
||||
|
||||
@app.route("/update/<slug>", methods=["POST"])
|
||||
def trigger_update(slug):
|
||||
# fire-and-forget; UI will see status flip to 'updating'
|
||||
_run_update_in_background(slug)
|
||||
return redirect(url_for("index"))
|
||||
|
||||
|
||||
@app.route("/status", methods=["GET"])
|
||||
def status():
|
||||
mirrors = load_mirrors()
|
||||
out = []
|
||||
for m in mirrors:
|
||||
raw = m.get("last_updated")
|
||||
last_disp = raw.replace("T", " ").replace("Z", " UTC") if raw else None
|
||||
out.append({
|
||||
"slug": m["slug"],
|
||||
"category": m["category"],
|
||||
"url": m["url"],
|
||||
"status": m.get("status") or "idle",
|
||||
"last_updated": raw,
|
||||
"last_updated_display": last_disp or "",
|
||||
})
|
||||
return jsonify({"mirrors": out})
|
||||
|
||||
|
||||
@app.route("/logs/<slug>")
|
||||
def view_log(slug):
|
||||
log_path = LOG_ROOT / f"{slug}.log"
|
||||
if not log_path.exists():
|
||||
abort(404)
|
||||
text = log_path.read_text(encoding="utf-8", errors="replace")
|
||||
return "<pre>" + (text.replace("&", "&").replace("<", "<")) + "</pre>"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(host="127.0.0.1", port=5000, debug=False)
|
||||
12
data/mirrors.json
Normal file
12
data/mirrors.json
Normal file
@@ -0,0 +1,12 @@
|
||||
[
|
||||
{
|
||||
"slug": "wgpu-tutorial",
|
||||
"category": "rust",
|
||||
"url": "https://sotrh.github.io/learn-wgpu/",
|
||||
"ignore_robots": false,
|
||||
"created_at": "2025-12-02T07:15:12Z",
|
||||
"last_updated": null,
|
||||
"status": "error",
|
||||
"last_error": "wget exited with 4"
|
||||
}
|
||||
]
|
||||
138
mirror_manager.py
Executable file
138
mirror_manager.py
Executable file
@@ -0,0 +1,138 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Manage the various mirrors for the mirror website.
|
||||
"""
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import datetime as dt
|
||||
from pathlib import Path
|
||||
|
||||
BASE = Path("/srv/www")
|
||||
DATA_FILE = BASE / "data" / "mirrors.json"
|
||||
MIRROR_ROOT = BASE / "mirrors"
|
||||
LOG_ROOT = BASE / "logs"
|
||||
|
||||
MIRROR_ROOT.mkdir(parents=True, exist_ok=True)
|
||||
LOG_ROOT.mkdir(parents=True, exist_ok=True)
|
||||
DATA_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
def _now_iso() -> str:
|
||||
return dt.datetime.utcnow().replace(microsecond=0).isoformat() + "Z"
|
||||
|
||||
|
||||
def load_mirrors() -> list[dict]:
|
||||
if not DATA_FILE.exists():
|
||||
return []
|
||||
with DATA_FILE.open("r", encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def save_mirrors(mirrors: list[dict]) -> None:
|
||||
tmp = DATA_FILE.with_suffix(".tmp")
|
||||
with tmp.open("w", encoding="utf-8") as f:
|
||||
json.dump(mirrors, f, indent=2)
|
||||
tmp.replace(DATA_FILE)
|
||||
|
||||
|
||||
def get_mirror(mirrors: list[dict], slug: str) -> dict | None:
|
||||
for m in mirrors:
|
||||
if m["slug"] == slug:
|
||||
return m
|
||||
return None
|
||||
|
||||
|
||||
def add_mirror(slug: str,
|
||||
category: str,
|
||||
url: str,
|
||||
ignore_robots: bool = False) -> dict:
|
||||
mirrors = load_mirrors()
|
||||
if get_mirror(mirrors, slug) is not None:
|
||||
raise ValueError(f"Mirror with slug '{slug}' already exists!")
|
||||
|
||||
m = {
|
||||
"slug": slug,
|
||||
"category": category,
|
||||
"url": url,
|
||||
"ignore_robots": bool(ignore_robots),
|
||||
"created_at": _now_iso(),
|
||||
"last_updated": None,
|
||||
"status": "queued",
|
||||
"last_error": None,
|
||||
}
|
||||
|
||||
mirrors.append(m)
|
||||
save_mirrors(mirrors)
|
||||
return m
|
||||
|
||||
|
||||
def update_mirror(slug: str) -> None:
|
||||
"""Run wget mirror for a singel slug (blocking)."""
|
||||
mirrors = load_mirrors()
|
||||
m = get_mirror(mirrors, slug)
|
||||
if m is None:
|
||||
raise ValueError(f"No such mirror: {slug}")
|
||||
|
||||
m["status"] = "updating"
|
||||
m["last_error"] = None
|
||||
save_mirrors(mirrors)
|
||||
|
||||
target_dir = MIRROR_ROOT / slug
|
||||
target_dir.mkdir(parents=True, exist_ok=True)
|
||||
log_file = LOG_ROOT / f"{slug}.log"
|
||||
|
||||
robots_setting = "off" if m.get("ignore_robots") else "on"
|
||||
|
||||
# Polite wget:
|
||||
# --mirror implies -r -N -l inf --no-remove-listing
|
||||
cmd = [
|
||||
"wget",
|
||||
"--mirror",
|
||||
"--convert-links",
|
||||
"--adjust-extension",
|
||||
"--page-requisites",
|
||||
"--no-parent",
|
||||
"--wait=0.70",
|
||||
"--random-wait",
|
||||
# "--limit-rate=50m",
|
||||
f"execute=robots={robots_setting}",
|
||||
"-P",
|
||||
str(target_dir),
|
||||
m["url"],
|
||||
]
|
||||
|
||||
try:
|
||||
with log_file.open("a", encoding="utf-8") as lf:
|
||||
lf.write(f"\n=== {_now_iso()} : "
|
||||
f"Starting mirror of {m['url']} ===\n")
|
||||
lf.flush()
|
||||
subprocess.run(
|
||||
cmd,
|
||||
stdout=lf,
|
||||
stderr=subprocess.STDOUT,
|
||||
check=True,
|
||||
)
|
||||
lf.write(f"=== {_now_iso()} : Completed mirror of {m['url']} ===\n")
|
||||
lf.flush()
|
||||
m["last_updated"] = _now_iso()
|
||||
m["status"] = "idle"
|
||||
m["last_error"] = None
|
||||
except subprocess.CalledProcessError as e:
|
||||
m["status"] = "error"
|
||||
m["last_error"] = f"wget exited with {e.returncode}"
|
||||
with log_file.open("a", encoding="utf-8") as lf:
|
||||
lf.write(f"*** ERROR: wget failed with code {e.returncode}\n")
|
||||
except Exception as e:
|
||||
m["status"] = "error"
|
||||
m["last_error"] = f"{type(e).__name__}: {e}"
|
||||
with log_file.open("a", encoding="utf-8") as lf:
|
||||
lf.write(f"*** ERROR: {type(e).__name__}: {e}\n")
|
||||
finally:
|
||||
save_mirrors(mirrors)
|
||||
|
||||
|
||||
def update_all_mirrors() -> None:
|
||||
mirrors = load_mirrors()
|
||||
for m in mirrors:
|
||||
update_mirror(m["slug"])
|
||||
@@ -1,2 +0,0 @@
|
||||
# Slug URL
|
||||
wgpu-tutorial https://sotrh.github.io/learn-wgpu/
|
||||
15
update_mirrors.py
Executable file
15
update_mirrors.py
Executable file
@@ -0,0 +1,15 @@
|
||||
#!/usr/bin/env python3
|
||||
import sys
|
||||
from mirror_manager import update_all_mirrors, update_mirror
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) == 2:
|
||||
slug = sys.argv[1]
|
||||
update_mirror(slug)
|
||||
else:
|
||||
update_all_mirrors()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,43 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
BASE="/srv/www"
|
||||
URL_LIST="$BASE/mirrors.txt"
|
||||
OUTDIR="$BASE/mirrors"
|
||||
|
||||
mkdir -p "$OUTDIR"
|
||||
|
||||
# If a slug is passed as an argument, only update that one.
|
||||
ONLY_SLUG="${1:-}"
|
||||
|
||||
while read -r slug url; do
|
||||
# skip empty lines & comments
|
||||
[ -z "${slug:-}" ] && continue
|
||||
[[ "$slug" =~ ^# ]] && continue
|
||||
|
||||
if [ -n "$ONLY_SLUG" ] && [ "$slug" != "$ONLY_SLUG" ]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
echo "=== Mirroring $slug ($url) ==="
|
||||
|
||||
# Each mirror in its own directory
|
||||
TARGET_DIR="$OUTDIR/$slug"
|
||||
mkdir -p "$TARGET_DIR"
|
||||
cd "$TARGET_DIR"
|
||||
|
||||
# Mirror site
|
||||
wget \
|
||||
--mirror \
|
||||
--convert-links \
|
||||
--adjust-extension \
|
||||
--page-requisites \
|
||||
--no-parent \
|
||||
"$url"
|
||||
|
||||
echo "=== Done $slug ==="
|
||||
done < "$URL_LIST"
|
||||
|
||||
# Regenerate index page
|
||||
cd "$BASE"
|
||||
python3 "$BASE/generate_index.py"
|
||||
Reference in New Issue
Block a user