Improved search

This commit is contained in:
2025-12-02 03:12:40 -05:00
parent 8ae11f4b03
commit 5ac1549567
2 changed files with 237 additions and 156 deletions

389
app.py
View File

@@ -31,9 +31,8 @@ def _run_update_in_background(slug: str):
th = threading.Thread(target=update_mirror, args=(slug,), daemon=True)
th.start()
# -------------------- TEMPLATES --------------------
INDEX_TEMPLATE = r"""
<!doctype html>
<html class="h-full">
@@ -45,7 +44,7 @@ INDEX_TEMPLATE = r"""
<body class="h-full bg-slate-950 text-slate-100">
<div class="min-h-full">
<header class="border-b border-slate-800 bg-slate-950/80 backdrop-blur">
<div class="max-w-6xl mx-auto px-4 py-4 flex flex-col sm:flex-row sm:items-center sm:justify-between gap-2">
<div class="max-w-5xl mx-auto px-4 py-4 flex flex-col sm:flex-row sm:items-center sm:justify-between gap-2">
<div>
<h1 class="text-xl font-semibold tracking-tight">Mirror Manager</h1>
<p class="text-xs text-slate-400">Local offline mirrors of external sites, grouped by category.</p>
@@ -59,147 +58,146 @@ INDEX_TEMPLATE = r"""
</div>
</header>
<main class="max-w-6xl mx-auto px-4 py-4 space-y-4">
<div class="flex flex-col lg:flex-row gap-4">
<!-- Left: mirrors list -->
<section class="flex-1 bg-slate-950/80 border border-slate-800 rounded-2xl p-4 shadow-xl shadow-black/40">
<div class="flex flex-col md:flex-row md:items-center md:justify-between gap-3 mb-3">
<div class="flex flex-wrap items-center gap-2">
<span class="text-xs text-slate-400">Categories:</span>
<button class="px-2.5 py-1 rounded-full text-xs border bg-slate-900 border-slate-700 text-slate-100 hover:border-sky-500 cat-pill cat-pill-active" data-category="all">
All ({{ mirrors|length }})
</button>
{% for cat in categories %}
<button class="px-2.5 py-1 rounded-full text-xs border bg-slate-900 border-slate-800 text-slate-400 hover:border-sky-500 hover:text-slate-100 cat-pill" data-category="{{ cat }}">
{{ cat }}
</button>
<main class="max-w-5xl mx-auto px-4 py-4 space-y-4">
<!-- Mirrors list -->
<section class="bg-slate-950/80 border border-slate-800 rounded-2xl p-4 shadow-xl shadow-black/40">
<div class="flex flex-col md:flex-row md:items-center md:justify-between gap-3 mb-3">
<div class="flex flex-wrap items-center gap-2">
<span class="text-xs text-slate-400">Categories:</span>
<button class="px-2.5 py-1 rounded-full text-xs border bg-slate-900 border-slate-700 text-slate-100 hover:border-sky-500 cat-pill cat-pill-active" data-category="all">
All ({{ mirrors|length }})
</button>
{% for cat in categories %}
<button class="px-2.5 py-1 rounded-full text-xs border bg-slate-900 border-slate-800 text-slate-400 hover:border-sky-500 hover:text-slate-100 cat-pill" data-category="{{ cat }}">
{{ cat }}
</button>
{% endfor %}
</div>
<div class="flex gap-2">
<input
id="search"
class="w-full md:w-64 rounded-full bg-slate-900 border border-slate-700 px-3 py-1.5 text-sm text-slate-100 placeholder:text-slate-500 focus:outline-none focus:ring-2 focus:ring-sky-500 focus:border-sky-500"
placeholder="Filter by slug / URL / category…"
/>
</div>
</div>
<div class="overflow-x-auto border border-slate-800 rounded-xl">
<table class="min-w-full text-sm">
<thead class="bg-slate-900/70 text-xs uppercase text-slate-400">
<tr>
<th class="px-3 py-2 text-left whitespace-nowrap">Slug</th>
<th class="px-3 py-2 text-left whitespace-nowrap">Categories</th>
<th class="px-3 py-2 text-left whitespace-nowrap">URL</th>
<th class="px-3 py-2 text-left whitespace-nowrap">Last updated</th>
<th class="px-3 py-2 text-left whitespace-nowrap">Status</th>
<th class="px-3 py-2 text-left"></th>
</tr>
</thead>
<tbody id="mirror-table" class="divide-y divide-slate-900/80">
{% for m in mirrors %}
<tr class="hover:bg-slate-900/80 transition" data-slug="{{ m.slug }}" data-categories="{{ m.categories_joined }}" data-search="{{ (m.slug ~ ' ' ~ m.categories_joined ~ ' ' ~ m.url)|lower }}">
<td class="px-3 py-2 align-top">
<div class="flex flex-col gap-1">
<a href="/mirrors/{{ m.slug }}/" target="_blank" class="font-mono text-xs text-sky-400 hover:text-sky-300 break-all">
{{ m.slug }}
</a>
<a href="{{ url_for('log_view', slug=m.slug) }}" target="_blank" class="text-[0.65rem] text-slate-400 hover:text-slate-200">
View live log
</a>
</div>
</td>
<td class="px-3 py-2 align-top">
<div class="flex flex-wrap gap-1">
{% for c in m.categories %}
<span class="px-1.5 py-0.5 rounded-full text-[0.65rem] bg-slate-800/80 text-slate-300 border border-slate-700">{{ c }}</span>
{% endfor %}
</div>
</td>
<td class="px-3 py-2 align-top max-w-xs">
<code class="font-mono text-[0.7rem] text-slate-300 break-all">{{ m.url }}</code>
</td>
<td class="px-3 py-2 align-top text-xs text-slate-300">
{% if m.last_updated %}
<span title="{{ m.last_updated_raw }}">{{ m.last_updated }}</span>
{% else %}
<span class="text-slate-600">never</span>
{% endif %}
</td>
<td class="px-3 py-2 align-top text-xs">
{% set st = m.status or 'idle' %}
<div class="inline-flex items-center gap-1.5 px-2 py-0.5 rounded-full bg-slate-900 border border-slate-800">
<span class="w-2 h-2 rounded-full
{% if st == 'idle' %}bg-emerald-400{% elif st == 'updating' %}bg-amber-400 animate-pulse{% elif st == 'warning' %}bg-yellow-400{% else %}bg-rose-400{% endif %}"></span>
<span class="capitalize">{{ st }}</span>
</div>
</td>
<td class="px-3 py-2 align-top text-right text-[0.7rem]">
<form method="post" action="{{ url_for('trigger_update', slug=m.slug) }}" class="inline">
<button class="inline-flex items-center gap-1 px-2 py-1 rounded-full border border-slate-700 text-slate-200 hover:border-sky-500 hover:text-sky-100">
<span>Update</span>
</button>
</form>
</td>
</tr>
{% endfor %}
</div>
<div class="flex flex-col sm:flex-row gap-2">
<div class="relative">
<input id="search" class="w-full sm:w-64 rounded-full bg-slate-900 border border-slate-700 px-3 py-1.5 text-sm text-slate-100 placeholder:text-slate-500 focus:outline-none focus:ring-2 focus:ring-sky-500 focus:border-sky-500" placeholder="Filter by slug / URL / category…" />
</div>
</div>
</div>
<div class="overflow-x-auto border border-slate-800 rounded-xl">
<table class="min-w-full text-sm">
<thead class="bg-slate-900/70 text-xs uppercase text-slate-400">
<tr>
<th class="px-3 py-2 text-left whitespace-nowrap">Slug</th>
<th class="px-3 py-2 text-left whitespace-nowrap">Categories</th>
<th class="px-3 py-2 text-left whitespace-nowrap">URL</th>
<th class="px-3 py-2 text-left whitespace-nowrap">Last updated</th>
<th class="px-3 py-2 text-left whitespace-nowrap">Status</th>
<th class="px-3 py-2 text-left"></th>
</tr>
</thead>
<tbody id="mirror-table" class="divide-y divide-slate-900/80">
{% for m in mirrors %}
<tr class="hover:bg-slate-900/80 transition" data-slug="{{ m.slug }}" data-categories="{{ m.categories_joined }}" data-search="{{ (m.slug ~ ' ' ~ m.categories_joined ~ ' ' ~ m.url)|lower }}">
<td class="px-3 py-2 align-top">
<div class="flex flex-col gap-1">
<a href="/mirrors/{{ m.slug }}/" target="_blank" class="font-mono text-xs text-sky-400 hover:text-sky-300 break-all">
{{ m.slug }}
</a>
<a href="{{ url_for('log_view', slug=m.slug) }}" target="_blank" class="text-[0.65rem] text-slate-400 hover:text-slate-200">
View live log
</a>
</div>
</td>
<td class="px-3 py-2 align-top">
<div class="flex flex-wrap gap-1">
{% for c in m.categories %}
<span class="px-1.5 py-0.5 rounded-full text-[0.65rem] bg-slate-800/80 text-slate-300 border border-slate-700">{{ c }}</span>
{% endfor %}
</div>
</td>
<td class="px-3 py-2 align-top max-w-xs">
<code class="font-mono text-[0.7rem] text-slate-300 break-all">{{ m.url }}</code>
</td>
<td class="px-3 py-2 align-top text-xs text-slate-300">
{% if m.last_updated %}
<span title="{{ m.last_updated_raw }}">{{ m.last_updated }}</span>
{% else %}
<span class="text-slate-600">never</span>
{% endif %}
</td>
<td class="px-3 py-2 align-top text-xs">
{% set st = m.status or 'idle' %}
<div class="inline-flex items-center gap-1.5 px-2 py-0.5 rounded-full bg-slate-900 border border-slate-800">
<span class="w-2 h-2 rounded-full
{% if st == 'idle' %}bg-emerald-400{% elif st == 'updating' %}bg-amber-400 animate-pulse{% elif st == 'warning' %}bg-yellow-400{% else %}bg-rose-400{% endif %}"></span>
<span class="capitalize">{{ st }}</span>
</div>
</td>
<td class="px-3 py-2 align-top text-right text-[0.7rem]">
<form method="post" action="{{ url_for('trigger_update', slug=m.slug) }}" class="inline">
<button class="inline-flex items-center gap-1 px-2 py-1 rounded-full border border-slate-700 text-slate-200 hover:border-sky-500 hover:text-sky-100">
<span>Update</span>
</button>
</form>
</td>
</tr>
{% endfor %}
{% if mirrors|length == 0 %}
<tr>
<td colspan="6" class="px-3 py-6 text-center text-sm text-slate-500">
No mirrors yet. Add one on the right.
</td>
</tr>
{% endif %}
</tbody>
</table>
</div>
</section>
<!-- Right: add mirror + content search -->
<section class="w-full lg:w-80 flex flex-col gap-4">
<div class="bg-slate-950/80 border border-slate-800 rounded-2xl p-4 shadow-xl shadow-black/40">
<h2 class="text-sm font-semibold mb-2">Add mirror</h2>
<form method="post" action="{{ url_for('add_mirror_route') }}" class="space-y-3">
<div>
<label for="slug" class="block text-xs font-medium text-slate-300 mb-1">Slug</label>
<input id="slug" name="slug" required class="w-full rounded-lg bg-slate-900 border border-slate-700 px-2.5 py-1.5 text-sm text-slate-100 focus:outline-none focus:ring-2 focus:ring-sky-500 focus:border-sky-500 font-mono" placeholder="e.g. wgpu-tutorial" />
</div>
<div>
<label for="categories" class="block text-xs font-medium text-slate-300 mb-1">Categories</label>
<input id="categories" name="categories" required class="w-full rounded-lg bg-slate-900 border border-slate-700 px-2.5 py-1.5 text-sm text-slate-100 focus:outline-none focus:ring-2 focus:ring-sky-500 focus:border-sky-500" placeholder="e.g. tutorials, graphics, rust" />
</div>
<div>
<label for="url" class="block text-xs font-medium text-slate-300 mb-1">URL</label>
<input id="url" name="url" required class="w-full rounded-lg bg-slate-900 border border-slate-700 px-2.5 py-1.5 text-sm text-slate-100 focus:outline-none focus:ring-2 focus:ring-sky-500 focus:border-sky-500" placeholder="https://example.com/some/path/" />
</div>
<div class="flex items-start gap-2">
<input id="ignore_robots" name="ignore_robots" value="1" type="checkbox" class="mt-0.5 rounded border-slate-600 bg-slate-900 text-sky-500 focus:ring-sky-500" />
<label for="ignore_robots" class="text-xs text-slate-400">
Ignore robots.txt (only if you explicitly want to archive disallowed paths).
</label>
</div>
{% if error %}
<p class="text-xs text-rose-300 bg-rose-950/60 border border-rose-900 rounded-lg px-2 py-1">{{ error }}</p>
{% if mirrors|length == 0 %}
<tr>
<td colspan="6" class="px-3 py-6 text-center text-sm text-slate-500">
No mirrors yet. Add one below.
</td>
</tr>
{% endif %}
<button type="submit" class="w-full inline-flex items-center justify-center gap-1.5 rounded-full bg-gradient-to-r from-sky-500 to-indigo-500 px-3 py-2 text-xs font-medium text-white hover:from-sky-400 hover:to-indigo-400">
Add &amp; mirror
</button>
<p class="text-[0.7rem] text-slate-500">
New mirrors are cloned in the background. Status will show as <span class="text-amber-300">updating</span> until done.
</p>
</form>
</div>
</tbody>
</table>
</div>
</section>
<div class="bg-slate-950/80 border border-slate-800 rounded-2xl p-4 shadow-xl shadow-black/40">
<h2 class="text-sm font-semibold mb-2">Content search</h2>
<form id="search-form" class="space-y-2">
<input id="content-query" class="w-full rounded-lg bg-slate-900 border border-slate-700 px-2.5 py-1.5 text-sm text-slate-100 focus:outline-none focus:ring-2 focus:ring-sky-500 focus:border-sky-500" placeholder="Search text across all mirrors (using rg)…" />
<button type="submit" class="w-full inline-flex items-center justify-center gap-1.5 rounded-full border border-slate-700 bg-slate-900 px-3 py-2 text-xs font-medium text-slate-100 hover:border-sky-500 hover:text-sky-100">
Run ripgrep search
</button>
</form>
<div id="search-results" class="mt-2 max-h-64 overflow-y-auto text-[0.7rem] space-y-1 text-slate-300"></div>
<!-- Add mirror -->
<section class="bg-slate-950/80 border border-slate-800 rounded-2xl p-4 shadow-xl shadow-black/40 space-y-3">
<h2 class="text-sm font-semibold">Add mirror</h2>
<form method="post" action="{{ url_for('add_mirror_route') }}" class="space-y-3">
<div>
<label for="slug" class="block text-xs font-medium text-slate-300 mb-1">Slug</label>
<input id="slug" name="slug" required class="w-full rounded-lg bg-slate-900 border border-slate-700 px-2.5 py-1.5 text-sm text-slate-100 focus:outline-none focus:ring-2 focus:ring-sky-500 focus:border-sky-500 font-mono" placeholder="e.g. wgpu-tutorial" />
</div>
</section>
</div>
<div>
<label for="categories" class="block text-xs font-medium text-slate-300 mb-1">Categories</label>
<input id="categories" name="categories" required class="w-full rounded-lg bg-slate-900 border border-slate-700 px-2.5 py-1.5 text-sm text-slate-100 focus:outline-none focus:ring-2 focus:ring-sky-500 focus:border-sky-500" placeholder="e.g. tutorials, graphics, rust" />
</div>
<div>
<label for="url" class="block text-xs font-medium text-slate-300 mb-1">URL</label>
<input id="url" name="url" required class="w-full rounded-lg bg-slate-900 border border-slate-700 px-2.5 py-1.5 text-sm text-slate-100 focus:outline-none focus:ring-2 focus:ring-sky-500 focus:border-sky-500" placeholder="https://example.com/some/path/" />
</div>
<div class="flex items-start gap-2">
<input id="ignore_robots" name="ignore_robots" value="1" type="checkbox" class="mt-0.5 rounded border-slate-600 bg-slate-900 text-sky-500 focus:ring-sky-500" />
<label for="ignore_robots" class="text-xs text-slate-400">
Ignore robots.txt (only if you explicitly want to archive disallowed paths).
</label>
</div>
{% if error %}
<p class="text-xs text-rose-300 bg-rose-950/60 border border-rose-900 rounded-lg px-2 py-1">{{ error }}</p>
{% endif %}
<button type="submit" class="w-full inline-flex items-center justify-center gap-1.5 rounded-full bg-gradient-to-r from-sky-500 to-indigo-500 px-3 py-2 text-xs font-medium text-white hover:from-sky-400 hover:to-indigo-400">
Add &amp; mirror
</button>
<p class="text-[0.7rem] text-slate-500">
New mirrors are cloned in the background. Status will show as <span class="text-amber-300">updating</span> until done.
</p>
</form>
</section>
<!-- Content search -->
<section class="bg-slate-950/80 border border-slate-800 rounded-2xl p-4 shadow-xl shadow-black/40">
<h2 class="text-sm font-semibold mb-2">Content search</h2>
<form id="search-form" class="space-y-2">
<input id="content-query" class="w-full rounded-lg bg-slate-900 border border-slate-700 px-2.5 py-1.5 text-sm text-slate-100 focus:outline-none focus:ring-2 focus:ring-sky-500 focus:border-sky-500" placeholder="Search text across all mirrors (using rg)…" />
<button type="submit" class="w-full inline-flex items-center justify-center gap-1.5 rounded-full border border-slate-700 bg-slate-900 px-3 py-2 text-xs font-medium text-slate-100 hover:border-sky-500 hover:text-sky-100">
Run ripgrep search
</button>
</form>
<div id="search-results" class="mt-2 max-h-64 overflow-y-auto text-[0.7rem] space-y-1 text-slate-300"></div>
</section>
</main>
</div>
@@ -289,13 +287,32 @@ INDEX_TEMPLATE = r"""
return;
}
searchResults.innerHTML = '';
data.results.forEach(r => {
const div = document.createElement('div');
div.className = "border border-slate-800 rounded-lg px-2 py-1 bg-slate-900/70";
div.innerHTML =
'<div class="font-mono text-[0.65rem] text-sky-300 break-all">' + r.path + '</div>' +
'<div class="text-[0.7rem] text-slate-200 whitespace-pre-wrap">' + r.line + '</div>';
searchResults.appendChild(div);
const wrapper = document.createElement('div');
wrapper.className = "border border-slate-800 rounded-lg px-2 py-1 bg-slate-900/70";
const pathLine = document.createElement('div');
pathLine.className = "font-mono text-[0.65rem] text-sky-300 break-all";
if (r.url) {
const link = document.createElement('a');
link.href = r.url;
link.target = "_blank";
link.rel = "noopener noreferrer";
link.textContent = r.path + (r.line ? `:${r.line}` : "");
pathLine.appendChild(link);
} else {
pathLine.textContent = r.path + (r.line ? `:${r.line}` : "");
}
const snippetLine = document.createElement('div');
snippetLine.className = "text-[0.7rem] text-slate-200 whitespace-pre-wrap";
snippetLine.textContent = r.snippet || "";
wrapper.appendChild(pathLine);
wrapper.appendChild(snippetLine);
searchResults.appendChild(wrapper);
});
} catch (e) {
searchResults.textContent = 'Search failed.';
@@ -306,6 +323,7 @@ INDEX_TEMPLATE = r"""
</html>
"""
LOG_TEMPLATE = r"""
<!doctype html>
<html class="h-full">
@@ -496,31 +514,94 @@ def content_search():
q = (request.args.get("q") or "").strip()
if not q:
return jsonify({"results": []})
def make_snippet(text: str, query: str, radius: int = 80, max_len: int = 240) -> str:
if not text:
return ""
lower = text.lower()
qlower = query.lower()
idx = lower.find(qlower)
if idx == -1:
snippet = text[:max_len]
if len(text) > max_len:
snippet += ""
return snippet
start = max(0, idx - radius)
end = min(len(text), idx + len(query) + radius)
snippet = text[start:end]
if start > 0:
snippet = "" + snippet
if end < len(text):
snippet += ""
return snippet
try:
# Only search "page-like" files: html / md / txt
proc = subprocess.run(
["rg", "--line-number", "--no-heading",
"--color", "never", q, str(MIRROR_ROOT)],
[
"rg",
"--line-number",
"--no-heading",
"--color", "never",
"--max-count", "5", # max 5 hits per file
"--type-add", "page:*.{html,htm,md,markdown,txt}",
"-tpage",
q,
str(MIRROR_ROOT),
],
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL,
text=True,
timeout=10,
)
except FileNotFoundError:
return jsonify({"results": [{"path": "(error)", "line": "ripgrep (rg) not installed"}]})
return jsonify({
"results": [{
"path": "(error)",
"line": 0,
"url": "",
"snippet": "ripgrep (rg) is not installed."
}]
})
except subprocess.TimeoutExpired:
return jsonify({"results": [{"path": "(error)", "line": "rg timed out"}]})
return jsonify({
"results": [{
"path": "(error)",
"line": 0,
"url": "",
"snippet": "rg timed out."
}]
})
results = []
for line in proc.stdout.splitlines()[:50]:
for line in proc.stdout.splitlines():
parts = line.split(":", 2)
if len(parts) != 3:
continue
path, lineno, content = parts
rel = str(Path(path).relative_to(MIRROR_ROOT))
try:
rel_path = str(Path(path).relative_to(MIRROR_ROOT))
except ValueError:
# Shouldn't happen, but be defensive
rel_path = path
# Short text snippet around the query
snippet = make_snippet(content, q)
# Build a URL that opens the mirrored page in the browser
# Assuming nginx serves /srv/www/mirrors as /mirrors/
url = "/mirrors/" + rel_path.replace("\\", "/")
results.append({
"path": f"{rel}:{lineno}",
"line": content.strip(),
"path": rel_path,
"line": int(lineno),
"url": url,
"snippet": snippet,
})
if len(results) >= 50:
break
return jsonify({"results": results})

View File

@@ -5,8 +5,8 @@
"url": "https://sotrh.github.io/learn-wgpu/",
"ignore_robots": false,
"created_at": "2025-12-02T07:15:12Z",
"last_updated": null,
"status": "error",
"last_updated": "2025-12-02T08:00:08Z",
"status": "idle",
"last_error": "wget exited with 4"
}
]