initial commit

This commit is contained in:
2026-03-04 21:17:36 +01:00
commit 5b5949c059
6 changed files with 446 additions and 0 deletions

316
generator.py Normal file
View File

@@ -0,0 +1,316 @@
#!/usr/bin/env python3
"""
Wiki.js → Gemini Blog Post Pipeline
Commands:
fetch Download a Wiki.js page as Markdown via GraphQL
write Generate a blog post from SOURCE.md using Gemini
translate Translate BLOGPOST.md using Gemini
upload Upload TRANSLATED_BLOGPOST.md to Wiki.js under /blog/{kebab-title}
Required environment variables:
WIKI_BASE_DOMAIN e.g. https://wiki.example.com
WIKI_TOKEN Bearer token for Wiki.js API
GEMINI_API_KEY Google Gemini API key
ORIGINAL_LANG Language for the blog post (default: Hungarian)
TRANSLATE_LANG Target language for translation (default: English)
"""
import argparse
import json
import os
import re
import sys
import urllib.request
import urllib.error
# ---------------------------------------------------------------------------
# Config
# ---------------------------------------------------------------------------
SOURCE_FILE = "SOURCE.md"
BLOGPOST_FILE = "BLOGPOST.md"
TRANSLATED_FILE = "TRANSLATED_BLOGPOST.md"
INSTRUCTIONS_FILE = "INSTRUCTIONS.md"
GEMINI_MODEL = "gemini-flash-latest"
GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/models"
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def require_env(name: str, default: str = None) -> str:
value = os.environ.get(name, default)
if not value:
print(f"ERROR: Environment variable '{name}' is required.", file=sys.stderr)
sys.exit(1)
return value
def http_post(url: str, payload: dict, headers: dict) -> dict:
data = json.dumps(payload).encode("utf-8")
req = urllib.request.Request(url, data=data, headers=headers, method="POST")
try:
with urllib.request.urlopen(req) as resp:
return json.loads(resp.read().decode("utf-8"))
except urllib.error.HTTPError as e:
body = e.read().decode("utf-8")
print(f"ERROR: HTTP {e.code} from {url}\n{body}", file=sys.stderr)
sys.exit(1)
except urllib.error.URLError as e:
print(f"ERROR: Could not reach {url}: {e.reason}", file=sys.stderr)
sys.exit(1)
def wiki_graphql(base: str, token: str, query: str, variables: dict = None) -> dict:
url = f"{base}/graphql"
payload = {"query": query}
if variables:
payload["variables"] = variables
headers = {
"Authorization": f"Bearer {token}",
"Content-Type": "application/json",
}
return http_post(url, payload, headers)
def gemini_generate(api_key: str, prompt: str) -> str:
url = f"{GEMINI_BASE_URL}/{GEMINI_MODEL}:generateContent"
payload = {"contents": [{"parts": [{"text": prompt}]}]}
headers = {"Content-Type": "application/json", "X-goog-api-key": api_key}
resp = http_post(url, payload, headers)
try:
return resp["candidates"][0]["content"]["parts"][0]["text"]
except (KeyError, IndexError) as e:
print(f"ERROR: Unexpected Gemini response structure: {resp}", file=sys.stderr)
sys.exit(1)
def to_kebab(text: str) -> str:
text = text.lower()
text = re.sub(r"[^a-z0-9\s-]", "", text)
text = re.sub(r"[\s-]+", "-", text)
return text.strip("-")
def read_file(path: str) -> str:
if not os.path.exists(path):
print(f"ERROR: File not found: {path}", file=sys.stderr)
sys.exit(1)
with open(path, "r", encoding="utf-8") as f:
return f.read()
def write_file(path: str, content: str) -> None:
with open(path, "w", encoding="utf-8") as f:
f.write(content)
print(f"✓ Saved to {path}")
# ---------------------------------------------------------------------------
# Commands
# ---------------------------------------------------------------------------
def cmd_fetch(args):
"""Download a Wiki.js page as Markdown via GraphQL."""
base = require_env("WIKI_BASE_DOMAIN")
token = require_env("WIKI_TOKEN")
# Strip base domain from URL if full URL was given, then strip leading slash
page_path = args.url.replace(base, "").lstrip("/")
print(f"→ Fetching wiki page: /{page_path}")
query = """
query ($path: String!) {
pages {
singleByPath(path: $path, locale: "en") {
id
title
description
content
}
}
}
"""
resp = wiki_graphql(base, token, query, {"path": page_path})
page = resp.get("data", {}).get("pages", {}).get("singleByPath")
if not page:
errors = resp.get("errors", resp)
print(f"ERROR: Page not found at '{page_path}': {errors}", file=sys.stderr)
sys.exit(1)
write_file(SOURCE_FILE, page["content"])
def cmd_write(args):
"""Generate a blog post from SOURCE.md using Gemini."""
api_key = require_env("GEMINI_API_KEY")
original_lang = require_env("ORIGINAL_LANG", "Hungarian")
instructions = read_file(INSTRUCTIONS_FILE)
source = read_file(SOURCE_FILE)
print(f"→ Generating blog post in {original_lang} with Gemini...")
prompt = (
"Please read the following instructions carefully and follow them to write a blog post.\n\n"
"## INSTRUCTIONS\n\n"
f"{instructions}\n\n"
"## TASK\n\n"
f"Read the source content below and write a blog post from it in {original_lang} language. "
"Output only the blog post in Markdown format, with no additional commentary.\n\n"
"## SOURCE CONTENT\n\n"
f"{source}"
)
result = gemini_generate(api_key, prompt)
write_file(BLOGPOST_FILE, result)
def cmd_translate(args):
"""Translate BLOGPOST.md to TRANSLATED_BLOGPOST.md using Gemini."""
api_key = require_env("GEMINI_API_KEY")
translate_lang = require_env("TRANSLATE_LANG", "English")
blogpost = read_file(BLOGPOST_FILE)
print(f"→ Translating blog post to {translate_lang} with Gemini...")
prompt = (
f"Translate the following Markdown blog post into {translate_lang}. "
"Preserve all Markdown formatting, headings, links, and code blocks exactly. "
"Output only the translated Markdown with no additional commentary.\n\n"
f"{blogpost}"
)
result = gemini_generate(api_key, prompt)
write_file(TRANSLATED_FILE, result)
def cmd_upload(args):
"""Upload TRANSLATED_BLOGPOST.md to Wiki.js under /blog/{kebab-title}."""
base = require_env("WIKI_BASE_DOMAIN")
token = require_env("WIKI_TOKEN")
content = read_file(TRANSLATED_FILE)
# Extract H1 title
match = re.search(r"^#\s+(.+)", content, re.MULTILINE)
if not match:
print(f"ERROR: No H1 heading found in {TRANSLATED_FILE}", file=sys.stderr)
sys.exit(1)
title = match.group(1).strip()
content = re.sub(r"^#\s+.+\n?", "", content, count=1, flags=re.MULTILINE).lstrip("\n")
kebab = to_kebab(title)
page_path = f"blog/{kebab}"
print(f"→ Uploading to Wiki.js")
print(f" Title : {title}")
print(f" Path : /{page_path}")
# Check if page already exists
find_query = """
query ($path: String!) {
pages {
singleByPath(path: $path, locale: "en") {
id
}
}
}
"""
find_resp = wiki_graphql(base, token, find_query, {"path": page_path})
existing = find_resp.get("data", {}).get("pages", {}).get("singleByPath")
existing_id = existing.get("id") if existing else None
if existing_id:
print(f" Found existing page id={existing_id}, updating...")
mutation = """
mutation ($id: Int!, $content: String!) {
pages {
update(id: $id, content: $content, tags: ["blog"]) {
responseResult { succeeded message }
}
}
}
"""
variables = {"id": existing_id, "content": content}
resp = wiki_graphql(base, token, mutation, variables)
result = resp.get("data", {}).get("pages", {}).get("update", {}).get("responseResult", {})
else:
print(" Page not found, creating new...")
mutation = """
mutation ($path: String!, $title: String!, $content: String!) {
pages {
create(
path: $path
title: $title
content: $content
editor: "markdown"
locale: "en"
isPublished: true
isPrivate: false
tags: ["blog"]
description: ""
) {
responseResult { succeeded message }
page { id }
}
}
}
"""
variables = {"path": page_path, "title": title, "content": content}
resp = wiki_graphql(base, token, mutation, variables)
result = resp.get("data", {}).get("pages", {}).get("create", {}).get("responseResult", {})
errors = resp.get("errors")
if errors:
print(f"ERROR: {json.dumps(errors, indent=2)}", file=sys.stderr)
sys.exit(1)
if not result.get("succeeded"):
print(f"ERROR: Operation failed: {result.get('message')}", file=sys.stderr)
sys.exit(1)
print(f"✓ Successfully uploaded to {base}/{page_path}")
# ---------------------------------------------------------------------------
# Entry point
# ---------------------------------------------------------------------------
def main():
parser = argparse.ArgumentParser(
description="Wiki.js → Gemini Blog Post Pipeline",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=__doc__,
)
subparsers = parser.add_subparsers(dest="command", required=True)
# fetch
p_fetch = subparsers.add_parser("fetch", help="Download a Wiki.js page as Markdown")
p_fetch.add_argument("url", help="Page path or full URL, e.g. /my-page or https://wiki.example.com/my-page")
p_fetch.set_defaults(func=cmd_fetch)
# write
p_write = subparsers.add_parser("write", help=f"Generate blog post from {SOURCE_FILE} using Gemini")
p_write.set_defaults(func=cmd_write)
# translate
p_translate = subparsers.add_parser("translate", help=f"Translate {BLOGPOST_FILE} using Gemini")
p_translate.set_defaults(func=cmd_translate)
# upload
p_upload = subparsers.add_parser("upload", help=f"Upload {TRANSLATED_FILE} to Wiki.js")
p_upload.set_defaults(func=cmd_upload)
args = parser.parse_args()
args.func(args)
if __name__ == "__main__":
main()