commit 5b5949c05983a8d927bc44346b62fce5058064da Author: Zsolt Tasnadi Date: Wed Mar 4 21:17:36 2026 +0100 initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..55e02b8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.env +BLOGPOST.md +TRANSLATED_BLOGPOST.md +SOURCE.md diff --git a/INSTRUCTIONS.md b/INSTRUCTIONS.md new file mode 100644 index 0000000..2ec75d2 --- /dev/null +++ b/INSTRUCTIONS.md @@ -0,0 +1 @@ +Írj egy kedves, pozitív hangvételű blogpost-ot ami tájékoztat arról, hogy épp min dolgozunk. \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..afae97d --- /dev/null +++ b/Makefile @@ -0,0 +1,23 @@ +ENV = export $(shell cat .env | grep -v '^\#' | grep -v '^$$' | xargs) + +.PHONY: fetch write translate upload all + +## Letölt egy wiki oldalt SOURCE.md-be +## Használat: make fetch URL=/path/to/page +fetch: + $(ENV) && python3 generator.py fetch $(URL) + +## Blogposztot ír SOURCE.md-ből → BLOGPOST.md +write: + $(ENV) && python3 generator.py write + +## Lefordítja BLOGPOST.md → TRANSLATED_BLOGPOST.md +translate: + $(ENV) && python3 generator.py translate + +## Feltölti TRANSLATED_BLOGPOST.md-t a wikire +upload: + $(ENV) && python3 generator.py upload + +## Teljes pipeline: write → translate → upload +all: write translate upload \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..1e86239 --- /dev/null +++ b/README.md @@ -0,0 +1,95 @@ +# Wiki.js Blog Writer Pipeline + +Automated pipeline that fetches a page from a [Wiki.js](https://js.wiki) instance, turns it into a blog post using Google Gemini, translates it, and publishes it back to the wiki under `/blog/`. + +## How it works + +``` +Wiki.js page → SOURCE.md → BLOGPOST.md → TRANSLATED_BLOGPOST.md → Wiki.js /blog/ + fetch write translate upload +``` + +1. **fetch** — Downloads a Wiki.js page as Markdown via GraphQL +2. **write** — Gemini reads `INSTRUCTIONS.md` and writes a blog post from `SOURCE.md` in the configured language +3. **translate** — Gemini translates the blog post into the target language +4. **upload** — Publishes the translated post to Wiki.js under `/blog/{kebab-case-title}`; creates the page if it doesn't exist, updates it if it does + +## Requirements + +- Python 3.8+ +- A running Wiki.js instance with API access +- A Google Gemini API key (get one free at [aistudio.google.com](https://aistudio.google.com)) + +## Setup + +**1. Clone the repo and enter the directory** + +```bash +git clone +cd blog_writer +``` + +**2. Copy the example env file and fill in your values** + +```bash +cp .env.example .env +``` + +| Variable | Description | +|---|---| +| `WIKI_BASE_DOMAIN` | Base URL of your Wiki.js instance, e.g. `https://wiki.example.com` | +| `WIKI_TOKEN` | Wiki.js API bearer token (Admin → API Access) | +| `GEMINI_API_KEY` | Google Gemini API key | +| `ORIGINAL_LANG` | Language for the generated blog post (default: `Hungarian`) | +| `TRANSLATE_LANG` | Target language for translation (default: `English`) | + +**3. Create an `INSTRUCTIONS.md` file** + +This file tells Gemini how to write the blog post — tone, structure, length, style, etc. Example: + +```markdown +Write an engaging blog post aimed at a general technical audience. +Use a friendly but professional tone. Keep it under 600 words. +Start with a short hook, then summarize the key points, and end with a call to action. +Do not use bullet points — write in flowing paragraphs. +``` + +## Usage + +```bash +# Fetch a wiki page +make fetch URL=/path/to/page + +# Generate the blog post +make write + +# Translate it +make translate + +# Upload to Wiki.js +make upload + +# Run the full pipeline (write → translate → upload) +make all +``` + +> `make fetch` must be run separately with a `URL` argument before `make all`. + +## File overview + +| File | Description | +|---|---| +| `tool.py` | Main Python script | +| `Makefile` | Convenience wrapper around `tool.py` | +| `.env` | Your local environment variables (never commit this) | +| `.env.example` | Example env file safe to commit | +| `INSTRUCTIONS.md` | Gemini prompt instructions for blog post style | +| `SOURCE.md` | Raw Markdown fetched from Wiki.js | +| `BLOGPOST.md` | Generated blog post in `ORIGINAL_LANG` | +| `TRANSLATED_BLOGPOST.md` | Translated blog post in `TRANSLATE_LANG` | + +## Notes + +- The `# H1` title is extracted from `TRANSLATED_BLOGPOST.md` and used as both the Wiki.js page title and the URL slug — it is stripped from the content body to avoid duplication. +- If a page already exists at the target path, it will be updated rather than duplicated. +- Add `.env`, `SOURCE.md`, `BLOGPOST.md`, and `TRANSLATED_BLOGPOST.md` to your `.gitignore`. \ No newline at end of file diff --git a/env.example b/env.example new file mode 100644 index 0000000..a06bae0 --- /dev/null +++ b/env.example @@ -0,0 +1,7 @@ +WIKI_BASE_DOMAIN=https://wiki.example.com +WIKI_TOKEN=your_wikijs_bearer_token_here + +GEMINI_API_KEY=your_gemini_api_key_here + +ORIGINAL_LANG=Hungarian +TRANSLATE_LANG=English diff --git a/generator.py b/generator.py new file mode 100644 index 0000000..878af91 --- /dev/null +++ b/generator.py @@ -0,0 +1,316 @@ +#!/usr/bin/env python3 +""" +Wiki.js → Gemini Blog Post Pipeline + +Commands: + fetch Download a Wiki.js page as Markdown via GraphQL + write Generate a blog post from SOURCE.md using Gemini + translate Translate BLOGPOST.md using Gemini + upload Upload TRANSLATED_BLOGPOST.md to Wiki.js under /blog/{kebab-title} + +Required environment variables: + WIKI_BASE_DOMAIN e.g. https://wiki.example.com + WIKI_TOKEN Bearer token for Wiki.js API + GEMINI_API_KEY Google Gemini API key + ORIGINAL_LANG Language for the blog post (default: Hungarian) + TRANSLATE_LANG Target language for translation (default: English) +""" + +import argparse +import json +import os +import re +import sys +import urllib.request +import urllib.error + +# --------------------------------------------------------------------------- +# Config +# --------------------------------------------------------------------------- + +SOURCE_FILE = "SOURCE.md" +BLOGPOST_FILE = "BLOGPOST.md" +TRANSLATED_FILE = "TRANSLATED_BLOGPOST.md" +INSTRUCTIONS_FILE = "INSTRUCTIONS.md" + +GEMINI_MODEL = "gemini-flash-latest" +GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/models" + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def require_env(name: str, default: str = None) -> str: + value = os.environ.get(name, default) + if not value: + print(f"ERROR: Environment variable '{name}' is required.", file=sys.stderr) + sys.exit(1) + return value + + +def http_post(url: str, payload: dict, headers: dict) -> dict: + data = json.dumps(payload).encode("utf-8") + req = urllib.request.Request(url, data=data, headers=headers, method="POST") + try: + with urllib.request.urlopen(req) as resp: + return json.loads(resp.read().decode("utf-8")) + except urllib.error.HTTPError as e: + body = e.read().decode("utf-8") + print(f"ERROR: HTTP {e.code} from {url}\n{body}", file=sys.stderr) + sys.exit(1) + except urllib.error.URLError as e: + print(f"ERROR: Could not reach {url}: {e.reason}", file=sys.stderr) + sys.exit(1) + + +def wiki_graphql(base: str, token: str, query: str, variables: dict = None) -> dict: + url = f"{base}/graphql" + payload = {"query": query} + if variables: + payload["variables"] = variables + headers = { + "Authorization": f"Bearer {token}", + "Content-Type": "application/json", + } + return http_post(url, payload, headers) + + +def gemini_generate(api_key: str, prompt: str) -> str: + url = f"{GEMINI_BASE_URL}/{GEMINI_MODEL}:generateContent" + payload = {"contents": [{"parts": [{"text": prompt}]}]} + headers = {"Content-Type": "application/json", "X-goog-api-key": api_key} + resp = http_post(url, payload, headers) + try: + return resp["candidates"][0]["content"]["parts"][0]["text"] + except (KeyError, IndexError) as e: + print(f"ERROR: Unexpected Gemini response structure: {resp}", file=sys.stderr) + sys.exit(1) + + +def to_kebab(text: str) -> str: + text = text.lower() + text = re.sub(r"[^a-z0-9\s-]", "", text) + text = re.sub(r"[\s-]+", "-", text) + return text.strip("-") + + +def read_file(path: str) -> str: + if not os.path.exists(path): + print(f"ERROR: File not found: {path}", file=sys.stderr) + sys.exit(1) + with open(path, "r", encoding="utf-8") as f: + return f.read() + + +def write_file(path: str, content: str) -> None: + with open(path, "w", encoding="utf-8") as f: + f.write(content) + print(f"✓ Saved to {path}") + + +# --------------------------------------------------------------------------- +# Commands +# --------------------------------------------------------------------------- + +def cmd_fetch(args): + """Download a Wiki.js page as Markdown via GraphQL.""" + base = require_env("WIKI_BASE_DOMAIN") + token = require_env("WIKI_TOKEN") + + # Strip base domain from URL if full URL was given, then strip leading slash + page_path = args.url.replace(base, "").lstrip("/") + print(f"→ Fetching wiki page: /{page_path}") + + query = """ + query ($path: String!) { + pages { + singleByPath(path: $path, locale: "en") { + id + title + description + content + } + } + } + """ + + resp = wiki_graphql(base, token, query, {"path": page_path}) + page = resp.get("data", {}).get("pages", {}).get("singleByPath") + + if not page: + errors = resp.get("errors", resp) + print(f"ERROR: Page not found at '{page_path}': {errors}", file=sys.stderr) + sys.exit(1) + + write_file(SOURCE_FILE, page["content"]) + + +def cmd_write(args): + """Generate a blog post from SOURCE.md using Gemini.""" + api_key = require_env("GEMINI_API_KEY") + original_lang = require_env("ORIGINAL_LANG", "Hungarian") + + instructions = read_file(INSTRUCTIONS_FILE) + source = read_file(SOURCE_FILE) + + print(f"→ Generating blog post in {original_lang} with Gemini...") + + prompt = ( + "Please read the following instructions carefully and follow them to write a blog post.\n\n" + "## INSTRUCTIONS\n\n" + f"{instructions}\n\n" + "## TASK\n\n" + f"Read the source content below and write a blog post from it in {original_lang} language. " + "Output only the blog post in Markdown format, with no additional commentary.\n\n" + "## SOURCE CONTENT\n\n" + f"{source}" + ) + + result = gemini_generate(api_key, prompt) + write_file(BLOGPOST_FILE, result) + + +def cmd_translate(args): + """Translate BLOGPOST.md to TRANSLATED_BLOGPOST.md using Gemini.""" + api_key = require_env("GEMINI_API_KEY") + translate_lang = require_env("TRANSLATE_LANG", "English") + + blogpost = read_file(BLOGPOST_FILE) + + print(f"→ Translating blog post to {translate_lang} with Gemini...") + + prompt = ( + f"Translate the following Markdown blog post into {translate_lang}. " + "Preserve all Markdown formatting, headings, links, and code blocks exactly. " + "Output only the translated Markdown with no additional commentary.\n\n" + f"{blogpost}" + ) + + result = gemini_generate(api_key, prompt) + write_file(TRANSLATED_FILE, result) + + +def cmd_upload(args): + """Upload TRANSLATED_BLOGPOST.md to Wiki.js under /blog/{kebab-title}.""" + base = require_env("WIKI_BASE_DOMAIN") + token = require_env("WIKI_TOKEN") + + content = read_file(TRANSLATED_FILE) + + # Extract H1 title + match = re.search(r"^#\s+(.+)", content, re.MULTILINE) + if not match: + print(f"ERROR: No H1 heading found in {TRANSLATED_FILE}", file=sys.stderr) + sys.exit(1) + + title = match.group(1).strip() + content = re.sub(r"^#\s+.+\n?", "", content, count=1, flags=re.MULTILINE).lstrip("\n") + kebab = to_kebab(title) + page_path = f"blog/{kebab}" + + print(f"→ Uploading to Wiki.js") + print(f" Title : {title}") + print(f" Path : /{page_path}") + + # Check if page already exists + find_query = """ + query ($path: String!) { + pages { + singleByPath(path: $path, locale: "en") { + id + } + } + } + """ + find_resp = wiki_graphql(base, token, find_query, {"path": page_path}) + existing = find_resp.get("data", {}).get("pages", {}).get("singleByPath") + existing_id = existing.get("id") if existing else None + + if existing_id: + print(f" Found existing page id={existing_id}, updating...") + mutation = """ + mutation ($id: Int!, $content: String!) { + pages { + update(id: $id, content: $content, tags: ["blog"]) { + responseResult { succeeded message } + } + } + } + """ + variables = {"id": existing_id, "content": content} + resp = wiki_graphql(base, token, mutation, variables) + result = resp.get("data", {}).get("pages", {}).get("update", {}).get("responseResult", {}) + else: + print(" Page not found, creating new...") + mutation = """ + mutation ($path: String!, $title: String!, $content: String!) { + pages { + create( + path: $path + title: $title + content: $content + editor: "markdown" + locale: "en" + isPublished: true + isPrivate: false + tags: ["blog"] + description: "" + ) { + responseResult { succeeded message } + page { id } + } + } + } + """ + variables = {"path": page_path, "title": title, "content": content} + resp = wiki_graphql(base, token, mutation, variables) + result = resp.get("data", {}).get("pages", {}).get("create", {}).get("responseResult", {}) + + errors = resp.get("errors") + if errors: + print(f"ERROR: {json.dumps(errors, indent=2)}", file=sys.stderr) + sys.exit(1) + + if not result.get("succeeded"): + print(f"ERROR: Operation failed: {result.get('message')}", file=sys.stderr) + sys.exit(1) + + print(f"✓ Successfully uploaded to {base}/{page_path}") + + +# --------------------------------------------------------------------------- +# Entry point +# --------------------------------------------------------------------------- + +def main(): + parser = argparse.ArgumentParser( + description="Wiki.js → Gemini Blog Post Pipeline", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__, + ) + subparsers = parser.add_subparsers(dest="command", required=True) + + # fetch + p_fetch = subparsers.add_parser("fetch", help="Download a Wiki.js page as Markdown") + p_fetch.add_argument("url", help="Page path or full URL, e.g. /my-page or https://wiki.example.com/my-page") + p_fetch.set_defaults(func=cmd_fetch) + + # write + p_write = subparsers.add_parser("write", help=f"Generate blog post from {SOURCE_FILE} using Gemini") + p_write.set_defaults(func=cmd_write) + + # translate + p_translate = subparsers.add_parser("translate", help=f"Translate {BLOGPOST_FILE} using Gemini") + p_translate.set_defaults(func=cmd_translate) + + # upload + p_upload = subparsers.add_parser("upload", help=f"Upload {TRANSLATED_FILE} to Wiki.js") + p_upload.set_defaults(func=cmd_upload) + + args = parser.parse_args() + args.func(args) + + +if __name__ == "__main__": + main() \ No newline at end of file