Source code for proxmox_api.parse_apidoc

"""Parser tool for Proxmox VE API documentation.

Can read from a local apidoc.json file or download the schema directly
from the Proxmox documentation site (apidoc.js).
"""

from __future__ import annotations

import json
import re
import sys
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any

import requests

APIDOC_JS_URL = "https://pve.proxmox.com/pve-docs/api-viewer/apidoc.js"


[docs] @dataclass class Parameter: name: str type: str description: str optional: bool = True default: Any = None enum: list[str] | None = None minimum: int | float | None = None maximum: int | float | None = None pattern: str | None = None format: str | None = None is_path_param: bool = False @property def python_name(self) -> str: """Convert parameter name to valid Python identifier.""" return self.name.replace("-", "_") @property def python_type(self) -> str: """Map JSON-schema type to Python type hint.""" mapping = { "string": "str", "integer": "int", "number": "float", "boolean": "bool", "array": "list", "object": "dict", "null": "None", } return mapping.get(self.type, "Any")
[docs] @dataclass class Endpoint: path: str method: str name: str description: str parameters: list[Parameter] = field(default_factory=list) returns_type: str = "Any" protected: bool = False proxyto: str | None = None @property def path_params(self) -> list[Parameter]: return [p for p in self.parameters if p.is_path_param] @property def query_or_body_params(self) -> list[Parameter]: return [p for p in self.parameters if not p.is_path_param] @property def required_params(self) -> list[Parameter]: return [p for p in self.parameters if not p.optional] @property def optional_params(self) -> list[Parameter]: return [p for p in self.parameters if p.optional]
def _extract_path_param_names(path: str) -> set[str]: """Extract parameter names from URL path template like /nodes/{node}.""" return set(re.findall(r"\{(\w+)\}", path)) def _parse_parameter(name: str, spec: dict, path_params: set[str]) -> Parameter: return Parameter( name=name, type=spec.get("type", "string"), description=spec.get("description", ""), optional=bool(spec.get("optional", 0)) and name not in path_params, default=spec.get("default"), enum=spec.get("enum"), minimum=spec.get("minimum"), maximum=spec.get("maximum"), pattern=spec.get("pattern"), format=spec.get("format") if isinstance(spec.get("format"), str) else None, is_path_param=name in path_params, ) def _parse_endpoint(path: str, method: str, info: dict) -> Endpoint: path_param_names = _extract_path_param_names(path) properties = info.get("parameters", {}).get("properties", {}) params = [ _parse_parameter(name, spec, path_param_names) for name, spec in properties.items() ] # Sort: path params first (in path order), then required, then optional path_order = re.findall(r"\{(\w+)\}", path) path_index = {name: i for i, name in enumerate(path_order)} def sort_key(p: Parameter) -> tuple: if p.is_path_param: return (0, path_index.get(p.name, 99)) if not p.optional: return (1, p.name) return (2, p.name) params.sort(key=sort_key) returns_type = info.get("returns", {}).get("type", "Any") return Endpoint( path=path, method=method, name=info.get("name", ""), description=info.get("description", ""), parameters=params, returns_type=returns_type, protected=bool(info.get("protected", 0)), proxyto=info.get("proxyto"), ) def _walk_tree(node: dict, endpoints: list[Endpoint]) -> None: """Recursively walk the API doc tree and collect endpoints.""" path = node.get("path", "") info = node.get("info", {}) for method, method_info in info.items(): if method in ("GET", "POST", "PUT", "DELETE"): endpoints.append(_parse_endpoint(path, method, method_info)) for child in node.get("children", []): _walk_tree(child, endpoints) def _extract_json_from_js(js_content: str) -> str: """Extract the JSON array from the apidoc.js JavaScript source. The file has the form: ``const apiSchema = [ ... ];`` followed by additional JavaScript code. We use incremental JSON parsing to extract just the array. """ match = re.search(r"=\s*(\[)", js_content) if not match: raise ValueError("Could not find JSON array in apidoc.js content") # Use raw_decode to parse only the JSON array, ignoring trailing JS code decoder = json.JSONDecoder() json_start = match.start(1) _, end_idx = decoder.raw_decode(js_content, json_start) return js_content[json_start:end_idx]
[docs] def download_apidoc( url: str = APIDOC_JS_URL, save_to: str | Path | None = None, ) -> list[dict]: """Download apidoc.js from the Proxmox docs and extract the schema. Args: url: URL to the apidoc.js file. save_to: If provided, save the extracted JSON to this path. Returns: The parsed API schema as a list of dicts. """ print(f"Downloading {url} ...") resp = requests.get(url, timeout=30) resp.raise_for_status() json_text = _extract_json_from_js(resp.text) data = json.loads(json_text) if save_to: save_path = Path(save_to) save_path.write_text(json_text, encoding="utf-8") print(f"Saved schema to {save_path}") return data
def _load_schema(source: str | Path) -> list[dict]: """Load the API schema from a file path or URL. Supports: - Local .json file (apidoc.json format) - Local .js file (apidoc.js format) - HTTP/HTTPS URL to apidoc.js """ source_str = str(source) if source_str.startswith(("http://", "https://")): return download_apidoc(source_str) filepath = Path(source) content = filepath.read_text(encoding="utf-8").strip() if filepath.suffix == ".js": json_text = _extract_json_from_js(content) return json.loads(json_text) # Legacy apidoc.json format: comma-separated objects without outer brackets if not content.startswith("["): content = "[" + content + "]" return json.loads(content)
[docs] def parse_apidoc(source: str | Path) -> list[Endpoint]: """Parse an API doc source and return a list of all API endpoints. Args: source: Path to apidoc.json, apidoc.js, or a URL to apidoc.js. """ data = _load_schema(source) endpoints: list[Endpoint] = [] for root_node in data: _walk_tree(root_node, endpoints) return endpoints
[docs] def group_by_section(endpoints: list[Endpoint]) -> dict[str, list[Endpoint]]: """Group endpoints by top-level API section (cluster, nodes, etc.).""" groups: dict[str, list[Endpoint]] = {} for ep in endpoints: parts = ep.path.strip("/").split("/") section = parts[0] if parts else "root" groups.setdefault(section, []).append(ep) return groups
[docs] def main() -> None: import argparse parser = argparse.ArgumentParser( description="Parse and summarize the Proxmox VE API documentation." ) parser.add_argument( "source", nargs="?", default=None, help=( "Path to apidoc.json or apidoc.js, or a URL. " "Defaults to downloading from the Proxmox docs site." ), ) parser.add_argument( "--download", action="store_true", help="Download the latest apidoc.js from the Proxmox docs site.", ) parser.add_argument( "--save", metavar="PATH", default=None, help="Save the downloaded schema to a local JSON file.", ) args = parser.parse_args() if args.download or args.source is None: source = APIDOC_JS_URL else: source = args.source if not str(source).startswith(("http://", "https://")): path = Path(source) if not path.exists(): print(f"Error: {path} not found", file=sys.stderr) sys.exit(1) if args.save and (args.download or args.source is None): download_apidoc(save_to=args.save) endpoints = parse_apidoc(source) print_summary(endpoints)
if __name__ == "__main__": main()