synapse/scripts-dev/gen_config_documentation.py

#!/usr/bin/env python3
"""Generate Synapse documentation from JSON Schema file."""

import json
import re
import sys
from typing import Any, Optional

import yaml

HEADER = """<!-- Document auto-generated by scripts-dev/gen_config_documentation.py -->

# Configuring Synapse

This is intended as a guide to the Synapse configuration. The behavior of a Synapse instance can be modified
through the many configuration settings documented here — each config option is explained,
including what the default is, how to change the default and what sort of behaviour the setting governs.
Also included is an example configuration for each setting. If you don't want to spend a lot of time
thinking about options, the config as generated sets sensible defaults for all values. Do note however that the
database defaults to SQLite, which is not recommended for production usage. You can read more on this subject
[here](../../setup/installation.md#using-postgresql).

## Config Conventions

Configuration options that take a time period can be set using a number
followed by a letter. Letters have the following meanings:

* `s` = second
* `m` = minute
* `h` = hour
* `d` = day
* `w` = week
* `y` = year

For example, setting `redaction_retention_period: 5m` would remove redacted
messages from the database after 5 minutes, rather than 5 months.

In addition, configuration options referring to size use the following suffixes:

* `K` = KiB, or 1024 bytes
* `M` = MiB, or 1,048,576 bytes
* `G` = GiB, or 1,073,741,824 bytes
* `T` = TiB, or 1,099,511,627,776 bytes

For example, setting `max_avatar_size: 10M` means that Synapse will not accept files larger than 10,485,760 bytes
for a user avatar.

## Config Validation

The configuration file can be validated with the following command:
```bash
python -m synapse.config read <config key to print> -c <path to config>
```

To validate the entire file, omit `read <config key to print>`:
```bash
python -m synapse.config -c <path to config>
```

To see how to set other options, check the help reference:
```bash
python -m synapse.config --help
```

### YAML
The configuration file is a [YAML](https://yaml.org/) file, which means that certain syntax rules
apply if you want your config file to be read properly. A few helpful things to know:
* `#` before any option in the config will comment out that setting and either a default (if available) will
   be applied or Synapse will ignore the setting. Thus, in example #1 below, the setting will be read and
   applied, but in example #2 the setting will not be read and a default will be applied.

   Example #1:
   ```yaml
   pid_file: DATADIR/homeserver.pid
   ```
   Example #2:
   ```yaml
   #pid_file: DATADIR/homeserver.pid
   ```
* Indentation matters! The indentation before a setting
  will determine whether a given setting is read as part of another
  setting, or considered on its own. Thus, in example #1, the `enabled` setting
  is read as a sub-option of the `presence` setting, and will be properly applied.

  However, the lack of indentation before the `enabled` setting in example #2 means
  that when reading the config, Synapse will consider both `presence` and `enabled` as
  different settings. In this case, `presence` has no value, and thus a default applied, and `enabled`
  is an option that Synapse doesn't recognize and thus ignores.

  Example #1:
  ```yaml
  presence:
    enabled: false
  ```
  Example #2:
  ```yaml
  presence:
  enabled: false
  ```
  In this manual, all top-level settings (ones with no indentation) are identified
  at the beginning of their section (i.e. "### `example_setting`") and
  the sub-options, if any, are identified and listed in the body of the section.
  In addition, each setting has an example of its usage, with the proper indentation
  shown.
"""
SECTION_HEADERS = {
    "modules": {
        "title": "Modules",
        "description": (
            "Server admins can expand Synapse's functionality with external "
            "modules.\n\n"
            "See [here](../../modules/index.md) for more documentation on how "
            "to configure or create custom modules for Synapse."
        ),
    },
    "server_name": {
        "title": "Server",
        "description": "Define your homeserver name and other base options.",
    },
    "admin_contact": {
        "title": "Homeserver blocking",
        "description": "Useful options for Synapse admins.",
    },
    "tls_certificate_path": {
        "title": "TLS",
        "description": "Options related to TLS.",
    },
    "federation_domain_whitelist": {
        "title": "Federation",
        "description": "Options related to federation.",
    },
    "event_cache_size": {
        "title": "Caching",
        "description": "Options related to caching.",
    },
    "database": {
        "title": "Database",
        "description": "Config options related to database settings.",
    },
    "log_config": {
        "title": "Logging",
        "description": ("Config options related to logging."),
    },
    "rc_message": {
        "title": "Ratelimiting",
        "description": (
            "Options related to ratelimiting in Synapse.\n\n"
            "Each ratelimiting configuration is made of two parameters:\n"
            "- `per_second`: number of requests a client can send per second.\n"
            "- `burst_count`: number of requests a client can send before "
            "being throttled."
        ),
    },
    "enable_authenticated_media": {
        "title": "Media Store",
        "description": "Config options related to Synapse's media store.",
    },
    "recaptcha_public_key": {
        "title": "Captcha",
        "description": (
            "See [here](../../CAPTCHA_SETUP.md) for full details on setting up captcha."
        ),
    },
    "turn_uris": {
        "title": "TURN",
        "description": ("Options related to adding a TURN server to Synapse."),
    },
    "enable_registration": {
        "title": "Registration",
        "description": (
            "Registration can be rate-limited using the parameters in the "
            "[Ratelimiting](#ratelimiting) section of this manual."
        ),
    },
    "session_lifetime": {
        "title": "User session management",
        "description": ("Config options related to user session management."),
    },
    "enable_metrics": {
        "title": "Metrics",
        "description": ("Config options related to metrics."),
    },
    "room_prejoin_state": {
        "title": "API Configuration",
        "description": ("Config settings related to the client/server API."),
    },
    "signing_key_path": {
        "title": "Signing Keys",
        "description": ("Config options relating to signing keys."),
    },
    "saml2_config": {
        "title": "Single sign-on integration",
        "description": (
            "The following settings can be used to make Synapse use a single sign-on provider for authentication, instead of its internal password database.\n\n"
            "You will probably also want to set the following options to `false` to disable the regular login/registration flows:\n"
            "* [`enable_registration`](#enable_registration)\n"
            "* [`password_config.enabled`](#password_config)"
        ),
    },
    "push": {
        "title": "Push",
        "description": ("Configuration settings related to push notifications."),
    },
    "encryption_enabled_by_default_for_room_type": {
        "title": "Rooms",
        "description": ("Config options relating to rooms."),
    },
    "opentracing": {
        "title": "Opentracing",
        "description": ("Configuration options related to Opentracing support."),
    },
    "worker_replication_secret": {
        "title": "Coordinating workers",
        "description": (
            "Configuration options related to workers which belong in the main config file (usually called `homeserver.yaml`). A Synapse deployment can scale horizontally by running multiple Synapse processes called _workers_. Incoming requests are distributed between workers to handle higher loads. Some workers are privileged and can accept requests from other workers.\n\n"
            "As a result, the worker configuration is divided into two parts.\n\n"
            "1. The first part (in this section of the manual) defines which shardable tasks are delegated to privileged workers. This allows unprivileged workers to make requests to a privileged worker to act on their behalf.\n"
            "2. [The second part](#individual-worker-configuration) controls the behaviour of individual workers in isolation.\n\n"
            "For guidance on setting up workers, see the [worker documentation](../../workers.md)."
        ),
    },
    "worker_app": {
        "title": "Individual worker configuration",
        "description": (
            "These options configure an individual worker, in its worker configuration file. They should be not be provided when configuring the main process.\n\n"
            "Note also the configuration above for [coordinating a cluster of workers](#coordinating-workers).\n\n"
            "For guidance on setting up workers, see the [worker documentation](../../workers.md)."
        ),
    },
    "background_updates": {
        "title": "Background Updates",
        "description": ("Configuration settings related to background updates."),
    },
    "auto_accept_invites": {
        "title": "Auto Accept Invites",
        "description": (
            "Configuration settings related to automatically accepting invites."
        ),
    },
}
INDENT = "  "


has_error = False


def error(text: str) -> None:
    global has_error
    print(f"ERROR: {text}", file=sys.stderr)
    has_error = True


def indent(text: str, first_line: bool = True) -> str:
    """Indents each non-empty line of the given text."""
    text = re.sub(r"(\n)([^\n])", r"\1" + INDENT + r"\2", text)
    if first_line:
        text = re.sub(r"^([^\n])", INDENT + r"\1", text)

    return text


def em(s: Optional[str]) -> str:
    """Add emphasis to text."""
    return f"*{s}*" if s else ""


def a(s: Optional[str], suffix: str = " ") -> str:
    """Appends a space if the given string is not empty."""
    return s + suffix if s else ""


def p(s: Optional[str], prefix: str = " ") -> str:
    """Prepend a space if the given string is not empty."""
    return prefix + s if s else ""


def resolve_local_refs(schema: dict) -> dict:
    """Returns the given schema with local $ref properties replaced by their keywords.

    Crude approximation that will override keywords.
    """
    defs = schema["$defs"]

    def replace_ref(d: Any) -> Any:
        if isinstance(d, dict):
            the_def = {}
            if "$ref" in d:
                # Found a "$ref" key.
                def_name = d["$ref"].removeprefix("#/$defs/")
                del d["$ref"]
                the_def = defs[def_name]

            new_dict = {k: replace_ref(v) for k, v in d.items()}
            if common_keys := (new_dict.keys() & the_def.keys()) - {"properties"}:
                print(
                    f"WARN: '{def_name}' overrides keys '{common_keys}'",
                    file=sys.stderr,
                )

            new_dict_props = new_dict.get("properties", {})
            the_def_props = the_def.get("properties", {})
            if common_props := new_dict_props.keys() & the_def_props.keys():
                print(
                    f"WARN: '{def_name}' overrides properties '{common_props}'",
                    file=sys.stderr,
                )
            if merged_props := {**new_dict_props, **the_def_props}:
                return {**new_dict, **the_def, "properties": merged_props}
            else:
                return {**new_dict, **the_def}

        elif isinstance(d, list):
            return [replace_ref(v) for v in d]
        else:
            return d

    return replace_ref(schema)


def sep(values: dict) -> str:
    """Separator between parts of the description."""
    # If description is multiple paragraphs already, add new ones. Otherwise
    # append to same paragraph.
    return "\n\n" if "\n\n" in values.get("description", "") else " "


def type_str(values: dict) -> str:
    """Type of the current value."""
    if t := values.get("io.element.type_name"):
        # Allow custom overrides for the type name, for documentation clarity
        return f"({t})"
    if not (t := values.get("type")):
        return ""
    if not isinstance(t, list):
        t = [t]
    joined = "|".join(t)
    return f"({joined})"


def items(values: dict) -> str:
    """A block listing properties of array items."""
    if not (items := values.get("items")):
        return ""
    if not (item_props := items.get("properties")):
        return ""
    return "\nOptions for each entry include:\n\n" + "\n".join(
        sub_section(k, v) for k, v in item_props.items()
    )


def properties(values: dict) -> str:
    """A block listing object properties."""
    if not (properties := values.get("properties")):
        return ""
    return "\nThis setting has the following sub-options:\n\n" + "\n".join(
        sub_section(k, v) for k, v in properties.items()
    )


def sub_section(prop: str, values: dict) -> str:
    """Formats a bullet point about the given sub-property."""
    sep = lambda: globals()["sep"](values)
    type_str = lambda: globals()["type_str"](values)
    items = lambda: globals()["items"](values)
    properties = lambda: globals()["properties"](values)

    def default() -> str:
        try:
            default = values["default"]
            return f"Defaults to `{json.dumps(default)}`."
        except KeyError:
            return ""

    def description() -> str:
        if not (description := values.get("description")):
            error(f"missing description for {prop}")
            return "MISSING DESCRIPTION\n"

        return f"{description}{p(default(), sep())}\n"

    return (
        f"* `{prop}`{p(type_str())}: "
        + f"{indent(description(), first_line=False)}"
        + indent(items())
        + indent(properties())
    )


def section(prop: str, values: dict) -> str:
    """Formats a section about the given property."""
    sep = lambda: globals()["sep"](values)
    type_str = lambda: globals()["type_str"](values)
    items = lambda: globals()["items"](values)
    properties = lambda: globals()["properties"](values)

    def is_simple_default() -> bool:
        """Whether the given default is simple enough for a one-liner."""
        if not (d := values.get("default")):
            return True
        return not isinstance(d, dict) and not isinstance(d, list)

    def default_str() -> str:
        try:
            default = values["default"]
        except KeyError:
            t = values.get("type", [])
            if "object" == t or "object" in t:
                # Skip objects as they probably have child defaults.
                return ""
            return "There is no default for this option."

        if not is_simple_default():
            # Show complex defaults as a code block instead.
            return ""
        return f"Defaults to `{json.dumps(default)}`."

    def header() -> str:
        try:
            title = SECTION_HEADERS[prop]["title"]
            description = SECTION_HEADERS[prop]["description"]
            return f"## {title}\n\n{description}\n\n---\n"
        except KeyError:
            return ""

    def title() -> str:
        return f"### `{prop}`\n"

    def description() -> str:
        if not (description := values.get("description")):
            error(f"missing description for {prop}")
            return "MISSING DESCRIPTION\n"
        return f"\n{a(em(type_str()))}{description}{p(default_str(), sep())}\n"

    def example_str(example: Any) -> str:
        return "```yaml\n" + f"{yaml.dump({prop: example}, sort_keys=False)}" + "```\n"

    def default_example() -> str:
        if is_simple_default():
            return ""
        default_cfg = example_str(values["default"])
        return f"\nDefault configuration:\n{default_cfg}"

    def examples() -> str:
        if not (examples := values.get("examples")):
            return ""

        examples_str = "\n".join(example_str(e) for e in examples)

        if len(examples) >= 2:
            return f"\nExample configurations:\n{examples_str}"
        else:
            return f"\nExample configuration:\n{examples_str}"

    def post_description() -> str:
        # Sometimes it's helpful to have a description after the list of fields,
        # e.g. with a subsection that consists only of text.
        # This helps with that.
        if not (description := values.get("io.element.post_description")):
            return ""
        return f"\n{description}\n\n"

    return (
        "---\n"
        + header()
        + title()
        + description()
        + items()
        + properties()
        + default_example()
        + examples()
        + post_description()
    )


def main() -> None:
    # For Windows: reconfigure the terminal to be UTF-8 for `print()` calls.
    if sys.platform == "win32":
        sys.stdout.reconfigure(encoding="utf-8")

    def usage(err_msg: str) -> int:
        script_name = (sys.argv[:1] or ["__main__.py"])[0]
        print(err_msg, file=sys.stderr)
        print(f"Usage: {script_name} <JSON Schema file>", file=sys.stderr)
        print(f"\n{__doc__}", file=sys.stderr)
        exit(1)

    def read_json_file_arg() -> Any:
        if len(sys.argv) > 2:
            exit(usage("Too many arguments."))
        if not (filepath := (sys.argv[1:] or [""])[0]):
            exit(usage("No schema file provided."))
        with open(filepath, "r", encoding="utf-8") as f:
            # Note: Windows requires that we specify the encoding otherwise it uses
            # things like CP-1251, which can cause explosions.
            # See https://github.com/yaml/pyyaml/issues/123 for more info.
            return yaml.safe_load(f)

    schema = read_json_file_arg()
    schema = resolve_local_refs(schema)

    sections = (section(k, v) for k, v in schema["properties"].items())
    print(HEADER + "".join(sections), end="")

    if has_error:
        print("There were errors.", file=sys.stderr)
        exit(2)


if __name__ == "__main__":
    main()