{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://opengastronomy.org/schema/v0.2/lstring.schema.json",
  "title": "OGS Localizable String (LString)",
  "description": "A reusable localizable-string shape used across OGS entity types and benchmark tasks. See spec/ogs-i18n.md.",
  "oneOf": [
    { "$ref": "#/$defs/bare_string" },
    { "$ref": "#/$defs/language_map" },
    { "$ref": "#/$defs/rich_labels" }
  ],
  "$defs": {
    "bcp47_tag": {
      "type": "string",
      "pattern": "^(?:[A-Za-z]{2,3}|[Ii]-[A-Za-z0-9]+|[Xx]-[A-Za-z0-9]+|zxx|und)(?:-[A-Za-z]{4})?(?:-(?:[A-Za-z]{2}|[0-9]{3}))?(?:-[A-Za-z0-9]{5,8})*(?:-[A-Za-z0-9](?:-[A-Za-z0-9]{2,8})+)*(?:-[Xx](?:-[A-Za-z0-9]{1,8})+)?$",
      "description": "BCP 47 (RFC 5646) language tag. Common examples: en, en-US, zh-Hans, ja-Latn, sr-Cyrl."
    },
    "iso15924_script": {
      "type": "string",
      "pattern": "^[A-Z][a-z]{3}$",
      "description": "ISO 15924 four-letter script code, e.g. Latn, Cyrl, Hans, Hant, Jpan, Hira, Kana, Arab, Hebr."
    },
    "bare_string": {
      "type": "string",
      "minLength": 1,
      "description": "Form 1: a bare string whose language is inferred from the enclosing entity's primary_language."
    },
    "language_map": {
      "type": "object",
      "minProperties": 1,
      "patternProperties": {
        "^[A-Za-z]{2,3}(-[A-Za-z0-9]{2,8})*$": { "type": "string", "minLength": 1 }
      },
      "additionalProperties": false,
      "description": "Form 2: map from BCP 47 tag to NFC-normalized UTF-8 string."
    },
    "rich_labels": {
      "type": "object",
      "required": ["labels"],
      "properties": {
        "primary_language": { "$ref": "#/$defs/bcp47_tag" },
        "labels": {
          "type": "array",
          "minItems": 1,
          "items": { "$ref": "#/$defs/label_entry" }
        }
      },
      "additionalProperties": false,
      "description": "Form 3: rich label array with explicit roles, scripts, and per-label provenance."
    },
    "label_entry": {
      "type": "object",
      "required": ["value", "language"],
      "properties": {
        "value": { "type": "string", "minLength": 1 },
        "language": { "$ref": "#/$defs/bcp47_tag" },
        "script": { "$ref": "#/$defs/iso15924_script" },
        "role": {
          "type": "string",
          "enum": [
            "preferred",
            "alternate",
            "transliteration",
            "descriptive",
            "loan",
            "historical",
            "deprecated",
            "ipa"
          ],
          "default": "preferred"
        },
        "transliteration_scheme": {
          "type": "string",
          "description": "Transliteration convention name (e.g. hepburn, kunrei, pinyin, wade_giles, iast, iso_9, bgn_pcgn). Required when role is 'transliteration'."
        },
        "translation": { "$ref": "#/$defs/translation_provenance" }
      },
      "allOf": [
        {
          "if": { "properties": { "role": { "const": "transliteration" } }, "required": ["role"] },
          "then": { "required": ["transliteration_scheme"] }
        }
      ],
      "additionalProperties": false
    },
    "translation_provenance": {
      "type": "object",
      "properties": {
        "method": {
          "type": "string",
          "enum": ["human_expert", "human_native", "machine", "machine_reviewed", "community"]
        },
        "translator": { "type": "string" },
        "reviewed_at": { "type": "string", "format": "date-time" },
        "source_language": { "$ref": "#/$defs/bcp47_tag" },
        "source_version": { "type": "string" },
        "confidence": { "type": "number", "minimum": 0, "maximum": 1 }
      },
      "additionalProperties": false
    }
  }
}
