{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://catalog.lintel.tools/schemas/schemastore/eidolon-resource/_shared/latest--SpacyTextSplitter.json",
  "title": "SpacyTextSplitter",
  "x-lintel": {
    "source": "https://www.eidolonai.com/json_schema/v1/schemas/DocumentTransformer/SpacyTextSplitter.json",
    "sourceSha256": "7f2436134e4cd54a0bc89575f268524c82390e642af86bfa07a1fef2f5edad83"
  },
  "type": "object",
  "properties": {
    "implementation": {
      "const": "SpacyTextSplitter",
      "title": "Implementation"
    },
    "chunk_size": {
      "default": 4000,
      "description": "Maximum size of chunks to return",
      "title": "Chunk Size",
      "type": "integer"
    },
    "chunk_overlap": {
      "default": 200,
      "description": "Overlap in characters between chunks",
      "title": "Chunk Overlap",
      "type": "integer"
    },
    "keep_separator": {
      "default": false,
      "description": "Whether to keep the separator in the chunks",
      "title": "Keep Separator",
      "type": "boolean"
    },
    "strip_whitespace": {
      "default": true,
      "description": "If `True`, strips whitespace from the start and end of every document",
      "title": "Strip Whitespace",
      "type": "boolean"
    },
    "separator": {
      "default": "\n\n",
      "description": "Separator to split on",
      "title": "Separator",
      "type": "string"
    },
    "pipeline": {
      "default": "en_core_web_sm",
      "description": "Spacy pipeline to use",
      "title": "Pipeline",
      "type": "string"
    },
    "max_length": {
      "default": 1000000,
      "description": "Maximum length of characters to process",
      "title": "Max Length",
      "type": "integer"
    }
  },
  "additionalProperties": false,
  "required": [
    "implementation"
  ],
  "reference_details": {
    "clz": "eidolon_ai_sdk.agent.doc_manager.transformer.text_splitters.SpacyTextSplitter",
    "groups": [
      "DocumentTransformer"
    ],
    "name": "SpacyTextSplitter",
    "overrides": {}
  }
}
