{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://catalog.lintel.tools/schemas/schemastore/eidolon-resource/_shared/latest--NLTKTextSplitter.json",
  "title": "NLTKTextSplitter",
  "x-lintel": {
    "source": "https://www.eidolonai.com/json_schema/v1/schemas/DocumentTransformer/NLTKTextSplitter.json",
    "sourceSha256": "98d426e855430fafcd53a8298c1906d19c2e053536d6fb17b12b9bf1b2295242"
  },
  "type": "object",
  "properties": {
    "implementation": {
      "const": "NLTKTextSplitter",
      "title": "Implementation"
    },
    "chunk_size": {
      "default": 4000,
      "description": "Maximum size of chunks to return",
      "title": "Chunk Size",
      "type": "integer"
    },
    "chunk_overlap": {
      "default": 200,
      "description": "Overlap in characters between chunks",
      "title": "Chunk Overlap",
      "type": "integer"
    },
    "keep_separator": {
      "default": false,
      "description": "Whether to keep the separator in the chunks",
      "title": "Keep Separator",
      "type": "boolean"
    },
    "strip_whitespace": {
      "default": true,
      "description": "If `True`, strips whitespace from the start and end of every document",
      "title": "Strip Whitespace",
      "type": "boolean"
    },
    "separator": {
      "default": "\n\n",
      "description": "Separator to split on",
      "title": "Separator",
      "type": "string"
    },
    "language": {
      "default": "english",
      "description": "Language to use for tokenization",
      "title": "Language",
      "type": "string"
    }
  },
  "additionalProperties": false,
  "required": [
    "implementation"
  ],
  "reference_details": {
    "clz": "eidolon_ai_sdk.agent.doc_manager.transformer.text_splitters.NLTKTextSplitter",
    "groups": [
      "DocumentTransformer"
    ],
    "name": "NLTKTextSplitter",
    "overrides": {}
  }
}
