{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://catalog.lintel.tools/schemas/schemastore/starlake-data-pipeline/latest.json",
  "title": "Starlake Data Pipeline",
  "description": "JSON Schema for Starlake Data Pipeline",
  "x-lintel": {
    "source": "https://www.schemastore.org/starlake.json",
    "sourceSha256": "63c82227fff46ce5ff5f81a5d2de5ab33cab5128cde61854d59307b568612a72",
    "fileMatch": [
      "*.sl.yml"
    ],
    "parsers": [
      "yaml"
    ]
  },
  "type": "object",
  "properties": {
    "version": {
      "type": "integer",
      "enum": [
        1
      ]
    }
  },
  "allOf": [
    {
      "$ref": "#/$defs/StarlakeV1Base"
    }
  ],
  "$defs": {
    "ConvertibleToString": {
      "anyOf": [
        {
          "type": "string"
        },
        {
          "type": "boolean"
        },
        {
          "type": "number"
        },
        {
          "type": "integer"
        },
        {
          "type": "null"
        }
      ]
    },
    "MergeOnV1": {
      "oneOf": [
        {
          "const": "TARGET",
          "description": "Merge operation will be applied on the target table only but not on the incoming data"
        },
        {
          "const": "SOURCE_AND_TARGET",
          "description": "Merge operation will be applied on the incoming source data and target tables"
        }
      ]
    },
    "PrimitiveTypeV1": {
      "$ref": "#/$defs/ConvertibleToString",
      "description": "Define the value type",
      "oneOf": [
        {
          "const": "string",
          "description": "Any string that match the '.*' regex"
        },
        {
          "const": "long",
          "description": "Any whole number that match the '[-|+|0-9][0-9]*' regex.\nints are mapped to as 'int' in some database whereas 'longs are mapped as 'bigint' and shorts as 'smallint'"
        },
        {
          "const": "int",
          "description": "Any whole number that match the '[-|+|0-9][0-9]*' regex.\nints are mapped to as 'int' in some database whereas 'longs are mapped as 'bigint' and shorts as 'smallint'"
        },
        {
          "const": "short",
          "description": "Any whole number that match the '[-|+|0-9][0-9]*' regex.\nints are mapped to as 'int' in some database whereas 'longs are mapped as 'bigint' and shorts as 'smallint'"
        },
        {
          "const": "double",
          "description": "Any decimal number that match the '[-+]?\\d*\\.?\\d+[Ee]?[-+]?\\d*' regex"
        },
        {
          "const": "boolean",
          "description": "Any string that match the '(?i)true|yes|[y1]<-TF->(?i)false|no|[n0]' regex,\nwhere the value on the left of '<-T' represent true and values on the right of 'F->' represent the false"
        },
        {
          "const": "byte",
          "description": "Any single char"
        },
        {
          "const": "date",
          "description": "Any date that match the 'yyyy-MM-dd' regex (2023-12-31)"
        },
        {
          "const": "timestamp",
          "description": "date/time that match the 'yyyy-MM-dd HH:mm:ss' regex s (2019-12-31 23:59:02).\nFor epoch timestamp, set pattern attribute to 'epoch_second' or 'epoch_milli'"
        },
        {
          "const": "decimal",
          "description": "Any floating value that match the '-?\\d*\\.{0,1}\\d+' regex"
        },
        {
          "const": "variant",
          "description": "Semi structured data type eq. JSON / XML"
        },
        {
          "const": "struct",
          "description": "Any attribute that has children. Set the array to true if this attribute is made of a list of attributes"
        }
      ]
    },
    "TrimV1": {
      "$ref": "#/$defs/ConvertibleToString",
      "description": "How to trim the input string",
      "oneOf": [
        {
          "const": "LEFT",
          "description": "Remove all leading space chars from the input"
        },
        {
          "const": "RIGHT",
          "description": "Remove all trailing spaces from the input"
        },
        {
          "const": "BOTH",
          "description": "Remove all leading and trailing spaces from the input"
        },
        {
          "const": "NONE",
          "description": "Do not remove leading or trailing spaces from the input"
        }
      ]
    },
    "TableSync": {
      "$ref": "#/$defs/ConvertibleToString",
      "description": "Should this YAML table schema be synchronized with the source table ?",
      "oneOf": [
        {
          "const": "NONE",
          "description": "Do not synchronize this table schema with the source table"
        },
        {
          "const": "ADD",
          "description": "Add missing attributes to the table schema, but do not remove any existing attributes"
        },
        {
          "const": "ALL",
          "description": "Synchronize this table schema with the source table, removing any attributes that are not present in the source table and adding any missing attributes"
        }
      ]
    },
    "TableDdlV1": {
      "description": "DDL used to create a table",
      "type": "object",
      "properties": {
        "createSql": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "SQL CREATE DDL statement"
        },
        "pingSql": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "How to test if the table exist.\nUse the following statement by default: 'select count(*) from tableName where 1=0'"
        },
        "selectSql": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Override the default select defined by Starlake"
        }
      },
      "required": [
        "createSql"
      ]
    },
    "Materialization": {
      "$ref": "#/$defs/ConvertibleToString",
      "description": "Table types supported by the Sink option",
      "oneOf": [
        {
          "const": "TABLE",
          "description": "SQL Table"
        },
        {
          "const": "VIEW",
          "description": "SQL View"
        },
        {
          "const": "MATERIALIZED_VIEW",
          "description": "SQL Materialized View"
        },
        {
          "const": "HYBRID",
          "description": "Snowflake OLTP tables"
        }
      ]
    },
    "TableTypeBase": {
      "$ref": "#/$defs/ConvertibleToString",
      "description": "Table types supported by the Extract module",
      "oneOf": [
        {
          "const": "TABLE",
          "description": "SQL Table"
        },
        {
          "const": "VIEW",
          "description": "SQL View"
        },
        {
          "const": "SYSTEM TABLE",
          "description": "Database specific system table"
        },
        {
          "const": "MATERIALIZED VIEW",
          "description": "SQL Materialized View"
        },
        {
          "const": "GLOBAL TEMPORARY",
          "description": "Global temporary table visible across sessions until explicitly dropped"
        },
        {
          "const": "LOCAL TEMPORARY",
          "description": "Local temporary table visible only within the current session"
        },
        {
          "const": "ALIAS",
          "description": "Table alias"
        },
        {
          "const": "SYNONYM",
          "description": "Table synonym"
        }
      ]
    },
    "TableTypeV1": {
      "$ref": "#/$defs/ConvertibleToString",
      "description": "Table types supported by the Extract module",
      "oneOf": [
        {
          "$ref": "#/$defs/TableTypeBase"
        },
        {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Exclude the these table types",
          "not": {
            "$ref": "#/$defs/TableTypeBase"
          }
        }
      ]
    },
    "TypeV1": {
      "type": "object",
      "description": "Custom type definition. Custom types are defined in the types/types.sl.yml file",
      "properties": {
        "name": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "unique id for this type"
        },
        "pattern": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Regex used to validate the input field"
        },
        "primitiveType": {
          "$ref": "#/$defs/PrimitiveTypeV1",
          "description": "To what primitive type should this type be mapped.\n This is the memory representation of the type, When saving, this primitive type is mapped to the database specific type. Default: string"
        },
        "zone": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Context-specific parsing configuration:\n- For double: locale for decimal parsing (e.g., 'fr_FR' for comma as decimal separator)\n- For decimal: precision and scale as 'precision,scale' (e.g., '38,9')\n- For timestamp: timezone (e.g., 'UTC', 'Europe/Paris')"
        },
        "sample": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "This field makes sure that the pattern matches the value you want to match. This will be checked on startup"
        },
        "comment": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Describes this type"
        },
        "ddlMapping": {
          "$ref": "#/$defs/MapString",
          "description": "Configure here the type mapping for each datawarehouse.\\nWill be used when inferring DDL from schema."
        }
      },
      "required": [
        "name",
        "pattern"
      ]
    },
    "PositionV1": {
      "description": "First and last char positions of an attribute in a fixed length record",
      "type": "object",
      "properties": {
        "first": {
          "type": "number",
          "description": "Zero based position of the first character for this attribute"
        },
        "last": {
          "type": "number",
          "description": "Zero based position of the last character to include in this attribute"
        }
      },
      "required": [
        "first",
        "last"
      ]
    },
    "ConnectionV1": {
      "description": "Connection properties to a datawarehouse.",
      "type": "object",
      "properties": {
        "type": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Connection type: jdbc, bigquery, snowflake, redshift, databricks, duckdb, or any Spark-supported format"
        },
        "sparkFormat": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Spark data source format to use (e.g., 'jdbc', 'bigquery', 'parquet'). Required when using Spark engine"
        },
        "loader": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Loader we should use with this connection. Superseded by the loader defined in the YAML table metadata"
        },
        "quote": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Character to use when quoting column and table names"
        },
        "separator": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Catalog/schema separator character used in fully qualified table names. Default is '.'"
        },
        "options": {
          "$ref": "#/$defs/MapString",
          "description": "Connection options"
        }
      },
      "required": [
        "type"
      ]
    },
    "DagGenerationConfigV1": {
      "description": "Dag configuration.",
      "type": "object",
      "properties": {
        "comment": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Dag config description"
        },
        "template": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Dag template to use for this config. Usually a .py.j2 file"
        },
        "filename": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "{schedule}, {domain}, {table} in the file name are used for DAG generation purposes"
        },
        "options": {
          "$ref": "#/$defs/MapString",
          "description": "DAG generation options"
        }
      },
      "required": [
        "template",
        "filename"
      ]
    },
    "RowLevelSecurityV1": {
      "description": "Row level security policy to apply to the output data.",
      "type": "object",
      "properties": {
        "name": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "This Row Level Security unique name"
        },
        "predicate": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "The condition that goes to the WHERE clause and limit the visible rows."
        },
        "grants": {
          "description": "user / groups / service accounts to which this security level is applied.\nex : user:me@mycompany.com,group:group@mycompany.com,serviceAccount:mysa@google-accounts.com",
          "type": "array",
          "items": {
            "$ref": "#/$defs/ConvertibleToString"
          }
        },
        "description": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Description for this access policy"
        }
      },
      "required": [
        "name",
        "grants"
      ]
    },
    "AccessControlEntryV1": {
      "description": "Column level security policy to apply to the attribute.",
      "type": "object",
      "properties": {
        "role": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "This role to give to the granted users"
        },
        "grants": {
          "description": "user / groups / service accounts to which this security level is applied.\nex : user:me@mycompany.com,group:group@mycompany.com,serviceAccount:mysa@google-accounts.com",
          "type": "array",
          "items": {
            "$ref": "#/$defs/ConvertibleToString"
          }
        },
        "name": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "This Access Control Entry unique name"
        }
      },
      "required": [
        "role",
        "grants"
      ]
    },
    "FormatV1": {
      "$ref": "#/$defs/ConvertibleToString",
      "description": "DSV by default. Supported file formats are :\\n- DSV : Delimiter-separated values file. Delimiter value is specified in the \"separator\" field.\\n- POSITION : FIXED format file where values are located at an exact position in each line.\\n- JSON_FLAT : For optimisation purpose, we differentiate JSON with top level values from JSON\\n  with deep level fields. JSON_FLAT are JSON files with top level fields only.\\n- JSON :  Deep JSON file. Use only when your json documents contain sub-documents, otherwise prefer to\\n  use JSON_FLAT since it is much faster.\\n- XML : XML files",
      "oneOf": [
        {
          "const": "DATAFRAME",
          "description": "Loader is developer responsibility. loader attribute references a python file that returns a dataframe"
        },
        {
          "const": "DSV",
          "description": "any single or multiple character delimited file. Separator is specified in the separator field"
        },
        {
          "const": "POSITION",
          "description": "any fixed position file. Positions are specified in the position field"
        },
        {
          "const": "JSON",
          "description": "any deep json file.\nTo improve performance, prefer the JSON_FLAT format if your json documents are flat"
        },
        {
          "const": "JSON_ARRAY",
          "description": "any json file containing an array of json objects."
        },
        {
          "const": "JSON_FLAT",
          "description": "any flat json file.\nTo improve performance, prefer this format if your json documents are flat"
        },
        {
          "const": "XML",
          "description": "any xml file. Use the metadata.xml.rowTag field to specify the root tag of your xml file"
        },
        {
          "const": "TEXT_XML",
          "description": "Used for privacy jobs"
        },
        {
          "const": "KAFKA",
          "description": "Kafka topic ingestion. Configure the Kafka connection in the application settings"
        },
        {
          "const": "KAFKASTREAM",
          "description": "Kafka streaming ingestion for real-time data processing"
        },
        {
          "const": "GENERIC",
          "description": "Generic format for custom file types. Requires a custom loader implementation"
        },
        {
          "const": "PARQUET",
          "description": "Any parquet file"
        }
      ]
    },
    "MapString": {
      "type": "object",
      "description": "Map of string",
      "additionalProperties": {
        "$ref": "#/$defs/ConvertibleToString"
      }
    },
    "MapConnectionV1": {
      "type": "object",
      "description": "Map of jdbc engines",
      "additionalProperties": {
        "$ref": "#/$defs/ConnectionV1"
      }
    },
    "MapJdbcEngineV1": {
      "type": "object",
      "description": "Map of jdbc engines",
      "additionalProperties": {
        "$ref": "#/$defs/JdbcEngineV1"
      }
    },
    "MapTableDdlV1": {
      "type": "object",
      "description": "Map of table ddl",
      "additionalProperties": {
        "$ref": "#/$defs/TableDdlV1"
      }
    },
    "JdbcEngineV1": {
      "type": "object",
      "description": "Jdbc engine",
      "properties": {
        "tables": {
          "$ref": "#/$defs/MapTableDdlV1",
          "description": "List of all SQL create statements used to create audit tables for this JDBC engine.\nTables are created only if the execution of the pingSQL statement fails"
        },
        "quote": {
          "type": "string",
          "description": "How to quote identifiers"
        },
        "viewPrefix": {
          "type": "string",
          "description": "When creating views, how they should be prefixed. Some databases like redshift require view name to be prefixed by the character '#'.\nThis is not required for other databases like snowflake or bigquery.\nDefault is empty string"
        },
        "preActions": {
          "type": "string",
          "description": "SQL statements to execute immediately after the database connection is opened (e.g., SET commands)"
        },
        "partitionBy": {
          "type": "string",
          "description": "keyword used to partition the table. Default is PARTITION BY"
        },
        "clusterBy": {
          "type": "string",
          "description": "keyword used to cluster the table. Default is CLUSTER BY"
        },
        "strategyBuilder": {
          "type": "string",
          "description": "Override the default strategy builder used to write data. A strategy is a folder located under metadata/templates/write-strategies/[strategyBuilder]"
        },
        "columnRemarks": {
          "type": "string",
          "description": "How to get column remarks"
        },
        "tableRemarks": {
          "type": "string",
          "description": "How to get table remarks"
        }
      },
      "required": [
        "tables",
        "quote",
        "strategyBuilder"
      ]
    },
    "PrivacyV1": {
      "type": "object",
      "properties": {
        "options": {
          "$ref": "#/$defs/MapString",
          "description": "Privacy strategies. The following default strategies are defined by default:\n- none: Leave the data as is\n- hide: replace the data with an empty string\n- hideX(\"s\", n): replace the string with n occurrences of the string 's'\n- md5: Redact the data using the MD5 algorithm\n- sha1: Redact the data using the SHA1 algorithm\n- sha256: Redact the data using the SHA256 algorithm\n - sha512: Redact the data using the SHA512 algorithm\n- initials: keep only the first char of each word in the data"
        }
      }
    },
    "InternalV1": {
      "type": "object",
      "description": "configure Spark internal options",
      "properties": {
        "cacheStorageLevel": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "How the dataframe are cached. Default is MEMORY_AND_DISK_SER.\nAvailable options are (https://spark.apache.org/docs/latest/api/java/index.html?org/apache/spark/storage/StorageLevel.html):\n- MEMORY_ONLY\n- MEMORY_AND_DISK\n- MEMORY_ONLY_SER\n- MEMORY_AND_DISK_SER\n- DISK_ONLY\n- OFF_HEAP"
        },
        "intermediateBigqueryFormat": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "May be parquet or ORC. Default is parquet. Used for BigQuery intermediate storage. Use ORC for for JSON files to keep the original data structure.\n<https://stackoverflow.com/questions/53674838/spark-writing-parquet-arraystring-converts-to-a-different-datatype-when-loadin>"
        },
        "temporaryGcsBucket": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "The GCS bucket that temporarily holds the data before it is loaded to BigQuery."
        },
        "substituteVars": {
          "description": "Internal use. Do not modify.",
          "type": "boolean"
        },
        "bqAuditSaveInBatchMode": {
          "description": "Should audit logs when using BigQuery be saved in batch or interactive mode ? Interactive by default (false)",
          "type": "boolean"
        }
      }
    },
    "AccessPoliciesV1": {
      "type": "object",
      "properties": {
        "apply": {
          "description": "Should access policies be enforced ?",
          "type": "boolean"
        },
        "location": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "GCP project location. Required if apply is true."
        },
        "database": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "GCP Project id. Required if apply is true."
        },
        "taxonomy": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Taxonomy name. Required if apply is true."
        }
      }
    },
    "SparkSchedulingV1": {
      "type": "object",
      "properties": {
        "maxJobs": {
          "description": "Max number of Spark jobs to run in parallel, default is 1",
          "type": "integer"
        },
        "poolName": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Pool name to use for Spark jobs, default is 'default'"
        },
        "mode": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "This can be FIFO or FAIR, to control whether jobs within the pool queue up behind each other (the default) or share the pool’s resources fairly."
        },
        "file": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Scheduler filename in the metadata folder. If not set, defaults to fairscheduler.xml."
        }
      }
    },
    "ExpectationsConfigV1": {
      "type": "object",
      "properties": {
        "path": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "When using filesystem storage, the path to the expectations file"
        },
        "active": {
          "description": "should expectations be executed ?",
          "type": "boolean"
        },
        "failOnError": {
          "description": "should load / transform fail on expectation error ?",
          "type": "boolean"
        }
      }
    },
    "ExpectationItemV1": {
      "type": "object",
      "properties": {
        "expect": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "SQL expression that must evaluate to true for the expectation to pass. Use {{table}} to reference the output table"
        },
        "failOnError": {
          "description": "should load / transform fail on expectation error ?",
          "type": "boolean"
        }
      }
    },
    "MetricsV1": {
      "type": "object",
      "properties": {
        "path": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "When using filesystem storage, the path to the metrics file"
        },
        "discreteMaxCardinality": {
          "description": "Max number of unique values accepted for a discrete column. Default is 10",
          "type": "integer"
        },
        "active": {
          "description": "Should metrics be computed ?",
          "type": "boolean"
        }
      }
    },
    "AllSinksV1": {
      "type": "object",
      "properties": {
        "connectionRef": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "JDBC: Connection String"
        },
        "clustering": {
          "description": "FS or BQ: List of attributes to use for clustering",
          "type": "array",
          "items": {
            "$ref": "#/$defs/ConvertibleToString"
          }
        },
        "days": {
          "type": "number",
          "description": "BQ: Number of days before this table is set as expired and deleted. Never by default."
        },
        "requirePartitionFilter": {
          "type": "boolean",
          "description": "BQ: Should be require a partition filter on every request ? No by default."
        },
        "materializedView": {
          "$ref": "#/$defs/Materialization",
          "description": "Should we materialize as a table or as a view when saving the results ? TABLE by default."
        },
        "enableRefresh": {
          "type": "boolean",
          "description": "BQ: Enable automatic refresh of materialized view ? false by default."
        },
        "refreshIntervalMs": {
          "type": "number",
          "description": "BQ: Refresh interval in milliseconds. Default to BigQuery default value"
        },
        "id": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "ES: Attribute to use as id of the document. Generated by Elasticsearch if not specified."
        },
        "format": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "FS: File format"
        },
        "extension": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "FS: File extension"
        },
        "sharding": {
          "description": "columns to use for sharding. table will be named table_{sharding(0)}_{sharding(1)}",
          "type": "array",
          "items": {
            "$ref": "#/$defs/ConvertibleToString"
          }
        },
        "partition": {
          "type": "array",
          "items": {
            "type": "string"
          },
          "description": "FS or BQ: List of partition attributes"
        },
        "coalesce": {
          "type": "boolean",
          "description": "When outputting files, should we coalesce it to a single file. Useful when CSV is the output format."
        },
        "path": {
          "type": "string",
          "description": "Optional path attribute if you want to save the file outside of the default location (datasets folder)"
        },
        "options": {
          "$ref": "#/$defs/MapString",
          "description": "Additional Spark writer options (e.g., compression, partitionOverwriteMode)"
        }
      }
    },
    "WriteStrategyTypeBase": {
      "$ref": "#/$defs/ConvertibleToString",
      "description": "Write strategy type that determines how data is written to the target table",
      "oneOf": [
        {
          "const": "OVERWRITE",
          "description": "Truncate table before writing new data"
        },
        {
          "const": "APPEND",
          "description": "Append incoming data to the target table"
        },
        {
          "const": "UPSERT_BY_KEY",
          "description": "Append incoming data to the target table and update existing records using the column key(s)"
        },
        {
          "const": "UPSERT_BY_KEY_AND_TIMESTAMP",
          "description": "Append incoming data to the target table and update existing records using the column key(s) and timestamp"
        },
        {
          "const": "DELETE_THEN_INSERT",
          "description": "Delete all records in the target table and insert incoming data"
        },
        {
          "const": "SCD2",
          "description": "Slowly changing dimension type 2: Append incoming data to the target table and update existing records using the column key(s) and timestamp.\n  If the target table does not have a start and end date columns, they will be added automatically.\n  If the target table does not have a current flag column, it will be added automatically.\n  If the target table does not have a version column, it will be added automatically.\n  If the target table does not have a hash column, it will be added automatically.\n  If the target table does not have a hash column, it will be added automatically."
        },
        {
          "const": "OVERWRITE_BY_PARTITION",
          "description": "Overwrite the partition(s) of the target table with the incoming data and append new ones"
        }
      ]
    },
    "WriteStrategyTypeV1": {
      "$ref": "#/$defs/ConvertibleToString",
      "description": "Write strategy type that determines how data is written to the target table",
      "oneOf": [
        {
          "$ref": "#/$defs/WriteStrategyTypeBase"
        }
      ]
    },
    "OpenWriteStrategyTypeV1": {
      "$ref": "#/$defs/ConvertibleToString",
      "description": "Write strategy type including custom strategies. Allows predefined strategies or custom strategy names",
      "oneOf": [
        {
          "$ref": "#/$defs/WriteStrategyTypeBase"
        },
        {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Any valid table types",
          "not": {
            "$ref": "#/$defs/WriteStrategyTypeBase"
          }
        }
      ]
    },
    "WriteStrategyV1": {
      "type": "object",
      "properties": {
        "type": {
          "$ref": "#/$defs/OpenWriteStrategyTypeV1",
          "description": "Write strategy type: OVERWRITE, APPEND, UPSERT_BY_KEY, UPSERT_BY_KEY_AND_TIMESTAMP, DELETE_THEN_INSERT, SCD2, or OVERWRITE_BY_PARTITION"
        },
        "types": {
          "type": "object",
          "description": "Map of connection type to write strategy. Allows different strategies per target database",
          "additionalProperties": {
            "$ref": "#/$defs/OpenWriteStrategyTypeV1"
          }
        },
        "key": {
          "type": "array",
          "items": {
            "$ref": "#/$defs/ConvertibleToString"
          },
          "description": "List of columns to use as key(s) for the target table.\n  This is used to update existing records in the target table."
        },
        "timestamp": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "timestamp column to use for the target table.\n This is used to update existing records in the target table by strategies UPSERT_BY_KEY_AND_TIMESTAMP and SCD2."
        },
        "queryFilter": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "SQL WHERE clause filter applied when reading from the target table during merge operations"
        },
        "on": {
          "$ref": "#/$defs/MergeOnV1",
          "description": "Merge strategy: TARGET or SOURCE_AND_TARGET.\n  TARGET means that the merge operation will be applied on the target table only.\n  SOURCE_AND_TARGET means that the merge operation will be applied on the incoming source data and target tables."
        },
        "startTs": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "column name to use for the start timestamp.\n This is used to update existing records in the target table by strategy SCD2."
        },
        "endTs": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "column name to use for the end timestamp.\n This is used to update existing records in the target table by strategy SCD2."
        }
      }
    },
    "MetadataV1": {
      "type": "object",
      "properties": {
        "format": {
          "$ref": "#/$defs/FormatV1"
        },
        "encoding": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "UTF-8 if not specified."
        },
        "multiline": {
          "type": "boolean",
          "description": "are json objects on a single line or multiple line ? Single by default.  false means single. false also means faster"
        },
        "array": {
          "type": "boolean",
          "description": "Is the json stored as a single object array ? false by default. This means that by default we have on json document per line."
        },
        "withHeader": {
          "type": "boolean",
          "description": "does the dataset has a header ? true by default"
        },
        "separator": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "the values delimiter,  ';' by default value may be a multichar string starting from Spark3"
        },
        "quote": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "The String quote char, '\"' by default"
        },
        "escape": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "escaping char '\\' by default"
        },
        "sink": {
          "$ref": "#/$defs/AllSinksV1"
        },
        "directory": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Folder on the local filesystem where incoming files are stored.\n Typically, this folder will be scanned periodically to move the dataset to the cluster for ingestion.\n                     Files located in this folder are moved to the stage folder for ingestion by the \"import\" command."
        },
        "extensions": {
          "type": "array",
          "items": {
            "$ref": "#/$defs/ConvertibleToString"
          },
          "description": "recognized filename extensions. json, csv, dsv, psv are recognized by default.\nOnly files with these extensions will be moved to the stage folder."
        },
        "ack": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Ack extension used for each file. \".ack\" if not specified.\nFiles are moved to the stage folder only once a file with the same name as the source file and with this extension is present.\nTo move a file without requiring an ack file to be present, set explicitly this property to the empty string value \"\"."
        },
        "options": {
          "$ref": "#/$defs/MapString",
          "description": "Options to add to the spark reader"
        },
        "loader": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Loader to use, 'spark' or 'native'. Default to 'spark' of SL_LOADER env variable is set to 'native'"
        },
        "emptyIsNull": {
          "description": "Treat empty columns as null in DSV files. Default to false",
          "type": "boolean"
        },
        "dagRef": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Cron expression to use for this domain/table"
        },
        "freshness": {
          "$ref": "#/$defs/FreshnessV1",
          "description": "Configure freshness checks on this dataset"
        },
        "nullValue": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Treat a specific input string as a null value indicator"
        },
        "schedule": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Cron expression to use for this domain/table"
        },
        "writeStrategy": {
          "$ref": "#/$defs/WriteStrategyV1",
          "description": "Write strategy to use when writing the data. Default is APPEND"
        }
      }
    },
    "AreaV1": {
      "type": "object",
      "properties": {
        "incoming": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Files are read from  this folder for ingestion by the \"import\" command."
        },
        "stage": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Files recognized by the extensions property are moved to this folder for ingestion by the \"import\" command."
        },
        "unresolved": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Files that cannot be ingested (do not match by any table pattern) are moved to this folder."
        },
        "archive": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Files that have been ingested are moved to this folder if SL_ARCHIVE is set to true."
        },
        "ingesting": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Files that are being ingested are moved to this folder."
        },
        "replay": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Invalid records are stored in this folder in source format when SL_SINK_REPLAY_TO_FILE is set to true."
        },
        "hiveDatabase": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Hive database name to use when running on Spark with Hive support enabled"
        }
      }
    },
    "FreshnessV1": {
      "type": "object",
      "properties": {
        "warn": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "How old may be the data before a warning is raised. Use syntax like '3 day' or '2 hour' or '30 minute'"
        },
        "error": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "How old may be the data before an error is raised. Use syntax like '3 day' or '2 hour' or '30 minute'"
        }
      }
    },
    "TableV1": {
      "type": "object",
      "description": "Table Schema definition.",
      "properties": {
        "name": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Schema name, must be unique among all the schemas belonging to the same domain.\n  *                     Will become the hive table name On Premise or BigQuery Table name on GCP."
        },
        "pattern": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "filename pattern to which this schema must be applied.\n  *                     This instructs the framework to use this schema to parse any file with a filename that match this pattern."
        },
        "attributes": {
          "description": "Attributes parsing rules.",
          "type": "array",
          "items": {
            "$ref": "#/$defs/AttributeV1"
          }
        },
        "metadata": {
          "$ref": "#/$defs/MetadataV1",
          "description": "Dataset metadata"
        },
        "comment": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Table description that will be stored as table comment in the target database"
        },
        "streams": {
          "type": "array",
          "description": "attach streams to table (Snowflake only)",
          "items": {
            "$ref": "#/$defs/ConvertibleToString"
          }
        },
        "presql": {
          "type": "array",
          "description": "Reserved for future use.",
          "items": {
            "$ref": "#/$defs/ConvertibleToString"
          }
        },
        "postsql": {
          "type": "array",
          "description": "List of SQL requests to executed after the table has been loaded.",
          "items": {
            "$ref": "#/$defs/ConvertibleToString"
          }
        },
        "tags": {
          "description": "Set of string to attach to this Schema",
          "type": "array",
          "items": {
            "$ref": "#/$defs/ConvertibleToString"
          }
        },
        "rls": {
          "description": " Row level security on this schema.",
          "type": "array",
          "items": {
            "$ref": "#/$defs/RowLevelSecurityV1"
          }
        },
        "expectations": {
          "description": "Expectations to check after Load / Transform has succeeded",
          "type": "array",
          "items": {
            "$ref": "#/$defs/ExpectationItemV1"
          }
        },
        "primaryKey": {
          "description": "List of columns that make up the primary key",
          "type": "array",
          "items": {
            "$ref": "#/$defs/ConvertibleToString"
          }
        },
        "acl": {
          "description": "Map of rolename -> List[Users].",
          "type": "array",
          "items": {
            "$ref": "#/$defs/AccessControlEntryV1"
          }
        },
        "rename": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "If present, the table is renamed with this name. Useful when use in conjunction with the 'extract' module"
        },
        "sample": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Store here a couple of records illustrating the table data."
        },
        "filter": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "remove all records that do not match this condition"
        },
        "patternSample": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Sample of filename matching this schema"
        }
      },
      "required": [
        "name",
        "pattern",
        "attributes"
      ]
    },
    "MetricTypeV1": {
      "$ref": "#/$defs/ConvertibleToString",
      "description": "Used to compute metrics on column values.",
      "oneOf": [
        {
          "const": "DISCRETE",
          "description": "Compute metrics for categorical/discrete values: count, distinct count, frequency distribution"
        },
        {
          "const": "CONTINUOUS",
          "description": "Compute metrics for numeric/continuous values: min, max, mean, median, standard deviation, percentiles"
        },
        {
          "const": "TEXT",
          "description": "Compute metrics for text values: min/max length, pattern analysis, null ratio"
        },
        {
          "const": "NONE",
          "description": "Do not compute any metrics for this column"
        }
      ]
    },
    "AttributeV1": {
      "type": "object",
      "properties": {
        "name": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Attribute name as defined in the source dataset and as received in the file"
        },
        "type": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Semantic type of the attribute. Can be a primitive type (string, long, int, double, boolean, date, timestamp, decimal) or a custom type defined in types.sl.yml. Default is 'string'"
        },
        "array": {
          "type": "boolean",
          "description": "Is this attribute an array/list of values? Default is false"
        },
        "required": {
          "type": "boolean",
          "description": "Should this attribute always be present in the source. Default to true."
        },
        "privacy": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Should this attribute be applied a privacy transformation at ingestion time"
        },
        "comment": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Column description that will be stored as column comment in the target database"
        },
        "rename": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "If present, the attribute is renamed with this name"
        },
        "sample": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Sample data for this attribute"
        },
        "metricType": {
          "$ref": "#/$defs/MetricTypeV1",
          "description": "If present, what kind of stat should be computed for this field"
        },
        "attributes": {
          "type": "array",
          "description": "List of sub-attributes (valid for JSON and XML files only)",
          "items": {
            "$ref": "#/$defs/AttributeV1"
          }
        },
        "position": {
          "$ref": "#/$defs/PositionV1"
        },
        "default": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Default value for this attribute when it is not present."
        },
        "tags": {
          "type": "array",
          "description": "Tags associated with this attribute",
          "items": {
            "$ref": "#/$defs/ConvertibleToString"
          }
        },
        "trim": {
          "$ref": "#/$defs/TrimV1"
        },
        "script": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Scripted field : SQL request on renamed column"
        },
        "foreignKey": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "If this attribute is a foreign key, reference to [domain.]table[.attribute]"
        },
        "ignore": {
          "type": "boolean",
          "description": "Should this attribute be ignored on ingestion. Default to false"
        },
        "accessPolicy": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Policy tag to assign to this attribute. Used for column level security"
        }
      },
      "required": [
        "name"
      ]
    },
    "AutoTaskDescV1": {
      "type": "object",
      "properties": {
        "name": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Task name. If not specified, defaults to the filename without extension"
        },
        "sql": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Used internally. The SQL should always be written in a separate file named after the transform name. If the transform is named transf.sl.yml then then the sql should be stored in the file transf.sql. It will contain the main SQL request to execute"
        },
        "streams": {
          "type": "array",
          "description": "attach streams to task (Snowflake only)",
          "items": {
            "$ref": "#/$defs/ConvertibleToString"
          }
        },
        "primaryKey": {
          "type": "array",
          "description": "List of columns that make up the primary key for the output table",
          "items": {
            "$ref": "#/$defs/ConvertibleToString"
          }
        },
        "database": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Output Database (refer to a project id in BigQuery). Default to SL_DATABASE env var if set."
        },
        "domain": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Output domain in output Area (Will be the Database name in Hive or Dataset in BigQuery)"
        },
        "table": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Dataset Name in output Area (Will be the Table name in Hive & BigQuery)"
        },
        "partition": {
          "type": "array",
          "items": {
            "$ref": "#/$defs/ConvertibleToString"
          },
          "description": "List of columns used for partitioning the output."
        },
        "presql": {
          "type": "array",
          "items": {
            "$ref": "#/$defs/ConvertibleToString"
          },
          "description": "List of SQL requests to executed before the main SQL request is run"
        },
        "postsql": {
          "type": "array",
          "items": {
            "$ref": "#/$defs/ConvertibleToString"
          },
          "description": "List of SQL requests to executed after the main SQL request is run"
        },
        "sink": {
          "$ref": "#/$defs/AllSinksV1"
        },
        "rls": {
          "type": "array",
          "items": {
            "$ref": "#/$defs/RowLevelSecurityV1"
          }
        },
        "expectations": {
          "description": "Expectations to check after Load / Transform has succeeded",
          "type": "array",
          "items": {
            "$ref": "#/$defs/ExpectationItemV1"
          }
        },
        "acl": {
          "description": "Map of rolename -> List[Users].",
          "type": "array",
          "items": {
            "$ref": "#/$defs/AccessControlEntryV1"
          }
        },
        "comment": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Output table description"
        },
        "freshness": {
          "$ref": "#/$defs/FreshnessV1",
          "description": "Configure freshness checks on the output table"
        },
        "attributes": {
          "description": "Attributes",
          "type": "array",
          "items": {
            "$ref": "#/$defs/AttributeV1"
          }
        },
        "python": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Python script URI to execute instead of the SQL request"
        },
        "tags": {
          "description": "Set of string to attach to the output table",
          "type": "array",
          "items": {
            "$ref": "#/$defs/ConvertibleToString"
          }
        },
        "writeStrategy": {
          "$ref": "#/$defs/WriteStrategyV1",
          "description": "Write strategy to use when writing the data. Default is APPEND"
        },
        "schedule": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Cron expression to use for this task"
        },
        "dagRef": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Cron expression to use for this domain/table"
        },
        "taskTimeoutMs": {
          "type": "integer",
          "description": "Number of milliseconds before a communication timeout."
        },
        "parseSQL": {
          "type": "boolean",
          "description": "Should we parse this SQL make it update the table according to write strategy or just execute it ?"
        },
        "connectionRef": {
          "type": "string",
          "description": "Used when the default connection ref present in the application.sl.yml file is not the one to use to run the SQL request for this task."
        },
        "syncStrategy": {
          "$ref": "#/$defs/TableSync",
          "description": "Schema synchronization strategy: NONE (no sync), ADD (add missing columns), or ALL (full sync with source)"
        },
        "dataset_triggering_strategy": {
          "type": "string",
          "description": "Dataset triggering strategy to determine when this task should be executed based on dataset changes: & and | operators are allowed (dataset1 & dataset2) | dataset3"
        }
      },
      "required": []
    },
    "LockV1": {
      "type": "object",
      "properties": {
        "path": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Name of the lock"
        },
        "timeout": {
          "type": "integer",
          "description": "reserved"
        },
        "pollTime": {
          "type": "integer",
          "description": "Default 5 seconds"
        },
        "refreshTime": {
          "type": "integer",
          "description": "Default 5 seconds"
        }
      }
    },
    "AuditV1": {
      "type": "object",
      "properties": {
        "path": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Path where audit logs are stored when using filesystem storage"
        },
        "sink": {
          "$ref": "#/$defs/AllSinksV1",
          "description": "Sink configuration for audit logs storage"
        },
        "maxErrors": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Maximum number of errors to tolerate before failing the job. Default is unlimited"
        },
        "database": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Database name where audit tables are stored (project ID in BigQuery)"
        },
        "domain": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Domain/dataset name for audit tables. Default is 'audit'"
        },
        "domainExpectation": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Domain/dataset name for expectation results. Default is 'expectations'"
        },
        "domainRejected": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Domain/dataset name for rejected records. Default is 'rejected'"
        },
        "detailedLoadAudit": {
          "type": "boolean",
          "description": "Create individual entry for each ingested file instead of a global one. Default: false"
        },
        "active": {
          "type": "boolean",
          "description": "Enable or disable audit logging. Default is true"
        },
        "sql": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Custom SQL query to use for audit table creation or data insertion"
        }
      },
      "required": []
    },
    "DomainV1": {
      "type": "object",
      "description": "A schema in JDBC database or a folder in HDFS or a dataset in BigQuery.",
      "properties": {
        "name": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Domain name. Make sure you use a name that may be used as a folder name on the target storage.\n                   - When using HDFS or Cloud Storage,  files once ingested are stored in a sub-directory named after the domain name.\n                   - When used with BigQuery, files are ingested and sorted in tables under a dataset named after the domain name."
        },
        "metadata": {
          "$ref": "#/$defs/MetadataV1"
        },
        "comment": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Domain Description (free text)"
        },
        "tags": {
          "description": "Set of string to attach to this domain",
          "type": "array",
          "items": {
            "$ref": "#/$defs/ConvertibleToString"
          }
        },
        "rename": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "If present, the domain is renamed to this name in the target database"
        },
        "database": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Output Database (refer to a project id in BigQuery). Default to SL_DATABASE env var if set."
        }
      }
    },
    "AutoJobDescV1": {
      "type": "object",
      "properties": {
        "name": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Optional name. If not specified, the name of the file without the extension is used."
        },
        "tasks": {
          "type": "array",
          "items": {
            "$ref": "#/$defs/AutoTaskDescV1",
            "description": "List of transform tasks to execute"
          }
        },
        "comment": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Optional description."
        },
        "default": {
          "$ref": "#/$defs/AutoTaskDescV1",
          "description": "Default task properties to apply to all tasks defined in tasks section and in included files"
        }
      }
    },
    "JDBCTableV1": {
      "type": "object",
      "properties": {
        "name": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Table name. Set to '*' to extract all tables. Scope: Schema and Data extraction."
        },
        "sql": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Optional SQL SELECT statement used to extract schema and data. Ignore columns attribute if set. Generated by default"
        },
        "columns": {
          "description": "List of columns to extract. All columns by default.",
          "type": "array",
          "minItems": 1,
          "items": {
            "oneOf": [
              {
                "$ref": "#/$defs/ConvertibleToString",
                "description": "Column name to extract. Scope: Schema and Data extraction."
              },
              {
                "type": "object",
                "properties": {
                  "name": {
                    "$ref": "#/$defs/ConvertibleToString",
                    "description": "Column name to extract. Scope: Schema and Data extraction."
                  },
                  "rename": {
                    "$ref": "#/$defs/ConvertibleToString",
                    "description": "Rename database column name. Scope: Schema and Data extraction."
                  }
                },
                "required": [
                  "name"
                ]
              }
            ]
          }
        },
        "partitionColumn": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Column to use in order to parallelize data extraction. Scope: Data extraction."
        },
        "numPartitions": {
          "type": "integer",
          "description": "Number of data partitions to create. Scope: Data extraction."
        },
        "connectionOptions": {
          "$ref": "#/$defs/MapString",
          "description": "Options to set on database connection, only when connectionRef is not defined. Scope: Data extraction."
        },
        "fetchSize": {
          "type": "integer",
          "description": "Number of rows to be fetched from the database when additional rows are needed. By default, most JDBC drivers use a fetch size of 10, so if you are reading 1000 objects, increasing the fetch size to 256 can significantly reduce the time required to fetch the query's results. The optimal fetch size is not always obvious. Scope: Data extraction."
        },
        "fullExport": {
          "type": "boolean",
          "description": "If true, extract all data from the table. Scope: Data extraction."
        },
        "filter": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Optional SQL WHERE clause to filter data. This is used to exclude part of the data. This is not related to incremental extraction which is handled automatically by Starlake. Scope: Data extraction."
        },
        "stringPartitionFunc": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "SQL template used on partition column's of type String. Some implementations are already defined, see ai.starlake.extract.JdbcDbUtils.getStringPartitionFunc. Mandatory variables: col, nb_partitions. Scope: Data extraction."
        }
      }
    },
    "OutputV1": {
      "type": "object",
      "description": "Output configuration for a domain",
      "properties": {
        "encoding": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Specifies encoding (charset) of saved CSV files."
        },
        "withHeader": {
          "type": "boolean",
          "description": "If true, writes the names of columns as the first line."
        },
        "separator": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Character used as a separator for each field and value."
        },
        "quote": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Character used for escaping quoted values where the separator can be part of the value."
        },
        "escape": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Character used for escaping quotes inside an already quoted value."
        },
        "nullValue": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "String representation of a null value."
        },
        "datePattern": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Java date pattern to apply on date object. Have a look at <https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html>"
        },
        "timestampPattern": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Java timestamp pattern to apply on timestamp object. Have a look at <https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html>"
        }
      }
    },
    "JDBCSchemaBase": {
      "type": "object",
      "properties": {
        "catalog": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Optional catalog name in the source database. Scope: Schema and Data extraction."
        },
        "schema": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Database schema where source tables are located. For mysql use this instead of catalog. Scope: Schema and Data extraction."
        },
        "tableRemarks": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Query template used to retrieve table remarks. Available variables: catalog, schema, table. Scope: Schema extraction."
        },
        "columnRemarks": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Query template used to retrieve all columns' remark of a table. Available variables: catalog, schema, table. Scope: Schema extraction."
        },
        "tableTypes": {
          "description": "One or many of the predefined table types. Scope: Schema and Data extraction.",
          "type": "array",
          "items": {
            "$ref": "#/$defs/TableTypeV1"
          }
        },
        "template": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Template used during schema extraction in order to generate load files (domain and tables). Scope: Schema extraction."
        },
        "pattern": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Pattern template used to define load tables' file pattern. Available variables: catalog, schema, table. Scope: Schema extraction."
        },
        "numericTrim": {
          "$ref": "#/$defs/TrimV1",
          "description": "Trim strategies applied to numeric fields set on load table's definition. Scope: Schema extraction."
        },
        "partitionColumn": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Column to use in order to parallelize data extraction. Scope: Data extraction."
        },
        "numPartitions": {
          "type": "integer",
          "description": "Number of data partitions to create. Scope: Data extraction."
        },
        "connectionOptions": {
          "$ref": "#/$defs/MapString",
          "description": "Options to set on database connection if no connectionRef is provided. Scope: Data extraction."
        },
        "fetchSize": {
          "type": "integer",
          "description": "Number of rows to be fetched from the database when additional rows are needed. By default, most JDBC drivers use a fetch size of 10, so if you are reading 1000 objects, increasing the fetch size to 256 can significantly reduce the time required to fetch the query's results. The optimal fetch size is not always obvious. Scope: Data extraction."
        },
        "stringPartitionFunc": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "SQL template used on partition column's of type String. Some implementations are already defined, see ai.starlake.extract.JdbcDbUtils.getStringPartitionFunc. Mandatory variables: col, nb_partitions. Scope: Data extraction."
        },
        "fullExport": {
          "type": "boolean",
          "description": "Define if we should fetch the entire table's or not. If not, maximum value of partitionColumn seen during last extraction is used in order to fetch incremental data. Scope: Data extraction."
        },
        "sanitizeName": {
          "type": "boolean",
          "description": "Sanitize domain's name by keeping alpha numeric characters only. Scope: Schema and Data extraction."
        }
      }
    },
    "DefaultJDBCSchemaV1": {
      "$ref": "#/$defs/JDBCSchemaBase"
    },
    "JDBCSchemaV1": {
      "type": "object",
      "allOf": [
        {
          "$ref": "#/$defs/JDBCSchemaBase"
        },
        {
          "properties": {
            "tables": {
              "type": "array",
              "description": "List of tables to extract. Scope: Schema and Data extraction.",
              "items": {
                "$ref": "#/$defs/JDBCTableV1"
              }
            },
            "exclude": {
              "type": "array",
              "description": "List of tables to exclude. Applied on tables list. Scope: Schema and Data extraction.",
              "items": {
                "$ref": "#/$defs/ConvertibleToString"
              }
            }
          }
        }
      ]
    },
    "JDBCSchemasV1": {
      "type": "object",
      "allOf": [
        {
          "$ref": "#/$defs/ExtractV1Base"
        },
        {
          "properties": {
            "jdbcSchemas": {
              "description": "Describe what to fetch from a database connection. Scope: Schema and Data extraction.",
              "type": "array",
              "items": {
                "$ref": "#/$defs/JDBCSchemaV1"
              }
            },
            "auditConnectionRef": {
              "$ref": "#/$defs/ConvertibleToString",
              "description": "Connection used to read/store audit from it. If not defined, fallbacks to connectionRef. Expected connection name as defined in the connections section of the application.conf file. Scope: Data extraction."
            },
            "output": {
              "$ref": "#/$defs/OutputV1",
              "description": "Define the output format of data extraction. Scope: Data extraction."
            },
            "default": {
              "$ref": "#/$defs/DefaultJDBCSchemaV1",
              "description": "Configuration merged into each of the jdbcSchemas defined in the YAML file. Scope: Schema and Data extraction."
            }
          },
          "not": {
            "required": [
              "openAPI"
            ]
          },
          "type": "object"
        }
      ]
    },
    "OpenAPIObjectSchemasV1": {
      "type": "object",
      "properties": {
        "include": {
          "description": "List of regex used to include open api schemas (#/components/schemas). Defaults to ['.*']. 'Includes' is evaluated before 'excludes'",
          "type": "array",
          "minItems": 1,
          "items": {
            "$ref": "#/$defs/ConvertibleToString"
          }
        },
        "exclude": {
          "description": "List of regex used to exclude open api schemas (#/components/schemas). Defaults to [].",
          "type": "array",
          "items": {
            "$ref": "#/$defs/ConvertibleToString"
          }
        }
      }
    },
    "OpenAPIRouteObjectExplosionV1": {
      "type": "object",
      "properties": {
        "on": {
          "description": "Explode route's object to more object definition. Use object's path with route path as final name. Defaults to ALL",
          "type": "string",
          "anyOf": [
            {
              "const": "ALL",
              "title": "Keep properties of type object or array."
            },
            {
              "const": "OBJECT",
              "title": "Keep properties of type object. Don't dive on array type."
            },
            {
              "const": "ARRAY",
              "title": "Keep properties of type array. If encounters an object, dive deeper."
            }
          ]
        },
        "exclude": {
          "type": "array",
          "description": "filter out on field path. Each field is separated by _. Default to []",
          "items": {
            "$ref": "#/$defs/ConvertibleToString"
          }
        },
        "rename": {
          "type": "object",
          "description": "Regex applied on object path. If matches, use the given name otherwise fallback to route_path + object path as final name",
          "additionalProperties": {
            "$ref": "#/$defs/ConvertibleToString"
          }
        }
      }
    },
    "OpenAPIRoutesV1": {
      "type": "object",
      "properties": {
        "paths": {
          "description": "List of regex used to include open api path '.*'",
          "type": "array",
          "minItems": 1,
          "items": {
            "$ref": "#/$defs/ConvertibleToString"
          }
        },
        "as": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Force all routes matching the pattern to be saved as the given name if they don't conflict"
        },
        "operations": {
          "description": "List of operations to retrieve schema from. Defaults to ['GET']. Supported values are GET and POST.",
          "type": "array",
          "minItems": 1,
          "items": {
            "anyOf": [
              {
                "const": "GET",
                "description": "Retain operation of type GET in openAPI"
              },
              {
                "const": "POST",
                "title": "Retain operation of type POST in openAPI"
              }
            ]
          }
        },
        "exclude": {
          "description": "List of regex used to excludes api path []",
          "type": "array",
          "minItems": 1,
          "items": {
            "$ref": "#/$defs/ConvertibleToString"
          }
        },
        "excludeFields": {
          "description": "List of regex used to excludes fields. Fields and their subfields are separated by _.",
          "type": "array",
          "minItems": 1,
          "items": {
            "$ref": "#/$defs/ConvertibleToString"
          }
        },
        "explode": {
          "$ref": "#/$defs/OpenAPIRouteObjectExplosionV1",
          "description": "Explodes on route's object and split the schema."
        }
      }
    },
    "OpenAPIDomainV1": {
      "type": "object",
      "properties": {
        "name": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Domain name used to group tables extracted from openAPI spec"
        },
        "basePath": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Common base path used to remove from path in order to generate final table name."
        },
        "schemas": {
          "$ref": "#/$defs/OpenAPIObjectSchemasV1",
          "description": "Describe what to fetch from data connection. Scope: Schema and Data extraction."
        },
        "routes": {
          "description": "Describe what to fetch from data connection. Scope: Schema and Data extraction.",
          "type": "array",
          "minItems": 1,
          "items": {
            "$ref": "#/$defs/OpenAPIRoutesV1"
          }
        }
      },
      "required": [
        "name"
      ]
    },
    "OpenAPIV1": {
      "type": "object",
      "properties": {
        "basePath": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Common base path used to remove from path in order to generate final table name."
        },
        "formatTypeMapping": {
          "$ref": "#/$defs/MapString",
          "description": "mapping a format used for string and the starlake attribute type"
        },
        "domains": {
          "description": "Describe what to fetch from data connection. Scope: Schema and Data extraction.",
          "type": "array",
          "minItems": 1,
          "items": {
            "$ref": "#/$defs/OpenAPIDomainV1"
          }
        }
      }
    },
    "OpenAPIsV1": {
      "type": "object",
      "allOf": [
        {
          "$ref": "#/$defs/ExtractV1Base"
        },
        {
          "properties": {
            "openAPI": {
              "$ref": "#/$defs/OpenAPIV1",
              "description": "Describe how to extract domains and tables from OpenAPI spec"
            }
          },
          "required": [
            "openAPI"
          ],
          "type": "object"
        }
      ]
    },
    "ExtractV1Base": {
      "type": "object",
      "properties": {
        "sanitizeAttributeName": {
          "type": "string",
          "anyOf": [
            {
              "const": "ON_EXTRACT",
              "description": "attribute name is sanitized and stored as field name. Default."
            },
            {
              "const": "ON_LOAD",
              "title": "attribute name is sanitized and stored as rename property when attribute's name differs from sanitized name"
            }
          ]
        },
        "connectionRef": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "With OpenAPI: connection used to reference OpenAPI spec. Supports file (local, storage) or public http url. With others, connection used to retrieve schema and then data. If not defined, fallback to application settings."
        }
      }
    },
    "InputRefV1": {
      "description": "Input for ref object",
      "type": "object",
      "properties": {
        "database": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Database pattern to match, none if any database"
        },
        "domain": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Domain pattern to match, none if any domain match"
        },
        "table": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Table pattern to match"
        }
      },
      "required": [
        "table"
      ]
    },
    "OutputRefV1": {
      "description": "Output for ref object",
      "type": "object",
      "properties": {
        "database": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Target database name for the resolved reference"
        },
        "domain": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Target domain/dataset name for the resolved reference"
        },
        "table": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Target table name for the resolved reference"
        }
      },
      "required": [
        "table",
        "domain",
        "database"
      ]
    },
    "RefV1": {
      "description": "Describe how to resolve a reference in a transform task",
      "type": "object",
      "properties": {
        "input": {
          "$ref": "#/$defs/InputRefV1",
          "description": "The input table to resolve"
        },
        "output": {
          "$ref": "#/$defs/OutputRefV1",
          "description": "The output table resolved with the domain and database"
        }
      },
      "required": [
        "input",
        "output"
      ]
    },
    "KafkaTopicConfigV1": {
      "properties": {
        "topicName": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Kafka topic name to consume from or produce to"
        },
        "maxRead": {
          "type": "integer",
          "description": "Maximum number of records to read from the topic in a single batch. Default is unlimited"
        },
        "fields": {
          "type": "array",
          "items": {
            "$ref": "#/$defs/ConvertibleToString"
          },
          "description": "List of fields to extract from Kafka messages"
        },
        "partitions": {
          "type": "integer",
          "description": "Number of partitions for the Kafka topic when creating it"
        },
        "replicationFactor": {
          "type": "integer",
          "description": "Replication factor for the Kafka topic when creating it"
        },
        "createOptions": {
          "$ref": "#/$defs/MapString",
          "description": "Additional options passed when creating the Kafka topic"
        },
        "accessOptions": {
          "$ref": "#/$defs/MapString",
          "description": "Kafka consumer/producer configuration options (e.g., security settings, serializers)"
        },
        "headers": {
          "type": "object",
          "description": "HTTP headers to include when accessing Kafka via HTTP proxy",
          "additionalProperties": {
            "$ref": "#/$defs/MapString"
          }
        }
      }
    },
    "KafkaConfigV1": {
      "type": "object",
      "properties": {
        "serverOptions": {
          "$ref": "#/$defs/MapString",
          "description": "Kafka server connection options (e.g., bootstrap.servers, security.protocol)"
        },
        "topics": {
          "type": "object",
          "description": "Map of topic name to topic configuration",
          "additionalProperties": {
            "$ref": "#/$defs/KafkaTopicConfigV1"
          }
        },
        "cometOffsetsMode": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Offset management mode: 'STREAM' for Spark streaming checkpoints or 'FILE' for file-based offset tracking"
        },
        "customDeserializers": {
          "$ref": "#/$defs/MapString",
          "description": "Map of custom deserializer class names for specific data formats (e.g., Avro, Protobuf)"
        }
      }
    },
    "DagRefV1": {
      "type": "object",
      "properties": {
        "load": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Dag config to use for load tasks. May be redefined at the table level"
        },
        "transform": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Dag config to use for transform tasks. May be redefined at the task level"
        }
      }
    },
    "GizmoV1": {
      "type": "object",
      "properties": {
        "url": {
          "type": "string",
          "description": "Gizmo server URL. Default is '<http://localhost:10900>'"
        },
        "apiKey": {
          "type": "string",
          "description": "API key for authenticating with the Gizmo server"
        }
      }
    },
    "HttpV1": {
      "type": "object",
      "properties": {
        "interface": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Network interface to bind the HTTP server to. Default is '0.0.0.0' (all interfaces)"
        },
        "port": {
          "type": "integer",
          "description": "Port number for the HTTP server. Default is 8080"
        }
      }
    },
    "AppConfigV1": {
      "type": "object",
      "properties": {
        "env": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Default environment to use. May be also set using the SL_ENV environment variable"
        },
        "datasets": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "When using filesystem storage, default path to store the datasets"
        },
        "incoming": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Incoming folder to use during autoload"
        },
        "dags": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "DAG generation config folder. metadata/dags by default"
        },
        "types": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "types config folder. metadata/types by default"
        },
        "macros": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Macros config folder. metadata/macros by default"
        },
        "tests": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Path to tests folder. Default is ${metadata}/tests"
        },
        "prunePartitionOnMerge": {
          "type": "boolean",
          "description": "Pre-compute incoming partitions to prune partitions on merge statement"
        },
        "writeStrategies": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Location where are located user defined write strategies; Default is ${metadata}/write-strategies"
        },
        "loadStrategies": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Location where are located user defined load strategies; Default is ${metadata}/load-strategies"
        },
        "metadata": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "default metadata folder name. May be also set using the SL_METADATA environment variable"
        },
        "metrics": {
          "$ref": "#/$defs/MetricsV1"
        },
        "validateOnLoad": {
          "type": "boolean",
          "description": "Validate the YAML file when loading it. If set to true fails on any error"
        },
        "rejectWithValue": {
          "type": "boolean",
          "description": "Add value along with the rejection error. Not enabled by default for security reason. Default: false"
        },
        "audit": {
          "$ref": "#/$defs/AuditV1"
        },
        "archive": {
          "type": "boolean",
          "description": "Should ingested files be archived after ingestion ?"
        },
        "sinkReplayToFile": {
          "type": "boolean",
          "description": "Should invalid records be stored in a replay file ?"
        },
        "lock": {
          "$ref": "#/$defs/LockV1"
        },
        "defaultWriteFormat": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Default write format in Spark. parquet is the default"
        },
        "defaultRejectedWriteFormat": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Default write format in Spark for rejected records. parquet is the default"
        },
        "defaultAuditWriteFormat": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Default write format in Spark for audit records. parquet is the default"
        },
        "csvOutput": {
          "type": "boolean",
          "description": "output files in CSV format ? Default is false"
        },
        "csvOutputExt": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "CSV file extension when csvOutput is true. Default is .csv"
        },
        "privacyOnly": {
          "type": "boolean",
          "description": "Only generate privacy tasks. Reserved for internal use"
        },
        "emptyIsNull": {
          "type": "boolean",
          "description": "Should empty strings be considered as null values ?"
        },
        "loader": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Default loader to use when none is specified in the schema. Valid values are 'spark' or 'native'. Default is 'spark'"
        },
        "rowValidatorClass": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Custom row validator class for advanced validation logic. Must implement RowValidator interface"
        },
        "loadStrategyClass": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "In what order should the files for a same table be loaded ? By time (default) or by or name ?\n",
          "anyOf": [
            {
              "const": "ai.starlake.job.load.IngestionNameStrategy",
              "description": "Order pending files by name"
            },
            {
              "const": "ai.starlake.job.load.IngestionTimeStrategy",
              "title": "Order pending files by creation date time"
            }
          ]
        },
        "grouped": {
          "type": "boolean",
          "description": "Should we load of the files to be stored in the same table in a single task or one by one ?"
        },
        "groupedMax": {
          "type": "integer",
          "description": "Maximum number of files to be stored in the same table in a single task"
        },
        "scd2StartTimestamp": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Column name to use for SCD2 start timestamp.@"
        },
        "scd2EndTimestamp": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Column name to use for SCD2 end timestamp."
        },
        "area": {
          "$ref": "#/$defs/AreaV1",
          "description": "stage, ingesting ... areas configuration"
        },
        "hadoop": {
          "$ref": "#/$defs/MapString",
          "description": "Hadoop configuration if applicable"
        },
        "connections": {
          "$ref": "#/$defs/MapConnectionV1",
          "description": "Connections configurations"
        },
        "jdbcEngines": {
          "$ref": "#/$defs/MapJdbcEngineV1",
          "description": "JDBC engine configurations"
        },
        "privacy": {
          "$ref": "#/$defs/PrivacyV1",
          "description": "Privacy algorithms"
        },
        "root": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Root folder for the application. May be also set using the SL_ROOT environment variable"
        },
        "internal": {
          "$ref": "#/$defs/InternalV1",
          "description": "Internal configuration"
        },
        "accessPolicies": {
          "$ref": "#/$defs/AccessPoliciesV1",
          "description": "Access policies configuration"
        },
        "sparkScheduling": {
          "$ref": "#/$defs/SparkSchedulingV1",
          "description": "Spark Job scheduling configuration"
        },
        "udfs": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Coma separated list of UDF to register in Spark jobs. May be also set using the SL_UDFS environment variable"
        },
        "expectations": {
          "$ref": "#/$defs/ExpectationsConfigV1",
          "description": "Expectations configuration"
        },
        "sqlParameterPattern": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Pattern to use to replace parameters in SQL queries in addition to the jinja syntax {{param}}. Default is ${param}"
        },
        "rejectAllOnError": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Should we reject all records when an error occurs ? Default is false"
        },
        "rejectMaxRecords": {
          "type": "integer",
          "description": "Maximum number of records to reject when an error occurs. Default is 100"
        },
        "maxParCopy": {
          "type": "integer",
          "description": "Maximum number of parallel file copy operations during import. Default is 1"
        },
        "kafka": {
          "$ref": "#/$defs/KafkaConfigV1",
          "description": "Kafka configuration for streaming ingestion and message processing"
        },
        "dsvOptions": {
          "$ref": "#/$defs/MapString",
          "description": "DSV ingestion extra options"
        },
        "forceViewPattern": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "reserved"
        },
        "forceDomainPattern": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "reserved"
        },
        "forceTablePattern": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "reserved"
        },
        "forceJobPattern": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "reserved"
        },
        "forceTaskPattern": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "reserved"
        },
        "useLocalFileSystem": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "reserved"
        },
        "sessionDurationServe": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "reserved"
        },
        "database": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Default target database (projectId in GCP). May be also set using the SL_DATABASE environment variable"
        },
        "tenant": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "reserved"
        },
        "connectionRef": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Default connection to use when loading / transforming data"
        },
        "loadConnectionRef": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Default connection to use when loading / transforming data"
        },
        "transformConnectionRef": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Default connection to use when loading / transforming data"
        },
        "schedulePresets": {
          "$ref": "#/$defs/MapString",
          "description": "Map of schedule preset names to cron expressions for reusable scheduling patterns"
        },
        "maxParTask": {
          "type": "integer",
          "description": "How many job to run simultaneously in dev mode (experimental)"
        },
        "refs": {
          "type": "array",
          "description": "Reference mappings for resolving table references in SQL queries across different environments",
          "items": {
            "$ref": "#/$defs/RefV1"
          }
        },
        "dagRef": {
          "$ref": "#/$defs/DagRefV1",
          "description": "Default DAG configuration references for load and transform tasks"
        },
        "forceHalt": {
          "type": "boolean",
          "description": "Force application to stop even when there is some pending thread."
        },
        "jobIdEnvName": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Environment variable name containing the job ID for tracking purposes"
        },
        "archiveTablePattern": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Pattern for naming archive tables. Use {table} as placeholder for the original table name"
        },
        "archiveTable": {
          "type": "boolean",
          "description": "Enable table archiving before overwrite operations. Default is false"
        },
        "version": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Application configuration version for compatibility tracking"
        },
        "autoExportSchema": {
          "type": "boolean",
          "description": "Automatically export table schemas after load/transform operations. Default is false"
        },
        "longJobTimeoutMs": {
          "type": "integer",
          "description": "Timeout in milliseconds for long-running jobs. Default is 3600000 (1 hour)"
        },
        "shortJobTimeoutMs": {
          "type": "integer",
          "description": "Timeout in milliseconds for short-running jobs. Default is 300000 (5 minutes)"
        },
        "createSchemaIfNotExists": {
          "type": "boolean",
          "description": "Automatically create database schema/dataset if it does not exist. Default is true"
        },
        "http": {
          "$ref": "#/$defs/HttpV1",
          "description": "HTTP server configuration for the Starlake REST API"
        },
        "timezone": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Default timezone for date/time operations. Default is UTC"
        },
        "maxInteractiveRecords": {
          "type": "integer",
          "description": "Maximum number of records to return in interactive query mode. Default is 1000"
        },
        "duckdbMode": {
          "type": "boolean",
          "description": "is duckdb mode active"
        },
        "duckdbExtensions": {
          "type": "string",
          "description": "Comma separated list of duckdb extensions to load. Default is spatial, json, httpfs"
        },
        "duckdbPath": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Where to store duckdb files if not using default"
        },
        "testCsvNullString": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "null string value in tests"
        },
        "hiveInTest": {
          "$ref": "#/$defs/ConvertibleToString",
          "description": "Internal use only"
        },
        "spark": {
          "type": "object",
          "description": "Map of string",
          "additionalProperties": true
        },
        "extra": {
          "type": "object",
          "description": "Map of string",
          "additionalProperties": true
        },
        "duckDbEnableExternalAccess": {
          "description": "Allow DuckDB to load / Save data from / to external sources. Default to true",
          "type": "boolean"
        },
        "syncSqlWithYaml": {
          "description": "Update attributes in YAMl file when SQL is updated. Default to true",
          "type": "boolean"
        },
        "syncYamlWithDb": {
          "description": "Update database with YAML transform is run. Default to true",
          "type": "boolean"
        },
        "onExceptionRetries": {
          "description": "Number of retries on transient exceptions",
          "type": "integer"
        },
        "pythonLibsDir": {
          "description": "Directory containing python libraries to use instead of pip install",
          "type": "string"
        },
        "gizmosql": {
          "$ref": "#/$defs/GizmoV1",
          "description": "Gizmo server configuration"
        }
      }
    },
    "StarlakeV1Base": {
      "type": "object",
      "properties": {
        "types": {
          "type": "array",
          "items": {
            "$ref": "#/$defs/TypeV1"
          }
        },
        "dag": {
          "$ref": "#/$defs/DagGenerationConfigV1"
        },
        "extract": {
          "oneOf": [
            {
              "$ref": "#/$defs/JDBCSchemasV1"
            },
            {
              "$ref": "#/$defs/OpenAPIsV1",
              "description": "Defines OpenAPI schemas extraction"
            }
          ]
        },
        "load": {
          "$ref": "#/$defs/DomainV1"
        },
        "transform": {
          "$ref": "#/$defs/AutoJobDescV1"
        },
        "task": {
          "$ref": "#/$defs/AutoTaskDescV1"
        },
        "env": {
          "$ref": "#/$defs/MapString"
        },
        "table": {
          "$ref": "#/$defs/TableV1"
        },
        "refs": {
          "type": "array",
          "items": {
            "$ref": "#/$defs/RefV1"
          }
        },
        "application": {
          "$ref": "#/$defs/AppConfigV1"
        }
      },
      "oneOf": [
        {
          "required": [
            "extract"
          ]
        },
        {
          "required": [
            "load"
          ]
        },
        {
          "required": [
            "transform"
          ]
        },
        {
          "required": [
            "env"
          ]
        },
        {
          "required": [
            "types"
          ]
        },
        {
          "required": [
            "tables"
          ]
        },
        {
          "required": [
            "table"
          ]
        },
        {
          "required": [
            "task"
          ]
        },
        {
          "required": [
            "application"
          ]
        },
        {
          "required": [
            "refs"
          ]
        },
        {
          "required": [
            "dag"
          ]
        }
      ]
    }
  },
  "required": [
    "version"
  ]
}