{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://catalog.lintel.tools/schemas/schemastore/pgap-yaml-input-reader/latest.json",
  "title": "NCBI PGAP submol YAML",
  "description": "NCBI Prokaryotic Genome Annotation Pipeline (PGAP) input metadata (submol) JSON/YAML configuration file",
  "x-lintel": {
    "source": "https://www.schemastore.org/pgap_yaml_input_reader.json",
    "sourceSha256": "f1002fc4052fdbf4251bfb238c699874bcb3dad60f900eff960cf90965141e5e",
    "fileMatch": [
      "submol*.json",
      "submol*.yml",
      "submol*.yaml"
    ],
    "parsers": [
      "json",
      "yaml"
    ]
  },
  "type": "object",
  "properties": {
    "$schema": {
      "type": "string",
      "title": "Schema",
      "description": "The value of this keyword MUST be a URI (containing a scheme) and this URI MUST be normalized. ",
      "default": "",
      "examples": [
        "https://www.schemastore.org/pgap_yaml_input_reader"
      ]
    },
    "consortium": {
      "type": "string",
      "title": "Consortium",
      "description": "Name of the project that generated the genome assembly",
      "default": "",
      "examples": [
        "SkyNet"
      ]
    },
    "comment": {
      "type": "string",
      "title": "Free text comment about the genome assembly",
      "description": "Appears in the COMMENT section of each GenBank sequence record.",
      "default": "",
      "examples": [
        "This draft WGS assembly was generated by running SKESA to generate a de-novo assembly. The de-novo assembly was then concatenated with  configs generated using a guided assembler using antimicrobial resistance genes as baits to comprehensively catalog the set of resistance genes in the isolate. Note, some parts of the configs derived from the guided assembler may overlap de-novo configs, and other guided assembler configs. De-novo configs can be differentiated from guided assembler configs by their names, which include either 'denovo' or 'guided'."
      ]
    },
    "tp_assembly": {
      "type": "boolean",
      "title": "Reserved",
      "description": "NCBI internal flag used for testing.",
      "default": false,
      "examples": [
        false
      ]
    },
    "sra": {
      "type": "array",
      "title": "SRA assembly data",
      "description": "Sequence reads used to build the assembly",
      "items": {
        "type": "object",
        "properties": {
          "accession": {
            "type": "string",
            "title": "SRA Accession",
            "description": "Sequence Read Archive (SRA) accession for the run (with SRR, ERR or DRR prefix)",
            "default": "",
            "examples": [
              "SRR8796989"
            ]
          }
        },
        "required": [
          "accession"
        ],
        "additionalProperties": false
      }
    },
    "authors": {
      "type": "array",
      "title": "Author(s) of the genome assembly",
      "description": "Optional, but include if intending to submit to GenBank. Authors can be different from the contact.",
      "items": {
        "type": "object",
        "properties": {
          "author": {
            "type": "object",
            "properties": {
              "first_name": {
                "type": "string",
                "title": "First name",
                "default": "",
                "examples": [
                  "Arnold"
                ]
              },
              "last_name": {
                "type": "string",
                "title": "Last name",
                "default": "",
                "examples": [
                  "Schwarzenegger"
                ]
              },
              "middle_initial": {
                "type": "string",
                "title": "First letter of middle name",
                "default": "",
                "examples": [
                  "T800"
                ]
              }
            },
            "required": [
              "first_name",
              "last_name"
            ],
            "additionalProperties": false
          }
        },
        "required": [
          "author"
        ],
        "additionalProperties": false
      }
    },
    "bioproject": {
      "type": "string",
      "title": "BioProject ID (PRJXX) for the project, if available",
      "default": "",
      "examples": [
        "PRJ9999999"
      ]
    },
    "biosample": {
      "type": "string",
      "title": "BioSample ID (SAMXXX) for the sequenced sample, if available",
      "default": "",
      "examples": [
        "SAMN99999999"
      ]
    },
    "contact_info": {
      "type": "object",
      "title": "Submitter contact information",
      "description": "Optional, but include if intending to submit to GenBank. The main contact for this genome assembly.",
      "properties": {
        "state": {
          "type": "string",
          "title": "State or region",
          "default": "",
          "examples": [
            "MD",
            "Florida"
          ]
        },
        "fax": {
          "type": "string",
          "title": "Fax number",
          "default": "",
          "examples": [
            "301-555-1234",
            "+7 095 555 1234"
          ]
        },
        "city": {
          "type": "string",
          "title": "City",
          "default": "",
          "examples": [
            "Docker"
          ]
        },
        "country": {
          "type": "string",
          "title": "Country",
          "default": "",
          "examples": [
            "Lappland"
          ]
        },
        "department": {
          "type": "string",
          "title": "Department or division submitting the genome assembly",
          "default": "",
          "examples": [
            "Department of Using NCBI"
          ]
        },
        "email": {
          "type": "string",
          "title": "Email address",
          "default": "",
          "examples": [
            "jane_doe@gmail.com"
          ]
        },
        "first_name": {
          "type": "string",
          "title": "First name",
          "default": "",
          "examples": [
            "Jane"
          ]
        },
        "middle_initial": {
          "type": "string",
          "title": "First letter of middle name",
          "default": "",
          "examples": [
            "N"
          ]
        },
        "last_name": {
          "type": "string",
          "title": "Last name",
          "default": "",
          "examples": [
            "Doe"
          ]
        },
        "organization": {
          "type": "string",
          "title": "Organization or consortium submitting the genome assembly",
          "default": "",
          "examples": [
            "Institute of Klebsiella foobarensis research"
          ]
        },
        "phone": {
          "type": "string",
          "title": "Phone number",
          "default": "",
          "examples": [
            "301-555-0245"
          ]
        },
        "postal_code": {
          "type": "string",
          "title": "Postal code",
          "default": "",
          "examples": [
            "12345"
          ]
        },
        "street": {
          "type": "string",
          "title": "Street address",
          "default": "",
          "examples": [
            "1234 Main St"
          ]
        }
      },
      "required": [
        "organization",
        "department",
        "city",
        "country",
        "street",
        "email",
        "first_name",
        "last_name",
        "postal_code"
      ],
      "additionalProperties": false
    },
    "fasta": {
      "type": "object",
      "properties": {
        "class": {
          "type": "string",
          "title": "Class of input type",
          "default": "",
          "examples": [
            "File"
          ]
        },
        "location": {
          "type": "string",
          "title": "Location of input file",
          "default": "",
          "examples": [
            "sample_fasta_input.fasta"
          ]
        }
      },
      "additionalProperties": false
    },
    "locus_tag_prefix": {
      "type": "string",
      "title": "Locus tag prefix",
      "description": "One to 9-letter prefix to use for naming genes on this genome assembly. If an official locus tag prefix was already reserved from an INSDC organization (GenBank, ENA or DDBJ) for the given BioSample and BioProject pair, provide here. Otherwise, provide a string of your choice. If no value is provided, the prefix 'pgaptmp' will be used. See more details in this Note about locus tags at: <https://github.com/ncbi/pgap/wiki/Input-Files#Note-about-locus-tags>",
      "default": "",
      "examples": [
        "tmp"
      ]
    },
    "organism": {
      "type": "object",
      "properties": {
        "strain": {
          "type": "string",
          "title": "Strain",
          "description": "Strain of the sequenced organism",
          "default": "",
          "examples": [
            "my_strain"
          ]
        },
        "genus_species": {
          "type": "string",
          "title": "Genus and species",
          "description": "Binomial name or, if the species is unknown, genus for the sequenced organism. This identifier must be valid in NCBI Taxonomy. See Taxonomy information for how to find out if the name is valid: <https://github.com/ncbi/pgap/wiki/Input-Files#Taxonomy-information>",
          "default": "",
          "examples": [
            "Escherichia coli"
          ]
        }
      },
      "additionalProperties": false
    },
    "publications": {
      "type": "array",
      "title": "Publication describing the genome assembly",
      "items": {
        "type": "object",
        "properties": {
          "publication": {
            "type": "object",
            "properties": {
              "authors": {
                "title": "Author(s)",
                "type": "array",
                "items": {
                  "type": "object",
                  "properties": {
                    "author": {
                      "type": "object",
                      "properties": {
                        "first_name": {
                          "type": "string",
                          "title": "First name",
                          "default": "",
                          "examples": [
                            "Arnold"
                          ]
                        },
                        "last_name": {
                          "type": "string",
                          "title": "Last name",
                          "default": "",
                          "examples": [
                            "Schwarzenegger"
                          ]
                        },
                        "middle_initial": {
                          "type": "string",
                          "title": "First letter of middle name",
                          "default": "",
                          "examples": [
                            "T800"
                          ]
                        }
                      },
                      "required": [
                        "first_name",
                        "last_name"
                      ],
                      "additionalProperties": false
                    }
                  },
                  "additionalProperties": false
                }
              },
              "status": {
                "type": "string",
                "title": "Publication status",
                "description": "Can be only one of: published, in-press, unpublished",
                "default": "",
                "enum": [
                  "published",
                  "in-press",
                  "unpublished"
                ]
              },
              "pmid": {
                "type": "integer",
                "title": "PubMed ID for the publication",
                "default": ""
              },
              "title": {
                "type": "string",
                "title": "Title",
                "default": "",
                "examples": [
                  "Discrete CHARMm of Klebsiella foobarensis. Journal of Improbable Results, vol. 34, issue 13, pages: 10001-100005, 2018"
                ]
              }
            },
            "additionalProperties": false
          }
        },
        "required": [
          "publication"
        ],
        "additionalProperties": false
      }
    },
    "topology": {
      "type": "string",
      "title": "Topology of the sequences included in the fasta file",
      "description": "Possible values are linear or circular. Circular means that the first base in the sequence is adjacent to the last base. Please provide the topology in the metadata YAML file only if it is applicable to ALL sequences in the fasta file. If some sequences in the assembled genome are circular and others linear, include the topology in the definition line of each sequence in the fasta file with the tag value pair [topology=circular] or [topology=linear], after the SeqID and a space (e.g. >seq1 [topology=circular]). If the topology is provided in neither the metadata YAML nor the fasta file, the sequences will be presumed to be linear.",
      "default": "",
      "examples": [
        "circular",
        "linear"
      ]
    },
    "location": {
      "type": "string",
      "title": "Location of the sequences included in the fasta file",
      "description": "Possible values are chromosome or plasmid. Please provide the location in the metadata YAML file only if it is applicable to ALL sequences in the fasta file. If some sequences in the assembled genome are chromosomes and others plasmids, include the location in the definition line of each sequence in the fasta file with the tag value pair [location=chromosome] or [location=plasmid], after the SeqID and a space (e.g. >seq1 [location=plasmid]). In plasmid case add [plasmid-name=<plasmidname>]. If the location is provided in neither the metadata YAML nor the fasta file, the sequences will be presumed to be chromosome. Note: since 2021 releases of PGAPx this will affect noticeably the annotation on the molecule",
      "default": "",
      "examples": [
        "chromosome",
        "plasmid"
      ]
    }
  },
  "required": [
    "authors",
    "contact_info"
  ],
  "additionalProperties": false,
  "$defs": {}
}
