{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://catalog.lintel.tools/schemas/schemastore/ms2rescore-configuration/latest.json",
  "title": "MS²Rescore configuration",
  "x-lintel": {
    "source": "https://raw.githubusercontent.com/compomics/ms2rescore/main/ms2rescore/package_data/config_schema.json",
    "sourceSha256": "9bf7d728cd5a640a5a07c35b5873d0a11526471d8a86ab4d707d962e2ece6ab8",
    "fileMatch": [
      "ms2rescore.json",
      "ms2rescore.toml",
      ".*-ms2rescore.json",
      ".*-ms2rescore.toml",
      ".*-ms2rescore-config.json",
      ".*-ms2rescore-config.toml"
    ],
    "parsers": [
      "json",
      "toml"
    ]
  },
  "type": "object",
  "properties": {
    "ms2rescore": {
      "description": "General MS²Rescore settings.",
      "type": "object",
      "required": [
        "psm_file"
      ],
      "properties": {
        "feature_generators": {
          "description": "Feature generators and their configurations.",
          "type": "object",
          "default": {
            "basic": {},
            "ms2pip": {
              "model": "HCD",
              "ms2_tolerance": 0.02
            },
            "deeplc": {},
            "maxquant": {}
          },
          "patternProperties": {
            ".*": {
              "$ref": "#/$defs/feature_generator"
            },
            "basic": {
              "$ref": "#/$defs/basic"
            },
            "ms2pip": {
              "$ref": "#/$defs/ms2pip"
            },
            "deeplc": {
              "$ref": "#/$defs/deeplc"
            },
            "maxquant": {
              "$ref": "#/$defs/maxquant"
            },
            "ionmob": {
              "$ref": "#/$defs/ionmob"
            },
            "im2deep": {
              "$ref": "#/$defs/im2deep"
            }
          }
        },
        "rescoring_engine": {
          "description": "Rescoring engine to use and its configuration. Leave empty to skip rescoring and write features to file.",
          "type": "object",
          "minProperties": 0,
          "maxProperties": 1,
          "default": {
            "mokapot": {}
          },
          "patternProperties": {
            ".*": {
              "$ref": "#/$defs/rescoring_engine"
            },
            "percolator": {
              "$ref": "#/$defs/percolator"
            },
            "mokapot": {
              "$ref": "#/$defs/mokapot"
            }
          }
        },
        "config_file": {
          "description": "Path to configuration file",
          "oneOf": [
            {
              "type": "string"
            },
            {
              "type": "null"
            }
          ]
        },
        "psm_file": {
          "description": "Path to file with peptide-spectrum matches.",
          "oneOf": [
            {
              "type": "string"
            },
            {
              "type": "null"
            },
            {
              "type": "array",
              "items": {
                "type": "string"
              }
            }
          ]
        },
        "psm_file_type": {
          "description": "PSM file type. By default inferred from file extension.",
          "type": "string",
          "default": "infer"
        },
        "psm_reader_kwargs": {
          "description": "Keyword arguments passed to the PSM reader.",
          "type": "object",
          "default": {}
        },
        "spectrum_path": {
          "description": "Path to spectrum file or directory with spectrum files",
          "oneOf": [
            {
              "type": "string"
            },
            {
              "type": "null"
            }
          ]
        },
        "output_path": {
          "description": "Path and root name for output files",
          "oneOf": [
            {
              "type": "string"
            },
            {
              "type": "null"
            }
          ]
        },
        "log_level": {
          "description": "Logging level",
          "type": "string",
          "enum": [
            "debug",
            "info",
            "warning",
            "error",
            "critical"
          ]
        },
        "id_decoy_pattern": {
          "description": "Regex pattern used to identify the decoy PSMs in identification file.",
          "oneOf": [
            {
              "type": "string"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "spectrum_id_pattern": {
          "description": "Regex pattern to extract index or scan number from spectrum file. Requires at least one capturing group.",
          "oneOf": [
            {
              "type": "string"
            },
            {
              "type": "null"
            }
          ],
          "default": "(.*)",
          "format": "regex"
        },
        "psm_id_pattern": {
          "description": "Regex pattern to extract index or scan number from PSM file. Requires at least one capturing group.",
          "oneOf": [
            {
              "type": "string"
            },
            {
              "type": "null"
            }
          ],
          "default": "(.*)",
          "format": "regex"
        },
        "psm_id_rt_pattern": {
          "description": "Regex pattern to extract retention time from PSM identifier. Requires at least one capturing group.",
          "oneOf": [
            {
              "type": "string"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "format": "regex"
        },
        "psm_id_im_pattern": {
          "description": "Regex pattern to extract ion mobility from PSM identifier. Requires at least one capturing group.",
          "oneOf": [
            {
              "type": "string"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "format": "regex"
        },
        "lower_score_is_better": {
          "description": "Bool indicating if lower score is better",
          "type": "boolean",
          "default": false
        },
        "max_psm_rank_input": {
          "description": "Maximum rank of PSMs to use as input for rescoring",
          "type": "number",
          "default": 10,
          "minimum": 1
        },
        "max_psm_rank_output": {
          "description": "Maximum rank of PSMs to return after rescoring, before final FDR calculation",
          "type": "number",
          "default": 1,
          "minimum": 1
        },
        "modification_mapping": {
          "description": "Mapping of modification labels to each replacement label.",
          "type": "object",
          "default": {}
        },
        "fixed_modifications": {
          "description": "Mapping of amino acids with fixed modifications to the modification name.",
          "type": "object",
          "default": {},
          "additionalProperties": true
        },
        "processes": {
          "description": "Number of parallel processes to use; -1 for all available",
          "type": "number",
          "multipleOf": 1,
          "minimum": -1,
          "default": -1
        },
        "rename_to_usi": {
          "description": "Convert spectrum IDs to their universal spectrum identifier",
          "type": "boolean"
        },
        "fasta_file": {
          "description": "Path to FASTA file with protein sequences to use for protein inference",
          "oneOf": [
            {
              "type": "string"
            },
            {
              "type": "null"
            }
          ]
        },
        "write_flashlfq": {
          "description": "Write results to a FlashLFQ-compatible file",
          "oneOf": [
            {
              "type": "boolean"
            },
            {
              "type": "null"
            }
          ],
          "default": false
        },
        "write_report": {
          "description": "Write an HTML report with various QC metrics and charts",
          "oneOf": [
            {
              "type": "boolean"
            },
            {
              "type": "null"
            }
          ],
          "default": true
        },
        "disable_update_check": {
          "description": "Disable the automatic update check",
          "oneOf": [
            {
              "type": "boolean"
            },
            {
              "type": "null"
            }
          ],
          "default": false
        },
        "profile": {
          "description": "Write a txt report using cProfile for profiling",
          "oneOf": [
            {
              "type": "boolean"
            },
            {
              "type": "null"
            }
          ],
          "default": false
        }
      },
      "additionalProperties": false
    }
  },
  "required": [
    "ms2rescore"
  ],
  "$defs": {
    "feature_generator": {
      "description": "Feature generator configuration",
      "type": "object",
      "additionalProperties": true
    },
    "rescoring_engine": {
      "description": "Rescoring engine configuration",
      "type": "object",
      "additionalProperties": true
    },
    "basic": {
      "$ref": "#/$defs/feature_generator",
      "description": "Basic feature generator configuration",
      "type": "object",
      "additionalProperties": true
    },
    "ms2pip": {
      "$ref": "#/$defs/feature_generator",
      "description": "MS²PIP feature generator configuration",
      "type": "object",
      "properties": {
        "model": {
          "description": "MS²PIP model to use (see MS²PIP documentation)",
          "type": "string",
          "default": "HCD"
        },
        "ms2_tolerance": {
          "description": "MS2 error tolerance in Da",
          "type": "number",
          "minimum": 0,
          "default": 0.02
        }
      },
      "additionalProperties": true
    },
    "deeplc": {
      "$ref": "#/$defs/feature_generator",
      "description": "DeepLC feature generator configuration",
      "type": "object",
      "properties": {
        "calibration_set_size": {
          "description": "Calibration set size",
          "oneOf": [
            {
              "type": "integer"
            },
            {
              "type": "number"
            }
          ],
          "default": 0.15
        }
      },
      "additionalProperties": true
    },
    "maxquant": {
      "$ref": "#/$defs/feature_generator",
      "description": "MaxQuant feature generator configuration",
      "type": "object",
      "additionalProperties": true
    },
    "ionmob": {
      "$ref": "#/$defs/feature_generator",
      "description": "Ion mobility feature generator configuration using Ionmob",
      "type": "object",
      "properties": {
        "ionmob_model": {
          "description": "Path to Ionmob model directory",
          "type": "string",
          "default": "GRUPredictor"
        },
        "reference_dataset": {
          "description": "Path to Ionmob reference dataset file",
          "type": "string",
          "default": "Meier_unimod.parquet"
        },
        "tokenizer": {
          "description": "Path to tokenizer json file",
          "type": "string",
          "default": "tokenizer.json"
        }
      },
      "additionalProperties": true
    },
    "im2deep": {
      "$ref": "#/$defs/feature_generator",
      "description": "Ion mobility feature generator configuration using IM2Deep",
      "type": "object",
      "properties": {
        "reference_dataset": {
          "description": "Path to IM2Deep reference dataset file",
          "type": "string",
          "default": "Meier_unimod.parquet"
        }
      },
      "additionalProperties": true
    },
    "mokapot": {
      "$ref": "#/$defs/rescoring_engine",
      "description": "Mokapot rescoring engine configuration. Additional properties are passed to the Mokapot brew function.",
      "type": "object",
      "properties": {
        "train_fdr": {
          "description": "FDR threshold for training Mokapot",
          "type": "number",
          "minimum": 0,
          "maximum": 1,
          "default": 0.01
        },
        "write_weights": {
          "description": "Write Mokapot weights to a text file",
          "type": "boolean",
          "default": false
        },
        "write_txt": {
          "description": "Write Mokapot results to a text file",
          "type": "boolean",
          "default": false
        }
      },
      "additionalProperties": true
    },
    "percolator": {
      "$ref": "#/$defs/rescoring_engine",
      "description": "Percolator rescoring engine configuration",
      "type": "object",
      "properties": {
        "init-weights": {
          "description": "Weights file for scoring function",
          "oneOf": [
            {
              "type": "string"
            },
            {
              "type": "null"
            }
          ],
          "default": false
        }
      },
      "additionalProperties": true
    }
  }
}
