| 1 | package org.egothor.methodatlas; | |
| 2 | ||
| 3 | import java.io.IOException; | |
| 4 | import java.nio.file.Path; | |
| 5 | import java.util.List; | |
| 6 | import java.util.Map; | |
| 7 | ||
| 8 | import com.fasterxml.jackson.annotation.JsonIgnoreProperties; | |
| 9 | import com.fasterxml.jackson.annotation.JsonProperty; | |
| 10 | import tools.jackson.core.JacksonException; | |
| 11 | import tools.jackson.databind.DeserializationFeature; | |
| 12 | import tools.jackson.databind.ObjectMapper; | |
| 13 | import tools.jackson.dataformat.yaml.YAMLMapper; | |
| 14 | ||
| 15 | /** | |
| 16 | * Loads a YAML configuration file that provides default values for | |
| 17 | * command-line options. | |
| 18 | * | |
| 19 | * <p> | |
| 20 | * When a {@code -config <file>} argument is present, {@link CliArgs} calls | |
| 21 | * {@link #load(Path)} before processing the remaining arguments. The returned | |
| 22 | * {@link YamlConfigFile} seeds the initial values; any matching command-line | |
| 23 | * flag then overrides the YAML value. | |
| 24 | * </p> | |
| 25 | * | |
| 26 | * <h2>Supported fields</h2> | |
| 27 | * | |
| 28 | * <pre> | |
| 29 | * outputMode: csv # csv | plain | sarif | json (default: csv) | |
| 30 | * emitMetadata: false # (default: false) | |
| 31 | * contentHash: false # (default: false) | |
| 32 | * securityOnly: false # (default: false) | |
| 33 | * includeNonSecurity: false # opt-in: include non-security methods in SARIF output (default: false) | |
| 34 | * sarifOmitScores: false # opt-out: omit interaction score / confidence from SARIF message text (default: false) | |
| 35 | * minConfidence: 0.0 # drop AI results below this threshold (requires ai.confidence: true; default: 0.0 = off) | |
| 36 | * driftDetect: false # (default: false) | |
| 37 | * promoteAi: false # RISKY, not recommended: -apply-tags-from-csv falls back to ai_tags/ai_display_name | |
| 38 | * # for methods whose curated tags/display_name are blank, writing UNVALIDATED AI | |
| 39 | * # output into source (default: false) | |
| 40 | * overrideFile: .methodatlas-overrides.yaml # optional | |
| 41 | * fileSuffixes: | |
| 42 | * - Test.java | |
| 43 | * testMarkers: # annotation/attribute names; empty = provider defaults | |
| 44 | * - Test | |
| 45 | * - ParameterizedTest | |
| 46 | * properties: # plugin-specific key/multi-value pairs (optional) | |
| 47 | * functionNames: # example: for a Jest/Mocha/Vitest TypeScript plugin | |
| 48 | * - test | |
| 49 | * - it | |
| 50 | * ai: | |
| 51 | * enabled: true | |
| 52 | * provider: ollama # auto | ollama | openai | openrouter | anthropic | azure_openai | groq | xai | github_models | mistral | |
| 53 | * model: qwen2.5-coder:7b | |
| 54 | * baseUrl: http://localhost:11434 | |
| 55 | * apiKey: sk-... | |
| 56 | * apiKeyEnv: MY_KEY_ENV | |
| 57 | * taxonomyFile: /path/to/taxonomy.txt | |
| 58 | * taxonomyMode: default # default | optimized | |
| 59 | * maxClassChars: 100000 | |
| 60 | * timeoutSec: 30 | |
| 61 | * maxRetries: 3 | |
| 62 | * confidence: false | |
| 63 | * apiVersion: 2024-02-01 # Azure OpenAI REST API version (azure_openai only) | |
| 64 | * detectSecrets: false # enable credential detection (default: false) | |
| 65 | * secretsInclude: "**/*.java" # glob override for file mask (default: null = use fileSuffixes) | |
| 66 | * secretsRules: /path/to/rules.yaml # custom rule catalog (default: null = built-in) | |
| 67 | * secretsOut: methodatlas-credentials.csv # output path for secrets CSV (default: methodatlas-credentials.csv) | |
| 68 | * secretsSeparateLlm: false # force standalone triage LLM call (default: false) | |
| 69 | * secretsShowValues: false # print unmasked values (default: false) | |
| 70 | * secretsErrorThreshold: 0.8 # SARIF error score floor (default: 0.8) | |
| 71 | * secretsWarningThreshold: 0.4 # SARIF warning score floor (default: 0.4) | |
| 72 | * secretsMinScore: 0.0 # suppress findings below this score (default: 0.0 = keep all) | |
| 73 | * </pre> | |
| 74 | * | |
| 75 | * <p> | |
| 76 | * Unknown fields in the YAML file are silently ignored. | |
| 77 | * </p> | |
| 78 | * | |
| 79 | * @see CliArgs | |
| 80 | */ | |
| 81 | final class YamlConfig { | |
| 82 | ||
| 83 | /** | |
| 84 | * Prevents instantiation of this utility class. | |
| 85 | */ | |
| 86 | private YamlConfig() { | |
| 87 | } | |
| 88 | ||
| 89 | /** | |
| 90 | * Loads a YAML configuration file. | |
| 91 | * | |
| 92 | * @param configFile path to the YAML file | |
| 93 | * @return parsed configuration; never {@code null} | |
| 94 | * @throws IOException if the file cannot be read | |
| 95 | * @throws IllegalArgumentException if the file cannot be parsed as valid YAML | |
| 96 | */ | |
| 97 | /* default */ static YamlConfigFile load(Path configFile) throws IOException { | |
| 98 | ObjectMapper mapper = YAMLMapper.builder() | |
| 99 | .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) | |
| 100 | .build(); | |
| 101 | try { | |
| 102 |
1
1. load : replaced return value with null for org/egothor/methodatlas/YamlConfig::load → KILLED |
return mapper.readValue(configFile.toFile(), YamlConfigFile.class); |
| 103 | } catch (JacksonException e) { | |
| 104 | throw new IOException("Cannot read or parse configuration file '" + configFile + "'", e); | |
| 105 | } | |
| 106 | } | |
| 107 | ||
| 108 | // ------------------------------------------------------------------------- | |
| 109 | // POJO classes | |
| 110 | // ------------------------------------------------------------------------- | |
| 111 | ||
| 112 | /** | |
| 113 | * Top-level YAML configuration structure. | |
| 114 | */ | |
| 115 | @JsonIgnoreProperties(ignoreUnknown = true) | |
| 116 | /* default */ static final class YamlConfigFile { | |
| 117 | ||
| 118 | /** Output mode: {@code csv}, {@code plain}, or {@code sarif}. */ | |
| 119 | @JsonProperty("outputMode") | |
| 120 | /* default */ String outputMode; | |
| 121 | ||
| 122 | /** Whether to emit {@code # key: value} metadata comment lines. */ | |
| 123 | @JsonProperty("emitMetadata") | |
| 124 | /* default */ boolean emitMetadata; | |
| 125 | ||
| 126 | /** File name suffixes used to select test source files. */ | |
| 127 | @JsonProperty("fileSuffixes") | |
| 128 | /* default */ List<String> fileSuffixes; | |
| 129 | ||
| 130 | /** | |
| 131 | * Language-neutral test-marker identifiers (annotation/attribute simple | |
| 132 | * names for JVM and .NET providers; ignored by TypeScript providers). | |
| 133 | * Empty or absent means "use provider defaults". | |
| 134 | */ | |
| 135 | @JsonProperty("testMarkers") | |
| 136 | /* default */ List<String> testMarkers; | |
| 137 | ||
| 138 | /** | |
| 139 | * Plugin-specific key/multi-value pairs forwarded verbatim to each | |
| 140 | * {@link org.egothor.methodatlas.api.TestDiscovery} provider. | |
| 141 | * Providers ignore keys they do not recognise. | |
| 142 | */ | |
| 143 | @JsonProperty("properties") | |
| 144 | /* default */ Map<String, List<String>> properties; | |
| 145 | ||
| 146 | /** | |
| 147 | * Whether to include a SHA-256 content-hash fingerprint of each class | |
| 148 | * source as a {@code content_hash} column. | |
| 149 | */ | |
| 150 | @JsonProperty("contentHash") | |
| 151 | /* default */ boolean contentHash; | |
| 152 | ||
| 153 | /** | |
| 154 | * Path to a YAML classification override file. When set, human-authored | |
| 155 | * corrections are applied after AI classification on every run. | |
| 156 | */ | |
| 157 | @JsonProperty("overrideFile") | |
| 158 | /* default */ String overrideFile; | |
| 159 | ||
| 160 | /** | |
| 161 | * When {@code true}, only security-relevant methods are emitted; all | |
| 162 | * other methods are silently dropped from the output. | |
| 163 | */ | |
| 164 | @JsonProperty("securityOnly") | |
| 165 | /* default */ boolean securityOnly; | |
| 166 | ||
| 167 | /** | |
| 168 | * When {@code true}, non-security methods are included in SARIF output | |
| 169 | * even though SARIF mode applies the security-only filter by default. | |
| 170 | * Has no effect in CSV or plain-text modes. | |
| 171 | */ | |
| 172 | @JsonProperty("includeNonSecurity") | |
| 173 | /* default */ boolean includeNonSecurity; | |
| 174 | ||
| 175 | /** | |
| 176 | * When {@code true}, a {@code tag_ai_drift} column is added to CSV/plain | |
| 177 | * output comparing the source-level {@code @Tag("security")} annotation | |
| 178 | * against the AI security-relevance classification. | |
| 179 | */ | |
| 180 | @JsonProperty("driftDetect") | |
| 181 | /* default */ boolean driftDetect; | |
| 182 | ||
| 183 | /** | |
| 184 | * <strong>Risky, not recommended.</strong> When {@code true}, the | |
| 185 | * {@code -apply-tags-from-csv} engine falls back to the {@code ai_tags} | |
| 186 | * and {@code ai_display_name} columns for any method whose curated | |
| 187 | * {@code tags} / {@code display_name} column is blank, writing the raw, | |
| 188 | * unvalidated AI suggestion into source. This bypasses the human review | |
| 189 | * step the apply-from-csv workflow exists to enforce. Default: | |
| 190 | * {@code false}. | |
| 191 | */ | |
| 192 | @JsonProperty("promoteAi") | |
| 193 | /* default */ boolean promoteAi; | |
| 194 | ||
| 195 | /** | |
| 196 | * When {@code true}, the interaction score and confidence percentage are | |
| 197 | * omitted from SARIF result message text. Use this when the consuming | |
| 198 | * system already renders the {@code properties} bag and the extra text is | |
| 199 | * unwanted. Default: {@code false} (scores are embedded in messages). | |
| 200 | */ | |
| 201 | @JsonProperty("sarifOmitScores") | |
| 202 | /* default */ boolean sarifOmitScores; | |
| 203 | ||
| 204 | /** | |
| 205 | * Minimum AI confidence score (inclusive) required for a method to be | |
| 206 | * emitted. Methods whose {@code ai_confidence} is below this threshold | |
| 207 | * are silently dropped. Only meaningful when {@code ai.confidence: true} | |
| 208 | * is also set. Default: {@code 0.0} (no filtering). | |
| 209 | */ | |
| 210 | @JsonProperty("minConfidence") | |
| 211 | /* default */ Double minConfidence; | |
| 212 | ||
| 213 | /** | |
| 214 | * When {@code true}, enable credential detection in addition to the normal | |
| 215 | * test-method scan. Default: {@code false}. | |
| 216 | */ | |
| 217 | @JsonProperty("detectSecrets") | |
| 218 | /* default */ boolean detectSecrets; | |
| 219 | ||
| 220 | /** | |
| 221 | * Glob pattern overriding the default test-file mask when scanning for | |
| 222 | * secrets. {@code null} means use the default mask derived from | |
| 223 | * {@code fileSuffixes}. | |
| 224 | */ | |
| 225 | @JsonProperty("secretsInclude") | |
| 226 | /* default */ String secretsInclude; | |
| 227 | ||
| 228 | /** | |
| 229 | * Path to a custom rule catalog YAML file. {@code null} uses the built-in | |
| 230 | * catalog bundled with the detect-secrets module. | |
| 231 | */ | |
| 232 | @JsonProperty("secretsRules") | |
| 233 | /* default */ String secretsRules; | |
| 234 | ||
| 235 | /** | |
| 236 | * Output path for the secrets CSV. {@code null} causes the default | |
| 237 | * {@code methodatlas-credentials.csv} in the current working directory to be | |
| 238 | * used. | |
| 239 | */ | |
| 240 | @JsonProperty("secretsOut") | |
| 241 | /* default */ String secretsOut; | |
| 242 | ||
| 243 | /** | |
| 244 | * When {@code true}, force a standalone triage LLM call instead of | |
| 245 | * appending the secret-triage prompt to the normal test-classification | |
| 246 | * call. Default: {@code false}. | |
| 247 | */ | |
| 248 | @JsonProperty("secretsSeparateLlm") | |
| 249 | /* default */ boolean secretsSeparateLlm; | |
| 250 | ||
| 251 | /** | |
| 252 | * When {@code true}, print unmasked secret values in CSV and SARIF output. | |
| 253 | * Default: {@code false} (values are redacted). | |
| 254 | */ | |
| 255 | @JsonProperty("secretsShowValues") | |
| 256 | /* default */ boolean secretsShowValues; | |
| 257 | ||
| 258 | /** | |
| 259 | * SARIF error score floor. Findings at or above this value are emitted as | |
| 260 | * {@code error}-level SARIF results. Default: {@code 0.8}. | |
| 261 | */ | |
| 262 | @JsonProperty("secretsErrorThreshold") | |
| 263 | /* default */ Double secretsErrorThreshold; | |
| 264 | ||
| 265 | /** | |
| 266 | * SARIF warning score floor. Findings at or above this value (but below | |
| 267 | * {@code secretsErrorThreshold}) are emitted as {@code warning}-level SARIF | |
| 268 | * results. Default: {@code 0.4}. | |
| 269 | */ | |
| 270 | @JsonProperty("secretsWarningThreshold") | |
| 271 | /* default */ Double secretsWarningThreshold; | |
| 272 | ||
| 273 | /** | |
| 274 | * Suppress findings whose triage score is below this value. Default: | |
| 275 | * {@code 0.0} keeps all findings. | |
| 276 | */ | |
| 277 | @JsonProperty("secretsMinScore") | |
| 278 | /* default */ Double secretsMinScore; | |
| 279 | ||
| 280 | /** AI enrichment settings. */ | |
| 281 | @JsonProperty("ai") | |
| 282 | /* default */ YamlAiConfig ai; | |
| 283 | } | |
| 284 | ||
| 285 | /** | |
| 286 | * AI subsystem configuration within the YAML file. | |
| 287 | */ | |
| 288 | @JsonIgnoreProperties(ignoreUnknown = true) | |
| 289 | /* default */ static final class YamlAiConfig { | |
| 290 | ||
| 291 | /** Whether AI enrichment is enabled. */ | |
| 292 | @JsonProperty("enabled") | |
| 293 | /* default */ Boolean enabled; | |
| 294 | ||
| 295 | /** | |
| 296 | * AI provider: {@code auto}, {@code ollama}, {@code openai}, | |
| 297 | * {@code openrouter}, {@code anthropic}, {@code azure_openai}, | |
| 298 | * {@code groq}, {@code xai}, {@code github_models}, or {@code mistral}. | |
| 299 | */ | |
| 300 | @JsonProperty("provider") | |
| 301 | /* default */ String provider; | |
| 302 | ||
| 303 | /** Provider-specific model name. */ | |
| 304 | @JsonProperty("model") | |
| 305 | /* default */ String model; | |
| 306 | ||
| 307 | /** Provider base URL override. */ | |
| 308 | @JsonProperty("baseUrl") | |
| 309 | /* default */ String baseUrl; | |
| 310 | ||
| 311 | /** API key supplied directly. */ | |
| 312 | @JsonProperty("apiKey") | |
| 313 | /* default */ String apiKey; | |
| 314 | ||
| 315 | /** Name of the environment variable that holds the API key. */ | |
| 316 | @JsonProperty("apiKeyEnv") | |
| 317 | /* default */ String apiKeyEnv; | |
| 318 | ||
| 319 | /** Path to an external taxonomy file. */ | |
| 320 | @JsonProperty("taxonomyFile") | |
| 321 | /* default */ String taxonomyFile; | |
| 322 | ||
| 323 | /** Built-in taxonomy variant: {@code default} or {@code optimized}. */ | |
| 324 | @JsonProperty("taxonomyMode") | |
| 325 | /* default */ String taxonomyMode; | |
| 326 | ||
| 327 | /** Maximum number of characters of class source sent to the AI. */ | |
| 328 | @JsonProperty("maxClassChars") | |
| 329 | /* default */ Integer maxClassChars; | |
| 330 | ||
| 331 | /** AI request timeout in seconds. */ | |
| 332 | @JsonProperty("timeoutSec") | |
| 333 | /* default */ Long timeoutSec; | |
| 334 | ||
| 335 | /** Maximum number of retries for AI requests. */ | |
| 336 | @JsonProperty("maxRetries") | |
| 337 | /* default */ Integer maxRetries; | |
| 338 | ||
| 339 | /** Whether to request a confidence score for each classification. */ | |
| 340 | @JsonProperty("confidence") | |
| 341 | /* default */ Boolean confidence; | |
| 342 | ||
| 343 | /** | |
| 344 | * Azure OpenAI REST API version appended as the {@code api-version} query | |
| 345 | * parameter; only used when {@code provider: azure_openai} is set. | |
| 346 | */ | |
| 347 | @JsonProperty("apiVersion") | |
| 348 | /* default */ String apiVersion; | |
| 349 | ||
| 350 | /** Path to a custom method-classification prompt template (default: built-in). */ | |
| 351 | @JsonProperty("classificationPrompt") | |
| 352 | /* default */ String classificationPrompt; | |
| 353 | ||
| 354 | /** Path to a custom folded credential-triage appendix template (default: built-in). */ | |
| 355 | @JsonProperty("triagePrompt") | |
| 356 | /* default */ String triagePrompt; | |
| 357 | ||
| 358 | /** Path to a custom standalone credential-triage template (default: built-in). */ | |
| 359 | @JsonProperty("dedicatedTriagePrompt") | |
| 360 | /* default */ String dedicatedTriagePrompt; | |
| 361 | } | |
| 362 | } | |
Mutations | ||
| 102 |
1.1 |