COBOLTestVisitor.java
package org.egothor.methodatlas.discovery.cobol.internal;
import java.util.ArrayList;
import java.util.List;
import org.antlr.v4.runtime.BufferedTokenStream;
import org.antlr.v4.runtime.Token;
import org.egothor.methodatlas.discovery.cobol.parser.COBOLTestLexer;
/**
* Token-stream scanner that walks a lexed COBOL file and collects MFUnit
* paragraphs and COBOL-Check {@code TestCase} declarations.
*
* <p>The scanner deliberately bypasses the structural ANTLR parse tree:
* COBOL allows an enormous number of statement-level constructs that are
* hard to model in a focused grammar (CALL, MOVE, IF/EVALUATE, etc.).
* Token-level scanning is robust to all of them — it only reacts to the
* tokens that mark a test declaration and ignores everything else.</p>
*
* <h2>Detection logic</h2>
* <ul>
* <li><strong>MFUnit</strong> — when an {@link COBOLTestLexer#MFU_TC_ID}
* token is seen, it is recorded as the start of a test paragraph.
* The paragraph end-line is the position of the next MFU paragraph
* header (or EOF), minus one if available.</li>
* <li><strong>COBOL-Check</strong> — when a {@link COBOLTestLexer#TESTCASE}
* token is followed by a {@code QUOTED_STRING} (or, defensively, an
* {@code IDENTIFIER}), the string value (quotes stripped) is recorded
* as the test name. The end-line is the line of the next TestCase /
* TestSuite header or EOF, minus one if available.</li>
* <li>{@code TestSuite} directives are recognised for completeness but
* are not emitted as test methods.</li>
* </ul>
*
* <p>Instances are single-use: construct one per file, call
* {@link #scan(BufferedTokenStream)}, then read results via
* {@link #getDiscoveredMethods()}.</p>
*/
public final class COBOLTestVisitor {
private final List<MethodInfo> discoveredMethods = new ArrayList<>();
/**
* Scans the supplied lexer token stream and populates the discovered
* methods list.
*
* @param tokens fully-lexed COBOL token stream; never {@code null}
*/
public void scan(BufferedTokenStream tokens) {
tokens.fill();
int n = tokens.size();
// First pass: collect the start line of every "section boundary"
// token (MFU_TC_ID, TESTCASE, TESTSUITE, EOF). This lets us compute
// accurate end-lines without a second linear pass per record.
List<Integer> boundaryLines = new ArrayList<>();
for (int i = 0; i < n; i++) {
Token t = tokens.get(i);
int tt = t.getType();
if (tt == COBOLTestLexer.MFU_TC_ID
|| tt == COBOLTestLexer.TESTCASE
|| tt == COBOLTestLexer.TESTSUITE
|| tt == Token.EOF) {
boundaryLines.add(t.getLine());
}
}
// Second pass: emit MethodInfo records.
for (int i = 0; i < n; i++) {
Token t = tokens.get(i);
int tt = t.getType();
if (tt == COBOLTestLexer.MFU_TC_ID) {
String name = t.getText().toUpperCase(java.util.Locale.ROOT);
int endLine = computeEndLine(boundaryLines, t.getLine());
discoveredMethods.add(new MethodInfo(name, t.getLine(), endLine));
} else if (tt == COBOLTestLexer.TESTCASE && i + 1 < n) {
Token next = tokens.get(i + 1);
String name = unquote(next.getText());
if (!name.isEmpty()) {
int endLine = computeEndLine(boundaryLines, t.getLine());
discoveredMethods.add(new MethodInfo(name, t.getLine(), endLine));
}
}
// TESTSUITE is recognised as a boundary but is never emitted.
}
}
/**
* All test methods found in the file after scanning.
*
* @return unmodifiable list of discovered test methods
*/
public List<MethodInfo> getDiscoveredMethods() {
return List.copyOf(discoveredMethods);
}
// ── Private helpers ───────────────────────────────────────────────
/**
* Finds the end line for a test record that starts at {@code startLine}.
*
* <p>The end line is the line just before the next section-boundary
* token (MFUnit paragraph header, TestCase, TestSuite, or EOF). If no
* subsequent boundary exists, {@code startLine} is returned.</p>
*
* @param boundaryLines pre-computed sorted list of section-boundary line
* numbers
* @param startLine line of the current test's start token
* @return end line for the current test (always {@code >= startLine})
*/
private static int computeEndLine(List<Integer> boundaryLines, int startLine) {
for (int line : boundaryLines) {
if (line > startLine) {
return Math.max(startLine, line - 1);
}
}
return startLine;
}
/**
* Strips matching single- or double-quote pairs from a raw lexer
* token text and collapses doubled quote escapes.
*
* @param raw token text; may be {@code null}
* @return unquoted string value; empty when {@code raw} is {@code null}
* or has no recognisable quoting
*/
/* default */ static String unquote(String raw) {
if (raw == null || raw.length() < 2) {
return raw == null ? "" : raw;
}
char first = raw.charAt(0);
char last = raw.charAt(raw.length() - 1);
if (first == '\'' && last == '\'') {
return raw.substring(1, raw.length() - 1).replace("''", "'");
}
if (first == '"' && last == '"') {
return raw.substring(1, raw.length() - 1).replace("\"\"", "\"");
}
return raw;
}
}