cqlsh_rs/
parser.rs

1//! Statement parser for cqlsh-rs.
2//!
3//! Handles multi-line input buffering, semicolon-terminated statement detection,
4//! comment stripping, string literal handling, and routing between CQL statements
5//! and built-in shell commands.
6//!
7//! Key design decisions (from SP4 and SP16 upstream fixes):
8//! - Context-aware tokenization: NO regex preprocessing for comments (PR #150)
9//! - Truly incremental parsing: O(n) total work via scan_offset tracking (PR #151)
10
11/// Lexer context states for tracking position within CQL input.
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
13enum LexState {
14    /// Normal CQL code (not in string or comment).
15    #[default]
16    Normal,
17    /// Inside a single-quoted string literal (`'...'`).
18    SingleQuote,
19    /// Inside a double-quoted identifier (`"..."`).
20    DoubleQuote,
21    /// Inside a dollar-quoted string literal (`$$...$$`).
22    DollarQuote,
23    /// Inside a block comment (`/* ... */`).
24    BlockComment,
25    /// Inside a line comment (`-- ...`), extends to end of line.
26    LineComment,
27}
28
29/// Incremental statement parser.
30///
31/// Tracks lexer state across `feed_line` calls so that each call only scans
32/// the newly appended bytes. Total work is O(n) over the lifetime of the parser,
33/// not O(n²). See PR #151 for why this matters.
34#[derive(Debug, Default)]
35pub struct StatementParser {
36    /// Accumulated input buffer.
37    buffer: String,
38    /// Byte offset in `buffer` where the next scan should resume.
39    scan_offset: usize,
40    /// Byte offset of the start of the current (in-progress) statement.
41    stmt_start: usize,
42    /// Current lexer state at `scan_offset`.
43    state: LexState,
44    /// Depth of nested block comments.
45    block_comment_depth: usize,
46    /// True when we are inside a `BEGIN BATCH … APPLY BATCH` block.
47    /// Semicolons inside a batch do not terminate the batch statement.
48    in_batch: bool,
49}
50
51/// The result of feeding a line to the parser.
52#[derive(Debug, PartialEq, Eq)]
53#[must_use]
54pub enum ParseResult {
55    /// No complete statement yet; continue accumulating.
56    Incomplete,
57    /// One or more complete statements extracted.
58    Complete(Vec<String>),
59}
60
61/// Classification of a parsed input line.
62#[derive(Debug, PartialEq, Eq)]
63#[must_use]
64pub enum InputKind {
65    /// A built-in shell command (HELP, QUIT, DESCRIBE, etc.).
66    ShellCommand(String),
67    /// A CQL statement to forward to the driver.
68    CqlStatement(String),
69    /// Empty or whitespace-only input.
70    Empty,
71}
72
73/// Built-in shell commands that don't require a semicolon terminator.
74const SHELL_COMMANDS: &[&str] = &[
75    "HELP",
76    "?",
77    "QUIT",
78    "EXIT",
79    "DESCRIBE",
80    "DESC",
81    "CONSISTENCY",
82    "SERIAL",
83    "TRACING",
84    "EXPAND",
85    "PAGING",
86    "LOGIN",
87    "SOURCE",
88    "CAPTURE",
89    "SHOW",
90    "CLEAR",
91    "CLS",
92    "UNICODE",
93    "DEBUG",
94    "COPY",
95    "USE",
96];
97
98impl StatementParser {
99    /// Create a new empty parser.
100    #[must_use]
101    pub fn new() -> Self {
102        Self::default()
103    }
104
105    /// Reset the parser, discarding any accumulated input.
106    pub fn reset(&mut self) {
107        self.buffer.clear();
108        self.scan_offset = 0;
109        self.stmt_start = 0;
110        self.state = LexState::Normal;
111        self.block_comment_depth = 0;
112        self.in_batch = false;
113    }
114
115    /// Returns true if the parser has no accumulated input.
116    #[must_use]
117    pub fn is_empty(&self) -> bool {
118        self.buffer.is_empty()
119    }
120
121    /// Returns the remaining unparsed content in the buffer.
122    #[must_use]
123    pub fn remaining(&self) -> &str {
124        &self.buffer[self.stmt_start..]
125    }
126
127    /// Feed a line of input and return any complete statements.
128    ///
129    /// This is the incremental entry point. Each call scans only the newly
130    /// appended bytes, preserving lexer state from the previous call.
131    /// Total work across all `feed_line` calls is O(n).
132    pub fn feed_line(&mut self, line: &str) -> ParseResult {
133        if !self.buffer.is_empty() {
134            self.buffer.push('\n');
135        }
136        self.buffer.push_str(line);
137
138        self.scan_for_statements()
139    }
140
141    /// Scan from `scan_offset` forward for statement terminators.
142    ///
143    /// Only scans newly appended bytes — does NOT re-scan from the start.
144    /// State (`self.state`, `self.block_comment_depth`) is preserved across calls.
145    fn scan_for_statements(&mut self) -> ParseResult {
146        let mut statements = Vec::new();
147
148        // We work on byte offsets using char_indices over the unscanned portion.
149        // But we need to handle multi-byte chars correctly, so iterate chars.
150        let buf = self.buffer.as_bytes();
151        let len = buf.len();
152        let mut i = self.scan_offset;
153
154        while i < len {
155            let (ch, char_len) = decode_char_at(&self.buffer, i);
156
157            match self.state {
158                LexState::Normal => {
159                    if ch == '\'' {
160                        self.state = LexState::SingleQuote;
161                        i += char_len;
162                    } else if ch == '"' {
163                        self.state = LexState::DoubleQuote;
164                        i += char_len;
165                    } else if ch == '$' && i + 1 < len && self.buffer.as_bytes()[i + 1] == b'$' {
166                        self.state = LexState::DollarQuote;
167                        i += 2;
168                    } else if ch == '-' && i + 1 < len && self.buffer.as_bytes()[i + 1] == b'-' {
169                        self.state = LexState::LineComment;
170                        i += 2;
171                    } else if ch == '/' && i + 1 < len && self.buffer.as_bytes()[i + 1] == b'*' {
172                        self.state = LexState::BlockComment;
173                        self.block_comment_depth = 1;
174                        i += 2;
175                    } else if ch == ';' {
176                        // Statement terminator found in Normal state.
177                        let raw = &self.buffer[self.stmt_start..i];
178                        let stripped = strip_comments(raw);
179                        let trimmed = stripped.trim();
180
181                        if self.in_batch {
182                            // Inside BEGIN BATCH … APPLY BATCH: semicolons
183                            // between DML statements are part of the batch
184                            // syntax, not statement terminators.  Only emit
185                            // when APPLY BATCH has been reached.
186                            if ends_with_apply_batch(trimmed) {
187                                self.in_batch = false;
188                                if !trimmed.is_empty() {
189                                    statements.push(trimmed.to_string());
190                                }
191                                self.stmt_start = i + 1;
192                            }
193                            // Otherwise keep accumulating; do NOT advance stmt_start.
194                            i += 1;
195                        } else if starts_with_begin_batch(trimmed) {
196                            if ends_with_apply_batch(trimmed) {
197                                // Single-semicolon batch: all inner DML
198                                // statements lacked semicolons, so the only
199                                // ';' is at APPLY BATCH.  Emit immediately.
200                                if !trimmed.is_empty() {
201                                    statements.push(trimmed.to_string());
202                                }
203                                self.stmt_start = i + 1;
204                            } else {
205                                // Opening of a BATCH block: treat the ';' as
206                                // internal to the batch, not as a terminator.
207                                self.in_batch = true;
208                                // Do NOT advance stmt_start — keep accumulating
209                                // from the start of BEGIN BATCH.
210                            }
211                            i += 1;
212                        } else {
213                            if !trimmed.is_empty() {
214                                statements.push(trimmed.to_string());
215                            }
216                            self.stmt_start = i + 1; // skip the ';'
217                            i += 1;
218                        }
219                    } else {
220                        i += char_len;
221                    }
222                }
223                LexState::SingleQuote => {
224                    if ch == '\'' {
225                        // Check for escaped quote ('')
226                        if i + 1 < len && self.buffer.as_bytes()[i + 1] == b'\'' {
227                            i += 2; // skip escaped quote
228                        } else {
229                            self.state = LexState::Normal;
230                            i += 1;
231                        }
232                    } else {
233                        i += char_len;
234                    }
235                }
236                LexState::DoubleQuote => {
237                    if ch == '"' {
238                        // Check for escaped quote ("")
239                        if i + 1 < len && self.buffer.as_bytes()[i + 1] == b'"' {
240                            i += 2;
241                        } else {
242                            self.state = LexState::Normal;
243                            i += 1;
244                        }
245                    } else {
246                        i += char_len;
247                    }
248                }
249                LexState::DollarQuote => {
250                    if ch == '$' && i + 1 < len && self.buffer.as_bytes()[i + 1] == b'$' {
251                        self.state = LexState::Normal;
252                        i += 2;
253                    } else {
254                        i += char_len;
255                    }
256                }
257                LexState::LineComment => {
258                    if ch == '\n' {
259                        self.state = LexState::Normal;
260                    }
261                    i += char_len;
262                }
263                LexState::BlockComment => {
264                    if ch == '*' && i + 1 < len && self.buffer.as_bytes()[i + 1] == b'/' {
265                        self.block_comment_depth -= 1;
266                        if self.block_comment_depth == 0 {
267                            self.state = LexState::Normal;
268                        }
269                        i += 2;
270                    } else if ch == '/' && i + 1 < len && self.buffer.as_bytes()[i + 1] == b'*' {
271                        self.block_comment_depth += 1;
272                        i += 2;
273                    } else {
274                        i += char_len;
275                    }
276                }
277            }
278        }
279
280        self.scan_offset = i;
281
282        // Always compact the buffer when stmt_start has advanced past consumed
283        // content (e.g., empty statements like `;;` that were skipped).
284        if self.stmt_start > 0 {
285            self.buffer = self.buffer[self.stmt_start..].to_string();
286            self.scan_offset -= self.stmt_start;
287            self.stmt_start = 0;
288        }
289
290        // If the remaining buffer is only whitespace and/or comments (no
291        // meaningful CQL tokens), clear it so the REPL returns to the primary
292        // prompt. This handles trailing line comments after semicolons
293        // (e.g., `SELECT 1; -- comment`) and bare `;;`.
294        if !self.buffer.is_empty() {
295            let stripped = strip_comments(&self.buffer);
296            if stripped.trim().is_empty() {
297                self.buffer.clear();
298                self.scan_offset = 0;
299                self.state = LexState::Normal;
300                self.block_comment_depth = 0;
301            }
302        }
303
304        if statements.is_empty() {
305            ParseResult::Incomplete
306        } else {
307            ParseResult::Complete(statements)
308        }
309    }
310}
311
312/// Decode the char at byte offset `i` in `s`, returning the char and its UTF-8 byte length.
313fn decode_char_at(s: &str, i: usize) -> (char, usize) {
314    // Safety: `i` must be at a char boundary, which our state machine guarantees
315    // because we always advance by `char_len`.
316    let ch = s[i..].chars().next().unwrap_or('\0');
317    (ch, ch.len_utf8())
318}
319
320/// Return true if `text` is the opening of a CQL BATCH block.
321///
322/// Matches: `BEGIN BATCH`, `BEGIN UNLOGGED BATCH`, `BEGIN COUNTER BATCH`
323/// (case-insensitive, any amount of internal whitespace).
324fn starts_with_begin_batch(text: &str) -> bool {
325    let words: Vec<&str> = text.split_whitespace().collect();
326    match words.as_slice() {
327        [b, batch, ..]
328            if b.eq_ignore_ascii_case("BEGIN") && batch.eq_ignore_ascii_case("BATCH") =>
329        {
330            true
331        }
332        [b, modifier, batch, ..]
333            if b.eq_ignore_ascii_case("BEGIN")
334                && (modifier.eq_ignore_ascii_case("UNLOGGED")
335                    || modifier.eq_ignore_ascii_case("COUNTER"))
336                && batch.eq_ignore_ascii_case("BATCH") =>
337        {
338            true
339        }
340        _ => false,
341    }
342}
343
344/// Return true if `text` ends with the `APPLY BATCH` token pair.
345fn ends_with_apply_batch(text: &str) -> bool {
346    let words: Vec<&str> = text.split_whitespace().collect();
347    matches!(
348        words.as_slice(),
349        [.., apply, batch]
350            if apply.eq_ignore_ascii_case("APPLY") && batch.eq_ignore_ascii_case("BATCH")
351    )
352}
353
354/// Strip comments from a CQL fragment (used on extracted statements).
355///
356/// Delegates to the unified CQL lexer for context-aware comment stripping.
357/// Handles nested block comments and preserves content inside string literals.
358fn strip_comments(input: &str) -> String {
359    crate::cql_lexer::strip_comments(input)
360}
361
362/// Classify a complete input as a shell command, CQL statement, or empty.
363pub fn classify_input(input: &str) -> InputKind {
364    let trimmed = input.trim();
365    if trimmed.is_empty() {
366        return InputKind::Empty;
367    }
368
369    if is_shell_command(trimmed) {
370        InputKind::ShellCommand(trimmed.to_string())
371    } else {
372        InputKind::CqlStatement(trimmed.to_string())
373    }
374}
375
376/// Check if the first line of input looks like a shell command.
377///
378/// Used by the REPL to decide whether to wait for a semicolon
379/// or dispatch immediately.
380#[must_use]
381pub fn is_shell_command(line: &str) -> bool {
382    let trimmed = line.trim();
383    // Strip trailing semicolon for command detection
384    let without_semi = trimmed.strip_suffix(';').unwrap_or(trimmed).trim();
385    let first_word = without_semi
386        .split_whitespace()
387        .next()
388        .unwrap_or("")
389        .to_uppercase();
390
391    SHELL_COMMANDS.contains(&first_word.as_str())
392}
393
394/// Parse a complete input string (e.g., from `-e` or `-f` batch mode)
395/// into individual statements.
396///
397/// Returns a vector of complete, comment-stripped statements.
398/// This is O(n) in the input size (not O(n²) per PR #151).
399#[must_use]
400pub fn parse_batch(input: &str) -> Vec<String> {
401    let mut parser = StatementParser::new();
402    let mut all_statements = Vec::new();
403
404    for line in input.lines() {
405        if let ParseResult::Complete(stmts) = parser.feed_line(line) {
406            all_statements.extend(stmts);
407        }
408    }
409
410    // Handle any remaining content without a trailing semicolon.
411    // Shell commands don't need semicolons; CQL statements do.
412    let remaining = parser.remaining().trim();
413    if !remaining.is_empty() {
414        let stripped = strip_comments(remaining);
415        let trimmed = stripped.trim();
416        if !trimmed.is_empty() && is_shell_command(trimmed) {
417            all_statements.push(trimmed.to_string());
418        }
419        // Non-shell-command without semicolon is incomplete — drop it
420        // (matches Python cqlsh batch mode behavior)
421    }
422
423    all_statements
424}
425
426#[cfg(test)]
427mod tests {
428    use super::*;
429
430    // --- Basic semicolon detection ---
431
432    #[test]
433    fn simple_statement() {
434        let mut p = StatementParser::new();
435        let result = p.feed_line("SELECT * FROM users;");
436        assert_eq!(
437            result,
438            ParseResult::Complete(vec!["SELECT * FROM users".to_string()])
439        );
440    }
441
442    #[test]
443    fn statement_with_trailing_whitespace() {
444        let mut p = StatementParser::new();
445        let result = p.feed_line("SELECT * FROM users;  ");
446        assert_eq!(
447            result,
448            ParseResult::Complete(vec!["SELECT * FROM users".to_string()])
449        );
450    }
451
452    #[test]
453    fn incomplete_no_semicolon() {
454        let mut p = StatementParser::new();
455        assert_eq!(p.feed_line("SELECT * FROM users"), ParseResult::Incomplete);
456    }
457
458    #[test]
459    fn empty_input() {
460        let mut p = StatementParser::new();
461        assert_eq!(p.feed_line(""), ParseResult::Incomplete);
462        assert_eq!(p.feed_line("   "), ParseResult::Incomplete);
463    }
464
465    // --- Single-quoted string handling ---
466
467    #[test]
468    fn semicolon_in_single_quoted_string() {
469        let mut p = StatementParser::new();
470        let result = p.feed_line("INSERT INTO t (v) VALUES ('hello;world');");
471        assert_eq!(
472            result,
473            ParseResult::Complete(vec!["INSERT INTO t (v) VALUES ('hello;world')".to_string()])
474        );
475    }
476
477    #[test]
478    fn escaped_quote_in_string() {
479        let mut p = StatementParser::new();
480        let result = p.feed_line("INSERT INTO t (v) VALUES ('it''s;here');");
481        assert_eq!(
482            result,
483            ParseResult::Complete(vec!["INSERT INTO t (v) VALUES ('it''s;here')".to_string()])
484        );
485    }
486
487    // --- Double-quoted identifier handling ---
488
489    #[test]
490    fn semicolon_in_double_quoted_identifier() {
491        let mut p = StatementParser::new();
492        let result = p.feed_line("SELECT \"col;name\" FROM t;");
493        assert_eq!(
494            result,
495            ParseResult::Complete(vec!["SELECT \"col;name\" FROM t".to_string()])
496        );
497    }
498
499    #[test]
500    fn escaped_double_quote() {
501        let mut p = StatementParser::new();
502        let result = p.feed_line("SELECT \"col\"\"name\" FROM t;");
503        assert_eq!(
504            result,
505            ParseResult::Complete(vec!["SELECT \"col\"\"name\" FROM t".to_string()])
506        );
507    }
508
509    // --- Dollar-quoted string handling ---
510
511    #[test]
512    fn semicolon_in_dollar_quoted_string() {
513        let mut p = StatementParser::new();
514        let result = p.feed_line("CREATE FUNCTION f() RETURNS NULL ON NULL INPUT RETURNS text LANGUAGE java AS $$return a;$$;");
515        assert_eq!(result, ParseResult::Complete(vec![
516            "CREATE FUNCTION f() RETURNS NULL ON NULL INPUT RETURNS text LANGUAGE java AS $$return a;$$".to_string()
517        ]));
518    }
519
520    #[test]
521    fn dollar_quote_multiline() {
522        let mut p = StatementParser::new();
523        assert_eq!(
524            p.feed_line("CREATE FUNCTION f() RETURNS text LANGUAGE java AS $$"),
525            ParseResult::Incomplete
526        );
527        assert_eq!(p.feed_line("  return a;"), ParseResult::Incomplete);
528        let result = p.feed_line("$$;");
529        assert!(matches!(result, ParseResult::Complete(_)));
530    }
531
532    #[test]
533    fn empty_dollar_quote() {
534        let mut p = StatementParser::new();
535        let result = p.feed_line("SELECT $$$$;");
536        assert_eq!(
537            result,
538            ParseResult::Complete(vec!["SELECT $$$$".to_string()])
539        );
540    }
541
542    // --- Line comment stripping ---
543
544    #[test]
545    fn line_comment_stripped() {
546        let mut p = StatementParser::new();
547        let result = p.feed_line("SELECT * FROM t; -- this is a comment");
548        assert_eq!(
549            result,
550            ParseResult::Complete(vec!["SELECT * FROM t".to_string()])
551        );
552    }
553
554    #[test]
555    fn line_comment_does_not_terminate() {
556        let mut p = StatementParser::new();
557        // Semicolon inside line comment should not terminate
558        assert_eq!(
559            p.feed_line("SELECT * FROM t -- comment with ;"),
560            ParseResult::Incomplete
561        );
562    }
563
564    #[test]
565    fn line_comment_then_statement_across_lines() {
566        let mut p = StatementParser::new();
567        assert_eq!(p.feed_line("-- header comment"), ParseResult::Incomplete);
568        let result = p.feed_line("SELECT 1;");
569        assert_eq!(result, ParseResult::Complete(vec!["SELECT 1".to_string()]));
570    }
571
572    // --- Block comment stripping (PR #150) ---
573
574    #[test]
575    fn block_comment_stripped() {
576        let mut p = StatementParser::new();
577        let result = p.feed_line("SELECT /* comment */ * FROM t;");
578        assert_eq!(
579            result,
580            ParseResult::Complete(vec!["SELECT   * FROM t".to_string()])
581        );
582    }
583
584    #[test]
585    fn block_comment_with_semicolon() {
586        let mut p = StatementParser::new();
587        // Semicolon inside block comment should not terminate
588        let result = p.feed_line("SELECT /* ; */ * FROM t;");
589        assert_eq!(
590            result,
591            ParseResult::Complete(vec!["SELECT   * FROM t".to_string()])
592        );
593    }
594
595    #[test]
596    fn block_comment_chars_in_single_quoted_string() {
597        // PR #150: /* inside strings must NOT be treated as comment start
598        let mut p = StatementParser::new();
599        let result = p.feed_line("INSERT INTO t (v) VALUES ('/* not a comment */');");
600        assert_eq!(
601            result,
602            ParseResult::Complete(vec![
603                "INSERT INTO t (v) VALUES ('/* not a comment */')".to_string()
604            ])
605        );
606    }
607
608    #[test]
609    fn block_comment_chars_in_double_quoted_string() {
610        let mut p = StatementParser::new();
611        let result = p.feed_line("SELECT \"/* not a comment */\" FROM t;");
612        assert_eq!(
613            result,
614            ParseResult::Complete(vec!["SELECT \"/* not a comment */\" FROM t".to_string()])
615        );
616    }
617
618    #[test]
619    fn block_comment_chars_in_dollar_quoted_string() {
620        let mut p = StatementParser::new();
621        let result = p.feed_line("SELECT $$/* not a comment */$$;");
622        assert_eq!(
623            result,
624            ParseResult::Complete(vec!["SELECT $$/* not a comment */$$".to_string()])
625        );
626    }
627
628    #[test]
629    fn block_comment_across_feed_lines() {
630        let mut p = StatementParser::new();
631        assert_eq!(p.feed_line("SELECT /* start"), ParseResult::Incomplete);
632        assert_eq!(p.feed_line("still comment"), ParseResult::Incomplete);
633        let result = p.feed_line("end */ 1;");
634        assert_eq!(
635            result,
636            ParseResult::Complete(vec!["SELECT   1".to_string()])
637        );
638    }
639
640    #[test]
641    fn nested_block_comments() {
642        let mut p = StatementParser::new();
643        let result = p.feed_line("SELECT /* outer /* inner */ still comment */ 1;");
644        assert_eq!(
645            result,
646            ParseResult::Complete(vec!["SELECT   1".to_string()])
647        );
648    }
649
650    #[test]
651    fn nested_block_comments_stripped() {
652        let input = "SELECT /* outer /* inner */ still */ 1";
653        let result = strip_comments(input);
654        assert_eq!(result, "SELECT   1");
655    }
656
657    // --- Multi-line statement buffering ---
658
659    #[test]
660    fn multiline_statement() {
661        let mut p = StatementParser::new();
662        assert_eq!(p.feed_line("SELECT *"), ParseResult::Incomplete);
663        assert_eq!(p.feed_line("FROM users"), ParseResult::Incomplete);
664        let result = p.feed_line("WHERE id = 1;");
665        assert_eq!(
666            result,
667            ParseResult::Complete(vec!["SELECT *\nFROM users\nWHERE id = 1".to_string()])
668        );
669    }
670
671    #[test]
672    fn multiline_with_string_across_lines() {
673        let mut p = StatementParser::new();
674        assert_eq!(
675            p.feed_line("INSERT INTO t (v) VALUES ('hello"),
676            ParseResult::Incomplete
677        );
678        let result = p.feed_line("world');");
679        assert_eq!(
680            result,
681            ParseResult::Complete(vec!["INSERT INTO t (v) VALUES ('hello\nworld')".to_string()])
682        );
683    }
684
685    // --- Empty statement handling ---
686
687    #[test]
688    fn empty_statement_skipped() {
689        let mut p = StatementParser::new();
690        let result = p.feed_line(";;");
691        // Both semicolons produce empty statements which are skipped
692        assert_eq!(result, ParseResult::Incomplete);
693    }
694
695    #[test]
696    fn empty_between_statements() {
697        let mut p = StatementParser::new();
698        let result = p.feed_line("SELECT 1; ; SELECT 2;");
699        assert_eq!(
700            result,
701            ParseResult::Complete(vec!["SELECT 1".to_string(), "SELECT 2".to_string(),])
702        );
703    }
704
705    // --- Built-in command detection ---
706
707    #[test]
708    fn shell_commands_detected() {
709        assert!(is_shell_command("HELP"));
710        assert!(is_shell_command("?"));
711        assert!(is_shell_command("QUIT"));
712        assert!(is_shell_command("EXIT"));
713        assert!(is_shell_command("DESCRIBE KEYSPACES"));
714        assert!(is_shell_command("DESC TABLE users"));
715        assert!(is_shell_command("CONSISTENCY ONE"));
716        assert!(is_shell_command("TRACING ON"));
717        assert!(is_shell_command("EXPAND ON"));
718        assert!(is_shell_command("PAGING 100"));
719        assert!(is_shell_command("SHOW VERSION"));
720        assert!(is_shell_command("CLEAR"));
721        assert!(is_shell_command("CLS"));
722        assert!(is_shell_command("COPY users TO '/tmp/data.csv'"));
723        assert!(is_shell_command("USE my_keyspace"));
724    }
725
726    #[test]
727    fn shell_command_case_insensitive() {
728        assert!(is_shell_command("help"));
729        assert!(is_shell_command("quit"));
730        assert!(is_shell_command("Help"));
731        assert!(is_shell_command("describe keyspaces"));
732        assert!(is_shell_command("use my_ks"));
733    }
734
735    #[test]
736    fn shell_command_with_semicolon() {
737        assert!(is_shell_command("USE my_ks;"));
738        assert!(is_shell_command("HELP;"));
739    }
740
741    #[test]
742    fn cql_not_shell_command() {
743        assert!(!is_shell_command("SELECT * FROM users"));
744        assert!(!is_shell_command("INSERT INTO t (id) VALUES (1)"));
745        assert!(!is_shell_command("CREATE TABLE test (id int PRIMARY KEY)"));
746    }
747
748    // --- Command classification ---
749
750    #[test]
751    fn classify_shell_command() {
752        assert_eq!(
753            classify_input("HELP"),
754            InputKind::ShellCommand("HELP".to_string())
755        );
756        assert_eq!(
757            classify_input("USE my_ks"),
758            InputKind::ShellCommand("USE my_ks".to_string())
759        );
760    }
761
762    #[test]
763    fn classify_shell_command_with_semicolon() {
764        assert_eq!(
765            classify_input("USE my_ks;"),
766            InputKind::ShellCommand("USE my_ks;".to_string())
767        );
768    }
769
770    #[test]
771    fn classify_cql_statement() {
772        assert_eq!(
773            classify_input("SELECT * FROM users"),
774            InputKind::CqlStatement("SELECT * FROM users".to_string())
775        );
776    }
777
778    #[test]
779    fn classify_empty() {
780        assert_eq!(classify_input(""), InputKind::Empty);
781        assert_eq!(classify_input("   "), InputKind::Empty);
782    }
783
784    // --- Multiple statements on one line ---
785
786    #[test]
787    fn multiple_statements_one_line() {
788        let mut p = StatementParser::new();
789        let result = p.feed_line("SELECT 1; SELECT 2; SELECT 3;");
790        assert_eq!(
791            result,
792            ParseResult::Complete(vec![
793                "SELECT 1".to_string(),
794                "SELECT 2".to_string(),
795                "SELECT 3".to_string(),
796            ])
797        );
798    }
799
800    // --- Whitespace normalization ---
801
802    #[test]
803    fn leading_trailing_whitespace_trimmed() {
804        let mut p = StatementParser::new();
805        let result = p.feed_line("  SELECT * FROM t  ;  ");
806        assert_eq!(
807            result,
808            ParseResult::Complete(vec!["SELECT * FROM t".to_string()])
809        );
810    }
811
812    // --- Batch mode parsing ---
813
814    #[test]
815    fn parse_batch_basic() {
816        let input = "SELECT 1;\nSELECT 2;\n";
817        let stmts = parse_batch(input);
818        assert_eq!(stmts, vec!["SELECT 1", "SELECT 2"]);
819    }
820
821    #[test]
822    fn parse_batch_with_comments() {
823        let input = "-- header comment\nSELECT 1; -- inline\nSELECT /* x */ 2;\n";
824        let stmts = parse_batch(input);
825        assert_eq!(stmts, vec!["SELECT 1", "SELECT   2"]);
826    }
827
828    #[test]
829    fn parse_batch_multiline_statement() {
830        let input = "SELECT *\nFROM users\nWHERE id = 1;\n";
831        let stmts = parse_batch(input);
832        assert_eq!(stmts, vec!["SELECT *\nFROM users\nWHERE id = 1"]);
833    }
834
835    #[test]
836    fn parse_batch_with_shell_command() {
837        let input = "SELECT 1;\nUSE my_ks\n";
838        let stmts = parse_batch(input);
839        assert_eq!(stmts, vec!["SELECT 1", "USE my_ks"]);
840    }
841
842    #[test]
843    fn parse_batch_drops_incomplete_cql() {
844        // CQL without semicolon at end of file is dropped (Python cqlsh behavior)
845        let input = "SELECT 1;\nSELECT 2";
846        let stmts = parse_batch(input);
847        assert_eq!(stmts, vec!["SELECT 1"]);
848    }
849
850    #[test]
851    fn parse_batch_only_comments() {
852        let input = "-- just a comment\n/* block */\n";
853        let stmts = parse_batch(input);
854        assert!(stmts.is_empty());
855    }
856
857    // --- Comment stripping edge cases ---
858
859    #[test]
860    fn strip_comments_preserves_strings() {
861        let input = "SELECT '-- not a comment' FROM t";
862        let result = strip_comments(input);
863        assert_eq!(result, "SELECT '-- not a comment' FROM t");
864    }
865
866    #[test]
867    fn strip_comments_preserves_dollar_strings() {
868        let input = "SELECT $$-- not a comment$$ FROM t";
869        let result = strip_comments(input);
870        assert_eq!(result, "SELECT $$-- not a comment$$ FROM t");
871    }
872
873    #[test]
874    fn strip_comments_multiline_block() {
875        let input = "SELECT /* multi\nline\ncomment */ 1";
876        let result = strip_comments(input);
877        // Block comment is replaced with a single space, plus the existing space = "  "
878        assert_eq!(result, "SELECT   1");
879    }
880
881    // --- Parser reset ---
882
883    #[test]
884    fn reset_clears_state() {
885        let mut p = StatementParser::new();
886        assert_eq!(p.feed_line("SELECT *"), ParseResult::Incomplete);
887        assert!(!p.is_empty());
888
889        p.reset();
890        assert!(p.is_empty());
891
892        // After reset, should start fresh
893        let result = p.feed_line("SELECT 1;");
894        assert_eq!(result, ParseResult::Complete(vec!["SELECT 1".to_string()]));
895    }
896
897    // --- Parser reuse after Complete ---
898
899    #[test]
900    fn reuse_after_complete() {
901        let mut p = StatementParser::new();
902        let r1 = p.feed_line("SELECT 1;");
903        assert_eq!(r1, ParseResult::Complete(vec!["SELECT 1".to_string()]));
904
905        // Parser should work for subsequent statements
906        let r2 = p.feed_line("SELECT 2;");
907        assert_eq!(r2, ParseResult::Complete(vec!["SELECT 2".to_string()]));
908    }
909
910    #[test]
911    fn reuse_after_complete_multiline() {
912        let mut p = StatementParser::new();
913        assert_eq!(
914            p.feed_line("SELECT 1;"),
915            ParseResult::Complete(vec!["SELECT 1".to_string()])
916        );
917
918        // Now a multi-line statement
919        assert_eq!(p.feed_line("SELECT *"), ParseResult::Incomplete);
920        let result = p.feed_line("FROM t;");
921        assert_eq!(
922            result,
923            ParseResult::Complete(vec!["SELECT *\nFROM t".to_string()])
924        );
925    }
926
927    // --- Unterminated constructs ---
928
929    #[test]
930    fn unterminated_string_blocks_semicolon() {
931        let stmts = parse_batch("SELECT 'unterminated;");
932        assert!(stmts.is_empty());
933    }
934
935    #[test]
936    fn unterminated_block_comment_blocks_semicolon() {
937        let stmts = parse_batch("SELECT /* never closed;");
938        assert!(stmts.is_empty());
939    }
940
941    // --- Backslash in strings ---
942
943    #[test]
944    fn backslash_in_string_is_literal() {
945        // CQL does NOT use backslash escaping (uses '' instead)
946        let mut p = StatementParser::new();
947        let result = p.feed_line("SELECT '\\';");
948        assert_eq!(
949            result,
950            ParseResult::Complete(vec!["SELECT '\\'".to_string()])
951        );
952    }
953
954    // --- Unicode handling ---
955
956    #[test]
957    fn unicode_in_strings() {
958        let mut p = StatementParser::new();
959        let result = p.feed_line("INSERT INTO t (v) VALUES ('héllo wörld; café');");
960        assert_eq!(
961            result,
962            ParseResult::Complete(vec![
963                "INSERT INTO t (v) VALUES ('héllo wörld; café')".to_string()
964            ])
965        );
966    }
967
968    #[test]
969    fn unicode_identifier() {
970        let mut p = StatementParser::new();
971        let result = p.feed_line("SELECT \"naïve;col\" FROM t;");
972        assert_eq!(
973            result,
974            ParseResult::Complete(vec!["SELECT \"naïve;col\" FROM t".to_string()])
975        );
976    }
977
978    // --- Incremental scan correctness ---
979
980    #[test]
981    fn incremental_scan_preserves_state_across_lines() {
982        // Verify that the parser doesn't re-scan from the start each time.
983        // This is a correctness test: if state weren't preserved,
984        // the second line's `'` would start a new string context.
985        let mut p = StatementParser::new();
986        assert_eq!(
987            p.feed_line("INSERT INTO t VALUES ('multi"),
988            ParseResult::Incomplete
989        );
990        assert_eq!(
991            p.feed_line("line string with ; inside"),
992            ParseResult::Incomplete
993        );
994        let result = p.feed_line("end of string');");
995        assert_eq!(
996            result,
997            ParseResult::Complete(vec![
998                "INSERT INTO t VALUES ('multi\nline string with ; inside\nend of string')"
999                    .to_string()
1000            ])
1001        );
1002    }
1003
1004    // --- BUG-7: Inline comment after semicolon ---
1005
1006    #[test]
1007    fn inline_comment_after_semicolon_clears_buffer() {
1008        let mut p = StatementParser::new();
1009        let result = p.feed_line("SELECT 1; -- inline comment");
1010        assert_eq!(result, ParseResult::Complete(vec!["SELECT 1".to_string()]));
1011        // Parser should be empty — no continuation prompt
1012        assert!(p.is_empty());
1013    }
1014
1015    #[test]
1016    fn inline_comment_after_semicolon_next_statement_works() {
1017        let mut p = StatementParser::new();
1018        let r1 = p.feed_line("SELECT 1; -- comment");
1019        assert_eq!(r1, ParseResult::Complete(vec!["SELECT 1".to_string()]));
1020        assert!(p.is_empty());
1021
1022        // Next statement should work normally
1023        let r2 = p.feed_line("SELECT 2;");
1024        assert_eq!(r2, ParseResult::Complete(vec!["SELECT 2".to_string()]));
1025    }
1026
1027    // --- BUG-8: Bare ;; enters continuation ---
1028
1029    #[test]
1030    fn bare_semicolons_clear_buffer() {
1031        let mut p = StatementParser::new();
1032        let result = p.feed_line(";;");
1033        assert_eq!(result, ParseResult::Incomplete);
1034        // Parser should be empty — no continuation prompt
1035        assert!(p.is_empty());
1036    }
1037
1038    #[test]
1039    fn bare_semicolons_then_statement() {
1040        let mut p = StatementParser::new();
1041        assert_eq!(p.feed_line(";;"), ParseResult::Incomplete);
1042        assert!(p.is_empty());
1043
1044        let result = p.feed_line("SELECT 1;");
1045        assert_eq!(result, ParseResult::Complete(vec!["SELECT 1".to_string()]));
1046    }
1047
1048    #[test]
1049    fn only_whitespace_and_comments_clears_buffer() {
1050        let mut p = StatementParser::new();
1051        assert_eq!(p.feed_line("-- just a comment"), ParseResult::Incomplete);
1052        assert!(p.is_empty());
1053    }
1054
1055    #[test]
1056    fn block_comment_only_clears_buffer() {
1057        let mut p = StatementParser::new();
1058        assert_eq!(p.feed_line("/* block comment */"), ParseResult::Incomplete);
1059        assert!(p.is_empty());
1060    }
1061
1062    // --- BATCH statement handling ---
1063
1064    #[test]
1065    fn batch_with_inner_semicolons() {
1066        let mut p = StatementParser::new();
1067        assert_eq!(p.feed_line("BEGIN BATCH"), ParseResult::Incomplete);
1068        assert_eq!(
1069            p.feed_line("INSERT INTO t (id) VALUES (1);"),
1070            ParseResult::Incomplete
1071        );
1072        assert_eq!(
1073            p.feed_line("INSERT INTO t (id) VALUES (2);"),
1074            ParseResult::Incomplete
1075        );
1076        let result = p.feed_line("APPLY BATCH;");
1077        assert_eq!(
1078            result,
1079            ParseResult::Complete(vec![
1080                "BEGIN BATCH\nINSERT INTO t (id) VALUES (1);\nINSERT INTO t (id) VALUES (2);\nAPPLY BATCH".to_string()
1081            ])
1082        );
1083    }
1084
1085    #[test]
1086    fn batch_without_inner_semicolons() {
1087        let mut p = StatementParser::new();
1088        assert_eq!(p.feed_line("BEGIN BATCH"), ParseResult::Incomplete);
1089        assert_eq!(
1090            p.feed_line("INSERT INTO t (id) VALUES (1)"),
1091            ParseResult::Incomplete
1092        );
1093        assert_eq!(
1094            p.feed_line("INSERT INTO t (id) VALUES (2)"),
1095            ParseResult::Incomplete
1096        );
1097        let result = p.feed_line("APPLY BATCH;");
1098        assert_eq!(
1099            result,
1100            ParseResult::Complete(vec![
1101                "BEGIN BATCH\nINSERT INTO t (id) VALUES (1)\nINSERT INTO t (id) VALUES (2)\nAPPLY BATCH".to_string()
1102            ])
1103        );
1104    }
1105
1106    #[test]
1107    fn batch_single_line_no_inner_semicolons() {
1108        let mut p = StatementParser::new();
1109        let result = p.feed_line(
1110            "BEGIN BATCH INSERT INTO t (id) VALUES (1) INSERT INTO t (id) VALUES (2) APPLY BATCH;",
1111        );
1112        assert_eq!(
1113            result,
1114            ParseResult::Complete(vec![
1115                "BEGIN BATCH INSERT INTO t (id) VALUES (1) INSERT INTO t (id) VALUES (2) APPLY BATCH".to_string()
1116            ])
1117        );
1118    }
1119
1120    #[test]
1121    fn batch_unlogged() {
1122        let mut p = StatementParser::new();
1123        assert_eq!(p.feed_line("BEGIN UNLOGGED BATCH"), ParseResult::Incomplete);
1124        assert_eq!(
1125            p.feed_line("INSERT INTO t (id) VALUES (1)"),
1126            ParseResult::Incomplete
1127        );
1128        let result = p.feed_line("APPLY BATCH;");
1129        assert_eq!(
1130            result,
1131            ParseResult::Complete(vec![
1132                "BEGIN UNLOGGED BATCH\nINSERT INTO t (id) VALUES (1)\nAPPLY BATCH".to_string()
1133            ])
1134        );
1135    }
1136
1137    #[test]
1138    fn batch_via_parse_batch_no_inner_semicolons() {
1139        let stmts = parse_batch(
1140            "BEGIN BATCH\nINSERT INTO t (id) VALUES (1)\nINSERT INTO t (id) VALUES (2)\nAPPLY BATCH;",
1141        );
1142        assert_eq!(stmts.len(), 1);
1143        assert!(stmts[0].starts_with("BEGIN BATCH"));
1144        assert!(stmts[0].ends_with("APPLY BATCH"));
1145    }
1146}