Skip to main content

cqlsh_rs/
parser.rs

1//! Statement parser for cqlsh-rs.
2//!
3//! Handles multi-line input buffering, semicolon-terminated statement detection,
4//! comment stripping, string literal handling, and routing between CQL statements
5//! and built-in shell commands.
6//!
7//! Key design decisions (from SP4 and SP16 upstream fixes):
8//! - Context-aware tokenization: NO regex preprocessing for comments (PR #150)
9//! - Truly incremental parsing: O(n) total work via scan_offset tracking (PR #151)
10
11/// Lexer context states for tracking position within CQL input.
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
13enum LexState {
14    /// Normal CQL code (not in string or comment).
15    #[default]
16    Normal,
17    /// Inside a single-quoted string literal (`'...'`).
18    SingleQuote,
19    /// Inside a double-quoted identifier (`"..."`).
20    DoubleQuote,
21    /// Inside a dollar-quoted string literal (`$$...$$`).
22    DollarQuote,
23    /// Inside a block comment (`/* ... */`).
24    BlockComment,
25    /// Inside a line comment (`-- ...`), extends to end of line.
26    LineComment,
27}
28
29/// Incremental statement parser.
30///
31/// Tracks lexer state across `feed_line` calls so that each call only scans
32/// the newly appended bytes. Total work is O(n) over the lifetime of the parser,
33/// not O(n²). See PR #151 for why this matters.
34#[derive(Debug, Default)]
35pub struct StatementParser {
36    /// Accumulated input buffer.
37    buffer: String,
38    /// Byte offset in `buffer` where the next scan should resume.
39    scan_offset: usize,
40    /// Byte offset of the start of the current (in-progress) statement.
41    stmt_start: usize,
42    /// Current lexer state at `scan_offset`.
43    state: LexState,
44    /// Depth of nested block comments.
45    block_comment_depth: usize,
46    /// True when we are inside a `BEGIN BATCH … APPLY BATCH` block.
47    /// Semicolons inside a batch do not terminate the batch statement.
48    in_batch: bool,
49}
50
51/// The result of feeding a line to the parser.
52#[derive(Debug, PartialEq, Eq)]
53#[must_use]
54pub enum ParseResult {
55    /// No complete statement yet; continue accumulating.
56    Incomplete,
57    /// One or more complete statements extracted.
58    Complete(Vec<String>),
59}
60
61/// Classification of a parsed input line.
62#[derive(Debug, PartialEq, Eq)]
63#[must_use]
64pub enum InputKind {
65    /// A built-in shell command (HELP, QUIT, DESCRIBE, etc.).
66    ShellCommand(String),
67    /// A CQL statement to forward to the driver.
68    CqlStatement(String),
69    /// Empty or whitespace-only input.
70    Empty,
71}
72
73/// Built-in shell commands that don't require a semicolon terminator.
74const SHELL_COMMANDS: &[&str] = &[
75    "HELP",
76    "?",
77    "QUIT",
78    "EXIT",
79    "DESCRIBE",
80    "DESC",
81    "CONSISTENCY",
82    "SERIAL",
83    "TRACING",
84    "EXPAND",
85    "PAGING",
86    "LOGIN",
87    "SOURCE",
88    "CAPTURE",
89    "SHOW",
90    "CLEAR",
91    "CLS",
92    "UNICODE",
93    "DEBUG",
94    "COPY",
95    "USE",
96];
97
98impl StatementParser {
99    /// Create a new empty parser.
100    #[must_use]
101    pub fn new() -> Self {
102        Self::default()
103    }
104
105    /// Reset the parser, discarding any accumulated input.
106    pub fn reset(&mut self) {
107        self.buffer.clear();
108        self.scan_offset = 0;
109        self.stmt_start = 0;
110        self.state = LexState::Normal;
111        self.block_comment_depth = 0;
112        self.in_batch = false;
113    }
114
115    /// Returns true if the parser has no accumulated input.
116    #[must_use]
117    pub fn is_empty(&self) -> bool {
118        self.buffer.is_empty()
119    }
120
121    /// Returns the remaining unparsed content in the buffer.
122    #[must_use]
123    pub fn remaining(&self) -> &str {
124        &self.buffer[self.stmt_start..]
125    }
126
127    /// Feed a line of input and return any complete statements.
128    ///
129    /// This is the incremental entry point. Each call scans only the newly
130    /// appended bytes, preserving lexer state from the previous call.
131    /// Total work across all `feed_line` calls is O(n).
132    pub fn feed_line(&mut self, line: &str) -> ParseResult {
133        if !self.buffer.is_empty() {
134            self.buffer.push('\n');
135        }
136        self.buffer.push_str(line);
137
138        self.scan_for_statements()
139    }
140
141    /// Scan from `scan_offset` forward for statement terminators.
142    ///
143    /// Only scans newly appended bytes — does NOT re-scan from the start.
144    /// State (`self.state`, `self.block_comment_depth`) is preserved across calls.
145    fn scan_for_statements(&mut self) -> ParseResult {
146        let mut statements = Vec::new();
147
148        // We work on byte offsets using char_indices over the unscanned portion.
149        // But we need to handle multi-byte chars correctly, so iterate chars.
150        let buf = self.buffer.as_bytes();
151        let len = buf.len();
152        let mut i = self.scan_offset;
153
154        while i < len {
155            let (ch, char_len) = decode_char_at(&self.buffer, i);
156
157            match self.state {
158                LexState::Normal => {
159                    if ch == '\'' {
160                        self.state = LexState::SingleQuote;
161                        i += char_len;
162                    } else if ch == '"' {
163                        self.state = LexState::DoubleQuote;
164                        i += char_len;
165                    } else if ch == '$' && i + 1 < len && self.buffer.as_bytes()[i + 1] == b'$' {
166                        self.state = LexState::DollarQuote;
167                        i += 2;
168                    } else if (ch == '-' && i + 1 < len && self.buffer.as_bytes()[i + 1] == b'-')
169                        || (ch == '/' && i + 1 < len && self.buffer.as_bytes()[i + 1] == b'/')
170                    {
171                        self.state = LexState::LineComment;
172                        i += 2;
173                    } else if ch == '/' && i + 1 < len && self.buffer.as_bytes()[i + 1] == b'*' {
174                        self.state = LexState::BlockComment;
175                        self.block_comment_depth = 1;
176                        i += 2;
177                    } else if ch == ';' {
178                        // Statement terminator found in Normal state.
179                        let raw = &self.buffer[self.stmt_start..i];
180                        let stripped = strip_comments(raw);
181                        let trimmed = stripped.trim();
182
183                        if self.in_batch {
184                            // Inside BEGIN BATCH … APPLY BATCH: semicolons
185                            // between DML statements are part of the batch
186                            // syntax, not statement terminators.  Only emit
187                            // when APPLY BATCH has been reached.
188                            if ends_with_apply_batch(trimmed) {
189                                self.in_batch = false;
190                                if !trimmed.is_empty() {
191                                    statements.push(trimmed.to_string());
192                                }
193                                self.stmt_start = i + 1;
194                            }
195                            // Otherwise keep accumulating; do NOT advance stmt_start.
196                            i += 1;
197                        } else if starts_with_begin_batch(trimmed) {
198                            if ends_with_apply_batch(trimmed) {
199                                // Single-semicolon batch: all inner DML
200                                // statements lacked semicolons, so the only
201                                // ';' is at APPLY BATCH.  Emit immediately.
202                                if !trimmed.is_empty() {
203                                    statements.push(trimmed.to_string());
204                                }
205                                self.stmt_start = i + 1;
206                            } else {
207                                // Opening of a BATCH block: treat the ';' as
208                                // internal to the batch, not as a terminator.
209                                self.in_batch = true;
210                                // Do NOT advance stmt_start — keep accumulating
211                                // from the start of BEGIN BATCH.
212                            }
213                            i += 1;
214                        } else {
215                            if !trimmed.is_empty() {
216                                statements.push(trimmed.to_string());
217                            }
218                            self.stmt_start = i + 1; // skip the ';'
219                            i += 1;
220                        }
221                    } else {
222                        i += char_len;
223                    }
224                }
225                LexState::SingleQuote => {
226                    if ch == '\'' {
227                        // Check for escaped quote ('')
228                        if i + 1 < len && self.buffer.as_bytes()[i + 1] == b'\'' {
229                            i += 2; // skip escaped quote
230                        } else {
231                            self.state = LexState::Normal;
232                            i += 1;
233                        }
234                    } else {
235                        i += char_len;
236                    }
237                }
238                LexState::DoubleQuote => {
239                    if ch == '"' {
240                        // Check for escaped quote ("")
241                        if i + 1 < len && self.buffer.as_bytes()[i + 1] == b'"' {
242                            i += 2;
243                        } else {
244                            self.state = LexState::Normal;
245                            i += 1;
246                        }
247                    } else {
248                        i += char_len;
249                    }
250                }
251                LexState::DollarQuote => {
252                    if ch == '$' && i + 1 < len && self.buffer.as_bytes()[i + 1] == b'$' {
253                        self.state = LexState::Normal;
254                        i += 2;
255                    } else {
256                        i += char_len;
257                    }
258                }
259                LexState::LineComment => {
260                    if ch == '\n' {
261                        self.state = LexState::Normal;
262                    }
263                    i += char_len;
264                }
265                LexState::BlockComment => {
266                    if ch == '*' && i + 1 < len && self.buffer.as_bytes()[i + 1] == b'/' {
267                        self.block_comment_depth -= 1;
268                        if self.block_comment_depth == 0 {
269                            self.state = LexState::Normal;
270                        }
271                        i += 2;
272                    } else if ch == '/' && i + 1 < len && self.buffer.as_bytes()[i + 1] == b'*' {
273                        self.block_comment_depth += 1;
274                        i += 2;
275                    } else {
276                        i += char_len;
277                    }
278                }
279            }
280        }
281
282        self.scan_offset = i;
283
284        // Always compact the buffer when stmt_start has advanced past consumed
285        // content (e.g., empty statements like `;;` that were skipped).
286        if self.stmt_start > 0 {
287            self.buffer = self.buffer[self.stmt_start..].to_string();
288            self.scan_offset -= self.stmt_start;
289            self.stmt_start = 0;
290        }
291
292        // If the remaining buffer is only whitespace and/or comments (no
293        // meaningful CQL tokens), clear it so the REPL returns to the primary
294        // prompt. This handles trailing line comments after semicolons
295        // (e.g., `SELECT 1; -- comment`) and bare `;;`.
296        if !self.buffer.is_empty() {
297            let stripped = strip_comments(&self.buffer);
298            if stripped.trim().is_empty() {
299                self.buffer.clear();
300                self.scan_offset = 0;
301                self.state = LexState::Normal;
302                self.block_comment_depth = 0;
303            }
304        }
305
306        if statements.is_empty() {
307            ParseResult::Incomplete
308        } else {
309            ParseResult::Complete(statements)
310        }
311    }
312}
313
314/// Decode the char at byte offset `i` in `s`, returning the char and its UTF-8 byte length.
315fn decode_char_at(s: &str, i: usize) -> (char, usize) {
316    // Safety: `i` must be at a char boundary, which our state machine guarantees
317    // because we always advance by `char_len`.
318    let ch = s[i..].chars().next().unwrap_or('\0');
319    (ch, ch.len_utf8())
320}
321
322/// Return true if `text` is the opening of a CQL BATCH block.
323///
324/// Matches: `BEGIN BATCH`, `BEGIN UNLOGGED BATCH`, `BEGIN COUNTER BATCH`
325/// (case-insensitive, any amount of internal whitespace).
326fn starts_with_begin_batch(text: &str) -> bool {
327    let words: Vec<&str> = text.split_whitespace().collect();
328    match words.as_slice() {
329        [b, batch, ..]
330            if b.eq_ignore_ascii_case("BEGIN") && batch.eq_ignore_ascii_case("BATCH") =>
331        {
332            true
333        }
334        [b, modifier, batch, ..]
335            if b.eq_ignore_ascii_case("BEGIN")
336                && (modifier.eq_ignore_ascii_case("UNLOGGED")
337                    || modifier.eq_ignore_ascii_case("COUNTER"))
338                && batch.eq_ignore_ascii_case("BATCH") =>
339        {
340            true
341        }
342        _ => false,
343    }
344}
345
346/// Return true if `text` ends with the `APPLY BATCH` token pair.
347fn ends_with_apply_batch(text: &str) -> bool {
348    let words: Vec<&str> = text.split_whitespace().collect();
349    matches!(
350        words.as_slice(),
351        [.., apply, batch]
352            if apply.eq_ignore_ascii_case("APPLY") && batch.eq_ignore_ascii_case("BATCH")
353    )
354}
355
356/// Strip comments from a CQL fragment (used on extracted statements).
357///
358/// Delegates to the unified CQL lexer for context-aware comment stripping.
359/// Handles nested block comments and preserves content inside string literals.
360fn strip_comments(input: &str) -> String {
361    crate::cql_lexer::strip_comments(input)
362}
363
364/// Classify a complete input as a shell command, CQL statement, or empty.
365pub fn classify_input(input: &str) -> InputKind {
366    let trimmed = input.trim();
367    if trimmed.is_empty() {
368        return InputKind::Empty;
369    }
370
371    if is_shell_command(trimmed) {
372        InputKind::ShellCommand(trimmed.to_string())
373    } else {
374        InputKind::CqlStatement(trimmed.to_string())
375    }
376}
377
378/// Check if the first line of input looks like a shell command.
379///
380/// Used by the REPL to decide whether to wait for a semicolon
381/// or dispatch immediately.
382#[must_use]
383pub fn is_shell_command(line: &str) -> bool {
384    let trimmed = line.trim();
385    // Strip trailing semicolon for command detection
386    let without_semi = trimmed.strip_suffix(';').unwrap_or(trimmed).trim();
387    let first_word = without_semi
388        .split_whitespace()
389        .next()
390        .unwrap_or("")
391        .to_uppercase();
392
393    SHELL_COMMANDS.contains(&first_word.as_str())
394}
395
396/// Parse a complete input string (e.g., from `-e` or `-f` batch mode)
397/// into individual statements.
398///
399/// Returns a vector of complete, comment-stripped statements.
400/// This is O(n) in the input size (not O(n²) per PR #151).
401#[must_use]
402pub fn parse_batch(input: &str) -> Vec<String> {
403    let mut parser = StatementParser::new();
404    let mut all_statements = Vec::new();
405
406    for line in input.lines() {
407        if let ParseResult::Complete(stmts) = parser.feed_line(line) {
408            all_statements.extend(stmts);
409        }
410    }
411
412    // Handle any remaining content without a trailing semicolon.
413    // Shell commands don't need semicolons; CQL statements do.
414    let remaining = parser.remaining().trim();
415    if !remaining.is_empty() {
416        let stripped = strip_comments(remaining);
417        let trimmed = stripped.trim();
418        if !trimmed.is_empty() && is_shell_command(trimmed) {
419            all_statements.push(trimmed.to_string());
420        }
421        // Non-shell-command without semicolon is incomplete — drop it
422        // (matches Python cqlsh batch mode behavior)
423    }
424
425    all_statements
426}
427
428#[cfg(test)]
429mod tests {
430    use super::*;
431
432    // --- Basic semicolon detection ---
433
434    #[test]
435    fn simple_statement() {
436        let mut p = StatementParser::new();
437        let result = p.feed_line("SELECT * FROM users;");
438        assert_eq!(
439            result,
440            ParseResult::Complete(vec!["SELECT * FROM users".to_string()])
441        );
442    }
443
444    #[test]
445    fn statement_with_trailing_whitespace() {
446        let mut p = StatementParser::new();
447        let result = p.feed_line("SELECT * FROM users;  ");
448        assert_eq!(
449            result,
450            ParseResult::Complete(vec!["SELECT * FROM users".to_string()])
451        );
452    }
453
454    #[test]
455    fn incomplete_no_semicolon() {
456        let mut p = StatementParser::new();
457        assert_eq!(p.feed_line("SELECT * FROM users"), ParseResult::Incomplete);
458    }
459
460    #[test]
461    fn empty_input() {
462        let mut p = StatementParser::new();
463        assert_eq!(p.feed_line(""), ParseResult::Incomplete);
464        assert_eq!(p.feed_line("   "), ParseResult::Incomplete);
465    }
466
467    // --- Single-quoted string handling ---
468
469    #[test]
470    fn semicolon_in_single_quoted_string() {
471        let mut p = StatementParser::new();
472        let result = p.feed_line("INSERT INTO t (v) VALUES ('hello;world');");
473        assert_eq!(
474            result,
475            ParseResult::Complete(vec!["INSERT INTO t (v) VALUES ('hello;world')".to_string()])
476        );
477    }
478
479    #[test]
480    fn escaped_quote_in_string() {
481        let mut p = StatementParser::new();
482        let result = p.feed_line("INSERT INTO t (v) VALUES ('it''s;here');");
483        assert_eq!(
484            result,
485            ParseResult::Complete(vec!["INSERT INTO t (v) VALUES ('it''s;here')".to_string()])
486        );
487    }
488
489    // --- Double-quoted identifier handling ---
490
491    #[test]
492    fn semicolon_in_double_quoted_identifier() {
493        let mut p = StatementParser::new();
494        let result = p.feed_line("SELECT \"col;name\" FROM t;");
495        assert_eq!(
496            result,
497            ParseResult::Complete(vec!["SELECT \"col;name\" FROM t".to_string()])
498        );
499    }
500
501    #[test]
502    fn escaped_double_quote() {
503        let mut p = StatementParser::new();
504        let result = p.feed_line("SELECT \"col\"\"name\" FROM t;");
505        assert_eq!(
506            result,
507            ParseResult::Complete(vec!["SELECT \"col\"\"name\" FROM t".to_string()])
508        );
509    }
510
511    // --- Dollar-quoted string handling ---
512
513    #[test]
514    fn semicolon_in_dollar_quoted_string() {
515        let mut p = StatementParser::new();
516        let result = p.feed_line("CREATE FUNCTION f() RETURNS NULL ON NULL INPUT RETURNS text LANGUAGE java AS $$return a;$$;");
517        assert_eq!(result, ParseResult::Complete(vec![
518            "CREATE FUNCTION f() RETURNS NULL ON NULL INPUT RETURNS text LANGUAGE java AS $$return a;$$".to_string()
519        ]));
520    }
521
522    #[test]
523    fn dollar_quote_multiline() {
524        let mut p = StatementParser::new();
525        assert_eq!(
526            p.feed_line("CREATE FUNCTION f() RETURNS text LANGUAGE java AS $$"),
527            ParseResult::Incomplete
528        );
529        assert_eq!(p.feed_line("  return a;"), ParseResult::Incomplete);
530        let result = p.feed_line("$$;");
531        assert!(matches!(result, ParseResult::Complete(_)));
532    }
533
534    #[test]
535    fn empty_dollar_quote() {
536        let mut p = StatementParser::new();
537        let result = p.feed_line("SELECT $$$$;");
538        assert_eq!(
539            result,
540            ParseResult::Complete(vec!["SELECT $$$$".to_string()])
541        );
542    }
543
544    // --- Line comment stripping ---
545
546    #[test]
547    fn line_comment_stripped() {
548        let mut p = StatementParser::new();
549        let result = p.feed_line("SELECT * FROM t; -- this is a comment");
550        assert_eq!(
551            result,
552            ParseResult::Complete(vec!["SELECT * FROM t".to_string()])
553        );
554    }
555
556    #[test]
557    fn line_comment_does_not_terminate() {
558        let mut p = StatementParser::new();
559        // Semicolon inside line comment should not terminate
560        assert_eq!(
561            p.feed_line("SELECT * FROM t -- comment with ;"),
562            ParseResult::Incomplete
563        );
564    }
565
566    #[test]
567    fn line_comment_then_statement_across_lines() {
568        let mut p = StatementParser::new();
569        assert_eq!(p.feed_line("-- header comment"), ParseResult::Incomplete);
570        let result = p.feed_line("SELECT 1;");
571        assert_eq!(result, ParseResult::Complete(vec!["SELECT 1".to_string()]));
572    }
573
574    // --- Block comment stripping (PR #150) ---
575
576    #[test]
577    fn block_comment_stripped() {
578        let mut p = StatementParser::new();
579        let result = p.feed_line("SELECT /* comment */ * FROM t;");
580        assert_eq!(
581            result,
582            ParseResult::Complete(vec!["SELECT   * FROM t".to_string()])
583        );
584    }
585
586    #[test]
587    fn block_comment_with_semicolon() {
588        let mut p = StatementParser::new();
589        // Semicolon inside block comment should not terminate
590        let result = p.feed_line("SELECT /* ; */ * FROM t;");
591        assert_eq!(
592            result,
593            ParseResult::Complete(vec!["SELECT   * FROM t".to_string()])
594        );
595    }
596
597    #[test]
598    fn block_comment_chars_in_single_quoted_string() {
599        // PR #150: /* inside strings must NOT be treated as comment start
600        let mut p = StatementParser::new();
601        let result = p.feed_line("INSERT INTO t (v) VALUES ('/* not a comment */');");
602        assert_eq!(
603            result,
604            ParseResult::Complete(vec![
605                "INSERT INTO t (v) VALUES ('/* not a comment */')".to_string()
606            ])
607        );
608    }
609
610    #[test]
611    fn block_comment_chars_in_double_quoted_string() {
612        let mut p = StatementParser::new();
613        let result = p.feed_line("SELECT \"/* not a comment */\" FROM t;");
614        assert_eq!(
615            result,
616            ParseResult::Complete(vec!["SELECT \"/* not a comment */\" FROM t".to_string()])
617        );
618    }
619
620    #[test]
621    fn block_comment_chars_in_dollar_quoted_string() {
622        let mut p = StatementParser::new();
623        let result = p.feed_line("SELECT $$/* not a comment */$$;");
624        assert_eq!(
625            result,
626            ParseResult::Complete(vec!["SELECT $$/* not a comment */$$".to_string()])
627        );
628    }
629
630    #[test]
631    fn block_comment_across_feed_lines() {
632        let mut p = StatementParser::new();
633        assert_eq!(p.feed_line("SELECT /* start"), ParseResult::Incomplete);
634        assert_eq!(p.feed_line("still comment"), ParseResult::Incomplete);
635        let result = p.feed_line("end */ 1;");
636        assert_eq!(
637            result,
638            ParseResult::Complete(vec!["SELECT   1".to_string()])
639        );
640    }
641
642    #[test]
643    fn nested_block_comments() {
644        let mut p = StatementParser::new();
645        let result = p.feed_line("SELECT /* outer /* inner */ still comment */ 1;");
646        assert_eq!(
647            result,
648            ParseResult::Complete(vec!["SELECT   1".to_string()])
649        );
650    }
651
652    #[test]
653    fn nested_block_comments_stripped() {
654        let input = "SELECT /* outer /* inner */ still */ 1";
655        let result = strip_comments(input);
656        assert_eq!(result, "SELECT   1");
657    }
658
659    // --- Multi-line statement buffering ---
660
661    #[test]
662    fn multiline_statement() {
663        let mut p = StatementParser::new();
664        assert_eq!(p.feed_line("SELECT *"), ParseResult::Incomplete);
665        assert_eq!(p.feed_line("FROM users"), ParseResult::Incomplete);
666        let result = p.feed_line("WHERE id = 1;");
667        assert_eq!(
668            result,
669            ParseResult::Complete(vec!["SELECT *\nFROM users\nWHERE id = 1".to_string()])
670        );
671    }
672
673    #[test]
674    fn multiline_with_string_across_lines() {
675        let mut p = StatementParser::new();
676        assert_eq!(
677            p.feed_line("INSERT INTO t (v) VALUES ('hello"),
678            ParseResult::Incomplete
679        );
680        let result = p.feed_line("world');");
681        assert_eq!(
682            result,
683            ParseResult::Complete(vec!["INSERT INTO t (v) VALUES ('hello\nworld')".to_string()])
684        );
685    }
686
687    // --- Empty statement handling ---
688
689    #[test]
690    fn empty_statement_skipped() {
691        let mut p = StatementParser::new();
692        let result = p.feed_line(";;");
693        // Both semicolons produce empty statements which are skipped
694        assert_eq!(result, ParseResult::Incomplete);
695    }
696
697    #[test]
698    fn empty_between_statements() {
699        let mut p = StatementParser::new();
700        let result = p.feed_line("SELECT 1; ; SELECT 2;");
701        assert_eq!(
702            result,
703            ParseResult::Complete(vec!["SELECT 1".to_string(), "SELECT 2".to_string(),])
704        );
705    }
706
707    // --- Built-in command detection ---
708
709    #[test]
710    fn shell_commands_detected() {
711        assert!(is_shell_command("HELP"));
712        assert!(is_shell_command("?"));
713        assert!(is_shell_command("QUIT"));
714        assert!(is_shell_command("EXIT"));
715        assert!(is_shell_command("DESCRIBE KEYSPACES"));
716        assert!(is_shell_command("DESC TABLE users"));
717        assert!(is_shell_command("CONSISTENCY ONE"));
718        assert!(is_shell_command("TRACING ON"));
719        assert!(is_shell_command("EXPAND ON"));
720        assert!(is_shell_command("PAGING 100"));
721        assert!(is_shell_command("SHOW VERSION"));
722        assert!(is_shell_command("CLEAR"));
723        assert!(is_shell_command("CLS"));
724        assert!(is_shell_command("COPY users TO '/tmp/data.csv'"));
725        assert!(is_shell_command("USE my_keyspace"));
726    }
727
728    #[test]
729    fn shell_command_case_insensitive() {
730        assert!(is_shell_command("help"));
731        assert!(is_shell_command("quit"));
732        assert!(is_shell_command("Help"));
733        assert!(is_shell_command("describe keyspaces"));
734        assert!(is_shell_command("use my_ks"));
735    }
736
737    #[test]
738    fn shell_command_with_semicolon() {
739        assert!(is_shell_command("USE my_ks;"));
740        assert!(is_shell_command("HELP;"));
741    }
742
743    #[test]
744    fn cql_not_shell_command() {
745        assert!(!is_shell_command("SELECT * FROM users"));
746        assert!(!is_shell_command("INSERT INTO t (id) VALUES (1)"));
747        assert!(!is_shell_command("CREATE TABLE test (id int PRIMARY KEY)"));
748    }
749
750    // --- Command classification ---
751
752    #[test]
753    fn classify_shell_command() {
754        assert_eq!(
755            classify_input("HELP"),
756            InputKind::ShellCommand("HELP".to_string())
757        );
758        assert_eq!(
759            classify_input("USE my_ks"),
760            InputKind::ShellCommand("USE my_ks".to_string())
761        );
762    }
763
764    #[test]
765    fn classify_shell_command_with_semicolon() {
766        assert_eq!(
767            classify_input("USE my_ks;"),
768            InputKind::ShellCommand("USE my_ks;".to_string())
769        );
770    }
771
772    #[test]
773    fn classify_cql_statement() {
774        assert_eq!(
775            classify_input("SELECT * FROM users"),
776            InputKind::CqlStatement("SELECT * FROM users".to_string())
777        );
778    }
779
780    #[test]
781    fn classify_empty() {
782        assert_eq!(classify_input(""), InputKind::Empty);
783        assert_eq!(classify_input("   "), InputKind::Empty);
784    }
785
786    // --- Multiple statements on one line ---
787
788    #[test]
789    fn multiple_statements_one_line() {
790        let mut p = StatementParser::new();
791        let result = p.feed_line("SELECT 1; SELECT 2; SELECT 3;");
792        assert_eq!(
793            result,
794            ParseResult::Complete(vec![
795                "SELECT 1".to_string(),
796                "SELECT 2".to_string(),
797                "SELECT 3".to_string(),
798            ])
799        );
800    }
801
802    // --- Whitespace normalization ---
803
804    #[test]
805    fn leading_trailing_whitespace_trimmed() {
806        let mut p = StatementParser::new();
807        let result = p.feed_line("  SELECT * FROM t  ;  ");
808        assert_eq!(
809            result,
810            ParseResult::Complete(vec!["SELECT * FROM t".to_string()])
811        );
812    }
813
814    // --- Batch mode parsing ---
815
816    #[test]
817    fn parse_batch_basic() {
818        let input = "SELECT 1;\nSELECT 2;\n";
819        let stmts = parse_batch(input);
820        assert_eq!(stmts, vec!["SELECT 1", "SELECT 2"]);
821    }
822
823    #[test]
824    fn parse_batch_with_comments() {
825        let input = "-- header comment\nSELECT 1; -- inline\nSELECT /* x */ 2;\n";
826        let stmts = parse_batch(input);
827        assert_eq!(stmts, vec!["SELECT 1", "SELECT   2"]);
828    }
829
830    #[test]
831    fn parse_batch_multiline_statement() {
832        let input = "SELECT *\nFROM users\nWHERE id = 1;\n";
833        let stmts = parse_batch(input);
834        assert_eq!(stmts, vec!["SELECT *\nFROM users\nWHERE id = 1"]);
835    }
836
837    #[test]
838    fn parse_batch_with_shell_command() {
839        let input = "SELECT 1;\nUSE my_ks\n";
840        let stmts = parse_batch(input);
841        assert_eq!(stmts, vec!["SELECT 1", "USE my_ks"]);
842    }
843
844    #[test]
845    fn parse_batch_drops_incomplete_cql() {
846        // CQL without semicolon at end of file is dropped (Python cqlsh behavior)
847        let input = "SELECT 1;\nSELECT 2";
848        let stmts = parse_batch(input);
849        assert_eq!(stmts, vec!["SELECT 1"]);
850    }
851
852    #[test]
853    fn parse_batch_only_comments() {
854        let input = "-- just a comment\n/* block */\n";
855        let stmts = parse_batch(input);
856        assert!(stmts.is_empty());
857    }
858
859    // --- Comment stripping edge cases ---
860
861    #[test]
862    fn strip_comments_preserves_strings() {
863        let input = "SELECT '-- not a comment' FROM t";
864        let result = strip_comments(input);
865        assert_eq!(result, "SELECT '-- not a comment' FROM t");
866    }
867
868    #[test]
869    fn strip_comments_preserves_dollar_strings() {
870        let input = "SELECT $$-- not a comment$$ FROM t";
871        let result = strip_comments(input);
872        assert_eq!(result, "SELECT $$-- not a comment$$ FROM t");
873    }
874
875    #[test]
876    fn strip_comments_multiline_block() {
877        let input = "SELECT /* multi\nline\ncomment */ 1";
878        let result = strip_comments(input);
879        // Block comment is replaced with a single space, plus the existing space = "  "
880        assert_eq!(result, "SELECT   1");
881    }
882
883    // --- Parser reset ---
884
885    #[test]
886    fn reset_clears_state() {
887        let mut p = StatementParser::new();
888        assert_eq!(p.feed_line("SELECT *"), ParseResult::Incomplete);
889        assert!(!p.is_empty());
890
891        p.reset();
892        assert!(p.is_empty());
893
894        // After reset, should start fresh
895        let result = p.feed_line("SELECT 1;");
896        assert_eq!(result, ParseResult::Complete(vec!["SELECT 1".to_string()]));
897    }
898
899    // --- Parser reuse after Complete ---
900
901    #[test]
902    fn reuse_after_complete() {
903        let mut p = StatementParser::new();
904        let r1 = p.feed_line("SELECT 1;");
905        assert_eq!(r1, ParseResult::Complete(vec!["SELECT 1".to_string()]));
906
907        // Parser should work for subsequent statements
908        let r2 = p.feed_line("SELECT 2;");
909        assert_eq!(r2, ParseResult::Complete(vec!["SELECT 2".to_string()]));
910    }
911
912    #[test]
913    fn reuse_after_complete_multiline() {
914        let mut p = StatementParser::new();
915        assert_eq!(
916            p.feed_line("SELECT 1;"),
917            ParseResult::Complete(vec!["SELECT 1".to_string()])
918        );
919
920        // Now a multi-line statement
921        assert_eq!(p.feed_line("SELECT *"), ParseResult::Incomplete);
922        let result = p.feed_line("FROM t;");
923        assert_eq!(
924            result,
925            ParseResult::Complete(vec!["SELECT *\nFROM t".to_string()])
926        );
927    }
928
929    // --- Unterminated constructs ---
930
931    #[test]
932    fn unterminated_string_blocks_semicolon() {
933        let stmts = parse_batch("SELECT 'unterminated;");
934        assert!(stmts.is_empty());
935    }
936
937    #[test]
938    fn unterminated_block_comment_blocks_semicolon() {
939        let stmts = parse_batch("SELECT /* never closed;");
940        assert!(stmts.is_empty());
941    }
942
943    // --- Backslash in strings ---
944
945    #[test]
946    fn backslash_in_string_is_literal() {
947        // CQL does NOT use backslash escaping (uses '' instead)
948        let mut p = StatementParser::new();
949        let result = p.feed_line("SELECT '\\';");
950        assert_eq!(
951            result,
952            ParseResult::Complete(vec!["SELECT '\\'".to_string()])
953        );
954    }
955
956    // --- Unicode handling ---
957
958    #[test]
959    fn unicode_in_strings() {
960        let mut p = StatementParser::new();
961        let result = p.feed_line("INSERT INTO t (v) VALUES ('héllo wörld; café');");
962        assert_eq!(
963            result,
964            ParseResult::Complete(vec![
965                "INSERT INTO t (v) VALUES ('héllo wörld; café')".to_string()
966            ])
967        );
968    }
969
970    #[test]
971    fn unicode_identifier() {
972        let mut p = StatementParser::new();
973        let result = p.feed_line("SELECT \"naïve;col\" FROM t;");
974        assert_eq!(
975            result,
976            ParseResult::Complete(vec!["SELECT \"naïve;col\" FROM t".to_string()])
977        );
978    }
979
980    // --- Incremental scan correctness ---
981
982    #[test]
983    fn incremental_scan_preserves_state_across_lines() {
984        // Verify that the parser doesn't re-scan from the start each time.
985        // This is a correctness test: if state weren't preserved,
986        // the second line's `'` would start a new string context.
987        let mut p = StatementParser::new();
988        assert_eq!(
989            p.feed_line("INSERT INTO t VALUES ('multi"),
990            ParseResult::Incomplete
991        );
992        assert_eq!(
993            p.feed_line("line string with ; inside"),
994            ParseResult::Incomplete
995        );
996        let result = p.feed_line("end of string');");
997        assert_eq!(
998            result,
999            ParseResult::Complete(vec![
1000                "INSERT INTO t VALUES ('multi\nline string with ; inside\nend of string')"
1001                    .to_string()
1002            ])
1003        );
1004    }
1005
1006    // --- BUG-7: Inline comment after semicolon ---
1007
1008    #[test]
1009    fn inline_comment_after_semicolon_clears_buffer() {
1010        let mut p = StatementParser::new();
1011        let result = p.feed_line("SELECT 1; -- inline comment");
1012        assert_eq!(result, ParseResult::Complete(vec!["SELECT 1".to_string()]));
1013        // Parser should be empty — no continuation prompt
1014        assert!(p.is_empty());
1015    }
1016
1017    #[test]
1018    fn inline_comment_after_semicolon_next_statement_works() {
1019        let mut p = StatementParser::new();
1020        let r1 = p.feed_line("SELECT 1; -- comment");
1021        assert_eq!(r1, ParseResult::Complete(vec!["SELECT 1".to_string()]));
1022        assert!(p.is_empty());
1023
1024        // Next statement should work normally
1025        let r2 = p.feed_line("SELECT 2;");
1026        assert_eq!(r2, ParseResult::Complete(vec!["SELECT 2".to_string()]));
1027    }
1028
1029    // --- BUG-8: Bare ;; enters continuation ---
1030
1031    #[test]
1032    fn bare_semicolons_clear_buffer() {
1033        let mut p = StatementParser::new();
1034        let result = p.feed_line(";;");
1035        assert_eq!(result, ParseResult::Incomplete);
1036        // Parser should be empty — no continuation prompt
1037        assert!(p.is_empty());
1038    }
1039
1040    #[test]
1041    fn bare_semicolons_then_statement() {
1042        let mut p = StatementParser::new();
1043        assert_eq!(p.feed_line(";;"), ParseResult::Incomplete);
1044        assert!(p.is_empty());
1045
1046        let result = p.feed_line("SELECT 1;");
1047        assert_eq!(result, ParseResult::Complete(vec!["SELECT 1".to_string()]));
1048    }
1049
1050    #[test]
1051    fn only_whitespace_and_comments_clears_buffer() {
1052        let mut p = StatementParser::new();
1053        assert_eq!(p.feed_line("-- just a comment"), ParseResult::Incomplete);
1054        assert!(p.is_empty());
1055    }
1056
1057    #[test]
1058    fn block_comment_only_clears_buffer() {
1059        let mut p = StatementParser::new();
1060        assert_eq!(p.feed_line("/* block comment */"), ParseResult::Incomplete);
1061        assert!(p.is_empty());
1062    }
1063
1064    // --- BATCH statement handling ---
1065
1066    #[test]
1067    fn batch_with_inner_semicolons() {
1068        let mut p = StatementParser::new();
1069        assert_eq!(p.feed_line("BEGIN BATCH"), ParseResult::Incomplete);
1070        assert_eq!(
1071            p.feed_line("INSERT INTO t (id) VALUES (1);"),
1072            ParseResult::Incomplete
1073        );
1074        assert_eq!(
1075            p.feed_line("INSERT INTO t (id) VALUES (2);"),
1076            ParseResult::Incomplete
1077        );
1078        let result = p.feed_line("APPLY BATCH;");
1079        assert_eq!(
1080            result,
1081            ParseResult::Complete(vec![
1082                "BEGIN BATCH\nINSERT INTO t (id) VALUES (1);\nINSERT INTO t (id) VALUES (2);\nAPPLY BATCH".to_string()
1083            ])
1084        );
1085    }
1086
1087    #[test]
1088    fn batch_without_inner_semicolons() {
1089        let mut p = StatementParser::new();
1090        assert_eq!(p.feed_line("BEGIN BATCH"), ParseResult::Incomplete);
1091        assert_eq!(
1092            p.feed_line("INSERT INTO t (id) VALUES (1)"),
1093            ParseResult::Incomplete
1094        );
1095        assert_eq!(
1096            p.feed_line("INSERT INTO t (id) VALUES (2)"),
1097            ParseResult::Incomplete
1098        );
1099        let result = p.feed_line("APPLY BATCH;");
1100        assert_eq!(
1101            result,
1102            ParseResult::Complete(vec![
1103                "BEGIN BATCH\nINSERT INTO t (id) VALUES (1)\nINSERT INTO t (id) VALUES (2)\nAPPLY BATCH".to_string()
1104            ])
1105        );
1106    }
1107
1108    #[test]
1109    fn batch_single_line_no_inner_semicolons() {
1110        let mut p = StatementParser::new();
1111        let result = p.feed_line(
1112            "BEGIN BATCH INSERT INTO t (id) VALUES (1) INSERT INTO t (id) VALUES (2) APPLY BATCH;",
1113        );
1114        assert_eq!(
1115            result,
1116            ParseResult::Complete(vec![
1117                "BEGIN BATCH INSERT INTO t (id) VALUES (1) INSERT INTO t (id) VALUES (2) APPLY BATCH".to_string()
1118            ])
1119        );
1120    }
1121
1122    #[test]
1123    fn batch_unlogged() {
1124        let mut p = StatementParser::new();
1125        assert_eq!(p.feed_line("BEGIN UNLOGGED BATCH"), ParseResult::Incomplete);
1126        assert_eq!(
1127            p.feed_line("INSERT INTO t (id) VALUES (1)"),
1128            ParseResult::Incomplete
1129        );
1130        let result = p.feed_line("APPLY BATCH;");
1131        assert_eq!(
1132            result,
1133            ParseResult::Complete(vec![
1134                "BEGIN UNLOGGED BATCH\nINSERT INTO t (id) VALUES (1)\nAPPLY BATCH".to_string()
1135            ])
1136        );
1137    }
1138
1139    #[test]
1140    fn batch_via_parse_batch_no_inner_semicolons() {
1141        let stmts = parse_batch(
1142            "BEGIN BATCH\nINSERT INTO t (id) VALUES (1)\nINSERT INTO t (id) VALUES (2)\nAPPLY BATCH;",
1143        );
1144        assert_eq!(stmts.len(), 1);
1145        assert!(stmts[0].starts_with("BEGIN BATCH"));
1146        assert!(stmts[0].ends_with("APPLY BATCH"));
1147    }
1148
1149    #[test]
1150    fn slash_slash_line_comment_stripped() {
1151        let mut p = StatementParser::new();
1152        let result = p.feed_line("SELECT 1; // post-line comment");
1153        assert_eq!(result, ParseResult::Complete(vec!["SELECT 1".to_string()]));
1154    }
1155
1156    #[test]
1157    fn slash_slash_comment_mid_file() {
1158        let mut p = StatementParser::new();
1159        assert_eq!(p.feed_line("// this is a comment"), ParseResult::Incomplete);
1160        let result = p.feed_line("SELECT 1;");
1161        assert_eq!(result, ParseResult::Complete(vec!["SELECT 1".to_string()]));
1162    }
1163
1164    #[test]
1165    fn slash_slash_inside_string_not_treated_as_comment() {
1166        let mut p = StatementParser::new();
1167        let result = p.feed_line("SELECT '// not a comment';");
1168        assert_eq!(
1169            result,
1170            ParseResult::Complete(vec!["SELECT '// not a comment'".to_string()])
1171        );
1172    }
1173
1174    #[test]
1175    fn block_comment_chars_inside_string_literal() {
1176        let mut p = StatementParser::new();
1177        let result = p.feed_line("INSERT INTO t (v) VALUES ('test_role./*');");
1178        assert_eq!(
1179            result,
1180            ParseResult::Complete(vec!["INSERT INTO t (v) VALUES ('test_role./*')".to_string()])
1181        );
1182    }
1183
1184    #[test]
1185    fn block_comment_close_inside_string_literal() {
1186        let mut p = StatementParser::new();
1187        let result = p.feed_line("INSERT INTO t (v) VALUES ('v1*/');");
1188        assert_eq!(
1189            result,
1190            ParseResult::Complete(vec!["INSERT INTO t (v) VALUES ('v1*/')".to_string()])
1191        );
1192    }
1193
1194    #[test]
1195    fn mixed_comment_chars_in_strings() {
1196        let mut p = StatementParser::new();
1197        let result = p.feed_line("INSERT INTO t (a,b,c) VALUES ('aKey','v1*/','/v2/*/v3');");
1198        assert_eq!(
1199            result,
1200            ParseResult::Complete(vec![
1201                "INSERT INTO t (a,b,c) VALUES ('aKey','v1*/','/v2/*/v3')".to_string()
1202            ])
1203        );
1204    }
1205
1206    #[test]
1207    fn double_dash_inside_string_not_comment() {
1208        let mut p = StatementParser::new();
1209        let result = p.feed_line("SELECT '-- not a comment';");
1210        assert_eq!(
1211            result,
1212            ParseResult::Complete(vec!["SELECT '-- not a comment'".to_string()])
1213        );
1214    }
1215
1216    #[test]
1217    fn inline_block_comment_stripped() {
1218        let mut p = StatementParser::new();
1219        let result = p.feed_line("SELECT /* inline */ * FROM t;");
1220        assert_eq!(
1221            result,
1222            ParseResult::Complete(vec!["SELECT   * FROM t".to_string()])
1223        );
1224    }
1225
1226    #[test]
1227    fn multiline_block_comment_across_feeds() {
1228        let mut p = StatementParser::new();
1229        assert_eq!(
1230            p.feed_line("SELECT * FROM t WHERE"),
1231            ParseResult::Incomplete
1232        );
1233        assert_eq!(p.feed_line("/* multi-line"), ParseResult::Incomplete);
1234        assert_eq!(p.feed_line("   comment */"), ParseResult::Incomplete);
1235        let result = p.feed_line("id = 1;");
1236        assert_eq!(
1237            result,
1238            ParseResult::Complete(vec!["SELECT * FROM t WHERE\n \nid = 1".to_string()])
1239        );
1240    }
1241
1242    #[test]
1243    fn comment_before_statement() {
1244        let mut p = StatementParser::new();
1245        assert_eq!(p.feed_line("/* comment */"), ParseResult::Incomplete);
1246        let result = p.feed_line("SELECT 1;");
1247        assert_eq!(result, ParseResult::Complete(vec!["SELECT 1".to_string()]));
1248    }
1249
1250    #[test]
1251    fn multiple_statements_with_comments() {
1252        let mut p = StatementParser::new();
1253        let result = p.feed_line("SELECT 1; -- comment\nSELECT 2;");
1254        assert_eq!(
1255            result,
1256            ParseResult::Complete(vec!["SELECT 1".to_string(), "SELECT 2".to_string()])
1257        );
1258    }
1259}