1#[derive(Debug, Clone, PartialEq, Eq)]
12pub struct Token {
13 pub kind: TokenKind,
15 pub text: String,
17 pub start: usize,
19 pub end: usize,
21}
22
23#[derive(Debug, Clone, PartialEq, Eq)]
25pub enum TokenKind {
26 Keyword,
29 Identifier,
31 QuotedIdentifier,
33 StringLiteral,
35 DollarStringLiteral,
37 NumberLiteral,
39 BlobLiteral,
41 UuidLiteral,
43 BooleanLiteral,
45 Operator,
47 Punctuation,
49 Whitespace,
51 LineComment,
53 BlockComment,
55 Unknown,
57}
58
59#[derive(Debug, Clone, Copy, PartialEq, Eq)]
62pub enum GrammarContext {
63 Start,
65 ExpectColumnList,
67 ExpectTable,
69 ExpectKeyspace,
71 ExpectColumn,
73 ExpectSetClause,
75 ExpectQualifiedPart,
77 ExpectType,
79 ExpectOrderBy,
81 ExpectOrderByColumn,
83 ExpectValues,
85 ExpectWithOption,
87 ExpectConsistencyLevel,
89 ExpectDescribeTarget,
91 ExpectFilePath,
93 General,
95}
96
97const CQL_KEYWORDS: &[&str] = &[
99 "ADD",
100 "AGGREGATE",
101 "ALL",
102 "ALLOW",
103 "ALTER",
104 "AND",
105 "APPLY",
106 "AS",
107 "ASC",
108 "AUTHORIZE",
109 "BATCH",
110 "BEGIN",
111 "BY",
112 "CALLED",
113 "CAPTURE",
114 "CLEAR",
115 "CLS",
116 "CLUSTERING",
117 "COLUMN",
118 "COMPACT",
119 "CONSISTENCY",
120 "CONTAINS",
121 "COPY",
122 "COUNT",
123 "COUNTER",
124 "CREATE",
125 "CUSTOM",
126 "DELETE",
127 "DESC",
128 "DESCRIBE",
129 "DISTINCT",
130 "DROP",
131 "EACH_QUORUM",
132 "ENTRIES",
133 "EXECUTE",
134 "EXISTS",
135 "EXIT",
136 "EXPAND",
137 "FILTERING",
138 "FINALFUNC",
139 "FROM",
140 "FROZEN",
141 "FULL",
142 "FUNCTION",
143 "FUNCTIONS",
144 "GRANT",
145 "HELP",
146 "IF",
147 "IN",
148 "INDEX",
149 "INITCOND",
150 "INPUT",
151 "INSERT",
152 "INTO",
153 "IS",
154 "JSON",
155 "KEY",
156 "KEYSPACE",
157 "KEYSPACES",
158 "LANGUAGE",
159 "LIKE",
160 "LIMIT",
161 "LIST",
162 "LOCAL_ONE",
163 "LOCAL_QUORUM",
164 "LOGIN",
165 "MAP",
166 "MATERIALIZED",
167 "MODIFY",
168 "NAMESPACE",
169 "NORECURSIVE",
170 "NOT",
171 "NULL",
172 "OF",
173 "ON",
174 "ONE",
175 "OR",
176 "ORDER",
177 "PAGING",
178 "PARTITION",
179 "PASSWORD",
180 "PER",
181 "PERMISSION",
182 "PERMISSIONS",
183 "PRIMARY",
184 "QUIT",
185 "QUORUM",
186 "RENAME",
187 "REPLACE",
188 "RETURNS",
189 "REVOKE",
190 "SCHEMA",
191 "SELECT",
192 "SERIAL",
193 "SET",
194 "SFUNC",
195 "SHOW",
196 "SOURCE",
197 "STATIC",
198 "STORAGE",
199 "STYPE",
200 "SUPERUSER",
201 "TABLE",
202 "TABLES",
203 "TEXT",
204 "THREE",
205 "TIMESTAMP",
206 "TO",
207 "TOKEN",
208 "TRACING",
209 "TRIGGER",
210 "TRUNCATE",
211 "TTL",
212 "TUPLE",
213 "TWO",
214 "TYPE",
215 "UNICODE",
216 "UNLOGGED",
217 "UPDATE",
218 "USE",
219 "USER",
220 "USERS",
221 "USING",
222 "VALUES",
223 "VIEW",
224 "WHERE",
225 "WITH",
226 "WRITETIME",
227];
228
229pub fn is_cql_keyword(word: &str) -> bool {
231 let upper = word.to_uppercase();
232 CQL_KEYWORDS.binary_search(&upper.as_str()).is_ok()
233}
234
235pub fn tokenize(input: &str) -> Vec<Token> {
240 let mut tokens = Vec::new();
241 let mut ctx = GrammarContext::Start;
242 let bytes = input.as_bytes();
243 let len = bytes.len();
244 let mut i = 0;
245
246 while i < len {
247 let ch = bytes[i];
248
249 if ch.is_ascii_whitespace() {
251 let start = i;
252 while i < len && bytes[i].is_ascii_whitespace() {
253 i += 1;
254 }
255 tokens.push(Token {
256 kind: TokenKind::Whitespace,
257 text: input[start..i].to_string(),
258 start,
259 end: i,
260 });
261 continue;
262 }
263
264 if ch == b'-' && i + 1 < len && bytes[i + 1] == b'-' {
266 let start = i;
267 i += 2;
268 while i < len && bytes[i] != b'\n' {
269 i += 1;
270 }
271 tokens.push(Token {
272 kind: TokenKind::LineComment,
273 text: input[start..i].to_string(),
274 start,
275 end: i,
276 });
277 continue;
278 }
279
280 if ch == b'/' && i + 1 < len && bytes[i + 1] == b'*' {
282 let start = i;
283 let mut depth: usize = 1;
284 i += 2;
285 while i < len && depth > 0 {
286 if bytes[i] == b'/' && i + 1 < len && bytes[i + 1] == b'*' {
287 depth += 1;
288 i += 2;
289 } else if bytes[i] == b'*' && i + 1 < len && bytes[i + 1] == b'/' {
290 depth -= 1;
291 i += 2;
292 } else {
293 i += 1;
294 }
295 }
296 tokens.push(Token {
297 kind: TokenKind::BlockComment,
298 text: input[start..i].to_string(),
299 start,
300 end: i,
301 });
302 continue;
303 }
304
305 if ch == b'\'' {
307 let start = i;
308 i += 1;
309 loop {
310 if i >= len {
311 break; }
313 if bytes[i] == b'\'' {
314 i += 1;
315 if i < len && bytes[i] == b'\'' {
317 i += 1;
318 continue;
319 }
320 break;
321 }
322 i += char_len_at(bytes, i);
324 }
325 tokens.push(Token {
326 kind: TokenKind::StringLiteral,
327 text: input[start..i].to_string(),
328 start,
329 end: i,
330 });
331 ctx = advance_context_after_value(ctx);
332 continue;
333 }
334
335 if ch == b'"' {
337 let start = i;
338 i += 1;
339 loop {
340 if i >= len {
341 break; }
343 if bytes[i] == b'"' {
344 i += 1;
345 if i < len && bytes[i] == b'"' {
347 i += 1;
348 continue;
349 }
350 break;
351 }
352 i += char_len_at(bytes, i);
353 }
354 tokens.push(Token {
355 kind: TokenKind::QuotedIdentifier,
356 text: input[start..i].to_string(),
357 start,
358 end: i,
359 });
360 ctx = advance_context_after_name(ctx);
361 continue;
362 }
363
364 if ch == b'$' && i + 1 < len && bytes[i + 1] == b'$' {
366 let start = i;
367 i += 2;
368 loop {
369 if i + 1 >= len {
370 i = len;
371 break; }
373 if bytes[i] == b'$' && bytes[i + 1] == b'$' {
374 i += 2;
375 break;
376 }
377 i += 1;
378 }
379 tokens.push(Token {
380 kind: TokenKind::DollarStringLiteral,
381 text: input[start..i].to_string(),
382 start,
383 end: i,
384 });
385 ctx = advance_context_after_value(ctx);
386 continue;
387 }
388
389 if ch == b'0' && i + 1 < len && (bytes[i + 1] == b'x' || bytes[i + 1] == b'X') {
391 let start = i;
392 i += 2;
393 while i < len && bytes[i].is_ascii_hexdigit() {
394 i += 1;
395 }
396 if i < len && (bytes[i].is_ascii_alphanumeric() || bytes[i] == b'_') {
398 i = start;
400 } else {
401 tokens.push(Token {
402 kind: TokenKind::BlobLiteral,
403 text: input[start..i].to_string(),
404 start,
405 end: i,
406 });
407 ctx = advance_context_after_value(ctx);
408 continue;
409 }
410 }
411
412 if ch.is_ascii_digit()
415 || (ch == b'-'
416 && i + 1 < len
417 && bytes[i + 1].is_ascii_digit()
418 && is_number_sign_position(&tokens))
419 {
420 let start = i;
421 if ch == b'-' {
422 i += 1;
423 }
424 while i < len && bytes[i].is_ascii_digit() {
425 i += 1;
426 }
427 if i < len && bytes[i] == b'.' && i + 1 < len && bytes[i + 1].is_ascii_digit() {
429 i += 1;
430 while i < len && bytes[i].is_ascii_digit() {
431 i += 1;
432 }
433 }
434 if i < len && (bytes[i] == b'e' || bytes[i] == b'E') {
436 let save = i;
437 i += 1;
438 if i < len && (bytes[i] == b'+' || bytes[i] == b'-') {
439 i += 1;
440 }
441 if i < len && bytes[i].is_ascii_digit() {
442 while i < len && bytes[i].is_ascii_digit() {
443 i += 1;
444 }
445 } else {
446 i = save; }
448 }
449 if i < len && bytes[i] == b'-' && looks_like_uuid(input, start, i) {
451 let uuid_end = scan_uuid(input, start);
453 if uuid_end > i {
454 i = uuid_end;
455 tokens.push(Token {
456 kind: TokenKind::UuidLiteral,
457 text: input[start..i].to_string(),
458 start,
459 end: i,
460 });
461 ctx = advance_context_after_value(ctx);
462 continue;
463 }
464 }
465 if i < len && (bytes[i].is_ascii_alphabetic() || bytes[i] == b'_') {
467 while i < len && (bytes[i].is_ascii_alphanumeric() || bytes[i] == b'_') {
470 i += 1;
471 }
472 let word = &input[start..i];
473 tokens.push(Token {
474 kind: classify_word(word, ctx),
475 text: word.to_string(),
476 start,
477 end: i,
478 });
479 ctx = advance_context_after_word(word, ctx);
480 continue;
481 }
482 tokens.push(Token {
483 kind: TokenKind::NumberLiteral,
484 text: input[start..i].to_string(),
485 start,
486 end: i,
487 });
488 ctx = advance_context_after_value(ctx);
489 continue;
490 }
491
492 if ch.is_ascii_alphabetic() || ch == b'_' {
494 let start = i;
495 while i < len && (bytes[i].is_ascii_alphanumeric() || bytes[i] == b'_') {
496 i += 1;
497 }
498 let word = &input[start..i];
499
500 if i < len
502 && bytes[i] == b'-'
503 && word.len() == 8
504 && word.chars().all(|c| c.is_ascii_hexdigit())
505 {
506 let uuid_end = scan_uuid(input, start);
507 if uuid_end > i {
508 i = uuid_end;
509 tokens.push(Token {
510 kind: TokenKind::UuidLiteral,
511 text: input[start..i].to_string(),
512 start,
513 end: i,
514 });
515 ctx = advance_context_after_value(ctx);
516 continue;
517 }
518 }
519
520 let kind = classify_word(word, ctx);
521 tokens.push(Token {
522 kind,
523 text: word.to_string(),
524 start,
525 end: i,
526 });
527 ctx = advance_context_after_word(word, ctx);
528 continue;
529 }
530
531 if is_operator_char(ch) {
533 let start = i;
534 if i + 1 < len && is_two_char_operator(ch, bytes[i + 1]) {
536 i += 2;
537 } else {
538 i += 1;
539 }
540 tokens.push(Token {
541 kind: TokenKind::Operator,
542 text: input[start..i].to_string(),
543 start,
544 end: i,
545 });
546 continue;
547 }
548
549 if is_punctuation(ch) {
551 let start = i;
552 i += 1;
553 let text = input[start..i].to_string();
554
555 if ch == b'.' {
557 ctx = GrammarContext::ExpectQualifiedPart;
558 }
559
560 tokens.push(Token {
561 kind: TokenKind::Punctuation,
562 text,
563 start,
564 end: i,
565 });
566 continue;
567 }
568
569 let start = i;
571 let clen = char_len_at(bytes, i);
572 i += clen;
573 tokens.push(Token {
574 kind: TokenKind::Unknown,
575 text: input[start..i].to_string(),
576 start,
577 end: i,
578 });
579 }
580
581 tokens
582}
583
584pub fn grammar_context_at_end(input: &str) -> GrammarContext {
587 let tokens = tokenize(input);
588 context_from_tokens(&tokens)
589}
590
591pub fn context_from_tokens(tokens: &[Token]) -> GrammarContext {
593 let significant: Vec<&Token> = tokens
595 .iter()
596 .filter(|t| {
597 !matches!(
598 t.kind,
599 TokenKind::Whitespace | TokenKind::LineComment | TokenKind::BlockComment
600 )
601 })
602 .collect();
603
604 if significant.is_empty() {
605 return GrammarContext::Start;
606 }
607
608 let last = significant.last().unwrap();
609
610 if last.kind == TokenKind::Keyword || last.kind == TokenKind::Identifier {
612 let upper = last.text.to_uppercase();
613 match upper.as_str() {
614 "SELECT" | "DISTINCT" => return GrammarContext::ExpectColumnList,
615 "FROM" | "INTO" | "UPDATE" | "TABLE" | "TRUNCATE" => {
616 return GrammarContext::ExpectTable
617 }
618 "USE" | "KEYSPACE" | "KEYSPACES" => return GrammarContext::ExpectKeyspace,
619 "WHERE" | "IF" => return GrammarContext::ExpectColumn,
620 "AND" => {
621 if has_keyword_before(&significant, &["WHERE", "IF"]) {
623 return GrammarContext::ExpectColumn;
624 }
625 return GrammarContext::General;
626 }
627 "SET" => {
628 if has_keyword_before(&significant, &["UPDATE"]) {
630 return GrammarContext::ExpectSetClause;
631 }
632 return GrammarContext::General;
633 }
634 "ORDER" => return GrammarContext::ExpectOrderBy,
635 "BY" => {
636 if significant.len() >= 2
637 && significant[significant.len() - 2].text.to_uppercase() == "ORDER"
638 {
639 return GrammarContext::ExpectOrderByColumn;
640 }
641 return GrammarContext::General;
642 }
643 "VALUES" => return GrammarContext::ExpectValues,
644 "WITH" => return GrammarContext::ExpectWithOption,
645 "CONSISTENCY" => return GrammarContext::ExpectConsistencyLevel,
646 "DESCRIBE" | "DESC" => return GrammarContext::ExpectDescribeTarget,
647 "SOURCE" | "CAPTURE" => return GrammarContext::ExpectFilePath,
648 "ON" => {
649 if has_keyword_before(&significant, &["INDEX"]) {
651 return GrammarContext::ExpectTable;
652 }
653 return GrammarContext::General;
654 }
655 "INDEX" => {
656 return GrammarContext::General;
658 }
659 _ => {}
660 }
661 }
662
663 if last.kind == TokenKind::Punctuation && last.text == "." {
664 return GrammarContext::ExpectQualifiedPart;
665 }
666
667 if significant.len() >= 2 {
670 let second_last = significant[significant.len() - 2];
671 let sl_upper = second_last.text.to_uppercase();
672 if matches!(
673 sl_upper.as_str(),
674 "FROM" | "INTO" | "UPDATE" | "TABLE" | "TRUNCATE"
675 ) {
676 return GrammarContext::General;
677 }
678 if sl_upper == "SERIAL" && last.text.to_uppercase() == "CONSISTENCY" {
680 return GrammarContext::ExpectConsistencyLevel;
681 }
682 }
683
684 GrammarContext::General
685}
686
687fn is_strict_identifier_context(ctx: GrammarContext) -> bool {
690 matches!(
691 ctx,
692 GrammarContext::ExpectTable
693 | GrammarContext::ExpectKeyspace
694 | GrammarContext::ExpectColumn
695 | GrammarContext::ExpectQualifiedPart
696 | GrammarContext::ExpectOrderByColumn
697 | GrammarContext::ExpectSetClause
698 | GrammarContext::ExpectConsistencyLevel
699 )
700}
701
702const COLUMN_LIST_KEYWORDS: &[&str] = &["AS", "DISTINCT", "FROM", "JSON"];
705
706fn is_column_list_keyword(word: &str) -> bool {
707 let upper = word.to_uppercase();
708 COLUMN_LIST_KEYWORDS.contains(&upper.as_str())
709}
710
711fn classify_word(word: &str, ctx: GrammarContext) -> TokenKind {
713 let upper = word.to_uppercase();
714
715 if upper == "TRUE" || upper == "FALSE" {
717 return TokenKind::BooleanLiteral;
718 }
719
720 if is_strict_identifier_context(ctx) {
722 return TokenKind::Identifier;
723 }
724
725 if ctx == GrammarContext::ExpectColumnList {
727 if is_column_list_keyword(word) {
728 return TokenKind::Keyword;
729 }
730 return TokenKind::Identifier;
731 }
732
733 if upper == "NULL" {
735 return TokenKind::Keyword;
736 }
737
738 if is_cql_keyword(&upper) {
739 TokenKind::Keyword
740 } else {
741 TokenKind::Identifier
742 }
743}
744
745fn advance_context_after_word(word: &str, ctx: GrammarContext) -> GrammarContext {
747 let upper = word.to_uppercase();
748
749 match upper.as_str() {
750 "SELECT" => GrammarContext::ExpectColumnList,
751 "DISTINCT" if ctx == GrammarContext::ExpectColumnList => GrammarContext::ExpectColumnList,
752 "FROM" => GrammarContext::ExpectTable,
753 "INTO" => GrammarContext::ExpectTable,
754 "UPDATE" => GrammarContext::ExpectTable,
755 "TABLE" => GrammarContext::ExpectTable,
756 "TRUNCATE" => GrammarContext::ExpectTable,
757 "USE" => GrammarContext::ExpectKeyspace,
758 "KEYSPACE" => GrammarContext::ExpectKeyspace,
759 "WHERE" => GrammarContext::ExpectColumn,
760 "AND" => {
761 match ctx {
763 GrammarContext::ExpectColumn | GrammarContext::General => {
764 GrammarContext::ExpectColumn
766 }
767 _ => GrammarContext::General,
768 }
769 }
770 "SET" => {
771 match ctx {
773 GrammarContext::General => GrammarContext::ExpectSetClause,
774 _ => GrammarContext::General,
775 }
776 }
777 "ORDER" => GrammarContext::ExpectOrderBy,
778 "BY" => {
779 if ctx == GrammarContext::ExpectOrderBy {
780 GrammarContext::ExpectOrderByColumn
781 } else {
782 GrammarContext::General
783 }
784 }
785 "VALUES" => GrammarContext::ExpectValues,
786 "WITH" => GrammarContext::ExpectWithOption,
787 "ON" => {
788 GrammarContext::ExpectTable
790 }
791 "CONSISTENCY" => GrammarContext::ExpectConsistencyLevel,
792 "DESCRIBE" | "DESC" => GrammarContext::ExpectDescribeTarget,
793 "SOURCE" | "CAPTURE" => GrammarContext::ExpectFilePath,
794 "INSERT" => GrammarContext::General, "DELETE" => GrammarContext::General, "CREATE" | "ALTER" | "DROP" => GrammarContext::General,
797 "IF" => GrammarContext::ExpectColumn,
798 "LIMIT" => GrammarContext::General,
799 _ => {
800 match ctx {
802 GrammarContext::ExpectTable
803 | GrammarContext::ExpectKeyspace
804 | GrammarContext::ExpectColumn
805 | GrammarContext::ExpectOrderByColumn
806 | GrammarContext::ExpectQualifiedPart
807 | GrammarContext::ExpectDescribeTarget => GrammarContext::General,
808 GrammarContext::ExpectColumnList => GrammarContext::ExpectColumnList, GrammarContext::ExpectSetClause => GrammarContext::ExpectSetClause,
810 other => other,
811 }
812 }
813 }
814}
815
816fn advance_context_after_value(ctx: GrammarContext) -> GrammarContext {
818 match ctx {
819 GrammarContext::ExpectColumnList => GrammarContext::ExpectColumnList,
820 _ => GrammarContext::General,
821 }
822}
823
824fn advance_context_after_name(ctx: GrammarContext) -> GrammarContext {
826 match ctx {
827 GrammarContext::ExpectTable
828 | GrammarContext::ExpectKeyspace
829 | GrammarContext::ExpectColumn
830 | GrammarContext::ExpectQualifiedPart
831 | GrammarContext::ExpectOrderByColumn
832 | GrammarContext::ExpectDescribeTarget => GrammarContext::General,
833 GrammarContext::ExpectColumnList => GrammarContext::ExpectColumnList,
834 other => other,
835 }
836}
837
838fn has_keyword_before(significant: &[&Token], keywords: &[&str]) -> bool {
840 significant.iter().rev().skip(1).any(|t| {
841 let upper = t.text.to_uppercase();
842 keywords.contains(&upper.as_str())
843 })
844}
845
846fn is_number_sign_position(tokens: &[Token]) -> bool {
849 match tokens.last() {
850 None => true,
851 Some(t) => matches!(
852 t.kind,
853 TokenKind::Operator
854 | TokenKind::Punctuation
855 | TokenKind::Keyword
856 | TokenKind::Whitespace
857 ),
858 }
859}
860
861fn looks_like_uuid(input: &str, start: usize, num_end: usize) -> bool {
864 let segment = &input[start..num_end];
865 segment.len() == 8 && segment.chars().all(|c| c.is_ascii_hexdigit())
866}
867
868fn scan_uuid(input: &str, start: usize) -> usize {
871 let expected_segments = [8, 4, 4, 4, 12];
872 let bytes = input.as_bytes();
873 let len = bytes.len();
874 let mut pos = start;
875
876 for (seg_idx, &seg_len) in expected_segments.iter().enumerate() {
877 if seg_idx > 0 {
878 if pos >= len || bytes[pos] != b'-' {
879 return start;
880 }
881 pos += 1;
882 }
883 let seg_start = pos;
884 while pos < len && bytes[pos].is_ascii_hexdigit() {
885 pos += 1;
886 }
887 if pos - seg_start != seg_len {
888 return start;
889 }
890 }
891
892 if pos < len && (bytes[pos].is_ascii_alphanumeric() || bytes[pos] == b'_') {
894 return start;
895 }
896
897 pos
898}
899
900fn is_operator_char(ch: u8) -> bool {
901 matches!(ch, b'=' | b'<' | b'>' | b'!' | b'+' | b'%')
902}
903
904fn is_two_char_operator(first: u8, second: u8) -> bool {
905 matches!((first, second), (b'<', b'=') | (b'>', b'=') | (b'!', b'='))
906}
907
908fn is_punctuation(ch: u8) -> bool {
909 matches!(
910 ch,
911 b';' | b',' | b'(' | b')' | b'.' | b'*' | b'?' | b'{' | b'}' | b'[' | b']' | b':'
912 )
913}
914
915fn char_len_at(bytes: &[u8], i: usize) -> usize {
917 if i >= bytes.len() {
918 return 1;
919 }
920 let b = bytes[i];
921 if b < 0x80 {
922 1
923 } else if b < 0xE0 {
924 2
925 } else if b < 0xF0 {
926 3
927 } else {
928 4
929 }
930}
931
932pub fn significant_tokens(tokens: &[Token]) -> Vec<&Token> {
934 tokens
935 .iter()
936 .filter(|t| {
937 !matches!(
938 t.kind,
939 TokenKind::Whitespace | TokenKind::LineComment | TokenKind::BlockComment
940 )
941 })
942 .collect()
943}
944
945pub fn strip_comments(input: &str) -> String {
951 let mut result = String::with_capacity(input.len());
952 let bytes = input.as_bytes();
953 let len = bytes.len();
954 let mut i = 0;
955
956 while i < len {
957 let ch = bytes[i];
958
959 if ch == b'-' && i + 1 < len && bytes[i + 1] == b'-' {
961 i += 2;
962 while i < len && bytes[i] != b'\n' {
963 i += 1;
964 }
965 continue;
967 }
968
969 if ch == b'/' && i + 1 < len && bytes[i + 1] == b'*' {
971 let mut depth: usize = 1;
972 i += 2;
973 while i < len && depth > 0 {
974 if bytes[i] == b'/' && i + 1 < len && bytes[i + 1] == b'*' {
975 depth += 1;
976 i += 2;
977 } else if bytes[i] == b'*' && i + 1 < len && bytes[i + 1] == b'/' {
978 depth -= 1;
979 i += 2;
980 } else {
981 i += 1;
982 }
983 }
984 result.push(' '); continue;
986 }
987
988 if ch == b'\'' {
990 let start = i;
991 i += 1;
992 loop {
993 if i >= len {
994 break;
995 }
996 if bytes[i] == b'\'' {
997 i += 1;
998 if i < len && bytes[i] == b'\'' {
999 i += 1; continue;
1001 }
1002 break;
1003 }
1004 i += char_len_at(bytes, i);
1005 }
1006 result.push_str(&input[start..i]);
1007 continue;
1008 }
1009
1010 if ch == b'"' {
1012 let start = i;
1013 i += 1;
1014 loop {
1015 if i >= len {
1016 break;
1017 }
1018 if bytes[i] == b'"' {
1019 i += 1;
1020 if i < len && bytes[i] == b'"' {
1021 i += 1; continue;
1023 }
1024 break;
1025 }
1026 i += char_len_at(bytes, i);
1027 }
1028 result.push_str(&input[start..i]);
1029 continue;
1030 }
1031
1032 if ch == b'$' && i + 1 < len && bytes[i + 1] == b'$' {
1034 let start = i;
1035 i += 2;
1036 loop {
1037 if i + 1 >= len {
1038 i = len; break;
1040 }
1041 if bytes[i] == b'$' && bytes[i + 1] == b'$' {
1042 i += 2;
1043 break;
1044 }
1045 i += 1;
1046 }
1047 result.push_str(&input[start..i]);
1048 continue;
1049 }
1050
1051 let clen = char_len_at(bytes, i);
1053 result.push_str(&input[i..i + clen]);
1054 i += clen;
1055 }
1056
1057 result
1058}
1059
1060#[cfg(test)]
1061mod tests {
1062 use super::*;
1063
1064 #[allow(dead_code)]
1067 fn token_kinds(input: &str) -> Vec<TokenKind> {
1068 tokenize(input).into_iter().map(|t| t.kind).collect()
1069 }
1070
1071 fn significant_kinds(input: &str) -> Vec<TokenKind> {
1072 tokenize(input)
1073 .into_iter()
1074 .filter(|t| t.kind != TokenKind::Whitespace)
1075 .map(|t| t.kind)
1076 .collect()
1077 }
1078
1079 fn significant_texts(input: &str) -> Vec<String> {
1080 tokenize(input)
1081 .into_iter()
1082 .filter(|t| t.kind != TokenKind::Whitespace)
1083 .map(|t| t.text)
1084 .collect()
1085 }
1086
1087 #[test]
1090 fn keyword_select() {
1091 let tokens = tokenize("SELECT");
1092 assert_eq!(tokens.len(), 1);
1093 assert_eq!(tokens[0].kind, TokenKind::Keyword);
1094 assert_eq!(tokens[0].text, "SELECT");
1095 }
1096
1097 #[test]
1098 fn keyword_case_insensitive() {
1099 let tokens = tokenize("select");
1100 assert_eq!(tokens[0].kind, TokenKind::Keyword);
1101 assert_eq!(tokens[0].text, "select");
1102 }
1103
1104 #[test]
1105 fn keyword_mixed_case() {
1106 let tokens = tokenize("Select");
1107 assert_eq!(tokens[0].kind, TokenKind::Keyword);
1108 }
1109
1110 #[test]
1111 fn identifier_plain() {
1112 let tokens = tokenize("FROM users");
1114 let sig: Vec<_> = tokens
1115 .iter()
1116 .filter(|t| t.kind != TokenKind::Whitespace)
1117 .collect();
1118 assert_eq!(sig[0].kind, TokenKind::Keyword);
1119 assert_eq!(sig[1].kind, TokenKind::Identifier);
1120 assert_eq!(sig[1].text, "users");
1121 }
1122
1123 #[test]
1124 fn identifier_after_from_keyword_name() {
1125 let tokens = tokenize("SELECT * FROM USERS");
1127 let sig: Vec<_> = tokens
1128 .iter()
1129 .filter(|t| t.kind != TokenKind::Whitespace)
1130 .collect();
1131 assert_eq!(sig[3].text, "USERS");
1132 assert_eq!(sig[3].kind, TokenKind::Identifier);
1133 }
1134
1135 #[test]
1136 fn identifier_key_after_from() {
1137 let tokens = tokenize("SELECT * FROM KEY");
1138 let sig: Vec<_> = tokens
1139 .iter()
1140 .filter(|t| t.kind != TokenKind::Whitespace)
1141 .collect();
1142 assert_eq!(sig[3].text, "KEY");
1143 assert_eq!(sig[3].kind, TokenKind::Identifier);
1144 }
1145
1146 #[test]
1147 fn identifier_set_after_from() {
1148 let tokens = tokenize("SELECT * FROM SET");
1149 let sig: Vec<_> = tokens
1150 .iter()
1151 .filter(|t| t.kind != TokenKind::Whitespace)
1152 .collect();
1153 assert_eq!(sig[3].text, "SET");
1154 assert_eq!(sig[3].kind, TokenKind::Identifier);
1155 }
1156
1157 #[test]
1158 fn identifier_after_into() {
1159 let tokens = tokenize("INSERT INTO my_table");
1160 let sig: Vec<_> = tokens
1161 .iter()
1162 .filter(|t| t.kind != TokenKind::Whitespace)
1163 .collect();
1164 assert_eq!(sig[2].kind, TokenKind::Identifier);
1165 }
1166
1167 #[test]
1168 fn identifier_after_update() {
1169 let tokens = tokenize("UPDATE my_table SET");
1170 let sig: Vec<_> = tokens
1171 .iter()
1172 .filter(|t| t.kind != TokenKind::Whitespace)
1173 .collect();
1174 assert_eq!(sig[1].kind, TokenKind::Identifier);
1175 assert_eq!(sig[1].text, "my_table");
1176 }
1177
1178 #[test]
1179 fn identifier_after_dot() {
1180 let tokens = tokenize("ks.my_table");
1181 let sig: Vec<_> = tokens
1182 .iter()
1183 .filter(|t| t.kind != TokenKind::Whitespace)
1184 .collect();
1185 assert_eq!(sig[0].kind, TokenKind::Identifier); assert_eq!(sig[1].kind, TokenKind::Punctuation); assert_eq!(sig[2].kind, TokenKind::Identifier); }
1190
1191 #[test]
1192 fn keyword_after_dot_is_identifier() {
1193 let tokens = tokenize("FROM ks.SELECT");
1195 let sig: Vec<_> = tokens
1196 .iter()
1197 .filter(|t| t.kind != TokenKind::Whitespace)
1198 .collect();
1199 assert_eq!(sig[2].kind, TokenKind::Punctuation); assert_eq!(sig[3].kind, TokenKind::Identifier); }
1202
1203 #[test]
1204 fn quoted_identifier() {
1205 let tokens = tokenize("\"MyTable\"");
1206 assert_eq!(tokens[0].kind, TokenKind::QuotedIdentifier);
1207 assert_eq!(tokens[0].text, "\"MyTable\"");
1208 }
1209
1210 #[test]
1211 fn quoted_identifier_with_escape() {
1212 let tokens = tokenize("\"My\"\"Table\"");
1213 assert_eq!(tokens[0].kind, TokenKind::QuotedIdentifier);
1214 assert_eq!(tokens[0].text, "\"My\"\"Table\"");
1215 }
1216
1217 #[test]
1218 fn string_literal_simple() {
1219 let tokens = tokenize("'hello'");
1220 assert_eq!(tokens[0].kind, TokenKind::StringLiteral);
1221 assert_eq!(tokens[0].text, "'hello'");
1222 }
1223
1224 #[test]
1225 fn string_literal_escaped_quote() {
1226 let tokens = tokenize("'it''s'");
1227 assert_eq!(tokens[0].kind, TokenKind::StringLiteral);
1228 assert_eq!(tokens[0].text, "'it''s'");
1229 }
1230
1231 #[test]
1232 fn string_literal_with_semicolon() {
1233 let tokens = tokenize("'hello;world'");
1234 assert_eq!(tokens[0].kind, TokenKind::StringLiteral);
1235 assert_eq!(tokens[0].text, "'hello;world'");
1236 }
1237
1238 #[test]
1239 fn string_literal_empty() {
1240 let tokens = tokenize("''");
1241 assert_eq!(tokens[0].kind, TokenKind::StringLiteral);
1242 assert_eq!(tokens[0].text, "''");
1243 }
1244
1245 #[test]
1246 fn dollar_string_literal() {
1247 let tokens = tokenize("$$hello world$$");
1248 assert_eq!(tokens[0].kind, TokenKind::DollarStringLiteral);
1249 assert_eq!(tokens[0].text, "$$hello world$$");
1250 }
1251
1252 #[test]
1253 fn dollar_string_with_semicolon() {
1254 let tokens = tokenize("$$a;b$$");
1255 assert_eq!(tokens[0].kind, TokenKind::DollarStringLiteral);
1256 }
1257
1258 #[test]
1259 fn dollar_string_empty() {
1260 let tokens = tokenize("$$$$");
1261 assert_eq!(tokens[0].kind, TokenKind::DollarStringLiteral);
1262 assert_eq!(tokens[0].text, "$$$$");
1263 }
1264
1265 #[test]
1266 fn number_integer() {
1267 let tokens = tokenize("42");
1268 assert_eq!(tokens[0].kind, TokenKind::NumberLiteral);
1269 assert_eq!(tokens[0].text, "42");
1270 }
1271
1272 #[test]
1273 fn number_decimal() {
1274 let tokens = tokenize("3.14");
1275 assert_eq!(tokens[0].kind, TokenKind::NumberLiteral);
1276 assert_eq!(tokens[0].text, "3.14");
1277 }
1278
1279 #[test]
1280 fn number_negative() {
1281 let tokens = tokenize("= -1");
1282 let sig: Vec<_> = tokens
1283 .iter()
1284 .filter(|t| t.kind != TokenKind::Whitespace)
1285 .collect();
1286 assert_eq!(sig[1].kind, TokenKind::NumberLiteral);
1287 assert_eq!(sig[1].text, "-1");
1288 }
1289
1290 #[test]
1291 fn number_exponent() {
1292 let tokens = tokenize("1.5E10");
1293 assert_eq!(tokens[0].kind, TokenKind::NumberLiteral);
1294 assert_eq!(tokens[0].text, "1.5E10");
1295 }
1296
1297 #[test]
1298 fn number_not_part_of_identifier() {
1299 let tokens = tokenize("LIMIT 100");
1300 let sig: Vec<_> = tokens
1301 .iter()
1302 .filter(|t| t.kind != TokenKind::Whitespace)
1303 .collect();
1304 assert_eq!(sig[1].kind, TokenKind::NumberLiteral);
1305 }
1306
1307 #[test]
1308 fn blob_literal() {
1309 let tokens = tokenize("0xDEADBEEF");
1310 assert_eq!(tokens[0].kind, TokenKind::BlobLiteral);
1311 assert_eq!(tokens[0].text, "0xDEADBEEF");
1312 }
1313
1314 #[test]
1315 fn blob_literal_lowercase() {
1316 let tokens = tokenize("0xdeadbeef");
1317 assert_eq!(tokens[0].kind, TokenKind::BlobLiteral);
1318 }
1319
1320 #[test]
1321 fn uuid_literal() {
1322 let tokens = tokenize("550e8400-e29b-41d4-a716-446655440000");
1323 assert_eq!(tokens[0].kind, TokenKind::UuidLiteral);
1324 }
1325
1326 #[test]
1327 fn boolean_true() {
1328 let tokens = tokenize("true");
1329 assert_eq!(tokens[0].kind, TokenKind::BooleanLiteral);
1330 }
1331
1332 #[test]
1333 fn boolean_false() {
1334 let tokens = tokenize("FALSE");
1335 assert_eq!(tokens[0].kind, TokenKind::BooleanLiteral);
1336 }
1337
1338 #[test]
1339 fn operator_equals() {
1340 let tokens = tokenize("=");
1341 assert_eq!(tokens[0].kind, TokenKind::Operator);
1342 }
1343
1344 #[test]
1345 fn operator_less_equal() {
1346 let tokens = tokenize("<=");
1347 assert_eq!(tokens[0].kind, TokenKind::Operator);
1348 assert_eq!(tokens[0].text, "<=");
1349 }
1350
1351 #[test]
1352 fn operator_greater_equal() {
1353 let tokens = tokenize(">=");
1354 assert_eq!(tokens[0].kind, TokenKind::Operator);
1355 }
1356
1357 #[test]
1358 fn operator_not_equal() {
1359 let tokens = tokenize("!=");
1360 assert_eq!(tokens[0].kind, TokenKind::Operator);
1361 }
1362
1363 #[test]
1364 fn punctuation_semicolon() {
1365 let tokens = tokenize(";");
1366 assert_eq!(tokens[0].kind, TokenKind::Punctuation);
1367 }
1368
1369 #[test]
1370 fn punctuation_comma() {
1371 let tokens = tokenize(",");
1372 assert_eq!(tokens[0].kind, TokenKind::Punctuation);
1373 }
1374
1375 #[test]
1376 fn punctuation_parens() {
1377 let kinds = significant_kinds("(x)");
1378 assert_eq!(
1379 kinds,
1380 vec![
1381 TokenKind::Punctuation,
1382 TokenKind::Identifier,
1383 TokenKind::Punctuation
1384 ]
1385 );
1386 }
1387
1388 #[test]
1389 fn punctuation_star() {
1390 let tokens = tokenize("*");
1391 assert_eq!(tokens[0].kind, TokenKind::Punctuation);
1392 }
1393
1394 #[test]
1395 fn punctuation_question_mark() {
1396 let tokens = tokenize("?");
1397 assert_eq!(tokens[0].kind, TokenKind::Punctuation);
1398 }
1399
1400 #[test]
1401 fn whitespace_space() {
1402 let tokens = tokenize(" ");
1403 assert_eq!(tokens[0].kind, TokenKind::Whitespace);
1404 }
1405
1406 #[test]
1407 fn whitespace_tab() {
1408 let tokens = tokenize("\t");
1409 assert_eq!(tokens[0].kind, TokenKind::Whitespace);
1410 }
1411
1412 #[test]
1413 fn whitespace_newline() {
1414 let tokens = tokenize("\n");
1415 assert_eq!(tokens[0].kind, TokenKind::Whitespace);
1416 }
1417
1418 #[test]
1419 fn line_comment() {
1420 let tokens = tokenize("-- this is a comment");
1421 assert_eq!(tokens[0].kind, TokenKind::LineComment);
1422 assert_eq!(tokens[0].text, "-- this is a comment");
1423 }
1424
1425 #[test]
1426 fn line_comment_stops_at_newline() {
1427 let tokens = tokenize("-- comment\nSELECT");
1428 assert_eq!(tokens[0].kind, TokenKind::LineComment);
1429 assert_eq!(tokens[0].text, "-- comment");
1430 }
1432
1433 #[test]
1434 fn block_comment() {
1435 let tokens = tokenize("/* block */");
1436 assert_eq!(tokens[0].kind, TokenKind::BlockComment);
1437 assert_eq!(tokens[0].text, "/* block */");
1438 }
1439
1440 #[test]
1441 fn block_comment_nested() {
1442 let tokens = tokenize("/* outer /* inner */ still */");
1443 assert_eq!(tokens[0].kind, TokenKind::BlockComment);
1444 assert_eq!(tokens[0].text, "/* outer /* inner */ still */");
1445 }
1446
1447 #[test]
1448 fn block_comment_with_semicolon() {
1449 let tokens = tokenize("/* ; */");
1450 assert_eq!(tokens[0].kind, TokenKind::BlockComment);
1451 }
1452
1453 #[test]
1454 fn unknown_char() {
1455 let tokens = tokenize("@");
1456 assert_eq!(tokens[0].kind, TokenKind::Unknown);
1457 }
1458
1459 #[test]
1462 fn spans_are_correct() {
1463 let tokens = tokenize("SELECT *");
1464 assert_eq!(tokens[0].start, 0);
1465 assert_eq!(tokens[0].end, 6);
1466 assert_eq!(tokens[1].start, 6);
1467 assert_eq!(tokens[1].end, 7);
1468 assert_eq!(tokens[2].start, 7);
1469 assert_eq!(tokens[2].end, 8);
1470 }
1471
1472 #[test]
1473 fn spans_cover_full_input() {
1474 let input = "SELECT * FROM users WHERE id = 1;";
1475 let tokens = tokenize(input);
1476 let last = tokens.last().unwrap();
1477 assert_eq!(last.end, input.len());
1478 for window in tokens.windows(2) {
1480 assert_eq!(
1481 window[0].end, window[1].start,
1482 "gap between {:?} and {:?}",
1483 window[0], window[1]
1484 );
1485 }
1486 }
1487
1488 #[test]
1491 fn select_star_from_users() {
1492 let kinds = significant_kinds("SELECT * FROM users");
1493 assert_eq!(
1494 kinds,
1495 vec![
1496 TokenKind::Keyword,
1497 TokenKind::Punctuation,
1498 TokenKind::Keyword,
1499 TokenKind::Identifier
1500 ]
1501 );
1502 }
1503
1504 #[test]
1505 fn select_with_where() {
1506 let kinds = significant_kinds("SELECT name FROM users WHERE id = 1");
1507 assert_eq!(
1508 kinds,
1509 vec![
1510 TokenKind::Keyword, TokenKind::Identifier, TokenKind::Keyword, TokenKind::Identifier, TokenKind::Keyword, TokenKind::Identifier, TokenKind::Operator, TokenKind::NumberLiteral, ]
1519 );
1520 }
1521
1522 #[test]
1523 fn insert_statement() {
1524 let kinds = significant_kinds("INSERT INTO my_table (id, name) VALUES (1, 'hello')");
1525 assert_eq!(
1526 kinds,
1527 vec![
1528 TokenKind::Keyword, TokenKind::Keyword, TokenKind::Identifier, TokenKind::Punctuation, TokenKind::Identifier, TokenKind::Punctuation, TokenKind::Identifier, TokenKind::Punctuation, TokenKind::Keyword, TokenKind::Punctuation, TokenKind::NumberLiteral, TokenKind::Punctuation, TokenKind::StringLiteral, TokenKind::Punctuation, ]
1543 );
1544 }
1545
1546 #[test]
1547 fn update_statement() {
1548 let kinds = significant_kinds("UPDATE users SET name = 'Alice' WHERE id = 1");
1549 assert_eq!(
1550 kinds,
1551 vec![
1552 TokenKind::Keyword, TokenKind::Identifier, TokenKind::Keyword, TokenKind::Identifier, TokenKind::Operator, TokenKind::StringLiteral, TokenKind::Keyword, TokenKind::Identifier, TokenKind::Operator, TokenKind::NumberLiteral, ]
1563 );
1564 }
1565
1566 #[test]
1567 fn create_table() {
1568 let kinds = significant_kinds("CREATE TABLE ks.my_table (id int PRIMARY KEY)");
1569 assert_eq!(
1570 kinds,
1571 vec![
1572 TokenKind::Keyword, TokenKind::Keyword, TokenKind::Identifier, TokenKind::Punctuation, TokenKind::Identifier, TokenKind::Punctuation, TokenKind::Identifier, TokenKind::Identifier, TokenKind::Keyword, TokenKind::Keyword, TokenKind::Punctuation, ]
1584 );
1585 }
1586
1587 #[test]
1588 fn use_keyspace() {
1589 let kinds = significant_kinds("USE my_keyspace");
1590 assert_eq!(kinds, vec![TokenKind::Keyword, TokenKind::Identifier]);
1591 }
1592
1593 #[test]
1594 fn qualified_table_name() {
1595 let texts = significant_texts("SELECT * FROM ks.users");
1596 assert_eq!(texts, vec!["SELECT", "*", "FROM", "ks", ".", "users"]);
1597 let kinds = significant_kinds("SELECT * FROM ks.users");
1598 assert_eq!(kinds[3], TokenKind::Identifier); assert_eq!(kinds[4], TokenKind::Punctuation); assert_eq!(kinds[5], TokenKind::Identifier); }
1602
1603 #[test]
1604 fn statement_with_string_containing_keyword() {
1605 let kinds = significant_kinds("INSERT INTO t (v) VALUES ('SELECT FROM')");
1606 assert!(kinds.contains(&TokenKind::StringLiteral));
1608 assert_eq!(
1610 kinds.iter().filter(|k| **k == TokenKind::Keyword).count(),
1611 3
1612 );
1613 }
1614
1615 #[test]
1616 fn statement_with_comment() {
1617 let tokens = tokenize("SELECT 1 -- comment");
1618 let sig: Vec<_> = tokens
1619 .iter()
1620 .filter(|t| t.kind != TokenKind::Whitespace)
1621 .collect();
1622 assert_eq!(sig.len(), 3); assert_eq!(sig[2].kind, TokenKind::LineComment);
1624 }
1625
1626 #[test]
1627 fn statement_with_block_comment() {
1628 let tokens = tokenize("SELECT /* mid */ 1");
1629 let sig: Vec<_> = tokens
1630 .iter()
1631 .filter(|t| t.kind != TokenKind::Whitespace)
1632 .collect();
1633 assert_eq!(sig[0].kind, TokenKind::Keyword); assert_eq!(sig[1].kind, TokenKind::BlockComment); assert_eq!(sig[2].kind, TokenKind::NumberLiteral); }
1637
1638 #[test]
1641 fn context_at_start() {
1642 assert_eq!(grammar_context_at_end(""), GrammarContext::Start);
1643 }
1644
1645 #[test]
1646 fn context_after_select() {
1647 assert_eq!(
1648 grammar_context_at_end("SELECT "),
1649 GrammarContext::ExpectColumnList
1650 );
1651 }
1652
1653 #[test]
1654 fn context_after_from() {
1655 assert_eq!(
1656 grammar_context_at_end("SELECT * FROM "),
1657 GrammarContext::ExpectTable
1658 );
1659 }
1660
1661 #[test]
1662 fn context_after_into() {
1663 assert_eq!(
1664 grammar_context_at_end("INSERT INTO "),
1665 GrammarContext::ExpectTable
1666 );
1667 }
1668
1669 #[test]
1670 fn context_after_update() {
1671 assert_eq!(
1672 grammar_context_at_end("UPDATE "),
1673 GrammarContext::ExpectTable
1674 );
1675 }
1676
1677 #[test]
1678 fn context_after_use() {
1679 assert_eq!(
1680 grammar_context_at_end("USE "),
1681 GrammarContext::ExpectKeyspace
1682 );
1683 }
1684
1685 #[test]
1686 fn context_after_where() {
1687 assert_eq!(
1688 grammar_context_at_end("SELECT * FROM t WHERE "),
1689 GrammarContext::ExpectColumn
1690 );
1691 }
1692
1693 #[test]
1694 fn context_after_dot() {
1695 assert_eq!(
1696 grammar_context_at_end("ks."),
1697 GrammarContext::ExpectQualifiedPart
1698 );
1699 }
1700
1701 #[test]
1702 fn context_after_table_name() {
1703 assert_eq!(
1704 grammar_context_at_end("SELECT * FROM users "),
1705 GrammarContext::General
1706 );
1707 }
1708
1709 #[test]
1710 fn context_after_consistency() {
1711 assert_eq!(
1712 grammar_context_at_end("CONSISTENCY "),
1713 GrammarContext::ExpectConsistencyLevel
1714 );
1715 }
1716
1717 #[test]
1718 fn context_after_describe() {
1719 assert_eq!(
1720 grammar_context_at_end("DESCRIBE "),
1721 GrammarContext::ExpectDescribeTarget
1722 );
1723 }
1724
1725 #[test]
1726 fn context_after_source() {
1727 assert_eq!(
1728 grammar_context_at_end("SOURCE "),
1729 GrammarContext::ExpectFilePath
1730 );
1731 }
1732
1733 #[test]
1734 fn context_after_order_by() {
1735 assert_eq!(
1736 grammar_context_at_end("SELECT * FROM t ORDER BY "),
1737 GrammarContext::ExpectOrderByColumn
1738 );
1739 }
1740
1741 #[test]
1742 fn context_after_values() {
1743 assert_eq!(
1744 grammar_context_at_end("INSERT INTO t (id) VALUES "),
1745 GrammarContext::ExpectValues
1746 );
1747 }
1748
1749 #[test]
1750 fn context_after_with() {
1751 assert_eq!(
1752 grammar_context_at_end("CREATE TABLE t (id int) WITH "),
1753 GrammarContext::ExpectWithOption
1754 );
1755 }
1756
1757 #[test]
1760 fn empty_input() {
1761 assert!(tokenize("").is_empty());
1762 }
1763
1764 #[test]
1765 fn only_whitespace() {
1766 let tokens = tokenize(" \t\n ");
1767 assert_eq!(tokens.len(), 1);
1768 assert_eq!(tokens[0].kind, TokenKind::Whitespace);
1769 }
1770
1771 #[test]
1772 fn only_comment() {
1773 let tokens = tokenize("-- just a comment");
1774 assert_eq!(tokens.len(), 1);
1775 assert_eq!(tokens[0].kind, TokenKind::LineComment);
1776 }
1777
1778 #[test]
1779 fn unterminated_string() {
1780 let tokens = tokenize("'unterminated");
1781 assert_eq!(tokens[0].kind, TokenKind::StringLiteral);
1782 assert_eq!(tokens[0].text, "'unterminated");
1783 }
1784
1785 #[test]
1786 fn unterminated_quoted_identifier() {
1787 let tokens = tokenize("\"unterminated");
1788 assert_eq!(tokens[0].kind, TokenKind::QuotedIdentifier);
1789 }
1790
1791 #[test]
1792 fn unterminated_dollar_string() {
1793 let tokens = tokenize("$$unterminated");
1794 assert_eq!(tokens[0].kind, TokenKind::DollarStringLiteral);
1795 }
1796
1797 #[test]
1798 fn unterminated_block_comment() {
1799 let tokens = tokenize("/* unterminated");
1800 assert_eq!(tokens[0].kind, TokenKind::BlockComment);
1801 }
1802
1803 #[test]
1804 fn unicode_in_string() {
1805 let tokens = tokenize("'héllo wörld'");
1806 assert_eq!(tokens[0].kind, TokenKind::StringLiteral);
1807 assert_eq!(tokens[0].text, "'héllo wörld'");
1808 }
1809
1810 #[test]
1811 fn unicode_in_quoted_identifier() {
1812 let tokens = tokenize("\"naïve\"");
1813 assert_eq!(tokens[0].kind, TokenKind::QuotedIdentifier);
1814 assert_eq!(tokens[0].text, "\"naïve\"");
1815 }
1816
1817 #[test]
1818 fn multiple_statements() {
1819 let tokens = tokenize("SELECT 1; SELECT 2;");
1820 let semis: Vec<_> = tokens.iter().filter(|t| t.text == ";").collect();
1821 assert_eq!(semis.len(), 2);
1822 }
1823
1824 #[test]
1825 fn comment_like_in_string() {
1826 let tokens = tokenize("'-- not a comment'");
1827 assert_eq!(tokens.len(), 1);
1828 assert_eq!(tokens[0].kind, TokenKind::StringLiteral);
1829 }
1830
1831 #[test]
1832 fn block_comment_like_in_string() {
1833 let tokens = tokenize("'/* not a comment */'");
1834 assert_eq!(tokens.len(), 1);
1835 assert_eq!(tokens[0].kind, TokenKind::StringLiteral);
1836 }
1837
1838 #[test]
1839 fn negative_number_after_operator() {
1840 let tokens = tokenize("id = -42");
1841 let sig: Vec<_> = tokens
1842 .iter()
1843 .filter(|t| t.kind != TokenKind::Whitespace)
1844 .collect();
1845 assert_eq!(sig[2].kind, TokenKind::NumberLiteral);
1846 assert_eq!(sig[2].text, "-42");
1847 }
1848
1849 #[test]
1850 fn minus_as_operator_after_number() {
1851 let tokens = tokenize("5 - 3");
1852 let sig: Vec<_> = tokens
1853 .iter()
1854 .filter(|t| t.kind != TokenKind::Whitespace)
1855 .collect();
1856 assert_eq!(sig[0].kind, TokenKind::NumberLiteral);
1857 assert!(sig.len() >= 3);
1862 }
1863
1864 #[test]
1865 fn blob_after_value_context() {
1866 let tokens = tokenize("INSERT INTO t (b) VALUES (0xDEAD)");
1867 let sig: Vec<_> = tokens
1868 .iter()
1869 .filter(|t| t.kind != TokenKind::Whitespace)
1870 .collect();
1871 let blob = sig.iter().find(|t| t.text == "0xDEAD").unwrap();
1872 assert_eq!(blob.kind, TokenKind::BlobLiteral);
1873 }
1874
1875 #[test]
1876 fn keyword_list_is_sorted() {
1877 for window in CQL_KEYWORDS.windows(2) {
1878 assert!(
1879 window[0] < window[1],
1880 "CQL_KEYWORDS not sorted: {:?} >= {:?}",
1881 window[0],
1882 window[1]
1883 );
1884 }
1885 }
1886
1887 #[test]
1890 fn strip_line_comment() {
1891 let result = strip_comments("SELECT 1 -- comment");
1892 assert_eq!(result, "SELECT 1 ");
1893 }
1894
1895 #[test]
1896 fn strip_block_comment() {
1897 let result = strip_comments("SELECT /* x */ 1");
1898 assert_eq!(result, "SELECT 1");
1899 }
1900
1901 #[test]
1902 fn strip_preserves_strings() {
1903 let result = strip_comments("SELECT '-- not a comment'");
1904 assert_eq!(result, "SELECT '-- not a comment'");
1905 }
1906
1907 #[test]
1908 fn strip_preserves_dollar_strings() {
1909 let result = strip_comments("SELECT $$-- not a comment$$");
1910 assert_eq!(result, "SELECT $$-- not a comment$$");
1911 }
1912
1913 #[test]
1914 fn strip_nested_block_comments() {
1915 let result = strip_comments("SELECT /* outer /* inner */ still */ 1");
1916 assert_eq!(result, "SELECT 1");
1917 }
1918
1919 #[test]
1922 fn keyword_lookup_positive() {
1923 assert!(is_cql_keyword("SELECT"));
1924 assert!(is_cql_keyword("select"));
1925 assert!(is_cql_keyword("From"));
1926 assert!(is_cql_keyword("WHERE"));
1927 }
1928
1929 #[test]
1930 fn keyword_lookup_negative() {
1931 assert!(!is_cql_keyword("my_table"));
1932 assert!(!is_cql_keyword("hello"));
1933 assert!(!is_cql_keyword("xyz"));
1934 }
1935
1936 #[test]
1939 fn significant_tokens_filters_whitespace_and_comments() {
1940 let tokens = tokenize("SELECT /* comment */ * -- line\nFROM t");
1941 let sig = significant_tokens(&tokens);
1942 let kinds: Vec<_> = sig.iter().map(|t| &t.kind).collect();
1943 assert!(!kinds.contains(&&TokenKind::Whitespace));
1944 assert!(!kinds.contains(&&TokenKind::LineComment));
1945 assert!(!kinds.contains(&&TokenKind::BlockComment));
1946 }
1947
1948 #[test]
1951 fn users_not_keyword_after_from() {
1952 let tokens = tokenize("SELECT * FROM users");
1953 let sig: Vec<_> = tokens
1954 .iter()
1955 .filter(|t| t.kind != TokenKind::Whitespace)
1956 .collect();
1957 assert_eq!(sig[3].text, "users");
1958 assert_eq!(sig[3].kind, TokenKind::Identifier);
1959 }
1960
1961 #[test]
1962 fn key_not_keyword_after_from() {
1963 let tokens = tokenize("SELECT key FROM my_table WHERE key = 1");
1964 let sig: Vec<_> = tokens
1965 .iter()
1966 .filter(|t| t.kind != TokenKind::Whitespace)
1967 .collect();
1968 assert_eq!(sig[1].kind, TokenKind::Identifier);
1970 assert_eq!(sig[3].kind, TokenKind::Identifier);
1972 assert_eq!(sig[5].kind, TokenKind::Identifier);
1974 }
1975
1976 #[test]
1977 fn set_not_keyword_in_column_list() {
1978 let tokens = tokenize("SELECT set FROM my_table");
1980 let sig: Vec<_> = tokens
1981 .iter()
1982 .filter(|t| t.kind != TokenKind::Whitespace)
1983 .collect();
1984 assert_eq!(sig[1].text, "set");
1985 assert_eq!(sig[1].kind, TokenKind::Identifier);
1986 }
1987
1988 #[test]
1989 fn column_names_after_where_are_identifiers() {
1990 let tokens = tokenize("SELECT * FROM t WHERE user = 'test' AND key = 1");
1991 let sig: Vec<_> = tokens
1992 .iter()
1993 .filter(|t| t.kind != TokenKind::Whitespace)
1994 .collect();
1995 assert_eq!(sig[5].text, "user");
1997 assert_eq!(sig[5].kind, TokenKind::Identifier);
1998 assert_eq!(sig[9].text, "key");
2000 assert_eq!(sig[9].kind, TokenKind::Identifier);
2001 }
2002
2003 #[test]
2006 fn select_with_function() {
2007 let tokens = tokenize("SELECT count(*) FROM users");
2008 let sig: Vec<_> = tokens
2009 .iter()
2010 .filter(|t| t.kind != TokenKind::Whitespace)
2011 .collect();
2012 assert_eq!(sig[0].kind, TokenKind::Keyword); assert_eq!(sig[1].kind, TokenKind::Identifier); }
2015
2016 #[test]
2017 fn batch_statement() {
2018 let input =
2019 "BEGIN BATCH INSERT INTO t (id) VALUES (1); INSERT INTO t (id) VALUES (2); APPLY BATCH";
2020 let tokens = tokenize(input);
2021 let keywords: Vec<_> = tokens
2022 .iter()
2023 .filter(|t| t.kind == TokenKind::Keyword)
2024 .collect();
2025 assert!(keywords.iter().any(|t| t.text.to_uppercase() == "BEGIN"));
2026 assert!(keywords.iter().any(|t| t.text.to_uppercase() == "BATCH"));
2027 assert!(keywords.iter().any(|t| t.text.to_uppercase() == "APPLY"));
2028 }
2029
2030 #[test]
2031 fn delete_from() {
2032 let kinds = significant_kinds("DELETE FROM users WHERE id = 1");
2033 assert_eq!(kinds[0], TokenKind::Keyword); assert_eq!(kinds[1], TokenKind::Keyword); assert_eq!(kinds[2], TokenKind::Identifier); }
2037
2038 #[test]
2039 fn describe_table() {
2040 let kinds = significant_kinds("DESCRIBE TABLE users");
2041 assert_eq!(kinds[0], TokenKind::Keyword); assert_eq!(kinds[1], TokenKind::Keyword); assert_eq!(kinds[2], TokenKind::Identifier); }
2045
2046 #[test]
2047 fn truncate_table() {
2048 let kinds = significant_kinds("TRUNCATE users");
2049 assert_eq!(kinds[0], TokenKind::Keyword); assert_eq!(kinds[1], TokenKind::Identifier); }
2052
2053 #[test]
2054 fn select_distinct() {
2055 let kinds = significant_kinds("SELECT DISTINCT partition_key FROM t");
2056 assert_eq!(kinds[0], TokenKind::Keyword); assert_eq!(kinds[1], TokenKind::Keyword); assert_eq!(kinds[2], TokenKind::Identifier); }
2060
2061 #[test]
2062 fn consistency_level() {
2063 let kinds = significant_kinds("CONSISTENCY QUORUM");
2064 assert_eq!(kinds[0], TokenKind::Keyword); assert_eq!(kinds[1], TokenKind::Identifier); }
2067
2068 #[test]
2069 fn serial_consistency() {
2070 let ctx = grammar_context_at_end("SERIAL CONSISTENCY ");
2071 assert_eq!(ctx, GrammarContext::ExpectConsistencyLevel);
2072 }
2073
2074 #[test]
2075 fn order_by_column() {
2076 let sig: Vec<_> = tokenize("SELECT * FROM t ORDER BY created_at")
2077 .into_iter()
2078 .filter(|t| t.kind != TokenKind::Whitespace)
2079 .collect();
2080 assert_eq!(sig.last().unwrap().kind, TokenKind::Identifier); }
2082}