sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111def build_pad(args: t.List, is_left: bool = True): 112 return exp.Pad( 113 this=seq_get(args, 0), 114 expression=seq_get(args, 1), 115 fill_pattern=seq_get(args, 2), 116 is_left=is_left, 117 ) 118 119 120def build_array_constructor( 121 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 122) -> exp.Expression: 123 array_exp = exp_class(expressions=args) 124 125 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 126 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 127 128 return array_exp 129 130 131def build_convert_timezone( 132 args: t.List, default_source_tz: t.Optional[str] = None 133) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 134 if len(args) == 2: 135 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 136 return exp.ConvertTimezone( 137 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 138 ) 139 140 return exp.ConvertTimezone.from_arg_list(args) 141 142 143def build_trim(args: t.List, is_left: bool = True): 144 return exp.Trim( 145 this=seq_get(args, 0), 146 expression=seq_get(args, 1), 147 position="LEADING" if is_left else "TRAILING", 148 ) 149 150 151class _Parser(type): 152 def __new__(cls, clsname, bases, attrs): 153 klass = super().__new__(cls, clsname, bases, attrs) 154 155 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 156 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 157 158 return klass 159 160 161class Parser(metaclass=_Parser): 162 """ 163 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 164 165 Args: 166 error_level: The desired error level. 167 Default: ErrorLevel.IMMEDIATE 168 error_message_context: The amount of context to capture from a query string when displaying 169 the error message (in number of characters). 170 Default: 100 171 max_errors: Maximum number of error messages to include in a raised ParseError. 172 This is only relevant if error_level is ErrorLevel.RAISE. 173 Default: 3 174 """ 175 176 FUNCTIONS: t.Dict[str, t.Callable] = { 177 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 178 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 179 "CONCAT": lambda args, dialect: exp.Concat( 180 expressions=args, 181 safe=not dialect.STRICT_STRING_CONCAT, 182 coalesce=dialect.CONCAT_COALESCE, 183 ), 184 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 185 expressions=args, 186 safe=not dialect.STRICT_STRING_CONCAT, 187 coalesce=dialect.CONCAT_COALESCE, 188 ), 189 "CONVERT_TIMEZONE": build_convert_timezone, 190 "DATE_TO_DATE_STR": lambda args: exp.Cast( 191 this=seq_get(args, 0), 192 to=exp.DataType(this=exp.DataType.Type.TEXT), 193 ), 194 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 195 start=seq_get(args, 0), 196 end=seq_get(args, 1), 197 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 198 ), 199 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 200 "HEX": build_hex, 201 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 202 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 203 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 204 "LIKE": build_like, 205 "LOG": build_logarithm, 206 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 207 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 208 "LOWER": build_lower, 209 "LPAD": lambda args: build_pad(args), 210 "LEFTPAD": lambda args: build_pad(args), 211 "LTRIM": lambda args: build_trim(args), 212 "MOD": build_mod, 213 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 214 "RPAD": lambda args: build_pad(args, is_left=False), 215 "RTRIM": lambda args: build_trim(args, is_left=False), 216 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 217 if len(args) != 2 218 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 219 "TIME_TO_TIME_STR": lambda args: exp.Cast( 220 this=seq_get(args, 0), 221 to=exp.DataType(this=exp.DataType.Type.TEXT), 222 ), 223 "TO_HEX": build_hex, 224 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 225 this=exp.Cast( 226 this=seq_get(args, 0), 227 to=exp.DataType(this=exp.DataType.Type.TEXT), 228 ), 229 start=exp.Literal.number(1), 230 length=exp.Literal.number(10), 231 ), 232 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 233 "UPPER": build_upper, 234 "VAR_MAP": build_var_map, 235 "COALESCE": lambda args: exp.Coalesce(this=seq_get(args, 0), expressions=args[1:]), 236 } 237 238 NO_PAREN_FUNCTIONS = { 239 TokenType.CURRENT_DATE: exp.CurrentDate, 240 TokenType.CURRENT_DATETIME: exp.CurrentDate, 241 TokenType.CURRENT_TIME: exp.CurrentTime, 242 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 243 TokenType.CURRENT_USER: exp.CurrentUser, 244 } 245 246 STRUCT_TYPE_TOKENS = { 247 TokenType.NESTED, 248 TokenType.OBJECT, 249 TokenType.STRUCT, 250 } 251 252 NESTED_TYPE_TOKENS = { 253 TokenType.ARRAY, 254 TokenType.LIST, 255 TokenType.LOWCARDINALITY, 256 TokenType.MAP, 257 TokenType.NULLABLE, 258 *STRUCT_TYPE_TOKENS, 259 } 260 261 ENUM_TYPE_TOKENS = { 262 TokenType.ENUM, 263 TokenType.ENUM8, 264 TokenType.ENUM16, 265 } 266 267 AGGREGATE_TYPE_TOKENS = { 268 TokenType.AGGREGATEFUNCTION, 269 TokenType.SIMPLEAGGREGATEFUNCTION, 270 } 271 272 TYPE_TOKENS = { 273 TokenType.BIT, 274 TokenType.BOOLEAN, 275 TokenType.TINYINT, 276 TokenType.UTINYINT, 277 TokenType.SMALLINT, 278 TokenType.USMALLINT, 279 TokenType.INT, 280 TokenType.UINT, 281 TokenType.BIGINT, 282 TokenType.UBIGINT, 283 TokenType.INT128, 284 TokenType.UINT128, 285 TokenType.INT256, 286 TokenType.UINT256, 287 TokenType.MEDIUMINT, 288 TokenType.UMEDIUMINT, 289 TokenType.FIXEDSTRING, 290 TokenType.FLOAT, 291 TokenType.DOUBLE, 292 TokenType.CHAR, 293 TokenType.NCHAR, 294 TokenType.VARCHAR, 295 TokenType.NVARCHAR, 296 TokenType.BPCHAR, 297 TokenType.TEXT, 298 TokenType.MEDIUMTEXT, 299 TokenType.LONGTEXT, 300 TokenType.MEDIUMBLOB, 301 TokenType.LONGBLOB, 302 TokenType.BINARY, 303 TokenType.VARBINARY, 304 TokenType.JSON, 305 TokenType.JSONB, 306 TokenType.INTERVAL, 307 TokenType.TINYBLOB, 308 TokenType.TINYTEXT, 309 TokenType.TIME, 310 TokenType.TIMETZ, 311 TokenType.TIMESTAMP, 312 TokenType.TIMESTAMP_S, 313 TokenType.TIMESTAMP_MS, 314 TokenType.TIMESTAMP_NS, 315 TokenType.TIMESTAMPTZ, 316 TokenType.TIMESTAMPLTZ, 317 TokenType.TIMESTAMPNTZ, 318 TokenType.DATETIME, 319 TokenType.DATETIME64, 320 TokenType.DATE, 321 TokenType.DATE32, 322 TokenType.INT4RANGE, 323 TokenType.INT4MULTIRANGE, 324 TokenType.INT8RANGE, 325 TokenType.INT8MULTIRANGE, 326 TokenType.NUMRANGE, 327 TokenType.NUMMULTIRANGE, 328 TokenType.TSRANGE, 329 TokenType.TSMULTIRANGE, 330 TokenType.TSTZRANGE, 331 TokenType.TSTZMULTIRANGE, 332 TokenType.DATERANGE, 333 TokenType.DATEMULTIRANGE, 334 TokenType.DECIMAL, 335 TokenType.UDECIMAL, 336 TokenType.BIGDECIMAL, 337 TokenType.UUID, 338 TokenType.GEOGRAPHY, 339 TokenType.GEOMETRY, 340 TokenType.HLLSKETCH, 341 TokenType.HSTORE, 342 TokenType.PSEUDO_TYPE, 343 TokenType.SUPER, 344 TokenType.SERIAL, 345 TokenType.SMALLSERIAL, 346 TokenType.BIGSERIAL, 347 TokenType.XML, 348 TokenType.YEAR, 349 TokenType.UNIQUEIDENTIFIER, 350 TokenType.USERDEFINED, 351 TokenType.MONEY, 352 TokenType.SMALLMONEY, 353 TokenType.ROWVERSION, 354 TokenType.IMAGE, 355 TokenType.VARIANT, 356 TokenType.VECTOR, 357 TokenType.OBJECT, 358 TokenType.OBJECT_IDENTIFIER, 359 TokenType.INET, 360 TokenType.IPADDRESS, 361 TokenType.IPPREFIX, 362 TokenType.IPV4, 363 TokenType.IPV6, 364 TokenType.UNKNOWN, 365 TokenType.NULL, 366 TokenType.NAME, 367 TokenType.TDIGEST, 368 *ENUM_TYPE_TOKENS, 369 *NESTED_TYPE_TOKENS, 370 *AGGREGATE_TYPE_TOKENS, 371 } 372 373 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 374 TokenType.BIGINT: TokenType.UBIGINT, 375 TokenType.INT: TokenType.UINT, 376 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 377 TokenType.SMALLINT: TokenType.USMALLINT, 378 TokenType.TINYINT: TokenType.UTINYINT, 379 TokenType.DECIMAL: TokenType.UDECIMAL, 380 } 381 382 SUBQUERY_PREDICATES = { 383 TokenType.ANY: exp.Any, 384 TokenType.ALL: exp.All, 385 TokenType.EXISTS: exp.Exists, 386 TokenType.SOME: exp.Any, 387 } 388 389 RESERVED_TOKENS = { 390 *Tokenizer.SINGLE_TOKENS.values(), 391 TokenType.SELECT, 392 } - {TokenType.IDENTIFIER} 393 394 DB_CREATABLES = { 395 TokenType.DATABASE, 396 TokenType.DICTIONARY, 397 TokenType.MODEL, 398 TokenType.SCHEMA, 399 TokenType.SEQUENCE, 400 TokenType.STORAGE_INTEGRATION, 401 TokenType.TABLE, 402 TokenType.TAG, 403 TokenType.VIEW, 404 TokenType.WAREHOUSE, 405 TokenType.STREAMLIT, 406 } 407 408 CREATABLES = { 409 TokenType.COLUMN, 410 TokenType.CONSTRAINT, 411 TokenType.FOREIGN_KEY, 412 TokenType.FUNCTION, 413 TokenType.INDEX, 414 TokenType.PROCEDURE, 415 *DB_CREATABLES, 416 } 417 418 ALTERABLES = { 419 TokenType.TABLE, 420 TokenType.VIEW, 421 } 422 423 # Tokens that can represent identifiers 424 ID_VAR_TOKENS = { 425 TokenType.ALL, 426 TokenType.VAR, 427 TokenType.ANTI, 428 TokenType.APPLY, 429 TokenType.ASC, 430 TokenType.ASOF, 431 TokenType.AUTO_INCREMENT, 432 TokenType.BEGIN, 433 TokenType.BPCHAR, 434 TokenType.CACHE, 435 TokenType.CASE, 436 TokenType.COLLATE, 437 TokenType.COMMAND, 438 TokenType.COMMENT, 439 TokenType.COMMIT, 440 TokenType.CONSTRAINT, 441 TokenType.COPY, 442 TokenType.CUBE, 443 TokenType.DEFAULT, 444 TokenType.DELETE, 445 TokenType.DESC, 446 TokenType.DESCRIBE, 447 TokenType.DICTIONARY, 448 TokenType.DIV, 449 TokenType.END, 450 TokenType.EXECUTE, 451 TokenType.ESCAPE, 452 TokenType.FALSE, 453 TokenType.FIRST, 454 TokenType.FILTER, 455 TokenType.FINAL, 456 TokenType.FORMAT, 457 TokenType.FULL, 458 TokenType.IDENTIFIER, 459 TokenType.IS, 460 TokenType.ISNULL, 461 TokenType.INTERVAL, 462 TokenType.KEEP, 463 TokenType.KILL, 464 TokenType.LEFT, 465 TokenType.LOAD, 466 TokenType.MERGE, 467 TokenType.NATURAL, 468 TokenType.NEXT, 469 TokenType.OFFSET, 470 TokenType.OPERATOR, 471 TokenType.ORDINALITY, 472 TokenType.OVERLAPS, 473 TokenType.OVERWRITE, 474 TokenType.PARTITION, 475 TokenType.PERCENT, 476 TokenType.PIVOT, 477 TokenType.PRAGMA, 478 TokenType.RANGE, 479 TokenType.RECURSIVE, 480 TokenType.REFERENCES, 481 TokenType.REFRESH, 482 TokenType.RENAME, 483 TokenType.REPLACE, 484 TokenType.RIGHT, 485 TokenType.ROLLUP, 486 TokenType.ROW, 487 TokenType.ROWS, 488 TokenType.SEMI, 489 TokenType.SET, 490 TokenType.SETTINGS, 491 TokenType.SHOW, 492 TokenType.TEMPORARY, 493 TokenType.TOP, 494 TokenType.TRUE, 495 TokenType.TRUNCATE, 496 TokenType.UNIQUE, 497 TokenType.UNNEST, 498 TokenType.UNPIVOT, 499 TokenType.UPDATE, 500 TokenType.USE, 501 TokenType.VOLATILE, 502 TokenType.WINDOW, 503 *CREATABLES, 504 *SUBQUERY_PREDICATES, 505 *TYPE_TOKENS, 506 *NO_PAREN_FUNCTIONS, 507 } 508 509 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 510 511 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 512 TokenType.ANTI, 513 TokenType.APPLY, 514 TokenType.ASOF, 515 TokenType.FULL, 516 TokenType.LEFT, 517 TokenType.LOCK, 518 TokenType.NATURAL, 519 TokenType.OFFSET, 520 TokenType.RIGHT, 521 TokenType.SEMI, 522 TokenType.WINDOW, 523 } 524 525 ALIAS_TOKENS = ID_VAR_TOKENS 526 527 ARRAY_CONSTRUCTORS = { 528 "ARRAY": exp.Array, 529 "LIST": exp.List, 530 } 531 532 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 533 534 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 535 536 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 537 538 FUNC_TOKENS = { 539 TokenType.COLLATE, 540 TokenType.COMMAND, 541 TokenType.CURRENT_DATE, 542 TokenType.CURRENT_DATETIME, 543 TokenType.CURRENT_TIMESTAMP, 544 TokenType.CURRENT_TIME, 545 TokenType.CURRENT_USER, 546 TokenType.FILTER, 547 TokenType.FIRST, 548 TokenType.FORMAT, 549 TokenType.GLOB, 550 TokenType.IDENTIFIER, 551 TokenType.INDEX, 552 TokenType.ISNULL, 553 TokenType.ILIKE, 554 TokenType.INSERT, 555 TokenType.LIKE, 556 TokenType.MERGE, 557 TokenType.OFFSET, 558 TokenType.PRIMARY_KEY, 559 TokenType.RANGE, 560 TokenType.REPLACE, 561 TokenType.RLIKE, 562 TokenType.ROW, 563 TokenType.UNNEST, 564 TokenType.VAR, 565 TokenType.LEFT, 566 TokenType.RIGHT, 567 TokenType.SEQUENCE, 568 TokenType.DATE, 569 TokenType.DATETIME, 570 TokenType.TABLE, 571 TokenType.TIMESTAMP, 572 TokenType.TIMESTAMPTZ, 573 TokenType.TRUNCATE, 574 TokenType.WINDOW, 575 TokenType.XOR, 576 *TYPE_TOKENS, 577 *SUBQUERY_PREDICATES, 578 } 579 580 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 581 TokenType.AND: exp.And, 582 } 583 584 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 585 TokenType.COLON_EQ: exp.PropertyEQ, 586 } 587 588 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 589 TokenType.OR: exp.Or, 590 } 591 592 EQUALITY = { 593 TokenType.EQ: exp.EQ, 594 TokenType.NEQ: exp.NEQ, 595 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 596 } 597 598 COMPARISON = { 599 TokenType.GT: exp.GT, 600 TokenType.GTE: exp.GTE, 601 TokenType.LT: exp.LT, 602 TokenType.LTE: exp.LTE, 603 } 604 605 BITWISE = { 606 TokenType.AMP: exp.BitwiseAnd, 607 TokenType.CARET: exp.BitwiseXor, 608 TokenType.PIPE: exp.BitwiseOr, 609 } 610 611 TERM = { 612 TokenType.DASH: exp.Sub, 613 TokenType.PLUS: exp.Add, 614 TokenType.MOD: exp.Mod, 615 TokenType.COLLATE: exp.Collate, 616 } 617 618 FACTOR = { 619 TokenType.DIV: exp.IntDiv, 620 TokenType.LR_ARROW: exp.Distance, 621 TokenType.SLASH: exp.Div, 622 TokenType.STAR: exp.Mul, 623 } 624 625 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 626 627 TIMES = { 628 TokenType.TIME, 629 TokenType.TIMETZ, 630 } 631 632 TIMESTAMPS = { 633 TokenType.TIMESTAMP, 634 TokenType.TIMESTAMPTZ, 635 TokenType.TIMESTAMPLTZ, 636 *TIMES, 637 } 638 639 SET_OPERATIONS = { 640 TokenType.UNION, 641 TokenType.INTERSECT, 642 TokenType.EXCEPT, 643 } 644 645 JOIN_METHODS = { 646 TokenType.ASOF, 647 TokenType.NATURAL, 648 TokenType.POSITIONAL, 649 } 650 651 JOIN_SIDES = { 652 TokenType.LEFT, 653 TokenType.RIGHT, 654 TokenType.FULL, 655 } 656 657 JOIN_KINDS = { 658 TokenType.ANTI, 659 TokenType.CROSS, 660 TokenType.INNER, 661 TokenType.OUTER, 662 TokenType.SEMI, 663 TokenType.STRAIGHT_JOIN, 664 } 665 666 JOIN_HINTS: t.Set[str] = set() 667 668 LAMBDAS = { 669 TokenType.ARROW: lambda self, expressions: self.expression( 670 exp.Lambda, 671 this=self._replace_lambda( 672 self._parse_assignment(), 673 expressions, 674 ), 675 expressions=expressions, 676 ), 677 TokenType.FARROW: lambda self, expressions: self.expression( 678 exp.Kwarg, 679 this=exp.var(expressions[0].name), 680 expression=self._parse_assignment(), 681 ), 682 } 683 684 COLUMN_OPERATORS = { 685 TokenType.DOT: None, 686 TokenType.DCOLON: lambda self, this, to: self.expression( 687 exp.Cast if self.STRICT_CAST else exp.TryCast, 688 this=this, 689 to=to, 690 ), 691 TokenType.ARROW: lambda self, this, path: self.expression( 692 exp.JSONExtract, 693 this=this, 694 expression=self.dialect.to_json_path(path), 695 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 696 ), 697 TokenType.DARROW: lambda self, this, path: self.expression( 698 exp.JSONExtractScalar, 699 this=this, 700 expression=self.dialect.to_json_path(path), 701 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 702 ), 703 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 704 exp.JSONBExtract, 705 this=this, 706 expression=path, 707 ), 708 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 709 exp.JSONBExtractScalar, 710 this=this, 711 expression=path, 712 ), 713 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 714 exp.JSONBContains, 715 this=this, 716 expression=key, 717 ), 718 } 719 720 EXPRESSION_PARSERS = { 721 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 722 exp.Column: lambda self: self._parse_column(), 723 exp.Condition: lambda self: self._parse_assignment(), 724 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 725 exp.Expression: lambda self: self._parse_expression(), 726 exp.From: lambda self: self._parse_from(joins=True), 727 exp.Group: lambda self: self._parse_group(), 728 exp.Having: lambda self: self._parse_having(), 729 exp.Identifier: lambda self: self._parse_id_var(), 730 exp.Join: lambda self: self._parse_join(), 731 exp.Lambda: lambda self: self._parse_lambda(), 732 exp.Lateral: lambda self: self._parse_lateral(), 733 exp.Limit: lambda self: self._parse_limit(), 734 exp.Offset: lambda self: self._parse_offset(), 735 exp.Order: lambda self: self._parse_order(), 736 exp.Ordered: lambda self: self._parse_ordered(), 737 exp.Properties: lambda self: self._parse_properties(), 738 exp.Qualify: lambda self: self._parse_qualify(), 739 exp.Returning: lambda self: self._parse_returning(), 740 exp.Select: lambda self: self._parse_select(), 741 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 742 exp.Table: lambda self: self._parse_table_parts(), 743 exp.TableAlias: lambda self: self._parse_table_alias(), 744 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 745 exp.Where: lambda self: self._parse_where(), 746 exp.Window: lambda self: self._parse_named_window(), 747 exp.With: lambda self: self._parse_with(), 748 "JOIN_TYPE": lambda self: self._parse_join_parts(), 749 } 750 751 STATEMENT_PARSERS = { 752 TokenType.ALTER: lambda self: self._parse_alter(), 753 TokenType.BEGIN: lambda self: self._parse_transaction(), 754 TokenType.CACHE: lambda self: self._parse_cache(), 755 TokenType.COMMENT: lambda self: self._parse_comment(), 756 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 757 TokenType.COPY: lambda self: self._parse_copy(), 758 TokenType.CREATE: lambda self: self._parse_create(), 759 TokenType.DELETE: lambda self: self._parse_delete(), 760 TokenType.DESC: lambda self: self._parse_describe(), 761 TokenType.DESCRIBE: lambda self: self._parse_describe(), 762 TokenType.DROP: lambda self: self._parse_drop(), 763 TokenType.INSERT: lambda self: self._parse_insert(), 764 TokenType.KILL: lambda self: self._parse_kill(), 765 TokenType.LOAD: lambda self: self._parse_load(), 766 TokenType.MERGE: lambda self: self._parse_merge(), 767 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 768 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 769 TokenType.REFRESH: lambda self: self._parse_refresh(), 770 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 771 TokenType.SET: lambda self: self._parse_set(), 772 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 773 TokenType.UNCACHE: lambda self: self._parse_uncache(), 774 TokenType.UPDATE: lambda self: self._parse_update(), 775 TokenType.USE: lambda self: self.expression( 776 exp.Use, 777 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 778 this=self._parse_table(schema=False), 779 ), 780 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 781 } 782 783 UNARY_PARSERS = { 784 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 785 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 786 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 787 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 788 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 789 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 790 } 791 792 STRING_PARSERS = { 793 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 794 exp.RawString, this=token.text 795 ), 796 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 797 exp.National, this=token.text 798 ), 799 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 800 TokenType.STRING: lambda self, token: self.expression( 801 exp.Literal, this=token.text, is_string=True 802 ), 803 TokenType.UNICODE_STRING: lambda self, token: self.expression( 804 exp.UnicodeString, 805 this=token.text, 806 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 807 ), 808 } 809 810 NUMERIC_PARSERS = { 811 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 812 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 813 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 814 TokenType.NUMBER: lambda self, token: self.expression( 815 exp.Literal, this=token.text, is_string=False 816 ), 817 } 818 819 PRIMARY_PARSERS = { 820 **STRING_PARSERS, 821 **NUMERIC_PARSERS, 822 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 823 TokenType.NULL: lambda self, _: self.expression(exp.Null), 824 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 825 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 826 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 827 TokenType.STAR: lambda self, _: self.expression( 828 exp.Star, 829 **{ 830 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 831 "replace": self._parse_star_op("REPLACE"), 832 "rename": self._parse_star_op("RENAME"), 833 }, 834 ), 835 } 836 837 PLACEHOLDER_PARSERS = { 838 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 839 TokenType.PARAMETER: lambda self: self._parse_parameter(), 840 TokenType.COLON: lambda self: ( 841 self.expression(exp.Placeholder, this=self._prev.text) 842 if self._match_set(self.ID_VAR_TOKENS) 843 else None 844 ), 845 } 846 847 RANGE_PARSERS = { 848 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 849 TokenType.GLOB: binary_range_parser(exp.Glob), 850 TokenType.ILIKE: binary_range_parser(exp.ILike), 851 TokenType.IN: lambda self, this: self._parse_in(this), 852 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 853 TokenType.IS: lambda self, this: self._parse_is(this), 854 TokenType.LIKE: binary_range_parser(exp.Like), 855 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 856 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 857 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 858 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 859 } 860 861 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 862 "ALLOWED_VALUES": lambda self: self.expression( 863 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 864 ), 865 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 866 "AUTO": lambda self: self._parse_auto_property(), 867 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 868 "BACKUP": lambda self: self.expression( 869 exp.BackupProperty, this=self._parse_var(any_token=True) 870 ), 871 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 872 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 873 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 874 "CHECKSUM": lambda self: self._parse_checksum(), 875 "CLUSTER BY": lambda self: self._parse_cluster(), 876 "CLUSTERED": lambda self: self._parse_clustered_by(), 877 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 878 exp.CollateProperty, **kwargs 879 ), 880 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 881 "CONTAINS": lambda self: self._parse_contains_property(), 882 "COPY": lambda self: self._parse_copy_property(), 883 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 884 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 885 "DEFINER": lambda self: self._parse_definer(), 886 "DETERMINISTIC": lambda self: self.expression( 887 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 888 ), 889 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 890 "DISTKEY": lambda self: self._parse_distkey(), 891 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 892 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 893 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 894 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 895 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 896 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 897 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 898 "FREESPACE": lambda self: self._parse_freespace(), 899 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 900 "HEAP": lambda self: self.expression(exp.HeapProperty), 901 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 902 "IMMUTABLE": lambda self: self.expression( 903 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 904 ), 905 "INHERITS": lambda self: self.expression( 906 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 907 ), 908 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 909 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 910 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 911 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 912 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 913 "LIKE": lambda self: self._parse_create_like(), 914 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 915 "LOCK": lambda self: self._parse_locking(), 916 "LOCKING": lambda self: self._parse_locking(), 917 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 918 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 919 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 920 "MODIFIES": lambda self: self._parse_modifies_property(), 921 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 922 "NO": lambda self: self._parse_no_property(), 923 "ON": lambda self: self._parse_on_property(), 924 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 925 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 926 "PARTITION": lambda self: self._parse_partitioned_of(), 927 "PARTITION BY": lambda self: self._parse_partitioned_by(), 928 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 929 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 930 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 931 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 932 "READS": lambda self: self._parse_reads_property(), 933 "REMOTE": lambda self: self._parse_remote_with_connection(), 934 "RETURNS": lambda self: self._parse_returns(), 935 "STRICT": lambda self: self.expression(exp.StrictProperty), 936 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 937 "ROW": lambda self: self._parse_row(), 938 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 939 "SAMPLE": lambda self: self.expression( 940 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 941 ), 942 "SECURE": lambda self: self.expression(exp.SecureProperty), 943 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 944 "SETTINGS": lambda self: self._parse_settings_property(), 945 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 946 "SORTKEY": lambda self: self._parse_sortkey(), 947 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 948 "STABLE": lambda self: self.expression( 949 exp.StabilityProperty, this=exp.Literal.string("STABLE") 950 ), 951 "STORED": lambda self: self._parse_stored(), 952 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 953 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 954 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 955 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 956 "TO": lambda self: self._parse_to_table(), 957 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 958 "TRANSFORM": lambda self: self.expression( 959 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 960 ), 961 "TTL": lambda self: self._parse_ttl(), 962 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 963 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 964 "VOLATILE": lambda self: self._parse_volatile_property(), 965 "WITH": lambda self: self._parse_with_property(), 966 } 967 968 CONSTRAINT_PARSERS = { 969 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 970 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 971 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 972 "CHARACTER SET": lambda self: self.expression( 973 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 974 ), 975 "CHECK": lambda self: self.expression( 976 exp.CheckColumnConstraint, 977 this=self._parse_wrapped(self._parse_assignment), 978 enforced=self._match_text_seq("ENFORCED"), 979 ), 980 "COLLATE": lambda self: self.expression( 981 exp.CollateColumnConstraint, 982 this=self._parse_identifier() or self._parse_column(), 983 ), 984 "COMMENT": lambda self: self.expression( 985 exp.CommentColumnConstraint, this=self._parse_string() 986 ), 987 "COMPRESS": lambda self: self._parse_compress(), 988 "CLUSTERED": lambda self: self.expression( 989 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 990 ), 991 "NONCLUSTERED": lambda self: self.expression( 992 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 993 ), 994 "DEFAULT": lambda self: self.expression( 995 exp.DefaultColumnConstraint, this=self._parse_bitwise() 996 ), 997 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 998 "EPHEMERAL": lambda self: self.expression( 999 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1000 ), 1001 "EXCLUDE": lambda self: self.expression( 1002 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1003 ), 1004 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1005 "FORMAT": lambda self: self.expression( 1006 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1007 ), 1008 "GENERATED": lambda self: self._parse_generated_as_identity(), 1009 "IDENTITY": lambda self: self._parse_auto_increment(), 1010 "INLINE": lambda self: self._parse_inline(), 1011 "LIKE": lambda self: self._parse_create_like(), 1012 "NOT": lambda self: self._parse_not_constraint(), 1013 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1014 "ON": lambda self: ( 1015 self._match(TokenType.UPDATE) 1016 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1017 ) 1018 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1019 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1020 "PERIOD": lambda self: self._parse_period_for_system_time(), 1021 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1022 "REFERENCES": lambda self: self._parse_references(match=False), 1023 "TITLE": lambda self: self.expression( 1024 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1025 ), 1026 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1027 "UNIQUE": lambda self: self._parse_unique(), 1028 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1029 "WITH": lambda self: self.expression( 1030 exp.Properties, expressions=self._parse_wrapped_properties() 1031 ), 1032 } 1033 1034 ALTER_PARSERS = { 1035 "ADD": lambda self: self._parse_alter_table_add(), 1036 "ALTER": lambda self: self._parse_alter_table_alter(), 1037 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1038 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1039 "DROP": lambda self: self._parse_alter_table_drop(), 1040 "RENAME": lambda self: self._parse_alter_table_rename(), 1041 "SET": lambda self: self._parse_alter_table_set(), 1042 "AS": lambda self: self._parse_select(), 1043 } 1044 1045 ALTER_ALTER_PARSERS = { 1046 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1047 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1048 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1049 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1050 } 1051 1052 SCHEMA_UNNAMED_CONSTRAINTS = { 1053 "CHECK", 1054 "EXCLUDE", 1055 "FOREIGN KEY", 1056 "LIKE", 1057 "PERIOD", 1058 "PRIMARY KEY", 1059 "UNIQUE", 1060 } 1061 1062 NO_PAREN_FUNCTION_PARSERS = { 1063 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1064 "CASE": lambda self: self._parse_case(), 1065 "CONNECT_BY_ROOT": lambda self: self.expression( 1066 exp.ConnectByRoot, this=self._parse_column() 1067 ), 1068 "IF": lambda self: self._parse_if(), 1069 "NEXT": lambda self: self._parse_next_value_for(), 1070 } 1071 1072 INVALID_FUNC_NAME_TOKENS = { 1073 TokenType.IDENTIFIER, 1074 TokenType.STRING, 1075 } 1076 1077 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1078 1079 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1080 1081 FUNCTION_PARSERS = { 1082 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1083 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1084 "DECODE": lambda self: self._parse_decode(), 1085 "EXTRACT": lambda self: self._parse_extract(), 1086 "GAP_FILL": lambda self: self._parse_gap_fill(), 1087 "JSON_OBJECT": lambda self: self._parse_json_object(), 1088 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1089 "JSON_TABLE": lambda self: self._parse_json_table(), 1090 "MATCH": lambda self: self._parse_match_against(), 1091 "OPENJSON": lambda self: self._parse_open_json(), 1092 "POSITION": lambda self: self._parse_position(), 1093 "PREDICT": lambda self: self._parse_predict(), 1094 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1095 "STRING_AGG": lambda self: self._parse_string_agg(), 1096 "SUBSTRING": lambda self: self._parse_substring(), 1097 "TRIM": lambda self: self._parse_trim(), 1098 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1099 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1100 } 1101 1102 QUERY_MODIFIER_PARSERS = { 1103 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1104 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1105 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1106 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1107 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1108 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1109 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1110 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1111 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1112 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1113 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1114 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1115 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1116 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1117 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1118 TokenType.CLUSTER_BY: lambda self: ( 1119 "cluster", 1120 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1121 ), 1122 TokenType.DISTRIBUTE_BY: lambda self: ( 1123 "distribute", 1124 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1125 ), 1126 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1127 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1128 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1129 } 1130 1131 SET_PARSERS = { 1132 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1133 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1134 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1135 "TRANSACTION": lambda self: self._parse_set_transaction(), 1136 } 1137 1138 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1139 1140 TYPE_LITERAL_PARSERS = { 1141 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1142 } 1143 1144 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1145 1146 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1147 1148 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1149 1150 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1151 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1152 "ISOLATION": ( 1153 ("LEVEL", "REPEATABLE", "READ"), 1154 ("LEVEL", "READ", "COMMITTED"), 1155 ("LEVEL", "READ", "UNCOMITTED"), 1156 ("LEVEL", "SERIALIZABLE"), 1157 ), 1158 "READ": ("WRITE", "ONLY"), 1159 } 1160 1161 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1162 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1163 ) 1164 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1165 1166 CREATE_SEQUENCE: OPTIONS_TYPE = { 1167 "SCALE": ("EXTEND", "NOEXTEND"), 1168 "SHARD": ("EXTEND", "NOEXTEND"), 1169 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1170 **dict.fromkeys( 1171 ( 1172 "SESSION", 1173 "GLOBAL", 1174 "KEEP", 1175 "NOKEEP", 1176 "ORDER", 1177 "NOORDER", 1178 "NOCACHE", 1179 "CYCLE", 1180 "NOCYCLE", 1181 "NOMINVALUE", 1182 "NOMAXVALUE", 1183 "NOSCALE", 1184 "NOSHARD", 1185 ), 1186 tuple(), 1187 ), 1188 } 1189 1190 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1191 1192 USABLES: OPTIONS_TYPE = dict.fromkeys( 1193 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1194 ) 1195 1196 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1197 1198 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1199 "TYPE": ("EVOLUTION",), 1200 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1201 } 1202 1203 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1204 "NOT": ("ENFORCED",), 1205 "MATCH": ( 1206 "FULL", 1207 "PARTIAL", 1208 "SIMPLE", 1209 ), 1210 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1211 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1212 } 1213 1214 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1215 1216 CLONE_KEYWORDS = {"CLONE", "COPY"} 1217 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1218 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1219 1220 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1221 1222 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1223 1224 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1225 1226 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1227 1228 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1229 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1230 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1231 1232 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1233 1234 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1235 1236 ADD_CONSTRAINT_TOKENS = { 1237 TokenType.CONSTRAINT, 1238 TokenType.FOREIGN_KEY, 1239 TokenType.INDEX, 1240 TokenType.KEY, 1241 TokenType.PRIMARY_KEY, 1242 TokenType.UNIQUE, 1243 } 1244 1245 DISTINCT_TOKENS = {TokenType.DISTINCT} 1246 1247 NULL_TOKENS = {TokenType.NULL} 1248 1249 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1250 1251 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1252 1253 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1254 1255 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1256 1257 STRICT_CAST = True 1258 1259 PREFIXED_PIVOT_COLUMNS = False 1260 IDENTIFY_PIVOT_STRINGS = False 1261 1262 LOG_DEFAULTS_TO_LN = False 1263 1264 # Whether ADD is present for each column added by ALTER TABLE 1265 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1266 1267 # Whether the table sample clause expects CSV syntax 1268 TABLESAMPLE_CSV = False 1269 1270 # The default method used for table sampling 1271 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1272 1273 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1274 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1275 1276 # Whether the TRIM function expects the characters to trim as its first argument 1277 TRIM_PATTERN_FIRST = False 1278 1279 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1280 STRING_ALIASES = False 1281 1282 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1283 MODIFIERS_ATTACHED_TO_SET_OP = True 1284 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1285 1286 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1287 NO_PAREN_IF_COMMANDS = True 1288 1289 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1290 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1291 1292 # Whether the `:` operator is used to extract a value from a VARIANT column 1293 COLON_IS_VARIANT_EXTRACT = False 1294 1295 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1296 # If this is True and '(' is not found, the keyword will be treated as an identifier 1297 VALUES_FOLLOWED_BY_PAREN = True 1298 1299 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1300 SUPPORTS_IMPLICIT_UNNEST = False 1301 1302 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1303 INTERVAL_SPANS = True 1304 1305 # Whether a PARTITION clause can follow a table reference 1306 SUPPORTS_PARTITION_SELECTION = False 1307 1308 __slots__ = ( 1309 "error_level", 1310 "error_message_context", 1311 "max_errors", 1312 "dialect", 1313 "sql", 1314 "errors", 1315 "_tokens", 1316 "_index", 1317 "_curr", 1318 "_next", 1319 "_prev", 1320 "_prev_comments", 1321 ) 1322 1323 # Autofilled 1324 SHOW_TRIE: t.Dict = {} 1325 SET_TRIE: t.Dict = {} 1326 1327 def __init__( 1328 self, 1329 error_level: t.Optional[ErrorLevel] = None, 1330 error_message_context: int = 100, 1331 max_errors: int = 3, 1332 dialect: DialectType = None, 1333 ): 1334 from sqlglot.dialects import Dialect 1335 1336 self.error_level = error_level or ErrorLevel.IMMEDIATE 1337 self.error_message_context = error_message_context 1338 self.max_errors = max_errors 1339 self.dialect = Dialect.get_or_raise(dialect) 1340 self.reset() 1341 1342 def reset(self): 1343 self.sql = "" 1344 self.errors = [] 1345 self._tokens = [] 1346 self._index = 0 1347 self._curr = None 1348 self._next = None 1349 self._prev = None 1350 self._prev_comments = None 1351 1352 def parse( 1353 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1354 ) -> t.List[t.Optional[exp.Expression]]: 1355 """ 1356 Parses a list of tokens and returns a list of syntax trees, one tree 1357 per parsed SQL statement. 1358 1359 Args: 1360 raw_tokens: The list of tokens. 1361 sql: The original SQL string, used to produce helpful debug messages. 1362 1363 Returns: 1364 The list of the produced syntax trees. 1365 """ 1366 return self._parse( 1367 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1368 ) 1369 1370 def parse_into( 1371 self, 1372 expression_types: exp.IntoType, 1373 raw_tokens: t.List[Token], 1374 sql: t.Optional[str] = None, 1375 ) -> t.List[t.Optional[exp.Expression]]: 1376 """ 1377 Parses a list of tokens into a given Expression type. If a collection of Expression 1378 types is given instead, this method will try to parse the token list into each one 1379 of them, stopping at the first for which the parsing succeeds. 1380 1381 Args: 1382 expression_types: The expression type(s) to try and parse the token list into. 1383 raw_tokens: The list of tokens. 1384 sql: The original SQL string, used to produce helpful debug messages. 1385 1386 Returns: 1387 The target Expression. 1388 """ 1389 errors = [] 1390 for expression_type in ensure_list(expression_types): 1391 parser = self.EXPRESSION_PARSERS.get(expression_type) 1392 if not parser: 1393 raise TypeError(f"No parser registered for {expression_type}") 1394 1395 try: 1396 return self._parse(parser, raw_tokens, sql) 1397 except ParseError as e: 1398 e.errors[0]["into_expression"] = expression_type 1399 errors.append(e) 1400 1401 raise ParseError( 1402 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1403 errors=merge_errors(errors), 1404 ) from errors[-1] 1405 1406 def _parse( 1407 self, 1408 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1409 raw_tokens: t.List[Token], 1410 sql: t.Optional[str] = None, 1411 ) -> t.List[t.Optional[exp.Expression]]: 1412 self.reset() 1413 self.sql = sql or "" 1414 1415 total = len(raw_tokens) 1416 chunks: t.List[t.List[Token]] = [[]] 1417 1418 for i, token in enumerate(raw_tokens): 1419 if token.token_type == TokenType.SEMICOLON: 1420 if token.comments: 1421 chunks.append([token]) 1422 1423 if i < total - 1: 1424 chunks.append([]) 1425 else: 1426 chunks[-1].append(token) 1427 1428 expressions = [] 1429 1430 for tokens in chunks: 1431 self._index = -1 1432 self._tokens = tokens 1433 self._advance() 1434 1435 expressions.append(parse_method(self)) 1436 1437 if self._index < len(self._tokens): 1438 self.raise_error("Invalid expression / Unexpected token") 1439 1440 self.check_errors() 1441 1442 return expressions 1443 1444 def check_errors(self) -> None: 1445 """Logs or raises any found errors, depending on the chosen error level setting.""" 1446 if self.error_level == ErrorLevel.WARN: 1447 for error in self.errors: 1448 logger.error(str(error)) 1449 elif self.error_level == ErrorLevel.RAISE and self.errors: 1450 raise ParseError( 1451 concat_messages(self.errors, self.max_errors), 1452 errors=merge_errors(self.errors), 1453 ) 1454 1455 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1456 """ 1457 Appends an error in the list of recorded errors or raises it, depending on the chosen 1458 error level setting. 1459 """ 1460 token = token or self._curr or self._prev or Token.string("") 1461 start = token.start 1462 end = token.end + 1 1463 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1464 highlight = self.sql[start:end] 1465 end_context = self.sql[end : end + self.error_message_context] 1466 1467 error = ParseError.new( 1468 f"{message}. Line {token.line}, Col: {token.col}.\n" 1469 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1470 description=message, 1471 line=token.line, 1472 col=token.col, 1473 start_context=start_context, 1474 highlight=highlight, 1475 end_context=end_context, 1476 ) 1477 1478 if self.error_level == ErrorLevel.IMMEDIATE: 1479 raise error 1480 1481 self.errors.append(error) 1482 1483 def expression( 1484 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1485 ) -> E: 1486 """ 1487 Creates a new, validated Expression. 1488 1489 Args: 1490 exp_class: The expression class to instantiate. 1491 comments: An optional list of comments to attach to the expression. 1492 kwargs: The arguments to set for the expression along with their respective values. 1493 1494 Returns: 1495 The target expression. 1496 """ 1497 instance = exp_class(**kwargs) 1498 instance.add_comments(comments) if comments else self._add_comments(instance) 1499 return self.validate_expression(instance) 1500 1501 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1502 if expression and self._prev_comments: 1503 expression.add_comments(self._prev_comments) 1504 self._prev_comments = None 1505 1506 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1507 """ 1508 Validates an Expression, making sure that all its mandatory arguments are set. 1509 1510 Args: 1511 expression: The expression to validate. 1512 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1513 1514 Returns: 1515 The validated expression. 1516 """ 1517 if self.error_level != ErrorLevel.IGNORE: 1518 for error_message in expression.error_messages(args): 1519 self.raise_error(error_message) 1520 1521 return expression 1522 1523 def _find_sql(self, start: Token, end: Token) -> str: 1524 return self.sql[start.start : end.end + 1] 1525 1526 def _is_connected(self) -> bool: 1527 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1528 1529 def _advance(self, times: int = 1) -> None: 1530 self._index += times 1531 self._curr = seq_get(self._tokens, self._index) 1532 self._next = seq_get(self._tokens, self._index + 1) 1533 1534 if self._index > 0: 1535 self._prev = self._tokens[self._index - 1] 1536 self._prev_comments = self._prev.comments 1537 else: 1538 self._prev = None 1539 self._prev_comments = None 1540 1541 def _retreat(self, index: int) -> None: 1542 if index != self._index: 1543 self._advance(index - self._index) 1544 1545 def _warn_unsupported(self) -> None: 1546 if len(self._tokens) <= 1: 1547 return 1548 1549 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1550 # interested in emitting a warning for the one being currently processed. 1551 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1552 1553 logger.warning( 1554 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1555 ) 1556 1557 def _parse_command(self) -> exp.Command: 1558 self._warn_unsupported() 1559 return self.expression( 1560 exp.Command, 1561 comments=self._prev_comments, 1562 this=self._prev.text.upper(), 1563 expression=self._parse_string(), 1564 ) 1565 1566 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1567 """ 1568 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1569 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1570 solve this by setting & resetting the parser state accordingly 1571 """ 1572 index = self._index 1573 error_level = self.error_level 1574 1575 self.error_level = ErrorLevel.IMMEDIATE 1576 try: 1577 this = parse_method() 1578 except ParseError: 1579 this = None 1580 finally: 1581 if not this or retreat: 1582 self._retreat(index) 1583 self.error_level = error_level 1584 1585 return this 1586 1587 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1588 start = self._prev 1589 exists = self._parse_exists() if allow_exists else None 1590 1591 self._match(TokenType.ON) 1592 1593 materialized = self._match_text_seq("MATERIALIZED") 1594 kind = self._match_set(self.CREATABLES) and self._prev 1595 if not kind: 1596 return self._parse_as_command(start) 1597 1598 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1599 this = self._parse_user_defined_function(kind=kind.token_type) 1600 elif kind.token_type == TokenType.TABLE: 1601 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1602 elif kind.token_type == TokenType.COLUMN: 1603 this = self._parse_column() 1604 else: 1605 this = self._parse_id_var() 1606 1607 self._match(TokenType.IS) 1608 1609 return self.expression( 1610 exp.Comment, 1611 this=this, 1612 kind=kind.text, 1613 expression=self._parse_string(), 1614 exists=exists, 1615 materialized=materialized, 1616 ) 1617 1618 def _parse_to_table( 1619 self, 1620 ) -> exp.ToTableProperty: 1621 table = self._parse_table_parts(schema=True) 1622 return self.expression(exp.ToTableProperty, this=table) 1623 1624 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1625 def _parse_ttl(self) -> exp.Expression: 1626 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1627 this = self._parse_bitwise() 1628 1629 if self._match_text_seq("DELETE"): 1630 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1631 if self._match_text_seq("RECOMPRESS"): 1632 return self.expression( 1633 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1634 ) 1635 if self._match_text_seq("TO", "DISK"): 1636 return self.expression( 1637 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1638 ) 1639 if self._match_text_seq("TO", "VOLUME"): 1640 return self.expression( 1641 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1642 ) 1643 1644 return this 1645 1646 expressions = self._parse_csv(_parse_ttl_action) 1647 where = self._parse_where() 1648 group = self._parse_group() 1649 1650 aggregates = None 1651 if group and self._match(TokenType.SET): 1652 aggregates = self._parse_csv(self._parse_set_item) 1653 1654 return self.expression( 1655 exp.MergeTreeTTL, 1656 expressions=expressions, 1657 where=where, 1658 group=group, 1659 aggregates=aggregates, 1660 ) 1661 1662 def _parse_statement(self) -> t.Optional[exp.Expression]: 1663 if self._curr is None: 1664 return None 1665 1666 if self._match_set(self.STATEMENT_PARSERS): 1667 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1668 1669 if self._match_set(self.dialect.tokenizer.COMMANDS): 1670 return self._parse_command() 1671 1672 expression = self._parse_expression() 1673 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1674 return self._parse_query_modifiers(expression) 1675 1676 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1677 start = self._prev 1678 temporary = self._match(TokenType.TEMPORARY) 1679 materialized = self._match_text_seq("MATERIALIZED") 1680 1681 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1682 if not kind: 1683 return self._parse_as_command(start) 1684 1685 if_exists = exists or self._parse_exists() 1686 table = self._parse_table_parts( 1687 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1688 ) 1689 1690 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1691 1692 if self._match(TokenType.L_PAREN, advance=False): 1693 expressions = self._parse_wrapped_csv(self._parse_types) 1694 else: 1695 expressions = None 1696 1697 return self.expression( 1698 exp.Drop, 1699 comments=start.comments, 1700 exists=if_exists, 1701 this=table, 1702 expressions=expressions, 1703 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1704 temporary=temporary, 1705 materialized=materialized, 1706 cascade=self._match_text_seq("CASCADE"), 1707 constraints=self._match_text_seq("CONSTRAINTS"), 1708 purge=self._match_text_seq("PURGE"), 1709 cluster=cluster, 1710 ) 1711 1712 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1713 return ( 1714 self._match_text_seq("IF") 1715 and (not not_ or self._match(TokenType.NOT)) 1716 and self._match(TokenType.EXISTS) 1717 ) 1718 1719 def _parse_create(self) -> exp.Create | exp.Command: 1720 # Note: this can't be None because we've matched a statement parser 1721 start = self._prev 1722 comments = self._prev_comments 1723 1724 replace = ( 1725 start.token_type == TokenType.REPLACE 1726 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1727 or self._match_pair(TokenType.OR, TokenType.ALTER) 1728 ) 1729 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1730 1731 unique = self._match(TokenType.UNIQUE) 1732 1733 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1734 clustered = True 1735 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1736 "COLUMNSTORE" 1737 ): 1738 clustered = False 1739 else: 1740 clustered = None 1741 1742 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1743 self._advance() 1744 1745 properties = None 1746 create_token = self._match_set(self.CREATABLES) and self._prev 1747 1748 if not create_token: 1749 # exp.Properties.Location.POST_CREATE 1750 properties = self._parse_properties() 1751 create_token = self._match_set(self.CREATABLES) and self._prev 1752 1753 if not properties or not create_token: 1754 return self._parse_as_command(start) 1755 1756 concurrently = self._match_text_seq("CONCURRENTLY") 1757 exists = self._parse_exists(not_=True) 1758 this = None 1759 expression: t.Optional[exp.Expression] = None 1760 indexes = None 1761 no_schema_binding = None 1762 begin = None 1763 end = None 1764 clone = None 1765 1766 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1767 nonlocal properties 1768 if properties and temp_props: 1769 properties.expressions.extend(temp_props.expressions) 1770 elif temp_props: 1771 properties = temp_props 1772 1773 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1774 this = self._parse_user_defined_function(kind=create_token.token_type) 1775 1776 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1777 extend_props(self._parse_properties()) 1778 1779 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1780 extend_props(self._parse_properties()) 1781 1782 if not expression: 1783 if self._match(TokenType.COMMAND): 1784 expression = self._parse_as_command(self._prev) 1785 else: 1786 begin = self._match(TokenType.BEGIN) 1787 return_ = self._match_text_seq("RETURN") 1788 1789 if self._match(TokenType.STRING, advance=False): 1790 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1791 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1792 expression = self._parse_string() 1793 extend_props(self._parse_properties()) 1794 else: 1795 expression = self._parse_statement() 1796 1797 end = self._match_text_seq("END") 1798 1799 if return_: 1800 expression = self.expression(exp.Return, this=expression) 1801 elif create_token.token_type == TokenType.INDEX: 1802 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1803 if not self._match(TokenType.ON): 1804 index = self._parse_id_var() 1805 anonymous = False 1806 else: 1807 index = None 1808 anonymous = True 1809 1810 this = self._parse_index(index=index, anonymous=anonymous) 1811 elif create_token.token_type in self.DB_CREATABLES: 1812 table_parts = self._parse_table_parts( 1813 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1814 ) 1815 1816 # exp.Properties.Location.POST_NAME 1817 self._match(TokenType.COMMA) 1818 extend_props(self._parse_properties(before=True)) 1819 1820 this = self._parse_schema(this=table_parts) 1821 1822 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1823 extend_props(self._parse_properties()) 1824 1825 self._match(TokenType.ALIAS) 1826 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1827 # exp.Properties.Location.POST_ALIAS 1828 extend_props(self._parse_properties()) 1829 1830 if create_token.token_type == TokenType.SEQUENCE: 1831 expression = self._parse_types() 1832 extend_props(self._parse_properties()) 1833 else: 1834 expression = self._parse_ddl_select() 1835 1836 if create_token.token_type == TokenType.TABLE: 1837 # exp.Properties.Location.POST_EXPRESSION 1838 extend_props(self._parse_properties()) 1839 1840 indexes = [] 1841 while True: 1842 index = self._parse_index() 1843 1844 # exp.Properties.Location.POST_INDEX 1845 extend_props(self._parse_properties()) 1846 if not index: 1847 break 1848 else: 1849 self._match(TokenType.COMMA) 1850 indexes.append(index) 1851 elif create_token.token_type == TokenType.VIEW: 1852 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1853 no_schema_binding = True 1854 1855 shallow = self._match_text_seq("SHALLOW") 1856 1857 if self._match_texts(self.CLONE_KEYWORDS): 1858 copy = self._prev.text.lower() == "copy" 1859 clone = self.expression( 1860 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1861 ) 1862 1863 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1864 return self._parse_as_command(start) 1865 1866 create_kind_text = create_token.text.upper() 1867 return self.expression( 1868 exp.Create, 1869 comments=comments, 1870 this=this, 1871 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1872 replace=replace, 1873 refresh=refresh, 1874 unique=unique, 1875 expression=expression, 1876 exists=exists, 1877 properties=properties, 1878 indexes=indexes, 1879 no_schema_binding=no_schema_binding, 1880 begin=begin, 1881 end=end, 1882 clone=clone, 1883 concurrently=concurrently, 1884 clustered=clustered, 1885 ) 1886 1887 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1888 seq = exp.SequenceProperties() 1889 1890 options = [] 1891 index = self._index 1892 1893 while self._curr: 1894 self._match(TokenType.COMMA) 1895 if self._match_text_seq("INCREMENT"): 1896 self._match_text_seq("BY") 1897 self._match_text_seq("=") 1898 seq.set("increment", self._parse_term()) 1899 elif self._match_text_seq("MINVALUE"): 1900 seq.set("minvalue", self._parse_term()) 1901 elif self._match_text_seq("MAXVALUE"): 1902 seq.set("maxvalue", self._parse_term()) 1903 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1904 self._match_text_seq("=") 1905 seq.set("start", self._parse_term()) 1906 elif self._match_text_seq("CACHE"): 1907 # T-SQL allows empty CACHE which is initialized dynamically 1908 seq.set("cache", self._parse_number() or True) 1909 elif self._match_text_seq("OWNED", "BY"): 1910 # "OWNED BY NONE" is the default 1911 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1912 else: 1913 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1914 if opt: 1915 options.append(opt) 1916 else: 1917 break 1918 1919 seq.set("options", options if options else None) 1920 return None if self._index == index else seq 1921 1922 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1923 # only used for teradata currently 1924 self._match(TokenType.COMMA) 1925 1926 kwargs = { 1927 "no": self._match_text_seq("NO"), 1928 "dual": self._match_text_seq("DUAL"), 1929 "before": self._match_text_seq("BEFORE"), 1930 "default": self._match_text_seq("DEFAULT"), 1931 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1932 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1933 "after": self._match_text_seq("AFTER"), 1934 "minimum": self._match_texts(("MIN", "MINIMUM")), 1935 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1936 } 1937 1938 if self._match_texts(self.PROPERTY_PARSERS): 1939 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1940 try: 1941 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1942 except TypeError: 1943 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1944 1945 return None 1946 1947 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1948 return self._parse_wrapped_csv(self._parse_property) 1949 1950 def _parse_property(self) -> t.Optional[exp.Expression]: 1951 if self._match_texts(self.PROPERTY_PARSERS): 1952 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1953 1954 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1955 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1956 1957 if self._match_text_seq("COMPOUND", "SORTKEY"): 1958 return self._parse_sortkey(compound=True) 1959 1960 if self._match_text_seq("SQL", "SECURITY"): 1961 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1962 1963 index = self._index 1964 key = self._parse_column() 1965 1966 if not self._match(TokenType.EQ): 1967 self._retreat(index) 1968 return self._parse_sequence_properties() 1969 1970 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1971 if isinstance(key, exp.Column): 1972 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1973 1974 value = self._parse_bitwise() or self._parse_var(any_token=True) 1975 1976 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1977 if isinstance(value, exp.Column): 1978 value = exp.var(value.name) 1979 1980 return self.expression(exp.Property, this=key, value=value) 1981 1982 def _parse_stored(self) -> exp.FileFormatProperty: 1983 self._match(TokenType.ALIAS) 1984 1985 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1986 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1987 1988 return self.expression( 1989 exp.FileFormatProperty, 1990 this=( 1991 self.expression( 1992 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1993 ) 1994 if input_format or output_format 1995 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1996 ), 1997 ) 1998 1999 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2000 field = self._parse_field() 2001 if isinstance(field, exp.Identifier) and not field.quoted: 2002 field = exp.var(field) 2003 2004 return field 2005 2006 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2007 self._match(TokenType.EQ) 2008 self._match(TokenType.ALIAS) 2009 2010 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2011 2012 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2013 properties = [] 2014 while True: 2015 if before: 2016 prop = self._parse_property_before() 2017 else: 2018 prop = self._parse_property() 2019 if not prop: 2020 break 2021 for p in ensure_list(prop): 2022 properties.append(p) 2023 2024 if properties: 2025 return self.expression(exp.Properties, expressions=properties) 2026 2027 return None 2028 2029 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2030 return self.expression( 2031 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2032 ) 2033 2034 def _parse_settings_property(self) -> exp.SettingsProperty: 2035 return self.expression( 2036 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2037 ) 2038 2039 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2040 if self._index >= 2: 2041 pre_volatile_token = self._tokens[self._index - 2] 2042 else: 2043 pre_volatile_token = None 2044 2045 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2046 return exp.VolatileProperty() 2047 2048 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2049 2050 def _parse_retention_period(self) -> exp.Var: 2051 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2052 number = self._parse_number() 2053 number_str = f"{number} " if number else "" 2054 unit = self._parse_var(any_token=True) 2055 return exp.var(f"{number_str}{unit}") 2056 2057 def _parse_system_versioning_property( 2058 self, with_: bool = False 2059 ) -> exp.WithSystemVersioningProperty: 2060 self._match(TokenType.EQ) 2061 prop = self.expression( 2062 exp.WithSystemVersioningProperty, 2063 **{ # type: ignore 2064 "on": True, 2065 "with": with_, 2066 }, 2067 ) 2068 2069 if self._match_text_seq("OFF"): 2070 prop.set("on", False) 2071 return prop 2072 2073 self._match(TokenType.ON) 2074 if self._match(TokenType.L_PAREN): 2075 while self._curr and not self._match(TokenType.R_PAREN): 2076 if self._match_text_seq("HISTORY_TABLE", "="): 2077 prop.set("this", self._parse_table_parts()) 2078 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2079 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2080 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2081 prop.set("retention_period", self._parse_retention_period()) 2082 2083 self._match(TokenType.COMMA) 2084 2085 return prop 2086 2087 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2088 self._match(TokenType.EQ) 2089 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2090 prop = self.expression(exp.DataDeletionProperty, on=on) 2091 2092 if self._match(TokenType.L_PAREN): 2093 while self._curr and not self._match(TokenType.R_PAREN): 2094 if self._match_text_seq("FILTER_COLUMN", "="): 2095 prop.set("filter_column", self._parse_column()) 2096 elif self._match_text_seq("RETENTION_PERIOD", "="): 2097 prop.set("retention_period", self._parse_retention_period()) 2098 2099 self._match(TokenType.COMMA) 2100 2101 return prop 2102 2103 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2104 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2105 prop = self._parse_system_versioning_property(with_=True) 2106 self._match_r_paren() 2107 return prop 2108 2109 if self._match(TokenType.L_PAREN, advance=False): 2110 return self._parse_wrapped_properties() 2111 2112 if self._match_text_seq("JOURNAL"): 2113 return self._parse_withjournaltable() 2114 2115 if self._match_texts(self.VIEW_ATTRIBUTES): 2116 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2117 2118 if self._match_text_seq("DATA"): 2119 return self._parse_withdata(no=False) 2120 elif self._match_text_seq("NO", "DATA"): 2121 return self._parse_withdata(no=True) 2122 2123 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2124 return self._parse_serde_properties(with_=True) 2125 2126 if self._match(TokenType.SCHEMA): 2127 return self.expression( 2128 exp.WithSchemaBindingProperty, 2129 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2130 ) 2131 2132 if not self._next: 2133 return None 2134 2135 return self._parse_withisolatedloading() 2136 2137 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2138 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2139 self._match(TokenType.EQ) 2140 2141 user = self._parse_id_var() 2142 self._match(TokenType.PARAMETER) 2143 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2144 2145 if not user or not host: 2146 return None 2147 2148 return exp.DefinerProperty(this=f"{user}@{host}") 2149 2150 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2151 self._match(TokenType.TABLE) 2152 self._match(TokenType.EQ) 2153 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2154 2155 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2156 return self.expression(exp.LogProperty, no=no) 2157 2158 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2159 return self.expression(exp.JournalProperty, **kwargs) 2160 2161 def _parse_checksum(self) -> exp.ChecksumProperty: 2162 self._match(TokenType.EQ) 2163 2164 on = None 2165 if self._match(TokenType.ON): 2166 on = True 2167 elif self._match_text_seq("OFF"): 2168 on = False 2169 2170 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2171 2172 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2173 return self.expression( 2174 exp.Cluster, 2175 expressions=( 2176 self._parse_wrapped_csv(self._parse_ordered) 2177 if wrapped 2178 else self._parse_csv(self._parse_ordered) 2179 ), 2180 ) 2181 2182 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2183 self._match_text_seq("BY") 2184 2185 self._match_l_paren() 2186 expressions = self._parse_csv(self._parse_column) 2187 self._match_r_paren() 2188 2189 if self._match_text_seq("SORTED", "BY"): 2190 self._match_l_paren() 2191 sorted_by = self._parse_csv(self._parse_ordered) 2192 self._match_r_paren() 2193 else: 2194 sorted_by = None 2195 2196 self._match(TokenType.INTO) 2197 buckets = self._parse_number() 2198 self._match_text_seq("BUCKETS") 2199 2200 return self.expression( 2201 exp.ClusteredByProperty, 2202 expressions=expressions, 2203 sorted_by=sorted_by, 2204 buckets=buckets, 2205 ) 2206 2207 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2208 if not self._match_text_seq("GRANTS"): 2209 self._retreat(self._index - 1) 2210 return None 2211 2212 return self.expression(exp.CopyGrantsProperty) 2213 2214 def _parse_freespace(self) -> exp.FreespaceProperty: 2215 self._match(TokenType.EQ) 2216 return self.expression( 2217 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2218 ) 2219 2220 def _parse_mergeblockratio( 2221 self, no: bool = False, default: bool = False 2222 ) -> exp.MergeBlockRatioProperty: 2223 if self._match(TokenType.EQ): 2224 return self.expression( 2225 exp.MergeBlockRatioProperty, 2226 this=self._parse_number(), 2227 percent=self._match(TokenType.PERCENT), 2228 ) 2229 2230 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2231 2232 def _parse_datablocksize( 2233 self, 2234 default: t.Optional[bool] = None, 2235 minimum: t.Optional[bool] = None, 2236 maximum: t.Optional[bool] = None, 2237 ) -> exp.DataBlocksizeProperty: 2238 self._match(TokenType.EQ) 2239 size = self._parse_number() 2240 2241 units = None 2242 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2243 units = self._prev.text 2244 2245 return self.expression( 2246 exp.DataBlocksizeProperty, 2247 size=size, 2248 units=units, 2249 default=default, 2250 minimum=minimum, 2251 maximum=maximum, 2252 ) 2253 2254 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2255 self._match(TokenType.EQ) 2256 always = self._match_text_seq("ALWAYS") 2257 manual = self._match_text_seq("MANUAL") 2258 never = self._match_text_seq("NEVER") 2259 default = self._match_text_seq("DEFAULT") 2260 2261 autotemp = None 2262 if self._match_text_seq("AUTOTEMP"): 2263 autotemp = self._parse_schema() 2264 2265 return self.expression( 2266 exp.BlockCompressionProperty, 2267 always=always, 2268 manual=manual, 2269 never=never, 2270 default=default, 2271 autotemp=autotemp, 2272 ) 2273 2274 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2275 index = self._index 2276 no = self._match_text_seq("NO") 2277 concurrent = self._match_text_seq("CONCURRENT") 2278 2279 if not self._match_text_seq("ISOLATED", "LOADING"): 2280 self._retreat(index) 2281 return None 2282 2283 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2284 return self.expression( 2285 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2286 ) 2287 2288 def _parse_locking(self) -> exp.LockingProperty: 2289 if self._match(TokenType.TABLE): 2290 kind = "TABLE" 2291 elif self._match(TokenType.VIEW): 2292 kind = "VIEW" 2293 elif self._match(TokenType.ROW): 2294 kind = "ROW" 2295 elif self._match_text_seq("DATABASE"): 2296 kind = "DATABASE" 2297 else: 2298 kind = None 2299 2300 if kind in ("DATABASE", "TABLE", "VIEW"): 2301 this = self._parse_table_parts() 2302 else: 2303 this = None 2304 2305 if self._match(TokenType.FOR): 2306 for_or_in = "FOR" 2307 elif self._match(TokenType.IN): 2308 for_or_in = "IN" 2309 else: 2310 for_or_in = None 2311 2312 if self._match_text_seq("ACCESS"): 2313 lock_type = "ACCESS" 2314 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2315 lock_type = "EXCLUSIVE" 2316 elif self._match_text_seq("SHARE"): 2317 lock_type = "SHARE" 2318 elif self._match_text_seq("READ"): 2319 lock_type = "READ" 2320 elif self._match_text_seq("WRITE"): 2321 lock_type = "WRITE" 2322 elif self._match_text_seq("CHECKSUM"): 2323 lock_type = "CHECKSUM" 2324 else: 2325 lock_type = None 2326 2327 override = self._match_text_seq("OVERRIDE") 2328 2329 return self.expression( 2330 exp.LockingProperty, 2331 this=this, 2332 kind=kind, 2333 for_or_in=for_or_in, 2334 lock_type=lock_type, 2335 override=override, 2336 ) 2337 2338 def _parse_partition_by(self) -> t.List[exp.Expression]: 2339 if self._match(TokenType.PARTITION_BY): 2340 return self._parse_csv(self._parse_assignment) 2341 return [] 2342 2343 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2344 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2345 if self._match_text_seq("MINVALUE"): 2346 return exp.var("MINVALUE") 2347 if self._match_text_seq("MAXVALUE"): 2348 return exp.var("MAXVALUE") 2349 return self._parse_bitwise() 2350 2351 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2352 expression = None 2353 from_expressions = None 2354 to_expressions = None 2355 2356 if self._match(TokenType.IN): 2357 this = self._parse_wrapped_csv(self._parse_bitwise) 2358 elif self._match(TokenType.FROM): 2359 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2360 self._match_text_seq("TO") 2361 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2362 elif self._match_text_seq("WITH", "(", "MODULUS"): 2363 this = self._parse_number() 2364 self._match_text_seq(",", "REMAINDER") 2365 expression = self._parse_number() 2366 self._match_r_paren() 2367 else: 2368 self.raise_error("Failed to parse partition bound spec.") 2369 2370 return self.expression( 2371 exp.PartitionBoundSpec, 2372 this=this, 2373 expression=expression, 2374 from_expressions=from_expressions, 2375 to_expressions=to_expressions, 2376 ) 2377 2378 # https://www.postgresql.org/docs/current/sql-createtable.html 2379 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2380 if not self._match_text_seq("OF"): 2381 self._retreat(self._index - 1) 2382 return None 2383 2384 this = self._parse_table(schema=True) 2385 2386 if self._match(TokenType.DEFAULT): 2387 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2388 elif self._match_text_seq("FOR", "VALUES"): 2389 expression = self._parse_partition_bound_spec() 2390 else: 2391 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2392 2393 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2394 2395 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2396 self._match(TokenType.EQ) 2397 return self.expression( 2398 exp.PartitionedByProperty, 2399 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2400 ) 2401 2402 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2403 if self._match_text_seq("AND", "STATISTICS"): 2404 statistics = True 2405 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2406 statistics = False 2407 else: 2408 statistics = None 2409 2410 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2411 2412 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2413 if self._match_text_seq("SQL"): 2414 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2415 return None 2416 2417 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2418 if self._match_text_seq("SQL", "DATA"): 2419 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2420 return None 2421 2422 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2423 if self._match_text_seq("PRIMARY", "INDEX"): 2424 return exp.NoPrimaryIndexProperty() 2425 if self._match_text_seq("SQL"): 2426 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2427 return None 2428 2429 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2430 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2431 return exp.OnCommitProperty() 2432 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2433 return exp.OnCommitProperty(delete=True) 2434 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2435 2436 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2437 if self._match_text_seq("SQL", "DATA"): 2438 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2439 return None 2440 2441 def _parse_distkey(self) -> exp.DistKeyProperty: 2442 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2443 2444 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2445 table = self._parse_table(schema=True) 2446 2447 options = [] 2448 while self._match_texts(("INCLUDING", "EXCLUDING")): 2449 this = self._prev.text.upper() 2450 2451 id_var = self._parse_id_var() 2452 if not id_var: 2453 return None 2454 2455 options.append( 2456 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2457 ) 2458 2459 return self.expression(exp.LikeProperty, this=table, expressions=options) 2460 2461 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2462 return self.expression( 2463 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2464 ) 2465 2466 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2467 self._match(TokenType.EQ) 2468 return self.expression( 2469 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2470 ) 2471 2472 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2473 self._match_text_seq("WITH", "CONNECTION") 2474 return self.expression( 2475 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2476 ) 2477 2478 def _parse_returns(self) -> exp.ReturnsProperty: 2479 value: t.Optional[exp.Expression] 2480 null = None 2481 is_table = self._match(TokenType.TABLE) 2482 2483 if is_table: 2484 if self._match(TokenType.LT): 2485 value = self.expression( 2486 exp.Schema, 2487 this="TABLE", 2488 expressions=self._parse_csv(self._parse_struct_types), 2489 ) 2490 if not self._match(TokenType.GT): 2491 self.raise_error("Expecting >") 2492 else: 2493 value = self._parse_schema(exp.var("TABLE")) 2494 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2495 null = True 2496 value = None 2497 else: 2498 value = self._parse_types() 2499 2500 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2501 2502 def _parse_describe(self) -> exp.Describe: 2503 kind = self._match_set(self.CREATABLES) and self._prev.text 2504 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2505 if self._match(TokenType.DOT): 2506 style = None 2507 self._retreat(self._index - 2) 2508 this = self._parse_table(schema=True) 2509 properties = self._parse_properties() 2510 expressions = properties.expressions if properties else None 2511 partition = self._parse_partition() 2512 return self.expression( 2513 exp.Describe, 2514 this=this, 2515 style=style, 2516 kind=kind, 2517 expressions=expressions, 2518 partition=partition, 2519 ) 2520 2521 def _parse_insert(self) -> exp.Insert: 2522 comments = ensure_list(self._prev_comments) 2523 hint = self._parse_hint() 2524 overwrite = self._match(TokenType.OVERWRITE) 2525 ignore = self._match(TokenType.IGNORE) 2526 local = self._match_text_seq("LOCAL") 2527 alternative = None 2528 is_function = None 2529 2530 if self._match_text_seq("DIRECTORY"): 2531 this: t.Optional[exp.Expression] = self.expression( 2532 exp.Directory, 2533 this=self._parse_var_or_string(), 2534 local=local, 2535 row_format=self._parse_row_format(match_row=True), 2536 ) 2537 else: 2538 if self._match(TokenType.OR): 2539 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2540 2541 self._match(TokenType.INTO) 2542 comments += ensure_list(self._prev_comments) 2543 self._match(TokenType.TABLE) 2544 is_function = self._match(TokenType.FUNCTION) 2545 2546 this = ( 2547 self._parse_table(schema=True, parse_partition=True) 2548 if not is_function 2549 else self._parse_function() 2550 ) 2551 2552 returning = self._parse_returning() 2553 2554 return self.expression( 2555 exp.Insert, 2556 comments=comments, 2557 hint=hint, 2558 is_function=is_function, 2559 this=this, 2560 stored=self._match_text_seq("STORED") and self._parse_stored(), 2561 by_name=self._match_text_seq("BY", "NAME"), 2562 exists=self._parse_exists(), 2563 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2564 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2565 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2566 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2567 conflict=self._parse_on_conflict(), 2568 returning=returning or self._parse_returning(), 2569 overwrite=overwrite, 2570 alternative=alternative, 2571 ignore=ignore, 2572 source=self._match(TokenType.TABLE) and self._parse_table(), 2573 ) 2574 2575 def _parse_kill(self) -> exp.Kill: 2576 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2577 2578 return self.expression( 2579 exp.Kill, 2580 this=self._parse_primary(), 2581 kind=kind, 2582 ) 2583 2584 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2585 conflict = self._match_text_seq("ON", "CONFLICT") 2586 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2587 2588 if not conflict and not duplicate: 2589 return None 2590 2591 conflict_keys = None 2592 constraint = None 2593 2594 if conflict: 2595 if self._match_text_seq("ON", "CONSTRAINT"): 2596 constraint = self._parse_id_var() 2597 elif self._match(TokenType.L_PAREN): 2598 conflict_keys = self._parse_csv(self._parse_id_var) 2599 self._match_r_paren() 2600 2601 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2602 if self._prev.token_type == TokenType.UPDATE: 2603 self._match(TokenType.SET) 2604 expressions = self._parse_csv(self._parse_equality) 2605 else: 2606 expressions = None 2607 2608 return self.expression( 2609 exp.OnConflict, 2610 duplicate=duplicate, 2611 expressions=expressions, 2612 action=action, 2613 conflict_keys=conflict_keys, 2614 constraint=constraint, 2615 ) 2616 2617 def _parse_returning(self) -> t.Optional[exp.Returning]: 2618 if not self._match(TokenType.RETURNING): 2619 return None 2620 return self.expression( 2621 exp.Returning, 2622 expressions=self._parse_csv(self._parse_expression), 2623 into=self._match(TokenType.INTO) and self._parse_table_part(), 2624 ) 2625 2626 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2627 if not self._match(TokenType.FORMAT): 2628 return None 2629 return self._parse_row_format() 2630 2631 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2632 index = self._index 2633 with_ = with_ or self._match_text_seq("WITH") 2634 2635 if not self._match(TokenType.SERDE_PROPERTIES): 2636 self._retreat(index) 2637 return None 2638 return self.expression( 2639 exp.SerdeProperties, 2640 **{ # type: ignore 2641 "expressions": self._parse_wrapped_properties(), 2642 "with": with_, 2643 }, 2644 ) 2645 2646 def _parse_row_format( 2647 self, match_row: bool = False 2648 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2649 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2650 return None 2651 2652 if self._match_text_seq("SERDE"): 2653 this = self._parse_string() 2654 2655 serde_properties = self._parse_serde_properties() 2656 2657 return self.expression( 2658 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2659 ) 2660 2661 self._match_text_seq("DELIMITED") 2662 2663 kwargs = {} 2664 2665 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2666 kwargs["fields"] = self._parse_string() 2667 if self._match_text_seq("ESCAPED", "BY"): 2668 kwargs["escaped"] = self._parse_string() 2669 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2670 kwargs["collection_items"] = self._parse_string() 2671 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2672 kwargs["map_keys"] = self._parse_string() 2673 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2674 kwargs["lines"] = self._parse_string() 2675 if self._match_text_seq("NULL", "DEFINED", "AS"): 2676 kwargs["null"] = self._parse_string() 2677 2678 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2679 2680 def _parse_load(self) -> exp.LoadData | exp.Command: 2681 if self._match_text_seq("DATA"): 2682 local = self._match_text_seq("LOCAL") 2683 self._match_text_seq("INPATH") 2684 inpath = self._parse_string() 2685 overwrite = self._match(TokenType.OVERWRITE) 2686 self._match_pair(TokenType.INTO, TokenType.TABLE) 2687 2688 return self.expression( 2689 exp.LoadData, 2690 this=self._parse_table(schema=True), 2691 local=local, 2692 overwrite=overwrite, 2693 inpath=inpath, 2694 partition=self._parse_partition(), 2695 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2696 serde=self._match_text_seq("SERDE") and self._parse_string(), 2697 ) 2698 return self._parse_as_command(self._prev) 2699 2700 def _parse_delete(self) -> exp.Delete: 2701 # This handles MySQL's "Multiple-Table Syntax" 2702 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2703 tables = None 2704 comments = self._prev_comments 2705 if not self._match(TokenType.FROM, advance=False): 2706 tables = self._parse_csv(self._parse_table) or None 2707 2708 returning = self._parse_returning() 2709 2710 return self.expression( 2711 exp.Delete, 2712 comments=comments, 2713 tables=tables, 2714 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2715 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2716 where=self._parse_where(), 2717 returning=returning or self._parse_returning(), 2718 limit=self._parse_limit(), 2719 ) 2720 2721 def _parse_update(self) -> exp.Update: 2722 comments = self._prev_comments 2723 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2724 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2725 returning = self._parse_returning() 2726 return self.expression( 2727 exp.Update, 2728 comments=comments, 2729 **{ # type: ignore 2730 "this": this, 2731 "expressions": expressions, 2732 "from": self._parse_from(joins=True), 2733 "where": self._parse_where(), 2734 "returning": returning or self._parse_returning(), 2735 "order": self._parse_order(), 2736 "limit": self._parse_limit(), 2737 }, 2738 ) 2739 2740 def _parse_uncache(self) -> exp.Uncache: 2741 if not self._match(TokenType.TABLE): 2742 self.raise_error("Expecting TABLE after UNCACHE") 2743 2744 return self.expression( 2745 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2746 ) 2747 2748 def _parse_cache(self) -> exp.Cache: 2749 lazy = self._match_text_seq("LAZY") 2750 self._match(TokenType.TABLE) 2751 table = self._parse_table(schema=True) 2752 2753 options = [] 2754 if self._match_text_seq("OPTIONS"): 2755 self._match_l_paren() 2756 k = self._parse_string() 2757 self._match(TokenType.EQ) 2758 v = self._parse_string() 2759 options = [k, v] 2760 self._match_r_paren() 2761 2762 self._match(TokenType.ALIAS) 2763 return self.expression( 2764 exp.Cache, 2765 this=table, 2766 lazy=lazy, 2767 options=options, 2768 expression=self._parse_select(nested=True), 2769 ) 2770 2771 def _parse_partition(self) -> t.Optional[exp.Partition]: 2772 if not self._match(TokenType.PARTITION): 2773 return None 2774 2775 return self.expression( 2776 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2777 ) 2778 2779 def _parse_value(self) -> t.Optional[exp.Tuple]: 2780 if self._match(TokenType.L_PAREN): 2781 expressions = self._parse_csv(self._parse_expression) 2782 self._match_r_paren() 2783 return self.expression(exp.Tuple, expressions=expressions) 2784 2785 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2786 expression = self._parse_expression() 2787 if expression: 2788 return self.expression(exp.Tuple, expressions=[expression]) 2789 return None 2790 2791 def _parse_projections(self) -> t.List[exp.Expression]: 2792 return self._parse_expressions() 2793 2794 def _parse_select( 2795 self, 2796 nested: bool = False, 2797 table: bool = False, 2798 parse_subquery_alias: bool = True, 2799 parse_set_operation: bool = True, 2800 ) -> t.Optional[exp.Expression]: 2801 cte = self._parse_with() 2802 2803 if cte: 2804 this = self._parse_statement() 2805 2806 if not this: 2807 self.raise_error("Failed to parse any statement following CTE") 2808 return cte 2809 2810 if "with" in this.arg_types: 2811 this.set("with", cte) 2812 else: 2813 self.raise_error(f"{this.key} does not support CTE") 2814 this = cte 2815 2816 return this 2817 2818 # duckdb supports leading with FROM x 2819 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2820 2821 if self._match(TokenType.SELECT): 2822 comments = self._prev_comments 2823 2824 hint = self._parse_hint() 2825 2826 if self._next and not self._next.token_type == TokenType.DOT: 2827 all_ = self._match(TokenType.ALL) 2828 distinct = self._match_set(self.DISTINCT_TOKENS) 2829 else: 2830 all_, distinct = None, None 2831 2832 kind = ( 2833 self._match(TokenType.ALIAS) 2834 and self._match_texts(("STRUCT", "VALUE")) 2835 and self._prev.text.upper() 2836 ) 2837 2838 if distinct: 2839 distinct = self.expression( 2840 exp.Distinct, 2841 on=self._parse_value() if self._match(TokenType.ON) else None, 2842 ) 2843 2844 if all_ and distinct: 2845 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2846 2847 limit = self._parse_limit(top=True) 2848 projections = self._parse_projections() 2849 2850 this = self.expression( 2851 exp.Select, 2852 kind=kind, 2853 hint=hint, 2854 distinct=distinct, 2855 expressions=projections, 2856 limit=limit, 2857 ) 2858 this.comments = comments 2859 2860 into = self._parse_into() 2861 if into: 2862 this.set("into", into) 2863 2864 if not from_: 2865 from_ = self._parse_from() 2866 2867 if from_: 2868 this.set("from", from_) 2869 2870 this = self._parse_query_modifiers(this) 2871 elif (table or nested) and self._match(TokenType.L_PAREN): 2872 if self._match(TokenType.PIVOT): 2873 this = self._parse_simplified_pivot() 2874 elif self._match(TokenType.FROM): 2875 this = exp.select("*").from_( 2876 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2877 ) 2878 else: 2879 this = ( 2880 self._parse_table() 2881 if table 2882 else self._parse_select(nested=True, parse_set_operation=False) 2883 ) 2884 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2885 2886 self._match_r_paren() 2887 2888 # We return early here so that the UNION isn't attached to the subquery by the 2889 # following call to _parse_set_operations, but instead becomes the parent node 2890 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2891 elif self._match(TokenType.VALUES, advance=False): 2892 this = self._parse_derived_table_values() 2893 elif from_: 2894 this = exp.select("*").from_(from_.this, copy=False) 2895 elif self._match(TokenType.SUMMARIZE): 2896 table = self._match(TokenType.TABLE) 2897 this = self._parse_select() or self._parse_string() or self._parse_table() 2898 return self.expression(exp.Summarize, this=this, table=table) 2899 elif self._match(TokenType.DESCRIBE): 2900 this = self._parse_describe() 2901 elif self._match_text_seq("STREAM"): 2902 this = self.expression(exp.Stream, this=self._parse_function()) 2903 else: 2904 this = None 2905 2906 return self._parse_set_operations(this) if parse_set_operation else this 2907 2908 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2909 if not skip_with_token and not self._match(TokenType.WITH): 2910 return None 2911 2912 comments = self._prev_comments 2913 recursive = self._match(TokenType.RECURSIVE) 2914 2915 expressions = [] 2916 while True: 2917 expressions.append(self._parse_cte()) 2918 2919 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2920 break 2921 else: 2922 self._match(TokenType.WITH) 2923 2924 return self.expression( 2925 exp.With, comments=comments, expressions=expressions, recursive=recursive 2926 ) 2927 2928 def _parse_cte(self) -> exp.CTE: 2929 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2930 if not alias or not alias.this: 2931 self.raise_error("Expected CTE to have alias") 2932 2933 self._match(TokenType.ALIAS) 2934 comments = self._prev_comments 2935 2936 if self._match_text_seq("NOT", "MATERIALIZED"): 2937 materialized = False 2938 elif self._match_text_seq("MATERIALIZED"): 2939 materialized = True 2940 else: 2941 materialized = None 2942 2943 return self.expression( 2944 exp.CTE, 2945 this=self._parse_wrapped(self._parse_statement), 2946 alias=alias, 2947 materialized=materialized, 2948 comments=comments, 2949 ) 2950 2951 def _parse_table_alias( 2952 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2953 ) -> t.Optional[exp.TableAlias]: 2954 any_token = self._match(TokenType.ALIAS) 2955 alias = ( 2956 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2957 or self._parse_string_as_identifier() 2958 ) 2959 2960 index = self._index 2961 if self._match(TokenType.L_PAREN): 2962 columns = self._parse_csv(self._parse_function_parameter) 2963 self._match_r_paren() if columns else self._retreat(index) 2964 else: 2965 columns = None 2966 2967 if not alias and not columns: 2968 return None 2969 2970 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2971 2972 # We bubble up comments from the Identifier to the TableAlias 2973 if isinstance(alias, exp.Identifier): 2974 table_alias.add_comments(alias.pop_comments()) 2975 2976 return table_alias 2977 2978 def _parse_subquery( 2979 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2980 ) -> t.Optional[exp.Subquery]: 2981 if not this: 2982 return None 2983 2984 return self.expression( 2985 exp.Subquery, 2986 this=this, 2987 pivots=self._parse_pivots(), 2988 alias=self._parse_table_alias() if parse_alias else None, 2989 sample=self._parse_table_sample(), 2990 ) 2991 2992 def _implicit_unnests_to_explicit(self, this: E) -> E: 2993 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2994 2995 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2996 for i, join in enumerate(this.args.get("joins") or []): 2997 table = join.this 2998 normalized_table = table.copy() 2999 normalized_table.meta["maybe_column"] = True 3000 normalized_table = _norm(normalized_table, dialect=self.dialect) 3001 3002 if isinstance(table, exp.Table) and not join.args.get("on"): 3003 if normalized_table.parts[0].name in refs: 3004 table_as_column = table.to_column() 3005 unnest = exp.Unnest(expressions=[table_as_column]) 3006 3007 # Table.to_column creates a parent Alias node that we want to convert to 3008 # a TableAlias and attach to the Unnest, so it matches the parser's output 3009 if isinstance(table.args.get("alias"), exp.TableAlias): 3010 table_as_column.replace(table_as_column.this) 3011 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3012 3013 table.replace(unnest) 3014 3015 refs.add(normalized_table.alias_or_name) 3016 3017 return this 3018 3019 def _parse_query_modifiers( 3020 self, this: t.Optional[exp.Expression] 3021 ) -> t.Optional[exp.Expression]: 3022 if isinstance(this, (exp.Query, exp.Table)): 3023 for join in self._parse_joins(): 3024 this.append("joins", join) 3025 for lateral in iter(self._parse_lateral, None): 3026 this.append("laterals", lateral) 3027 3028 while True: 3029 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3030 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3031 key, expression = parser(self) 3032 3033 if expression: 3034 this.set(key, expression) 3035 if key == "limit": 3036 offset = expression.args.pop("offset", None) 3037 3038 if offset: 3039 offset = exp.Offset(expression=offset) 3040 this.set("offset", offset) 3041 3042 limit_by_expressions = expression.expressions 3043 expression.set("expressions", None) 3044 offset.set("expressions", limit_by_expressions) 3045 continue 3046 break 3047 3048 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3049 this = self._implicit_unnests_to_explicit(this) 3050 3051 return this 3052 3053 def _parse_hint(self) -> t.Optional[exp.Hint]: 3054 if self._match(TokenType.HINT): 3055 hints = [] 3056 for hint in iter( 3057 lambda: self._parse_csv( 3058 lambda: self._parse_function() or self._parse_var(upper=True) 3059 ), 3060 [], 3061 ): 3062 hints.extend(hint) 3063 3064 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3065 self.raise_error("Expected */ after HINT") 3066 3067 return self.expression(exp.Hint, expressions=hints) 3068 3069 return None 3070 3071 def _parse_into(self) -> t.Optional[exp.Into]: 3072 if not self._match(TokenType.INTO): 3073 return None 3074 3075 temp = self._match(TokenType.TEMPORARY) 3076 unlogged = self._match_text_seq("UNLOGGED") 3077 self._match(TokenType.TABLE) 3078 3079 return self.expression( 3080 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3081 ) 3082 3083 def _parse_from( 3084 self, joins: bool = False, skip_from_token: bool = False 3085 ) -> t.Optional[exp.From]: 3086 if not skip_from_token and not self._match(TokenType.FROM): 3087 return None 3088 3089 return self.expression( 3090 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3091 ) 3092 3093 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3094 return self.expression( 3095 exp.MatchRecognizeMeasure, 3096 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3097 this=self._parse_expression(), 3098 ) 3099 3100 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3101 if not self._match(TokenType.MATCH_RECOGNIZE): 3102 return None 3103 3104 self._match_l_paren() 3105 3106 partition = self._parse_partition_by() 3107 order = self._parse_order() 3108 3109 measures = ( 3110 self._parse_csv(self._parse_match_recognize_measure) 3111 if self._match_text_seq("MEASURES") 3112 else None 3113 ) 3114 3115 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3116 rows = exp.var("ONE ROW PER MATCH") 3117 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3118 text = "ALL ROWS PER MATCH" 3119 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3120 text += " SHOW EMPTY MATCHES" 3121 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3122 text += " OMIT EMPTY MATCHES" 3123 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3124 text += " WITH UNMATCHED ROWS" 3125 rows = exp.var(text) 3126 else: 3127 rows = None 3128 3129 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3130 text = "AFTER MATCH SKIP" 3131 if self._match_text_seq("PAST", "LAST", "ROW"): 3132 text += " PAST LAST ROW" 3133 elif self._match_text_seq("TO", "NEXT", "ROW"): 3134 text += " TO NEXT ROW" 3135 elif self._match_text_seq("TO", "FIRST"): 3136 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3137 elif self._match_text_seq("TO", "LAST"): 3138 text += f" TO LAST {self._advance_any().text}" # type: ignore 3139 after = exp.var(text) 3140 else: 3141 after = None 3142 3143 if self._match_text_seq("PATTERN"): 3144 self._match_l_paren() 3145 3146 if not self._curr: 3147 self.raise_error("Expecting )", self._curr) 3148 3149 paren = 1 3150 start = self._curr 3151 3152 while self._curr and paren > 0: 3153 if self._curr.token_type == TokenType.L_PAREN: 3154 paren += 1 3155 if self._curr.token_type == TokenType.R_PAREN: 3156 paren -= 1 3157 3158 end = self._prev 3159 self._advance() 3160 3161 if paren > 0: 3162 self.raise_error("Expecting )", self._curr) 3163 3164 pattern = exp.var(self._find_sql(start, end)) 3165 else: 3166 pattern = None 3167 3168 define = ( 3169 self._parse_csv(self._parse_name_as_expression) 3170 if self._match_text_seq("DEFINE") 3171 else None 3172 ) 3173 3174 self._match_r_paren() 3175 3176 return self.expression( 3177 exp.MatchRecognize, 3178 partition_by=partition, 3179 order=order, 3180 measures=measures, 3181 rows=rows, 3182 after=after, 3183 pattern=pattern, 3184 define=define, 3185 alias=self._parse_table_alias(), 3186 ) 3187 3188 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3189 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3190 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3191 cross_apply = False 3192 3193 if cross_apply is not None: 3194 this = self._parse_select(table=True) 3195 view = None 3196 outer = None 3197 elif self._match(TokenType.LATERAL): 3198 this = self._parse_select(table=True) 3199 view = self._match(TokenType.VIEW) 3200 outer = self._match(TokenType.OUTER) 3201 else: 3202 return None 3203 3204 if not this: 3205 this = ( 3206 self._parse_unnest() 3207 or self._parse_function() 3208 or self._parse_id_var(any_token=False) 3209 ) 3210 3211 while self._match(TokenType.DOT): 3212 this = exp.Dot( 3213 this=this, 3214 expression=self._parse_function() or self._parse_id_var(any_token=False), 3215 ) 3216 3217 if view: 3218 table = self._parse_id_var(any_token=False) 3219 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3220 table_alias: t.Optional[exp.TableAlias] = self.expression( 3221 exp.TableAlias, this=table, columns=columns 3222 ) 3223 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3224 # We move the alias from the lateral's child node to the lateral itself 3225 table_alias = this.args["alias"].pop() 3226 else: 3227 table_alias = self._parse_table_alias() 3228 3229 return self.expression( 3230 exp.Lateral, 3231 this=this, 3232 view=view, 3233 outer=outer, 3234 alias=table_alias, 3235 cross_apply=cross_apply, 3236 ) 3237 3238 def _parse_join_parts( 3239 self, 3240 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3241 return ( 3242 self._match_set(self.JOIN_METHODS) and self._prev, 3243 self._match_set(self.JOIN_SIDES) and self._prev, 3244 self._match_set(self.JOIN_KINDS) and self._prev, 3245 ) 3246 3247 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3248 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3249 this = self._parse_column() 3250 if isinstance(this, exp.Column): 3251 return this.this 3252 return this 3253 3254 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3255 3256 def _parse_join( 3257 self, skip_join_token: bool = False, parse_bracket: bool = False 3258 ) -> t.Optional[exp.Join]: 3259 if self._match(TokenType.COMMA): 3260 return self.expression(exp.Join, this=self._parse_table()) 3261 3262 index = self._index 3263 method, side, kind = self._parse_join_parts() 3264 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3265 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3266 3267 if not skip_join_token and not join: 3268 self._retreat(index) 3269 kind = None 3270 method = None 3271 side = None 3272 3273 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3274 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3275 3276 if not skip_join_token and not join and not outer_apply and not cross_apply: 3277 return None 3278 3279 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3280 3281 if method: 3282 kwargs["method"] = method.text 3283 if side: 3284 kwargs["side"] = side.text 3285 if kind: 3286 kwargs["kind"] = kind.text 3287 if hint: 3288 kwargs["hint"] = hint 3289 3290 if self._match(TokenType.MATCH_CONDITION): 3291 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3292 3293 if self._match(TokenType.ON): 3294 kwargs["on"] = self._parse_assignment() 3295 elif self._match(TokenType.USING): 3296 kwargs["using"] = self._parse_using_identifiers() 3297 elif ( 3298 not (outer_apply or cross_apply) 3299 and not isinstance(kwargs["this"], exp.Unnest) 3300 and not (kind and kind.token_type == TokenType.CROSS) 3301 ): 3302 index = self._index 3303 joins: t.Optional[list] = list(self._parse_joins()) 3304 3305 if joins and self._match(TokenType.ON): 3306 kwargs["on"] = self._parse_assignment() 3307 elif joins and self._match(TokenType.USING): 3308 kwargs["using"] = self._parse_using_identifiers() 3309 else: 3310 joins = None 3311 self._retreat(index) 3312 3313 kwargs["this"].set("joins", joins if joins else None) 3314 3315 comments = [c for token in (method, side, kind) if token for c in token.comments] 3316 return self.expression(exp.Join, comments=comments, **kwargs) 3317 3318 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3319 this = self._parse_assignment() 3320 3321 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3322 return this 3323 3324 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3325 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3326 3327 return this 3328 3329 def _parse_index_params(self) -> exp.IndexParameters: 3330 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3331 3332 if self._match(TokenType.L_PAREN, advance=False): 3333 columns = self._parse_wrapped_csv(self._parse_with_operator) 3334 else: 3335 columns = None 3336 3337 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3338 partition_by = self._parse_partition_by() 3339 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3340 tablespace = ( 3341 self._parse_var(any_token=True) 3342 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3343 else None 3344 ) 3345 where = self._parse_where() 3346 3347 on = self._parse_field() if self._match(TokenType.ON) else None 3348 3349 return self.expression( 3350 exp.IndexParameters, 3351 using=using, 3352 columns=columns, 3353 include=include, 3354 partition_by=partition_by, 3355 where=where, 3356 with_storage=with_storage, 3357 tablespace=tablespace, 3358 on=on, 3359 ) 3360 3361 def _parse_index( 3362 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3363 ) -> t.Optional[exp.Index]: 3364 if index or anonymous: 3365 unique = None 3366 primary = None 3367 amp = None 3368 3369 self._match(TokenType.ON) 3370 self._match(TokenType.TABLE) # hive 3371 table = self._parse_table_parts(schema=True) 3372 else: 3373 unique = self._match(TokenType.UNIQUE) 3374 primary = self._match_text_seq("PRIMARY") 3375 amp = self._match_text_seq("AMP") 3376 3377 if not self._match(TokenType.INDEX): 3378 return None 3379 3380 index = self._parse_id_var() 3381 table = None 3382 3383 params = self._parse_index_params() 3384 3385 return self.expression( 3386 exp.Index, 3387 this=index, 3388 table=table, 3389 unique=unique, 3390 primary=primary, 3391 amp=amp, 3392 params=params, 3393 ) 3394 3395 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3396 hints: t.List[exp.Expression] = [] 3397 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3398 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3399 hints.append( 3400 self.expression( 3401 exp.WithTableHint, 3402 expressions=self._parse_csv( 3403 lambda: self._parse_function() or self._parse_var(any_token=True) 3404 ), 3405 ) 3406 ) 3407 self._match_r_paren() 3408 else: 3409 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3410 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3411 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3412 3413 self._match_set((TokenType.INDEX, TokenType.KEY)) 3414 if self._match(TokenType.FOR): 3415 hint.set("target", self._advance_any() and self._prev.text.upper()) 3416 3417 hint.set("expressions", self._parse_wrapped_id_vars()) 3418 hints.append(hint) 3419 3420 return hints or None 3421 3422 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3423 return ( 3424 (not schema and self._parse_function(optional_parens=False)) 3425 or self._parse_id_var(any_token=False) 3426 or self._parse_string_as_identifier() 3427 or self._parse_placeholder() 3428 ) 3429 3430 def _parse_table_parts( 3431 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3432 ) -> exp.Table: 3433 catalog = None 3434 db = None 3435 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3436 3437 while self._match(TokenType.DOT): 3438 if catalog: 3439 # This allows nesting the table in arbitrarily many dot expressions if needed 3440 table = self.expression( 3441 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3442 ) 3443 else: 3444 catalog = db 3445 db = table 3446 # "" used for tsql FROM a..b case 3447 table = self._parse_table_part(schema=schema) or "" 3448 3449 if ( 3450 wildcard 3451 and self._is_connected() 3452 and (isinstance(table, exp.Identifier) or not table) 3453 and self._match(TokenType.STAR) 3454 ): 3455 if isinstance(table, exp.Identifier): 3456 table.args["this"] += "*" 3457 else: 3458 table = exp.Identifier(this="*") 3459 3460 # We bubble up comments from the Identifier to the Table 3461 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3462 3463 if is_db_reference: 3464 catalog = db 3465 db = table 3466 table = None 3467 3468 if not table and not is_db_reference: 3469 self.raise_error(f"Expected table name but got {self._curr}") 3470 if not db and is_db_reference: 3471 self.raise_error(f"Expected database name but got {self._curr}") 3472 3473 table = self.expression( 3474 exp.Table, 3475 comments=comments, 3476 this=table, 3477 db=db, 3478 catalog=catalog, 3479 ) 3480 3481 changes = self._parse_changes() 3482 if changes: 3483 table.set("changes", changes) 3484 3485 at_before = self._parse_historical_data() 3486 if at_before: 3487 table.set("when", at_before) 3488 3489 pivots = self._parse_pivots() 3490 if pivots: 3491 table.set("pivots", pivots) 3492 3493 return table 3494 3495 def _parse_table( 3496 self, 3497 schema: bool = False, 3498 joins: bool = False, 3499 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3500 parse_bracket: bool = False, 3501 is_db_reference: bool = False, 3502 parse_partition: bool = False, 3503 ) -> t.Optional[exp.Expression]: 3504 lateral = self._parse_lateral() 3505 if lateral: 3506 return lateral 3507 3508 unnest = self._parse_unnest() 3509 if unnest: 3510 return unnest 3511 3512 values = self._parse_derived_table_values() 3513 if values: 3514 return values 3515 3516 subquery = self._parse_select(table=True) 3517 if subquery: 3518 if not subquery.args.get("pivots"): 3519 subquery.set("pivots", self._parse_pivots()) 3520 return subquery 3521 3522 bracket = parse_bracket and self._parse_bracket(None) 3523 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3524 3525 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3526 self._parse_table 3527 ) 3528 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3529 3530 only = self._match(TokenType.ONLY) 3531 3532 this = t.cast( 3533 exp.Expression, 3534 bracket 3535 or rows_from 3536 or self._parse_bracket( 3537 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3538 ), 3539 ) 3540 3541 if only: 3542 this.set("only", only) 3543 3544 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3545 self._match_text_seq("*") 3546 3547 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3548 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3549 this.set("partition", self._parse_partition()) 3550 3551 if schema: 3552 return self._parse_schema(this=this) 3553 3554 version = self._parse_version() 3555 3556 if version: 3557 this.set("version", version) 3558 3559 if self.dialect.ALIAS_POST_TABLESAMPLE: 3560 this.set("sample", self._parse_table_sample()) 3561 3562 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3563 if alias: 3564 this.set("alias", alias) 3565 3566 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3567 return self.expression( 3568 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3569 ) 3570 3571 this.set("hints", self._parse_table_hints()) 3572 3573 if not this.args.get("pivots"): 3574 this.set("pivots", self._parse_pivots()) 3575 3576 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3577 this.set("sample", self._parse_table_sample()) 3578 3579 if joins: 3580 for join in self._parse_joins(): 3581 this.append("joins", join) 3582 3583 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3584 this.set("ordinality", True) 3585 this.set("alias", self._parse_table_alias()) 3586 3587 return this 3588 3589 def _parse_version(self) -> t.Optional[exp.Version]: 3590 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3591 this = "TIMESTAMP" 3592 elif self._match(TokenType.VERSION_SNAPSHOT): 3593 this = "VERSION" 3594 else: 3595 return None 3596 3597 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3598 kind = self._prev.text.upper() 3599 start = self._parse_bitwise() 3600 self._match_texts(("TO", "AND")) 3601 end = self._parse_bitwise() 3602 expression: t.Optional[exp.Expression] = self.expression( 3603 exp.Tuple, expressions=[start, end] 3604 ) 3605 elif self._match_text_seq("CONTAINED", "IN"): 3606 kind = "CONTAINED IN" 3607 expression = self.expression( 3608 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3609 ) 3610 elif self._match(TokenType.ALL): 3611 kind = "ALL" 3612 expression = None 3613 else: 3614 self._match_text_seq("AS", "OF") 3615 kind = "AS OF" 3616 expression = self._parse_type() 3617 3618 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3619 3620 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3621 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3622 index = self._index 3623 historical_data = None 3624 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3625 this = self._prev.text.upper() 3626 kind = ( 3627 self._match(TokenType.L_PAREN) 3628 and self._match_texts(self.HISTORICAL_DATA_KIND) 3629 and self._prev.text.upper() 3630 ) 3631 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3632 3633 if expression: 3634 self._match_r_paren() 3635 historical_data = self.expression( 3636 exp.HistoricalData, this=this, kind=kind, expression=expression 3637 ) 3638 else: 3639 self._retreat(index) 3640 3641 return historical_data 3642 3643 def _parse_changes(self) -> t.Optional[exp.Changes]: 3644 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3645 return None 3646 3647 information = self._parse_var(any_token=True) 3648 self._match_r_paren() 3649 3650 return self.expression( 3651 exp.Changes, 3652 information=information, 3653 at_before=self._parse_historical_data(), 3654 end=self._parse_historical_data(), 3655 ) 3656 3657 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3658 if not self._match(TokenType.UNNEST): 3659 return None 3660 3661 expressions = self._parse_wrapped_csv(self._parse_equality) 3662 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3663 3664 alias = self._parse_table_alias() if with_alias else None 3665 3666 if alias: 3667 if self.dialect.UNNEST_COLUMN_ONLY: 3668 if alias.args.get("columns"): 3669 self.raise_error("Unexpected extra column alias in unnest.") 3670 3671 alias.set("columns", [alias.this]) 3672 alias.set("this", None) 3673 3674 columns = alias.args.get("columns") or [] 3675 if offset and len(expressions) < len(columns): 3676 offset = columns.pop() 3677 3678 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3679 self._match(TokenType.ALIAS) 3680 offset = self._parse_id_var( 3681 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3682 ) or exp.to_identifier("offset") 3683 3684 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3685 3686 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3687 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3688 if not is_derived and not ( 3689 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3690 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3691 ): 3692 return None 3693 3694 expressions = self._parse_csv(self._parse_value) 3695 alias = self._parse_table_alias() 3696 3697 if is_derived: 3698 self._match_r_paren() 3699 3700 return self.expression( 3701 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3702 ) 3703 3704 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3705 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3706 as_modifier and self._match_text_seq("USING", "SAMPLE") 3707 ): 3708 return None 3709 3710 bucket_numerator = None 3711 bucket_denominator = None 3712 bucket_field = None 3713 percent = None 3714 size = None 3715 seed = None 3716 3717 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3718 matched_l_paren = self._match(TokenType.L_PAREN) 3719 3720 if self.TABLESAMPLE_CSV: 3721 num = None 3722 expressions = self._parse_csv(self._parse_primary) 3723 else: 3724 expressions = None 3725 num = ( 3726 self._parse_factor() 3727 if self._match(TokenType.NUMBER, advance=False) 3728 else self._parse_primary() or self._parse_placeholder() 3729 ) 3730 3731 if self._match_text_seq("BUCKET"): 3732 bucket_numerator = self._parse_number() 3733 self._match_text_seq("OUT", "OF") 3734 bucket_denominator = bucket_denominator = self._parse_number() 3735 self._match(TokenType.ON) 3736 bucket_field = self._parse_field() 3737 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3738 percent = num 3739 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3740 size = num 3741 else: 3742 percent = num 3743 3744 if matched_l_paren: 3745 self._match_r_paren() 3746 3747 if self._match(TokenType.L_PAREN): 3748 method = self._parse_var(upper=True) 3749 seed = self._match(TokenType.COMMA) and self._parse_number() 3750 self._match_r_paren() 3751 elif self._match_texts(("SEED", "REPEATABLE")): 3752 seed = self._parse_wrapped(self._parse_number) 3753 3754 if not method and self.DEFAULT_SAMPLING_METHOD: 3755 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3756 3757 return self.expression( 3758 exp.TableSample, 3759 expressions=expressions, 3760 method=method, 3761 bucket_numerator=bucket_numerator, 3762 bucket_denominator=bucket_denominator, 3763 bucket_field=bucket_field, 3764 percent=percent, 3765 size=size, 3766 seed=seed, 3767 ) 3768 3769 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3770 return list(iter(self._parse_pivot, None)) or None 3771 3772 def _parse_joins(self) -> t.Iterator[exp.Join]: 3773 return iter(self._parse_join, None) 3774 3775 # https://duckdb.org/docs/sql/statements/pivot 3776 def _parse_simplified_pivot(self) -> exp.Pivot: 3777 def _parse_on() -> t.Optional[exp.Expression]: 3778 this = self._parse_bitwise() 3779 return self._parse_in(this) if self._match(TokenType.IN) else this 3780 3781 this = self._parse_table() 3782 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3783 using = self._match(TokenType.USING) and self._parse_csv( 3784 lambda: self._parse_alias(self._parse_function()) 3785 ) 3786 group = self._parse_group() 3787 return self.expression( 3788 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3789 ) 3790 3791 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3792 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3793 this = self._parse_select_or_expression() 3794 3795 self._match(TokenType.ALIAS) 3796 alias = self._parse_bitwise() 3797 if alias: 3798 if isinstance(alias, exp.Column) and not alias.db: 3799 alias = alias.this 3800 return self.expression(exp.PivotAlias, this=this, alias=alias) 3801 3802 return this 3803 3804 value = self._parse_column() 3805 3806 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3807 self.raise_error("Expecting IN (") 3808 3809 if self._match(TokenType.ANY): 3810 expr: exp.PivotAny | exp.In = self.expression(exp.PivotAny, this=self._parse_order()) 3811 else: 3812 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3813 expr = self.expression(exp.In, this=value, expressions=aliased_expressions) 3814 3815 self._match_r_paren() 3816 return expr 3817 3818 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3819 index = self._index 3820 include_nulls = None 3821 3822 if self._match(TokenType.PIVOT): 3823 unpivot = False 3824 elif self._match(TokenType.UNPIVOT): 3825 unpivot = True 3826 3827 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3828 if self._match_text_seq("INCLUDE", "NULLS"): 3829 include_nulls = True 3830 elif self._match_text_seq("EXCLUDE", "NULLS"): 3831 include_nulls = False 3832 else: 3833 return None 3834 3835 expressions = [] 3836 3837 if not self._match(TokenType.L_PAREN): 3838 self._retreat(index) 3839 return None 3840 3841 if unpivot: 3842 expressions = self._parse_csv(self._parse_column) 3843 else: 3844 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3845 3846 if not expressions: 3847 self.raise_error("Failed to parse PIVOT's aggregation list") 3848 3849 if not self._match(TokenType.FOR): 3850 self.raise_error("Expecting FOR") 3851 3852 field = self._parse_pivot_in() 3853 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 3854 self._parse_bitwise 3855 ) 3856 3857 self._match_r_paren() 3858 3859 pivot = self.expression( 3860 exp.Pivot, 3861 expressions=expressions, 3862 field=field, 3863 unpivot=unpivot, 3864 include_nulls=include_nulls, 3865 default_on_null=default_on_null, 3866 ) 3867 3868 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3869 pivot.set("alias", self._parse_table_alias()) 3870 3871 if not unpivot: 3872 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3873 3874 columns: t.List[exp.Expression] = [] 3875 for fld in pivot.args["field"].expressions: 3876 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3877 for name in names: 3878 if self.PREFIXED_PIVOT_COLUMNS: 3879 name = f"{name}_{field_name}" if name else field_name 3880 else: 3881 name = f"{field_name}_{name}" if name else field_name 3882 3883 columns.append(exp.to_identifier(name)) 3884 3885 pivot.set("columns", columns) 3886 3887 return pivot 3888 3889 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3890 return [agg.alias for agg in aggregations] 3891 3892 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3893 if not skip_where_token and not self._match(TokenType.PREWHERE): 3894 return None 3895 3896 return self.expression( 3897 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3898 ) 3899 3900 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3901 if not skip_where_token and not self._match(TokenType.WHERE): 3902 return None 3903 3904 return self.expression( 3905 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3906 ) 3907 3908 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3909 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3910 return None 3911 3912 elements: t.Dict[str, t.Any] = defaultdict(list) 3913 3914 if self._match(TokenType.ALL): 3915 elements["all"] = True 3916 elif self._match(TokenType.DISTINCT): 3917 elements["all"] = False 3918 3919 while True: 3920 index = self._index 3921 3922 elements["expressions"].extend( 3923 self._parse_csv( 3924 lambda: None 3925 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 3926 else self._parse_assignment() 3927 ) 3928 ) 3929 3930 before_with_index = self._index 3931 with_prefix = self._match(TokenType.WITH) 3932 3933 if self._match(TokenType.ROLLUP): 3934 elements["rollup"].append( 3935 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 3936 ) 3937 elif self._match(TokenType.CUBE): 3938 elements["cube"].append( 3939 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 3940 ) 3941 elif self._match(TokenType.GROUPING_SETS): 3942 elements["grouping_sets"].append( 3943 self.expression( 3944 exp.GroupingSets, 3945 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 3946 ) 3947 ) 3948 elif self._match_text_seq("TOTALS"): 3949 elements["totals"] = True # type: ignore 3950 3951 if before_with_index <= self._index <= before_with_index + 1: 3952 self._retreat(before_with_index) 3953 break 3954 3955 if index == self._index: 3956 break 3957 3958 return self.expression(exp.Group, **elements) # type: ignore 3959 3960 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 3961 return self.expression( 3962 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 3963 ) 3964 3965 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3966 if self._match(TokenType.L_PAREN): 3967 grouping_set = self._parse_csv(self._parse_column) 3968 self._match_r_paren() 3969 return self.expression(exp.Tuple, expressions=grouping_set) 3970 3971 return self._parse_column() 3972 3973 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3974 if not skip_having_token and not self._match(TokenType.HAVING): 3975 return None 3976 return self.expression(exp.Having, this=self._parse_assignment()) 3977 3978 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3979 if not self._match(TokenType.QUALIFY): 3980 return None 3981 return self.expression(exp.Qualify, this=self._parse_assignment()) 3982 3983 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3984 if skip_start_token: 3985 start = None 3986 elif self._match(TokenType.START_WITH): 3987 start = self._parse_assignment() 3988 else: 3989 return None 3990 3991 self._match(TokenType.CONNECT_BY) 3992 nocycle = self._match_text_seq("NOCYCLE") 3993 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3994 exp.Prior, this=self._parse_bitwise() 3995 ) 3996 connect = self._parse_assignment() 3997 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3998 3999 if not start and self._match(TokenType.START_WITH): 4000 start = self._parse_assignment() 4001 4002 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4003 4004 def _parse_name_as_expression(self) -> exp.Alias: 4005 return self.expression( 4006 exp.Alias, 4007 alias=self._parse_id_var(any_token=True), 4008 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 4009 ) 4010 4011 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4012 if self._match_text_seq("INTERPOLATE"): 4013 return self._parse_wrapped_csv(self._parse_name_as_expression) 4014 return None 4015 4016 def _parse_order( 4017 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4018 ) -> t.Optional[exp.Expression]: 4019 siblings = None 4020 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4021 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4022 return this 4023 4024 siblings = True 4025 4026 return self.expression( 4027 exp.Order, 4028 this=this, 4029 expressions=self._parse_csv(self._parse_ordered), 4030 siblings=siblings, 4031 ) 4032 4033 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4034 if not self._match(token): 4035 return None 4036 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4037 4038 def _parse_ordered( 4039 self, parse_method: t.Optional[t.Callable] = None 4040 ) -> t.Optional[exp.Ordered]: 4041 this = parse_method() if parse_method else self._parse_assignment() 4042 if not this: 4043 return None 4044 4045 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4046 this = exp.var("ALL") 4047 4048 asc = self._match(TokenType.ASC) 4049 desc = self._match(TokenType.DESC) or (asc and False) 4050 4051 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4052 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4053 4054 nulls_first = is_nulls_first or False 4055 explicitly_null_ordered = is_nulls_first or is_nulls_last 4056 4057 if ( 4058 not explicitly_null_ordered 4059 and ( 4060 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4061 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4062 ) 4063 and self.dialect.NULL_ORDERING != "nulls_are_last" 4064 ): 4065 nulls_first = True 4066 4067 if self._match_text_seq("WITH", "FILL"): 4068 with_fill = self.expression( 4069 exp.WithFill, 4070 **{ # type: ignore 4071 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4072 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4073 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4074 "interpolate": self._parse_interpolate(), 4075 }, 4076 ) 4077 else: 4078 with_fill = None 4079 4080 return self.expression( 4081 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4082 ) 4083 4084 def _parse_limit( 4085 self, 4086 this: t.Optional[exp.Expression] = None, 4087 top: bool = False, 4088 skip_limit_token: bool = False, 4089 ) -> t.Optional[exp.Expression]: 4090 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4091 comments = self._prev_comments 4092 if top: 4093 limit_paren = self._match(TokenType.L_PAREN) 4094 expression = self._parse_term() if limit_paren else self._parse_number() 4095 4096 if limit_paren: 4097 self._match_r_paren() 4098 else: 4099 expression = self._parse_term() 4100 4101 if self._match(TokenType.COMMA): 4102 offset = expression 4103 expression = self._parse_term() 4104 else: 4105 offset = None 4106 4107 limit_exp = self.expression( 4108 exp.Limit, 4109 this=this, 4110 expression=expression, 4111 offset=offset, 4112 comments=comments, 4113 expressions=self._parse_limit_by(), 4114 ) 4115 4116 return limit_exp 4117 4118 if self._match(TokenType.FETCH): 4119 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4120 direction = self._prev.text.upper() if direction else "FIRST" 4121 4122 count = self._parse_field(tokens=self.FETCH_TOKENS) 4123 percent = self._match(TokenType.PERCENT) 4124 4125 self._match_set((TokenType.ROW, TokenType.ROWS)) 4126 4127 only = self._match_text_seq("ONLY") 4128 with_ties = self._match_text_seq("WITH", "TIES") 4129 4130 if only and with_ties: 4131 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4132 4133 return self.expression( 4134 exp.Fetch, 4135 direction=direction, 4136 count=count, 4137 percent=percent, 4138 with_ties=with_ties, 4139 ) 4140 4141 return this 4142 4143 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4144 if not self._match(TokenType.OFFSET): 4145 return this 4146 4147 count = self._parse_term() 4148 self._match_set((TokenType.ROW, TokenType.ROWS)) 4149 4150 return self.expression( 4151 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4152 ) 4153 4154 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4155 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4156 4157 def _parse_locks(self) -> t.List[exp.Lock]: 4158 locks = [] 4159 while True: 4160 if self._match_text_seq("FOR", "UPDATE"): 4161 update = True 4162 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4163 "LOCK", "IN", "SHARE", "MODE" 4164 ): 4165 update = False 4166 else: 4167 break 4168 4169 expressions = None 4170 if self._match_text_seq("OF"): 4171 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4172 4173 wait: t.Optional[bool | exp.Expression] = None 4174 if self._match_text_seq("NOWAIT"): 4175 wait = True 4176 elif self._match_text_seq("WAIT"): 4177 wait = self._parse_primary() 4178 elif self._match_text_seq("SKIP", "LOCKED"): 4179 wait = False 4180 4181 locks.append( 4182 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4183 ) 4184 4185 return locks 4186 4187 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4188 while this and self._match_set(self.SET_OPERATIONS): 4189 token_type = self._prev.token_type 4190 4191 if token_type == TokenType.UNION: 4192 operation: t.Type[exp.SetOperation] = exp.Union 4193 elif token_type == TokenType.EXCEPT: 4194 operation = exp.Except 4195 else: 4196 operation = exp.Intersect 4197 4198 comments = self._prev.comments 4199 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 4200 by_name = self._match_text_seq("BY", "NAME") 4201 expression = self._parse_select(nested=True, parse_set_operation=False) 4202 4203 this = self.expression( 4204 operation, 4205 comments=comments, 4206 this=this, 4207 distinct=distinct, 4208 by_name=by_name, 4209 expression=expression, 4210 ) 4211 4212 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4213 expression = this.expression 4214 4215 if expression: 4216 for arg in self.SET_OP_MODIFIERS: 4217 expr = expression.args.get(arg) 4218 if expr: 4219 this.set(arg, expr.pop()) 4220 4221 return this 4222 4223 def _parse_expression(self) -> t.Optional[exp.Expression]: 4224 return self._parse_alias(self._parse_assignment()) 4225 4226 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4227 this = self._parse_disjunction() 4228 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4229 # This allows us to parse <non-identifier token> := <expr> 4230 this = exp.column( 4231 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4232 ) 4233 4234 while self._match_set(self.ASSIGNMENT): 4235 this = self.expression( 4236 self.ASSIGNMENT[self._prev.token_type], 4237 this=this, 4238 comments=self._prev_comments, 4239 expression=self._parse_assignment(), 4240 ) 4241 4242 return this 4243 4244 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4245 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4246 4247 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4248 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4249 4250 def _parse_equality(self) -> t.Optional[exp.Expression]: 4251 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4252 4253 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4254 return self._parse_tokens(self._parse_range, self.COMPARISON) 4255 4256 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4257 this = this or self._parse_bitwise() 4258 negate = self._match(TokenType.NOT) 4259 4260 if self._match_set(self.RANGE_PARSERS): 4261 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4262 if not expression: 4263 return this 4264 4265 this = expression 4266 elif self._match(TokenType.ISNULL): 4267 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4268 4269 # Postgres supports ISNULL and NOTNULL for conditions. 4270 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4271 if self._match(TokenType.NOTNULL): 4272 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4273 this = self.expression(exp.Not, this=this) 4274 4275 if negate: 4276 this = self._negate_range(this) 4277 4278 if self._match(TokenType.IS): 4279 this = self._parse_is(this) 4280 4281 return this 4282 4283 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4284 if not this: 4285 return this 4286 4287 return self.expression(exp.Not, this=this) 4288 4289 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4290 index = self._index - 1 4291 negate = self._match(TokenType.NOT) 4292 4293 if self._match_text_seq("DISTINCT", "FROM"): 4294 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4295 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4296 4297 if self._match(TokenType.JSON): 4298 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4299 4300 if self._match_text_seq("WITH"): 4301 _with = True 4302 elif self._match_text_seq("WITHOUT"): 4303 _with = False 4304 else: 4305 _with = None 4306 4307 unique = self._match(TokenType.UNIQUE) 4308 self._match_text_seq("KEYS") 4309 expression: t.Optional[exp.Expression] = self.expression( 4310 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4311 ) 4312 else: 4313 expression = self._parse_primary() or self._parse_null() 4314 if not expression: 4315 self._retreat(index) 4316 return None 4317 4318 this = self.expression(exp.Is, this=this, expression=expression) 4319 return self.expression(exp.Not, this=this) if negate else this 4320 4321 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4322 unnest = self._parse_unnest(with_alias=False) 4323 if unnest: 4324 this = self.expression(exp.In, this=this, unnest=unnest) 4325 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4326 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4327 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4328 4329 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4330 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4331 else: 4332 this = self.expression(exp.In, this=this, expressions=expressions) 4333 4334 if matched_l_paren: 4335 self._match_r_paren(this) 4336 elif not self._match(TokenType.R_BRACKET, expression=this): 4337 self.raise_error("Expecting ]") 4338 else: 4339 this = self.expression(exp.In, this=this, field=self._parse_field()) 4340 4341 return this 4342 4343 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4344 low = self._parse_bitwise() 4345 self._match(TokenType.AND) 4346 high = self._parse_bitwise() 4347 return self.expression(exp.Between, this=this, low=low, high=high) 4348 4349 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4350 if not self._match(TokenType.ESCAPE): 4351 return this 4352 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4353 4354 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4355 index = self._index 4356 4357 if not self._match(TokenType.INTERVAL) and match_interval: 4358 return None 4359 4360 if self._match(TokenType.STRING, advance=False): 4361 this = self._parse_primary() 4362 else: 4363 this = self._parse_term() 4364 4365 if not this or ( 4366 isinstance(this, exp.Column) 4367 and not this.table 4368 and not this.this.quoted 4369 and this.name.upper() == "IS" 4370 ): 4371 self._retreat(index) 4372 return None 4373 4374 unit = self._parse_function() or ( 4375 not self._match(TokenType.ALIAS, advance=False) 4376 and self._parse_var(any_token=True, upper=True) 4377 ) 4378 4379 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4380 # each INTERVAL expression into this canonical form so it's easy to transpile 4381 if this and this.is_number: 4382 this = exp.Literal.string(this.to_py()) 4383 elif this and this.is_string: 4384 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4385 if len(parts) == 1: 4386 if unit: 4387 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4388 self._retreat(self._index - 1) 4389 4390 this = exp.Literal.string(parts[0][0]) 4391 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4392 4393 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4394 unit = self.expression( 4395 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4396 ) 4397 4398 interval = self.expression(exp.Interval, this=this, unit=unit) 4399 4400 index = self._index 4401 self._match(TokenType.PLUS) 4402 4403 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4404 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4405 return self.expression( 4406 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4407 ) 4408 4409 self._retreat(index) 4410 return interval 4411 4412 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4413 this = self._parse_term() 4414 4415 while True: 4416 if self._match_set(self.BITWISE): 4417 this = self.expression( 4418 self.BITWISE[self._prev.token_type], 4419 this=this, 4420 expression=self._parse_term(), 4421 ) 4422 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4423 this = self.expression( 4424 exp.DPipe, 4425 this=this, 4426 expression=self._parse_term(), 4427 safe=not self.dialect.STRICT_STRING_CONCAT, 4428 ) 4429 elif self._match(TokenType.DQMARK): 4430 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4431 elif self._match_pair(TokenType.LT, TokenType.LT): 4432 this = self.expression( 4433 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4434 ) 4435 elif self._match_pair(TokenType.GT, TokenType.GT): 4436 this = self.expression( 4437 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4438 ) 4439 else: 4440 break 4441 4442 return this 4443 4444 def _parse_term(self) -> t.Optional[exp.Expression]: 4445 this = self._parse_factor() 4446 4447 while self._match_set(self.TERM): 4448 klass = self.TERM[self._prev.token_type] 4449 comments = self._prev_comments 4450 expression = self._parse_factor() 4451 4452 this = self.expression(klass, this=this, comments=comments, expression=expression) 4453 4454 if isinstance(this, exp.Collate): 4455 expr = this.expression 4456 4457 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4458 # fallback to Identifier / Var 4459 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4460 ident = expr.this 4461 if isinstance(ident, exp.Identifier): 4462 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4463 4464 return this 4465 4466 def _parse_factor(self) -> t.Optional[exp.Expression]: 4467 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4468 this = parse_method() 4469 4470 while self._match_set(self.FACTOR): 4471 klass = self.FACTOR[self._prev.token_type] 4472 comments = self._prev_comments 4473 expression = parse_method() 4474 4475 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4476 self._retreat(self._index - 1) 4477 return this 4478 4479 this = self.expression(klass, this=this, comments=comments, expression=expression) 4480 4481 if isinstance(this, exp.Div): 4482 this.args["typed"] = self.dialect.TYPED_DIVISION 4483 this.args["safe"] = self.dialect.SAFE_DIVISION 4484 4485 return this 4486 4487 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4488 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4489 4490 def _parse_unary(self) -> t.Optional[exp.Expression]: 4491 if self._match_set(self.UNARY_PARSERS): 4492 return self.UNARY_PARSERS[self._prev.token_type](self) 4493 return self._parse_at_time_zone(self._parse_type()) 4494 4495 def _parse_type( 4496 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4497 ) -> t.Optional[exp.Expression]: 4498 interval = parse_interval and self._parse_interval() 4499 if interval: 4500 return interval 4501 4502 index = self._index 4503 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4504 4505 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4506 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4507 if isinstance(data_type, exp.Cast): 4508 # This constructor can contain ops directly after it, for instance struct unnesting: 4509 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4510 return self._parse_column_ops(data_type) 4511 4512 if data_type: 4513 index2 = self._index 4514 this = self._parse_primary() 4515 4516 if isinstance(this, exp.Literal): 4517 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4518 if parser: 4519 return parser(self, this, data_type) 4520 4521 return self.expression(exp.Cast, this=this, to=data_type) 4522 4523 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4524 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4525 # 4526 # If the index difference here is greater than 1, that means the parser itself must have 4527 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4528 # 4529 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4530 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4531 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4532 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4533 # 4534 # In these cases, we don't really want to return the converted type, but instead retreat 4535 # and try to parse a Column or Identifier in the section below. 4536 if data_type.expressions and index2 - index > 1: 4537 self._retreat(index2) 4538 return self._parse_column_ops(data_type) 4539 4540 self._retreat(index) 4541 4542 if fallback_to_identifier: 4543 return self._parse_id_var() 4544 4545 this = self._parse_column() 4546 return this and self._parse_column_ops(this) 4547 4548 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4549 this = self._parse_type() 4550 if not this: 4551 return None 4552 4553 if isinstance(this, exp.Column) and not this.table: 4554 this = exp.var(this.name.upper()) 4555 4556 return self.expression( 4557 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4558 ) 4559 4560 def _parse_types( 4561 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4562 ) -> t.Optional[exp.Expression]: 4563 index = self._index 4564 4565 this: t.Optional[exp.Expression] = None 4566 prefix = self._match_text_seq("SYSUDTLIB", ".") 4567 4568 if not self._match_set(self.TYPE_TOKENS): 4569 identifier = allow_identifiers and self._parse_id_var( 4570 any_token=False, tokens=(TokenType.VAR,) 4571 ) 4572 if isinstance(identifier, exp.Identifier): 4573 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4574 4575 if len(tokens) != 1: 4576 self.raise_error("Unexpected identifier", self._prev) 4577 4578 if tokens[0].token_type in self.TYPE_TOKENS: 4579 self._prev = tokens[0] 4580 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4581 type_name = identifier.name 4582 4583 while self._match(TokenType.DOT): 4584 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4585 4586 this = exp.DataType.build(type_name, udt=True) 4587 else: 4588 self._retreat(self._index - 1) 4589 return None 4590 else: 4591 return None 4592 4593 type_token = self._prev.token_type 4594 4595 if type_token == TokenType.PSEUDO_TYPE: 4596 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4597 4598 if type_token == TokenType.OBJECT_IDENTIFIER: 4599 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4600 4601 # https://materialize.com/docs/sql/types/map/ 4602 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4603 key_type = self._parse_types( 4604 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4605 ) 4606 if not self._match(TokenType.FARROW): 4607 self._retreat(index) 4608 return None 4609 4610 value_type = self._parse_types( 4611 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4612 ) 4613 if not self._match(TokenType.R_BRACKET): 4614 self._retreat(index) 4615 return None 4616 4617 return exp.DataType( 4618 this=exp.DataType.Type.MAP, 4619 expressions=[key_type, value_type], 4620 nested=True, 4621 prefix=prefix, 4622 ) 4623 4624 nested = type_token in self.NESTED_TYPE_TOKENS 4625 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4626 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4627 expressions = None 4628 maybe_func = False 4629 4630 if self._match(TokenType.L_PAREN): 4631 if is_struct: 4632 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4633 elif nested: 4634 expressions = self._parse_csv( 4635 lambda: self._parse_types( 4636 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4637 ) 4638 ) 4639 elif type_token in self.ENUM_TYPE_TOKENS: 4640 expressions = self._parse_csv(self._parse_equality) 4641 elif is_aggregate: 4642 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4643 any_token=False, tokens=(TokenType.VAR,) 4644 ) 4645 if not func_or_ident or not self._match(TokenType.COMMA): 4646 return None 4647 expressions = self._parse_csv( 4648 lambda: self._parse_types( 4649 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4650 ) 4651 ) 4652 expressions.insert(0, func_or_ident) 4653 else: 4654 expressions = self._parse_csv(self._parse_type_size) 4655 4656 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4657 if type_token == TokenType.VECTOR and len(expressions) == 2: 4658 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4659 4660 if not expressions or not self._match(TokenType.R_PAREN): 4661 self._retreat(index) 4662 return None 4663 4664 maybe_func = True 4665 4666 values: t.Optional[t.List[exp.Expression]] = None 4667 4668 if nested and self._match(TokenType.LT): 4669 if is_struct: 4670 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4671 else: 4672 expressions = self._parse_csv( 4673 lambda: self._parse_types( 4674 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4675 ) 4676 ) 4677 4678 if not self._match(TokenType.GT): 4679 self.raise_error("Expecting >") 4680 4681 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4682 values = self._parse_csv(self._parse_assignment) 4683 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4684 4685 if type_token in self.TIMESTAMPS: 4686 if self._match_text_seq("WITH", "TIME", "ZONE"): 4687 maybe_func = False 4688 tz_type = ( 4689 exp.DataType.Type.TIMETZ 4690 if type_token in self.TIMES 4691 else exp.DataType.Type.TIMESTAMPTZ 4692 ) 4693 this = exp.DataType(this=tz_type, expressions=expressions) 4694 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4695 maybe_func = False 4696 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4697 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4698 maybe_func = False 4699 elif type_token == TokenType.INTERVAL: 4700 unit = self._parse_var(upper=True) 4701 if unit: 4702 if self._match_text_seq("TO"): 4703 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4704 4705 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4706 else: 4707 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4708 4709 if maybe_func and check_func: 4710 index2 = self._index 4711 peek = self._parse_string() 4712 4713 if not peek: 4714 self._retreat(index) 4715 return None 4716 4717 self._retreat(index2) 4718 4719 if not this: 4720 if self._match_text_seq("UNSIGNED"): 4721 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4722 if not unsigned_type_token: 4723 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4724 4725 type_token = unsigned_type_token or type_token 4726 4727 this = exp.DataType( 4728 this=exp.DataType.Type[type_token.value], 4729 expressions=expressions, 4730 nested=nested, 4731 prefix=prefix, 4732 ) 4733 4734 # Empty arrays/structs are allowed 4735 if values is not None: 4736 cls = exp.Struct if is_struct else exp.Array 4737 this = exp.cast(cls(expressions=values), this, copy=False) 4738 4739 elif expressions: 4740 this.set("expressions", expressions) 4741 4742 # https://materialize.com/docs/sql/types/list/#type-name 4743 while self._match(TokenType.LIST): 4744 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4745 4746 index = self._index 4747 4748 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4749 matched_array = self._match(TokenType.ARRAY) 4750 4751 while self._curr: 4752 datatype_token = self._prev.token_type 4753 matched_l_bracket = self._match(TokenType.L_BRACKET) 4754 if not matched_l_bracket and not matched_array: 4755 break 4756 4757 matched_array = False 4758 values = self._parse_csv(self._parse_assignment) or None 4759 if ( 4760 values 4761 and not schema 4762 and ( 4763 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4764 ) 4765 ): 4766 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4767 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4768 self._retreat(index) 4769 break 4770 4771 this = exp.DataType( 4772 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4773 ) 4774 self._match(TokenType.R_BRACKET) 4775 4776 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4777 converter = self.TYPE_CONVERTERS.get(this.this) 4778 if converter: 4779 this = converter(t.cast(exp.DataType, this)) 4780 4781 return this 4782 4783 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4784 index = self._index 4785 4786 if ( 4787 self._curr 4788 and self._next 4789 and self._curr.token_type in self.TYPE_TOKENS 4790 and self._next.token_type in self.TYPE_TOKENS 4791 ): 4792 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4793 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4794 this = self._parse_id_var() 4795 else: 4796 this = ( 4797 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4798 or self._parse_id_var() 4799 ) 4800 4801 self._match(TokenType.COLON) 4802 4803 if ( 4804 type_required 4805 and not isinstance(this, exp.DataType) 4806 and not self._match_set(self.TYPE_TOKENS, advance=False) 4807 ): 4808 self._retreat(index) 4809 return self._parse_types() 4810 4811 return self._parse_column_def(this) 4812 4813 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4814 if not self._match_text_seq("AT", "TIME", "ZONE"): 4815 return this 4816 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4817 4818 def _parse_column(self) -> t.Optional[exp.Expression]: 4819 this = self._parse_column_reference() 4820 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4821 4822 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4823 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4824 4825 return column 4826 4827 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4828 this = self._parse_field() 4829 if ( 4830 not this 4831 and self._match(TokenType.VALUES, advance=False) 4832 and self.VALUES_FOLLOWED_BY_PAREN 4833 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4834 ): 4835 this = self._parse_id_var() 4836 4837 if isinstance(this, exp.Identifier): 4838 # We bubble up comments from the Identifier to the Column 4839 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4840 4841 return this 4842 4843 def _parse_colon_as_variant_extract( 4844 self, this: t.Optional[exp.Expression] 4845 ) -> t.Optional[exp.Expression]: 4846 casts = [] 4847 json_path = [] 4848 4849 while self._match(TokenType.COLON): 4850 start_index = self._index 4851 4852 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4853 path = self._parse_column_ops( 4854 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4855 ) 4856 4857 # The cast :: operator has a lower precedence than the extraction operator :, so 4858 # we rearrange the AST appropriately to avoid casting the JSON path 4859 while isinstance(path, exp.Cast): 4860 casts.append(path.to) 4861 path = path.this 4862 4863 if casts: 4864 dcolon_offset = next( 4865 i 4866 for i, t in enumerate(self._tokens[start_index:]) 4867 if t.token_type == TokenType.DCOLON 4868 ) 4869 end_token = self._tokens[start_index + dcolon_offset - 1] 4870 else: 4871 end_token = self._prev 4872 4873 if path: 4874 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4875 4876 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 4877 # Databricks transforms it back to the colon/dot notation 4878 if json_path: 4879 this = self.expression( 4880 exp.JSONExtract, 4881 this=this, 4882 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4883 variant_extract=True, 4884 ) 4885 4886 while casts: 4887 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4888 4889 return this 4890 4891 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 4892 return self._parse_types() 4893 4894 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4895 this = self._parse_bracket(this) 4896 4897 while self._match_set(self.COLUMN_OPERATORS): 4898 op_token = self._prev.token_type 4899 op = self.COLUMN_OPERATORS.get(op_token) 4900 4901 if op_token == TokenType.DCOLON: 4902 field = self._parse_dcolon() 4903 if not field: 4904 self.raise_error("Expected type") 4905 elif op and self._curr: 4906 field = self._parse_column_reference() 4907 else: 4908 field = self._parse_field(any_token=True, anonymous_func=True) 4909 4910 if isinstance(field, exp.Func) and this: 4911 # bigquery allows function calls like x.y.count(...) 4912 # SAFE.SUBSTR(...) 4913 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4914 this = exp.replace_tree( 4915 this, 4916 lambda n: ( 4917 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4918 if n.table 4919 else n.this 4920 ) 4921 if isinstance(n, exp.Column) 4922 else n, 4923 ) 4924 4925 if op: 4926 this = op(self, this, field) 4927 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4928 this = self.expression( 4929 exp.Column, 4930 this=field, 4931 table=this.this, 4932 db=this.args.get("table"), 4933 catalog=this.args.get("db"), 4934 ) 4935 else: 4936 this = self.expression(exp.Dot, this=this, expression=field) 4937 4938 this = self._parse_bracket(this) 4939 4940 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 4941 4942 def _parse_primary(self) -> t.Optional[exp.Expression]: 4943 if self._match_set(self.PRIMARY_PARSERS): 4944 token_type = self._prev.token_type 4945 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4946 4947 if token_type == TokenType.STRING: 4948 expressions = [primary] 4949 while self._match(TokenType.STRING): 4950 expressions.append(exp.Literal.string(self._prev.text)) 4951 4952 if len(expressions) > 1: 4953 return self.expression(exp.Concat, expressions=expressions) 4954 4955 return primary 4956 4957 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4958 return exp.Literal.number(f"0.{self._prev.text}") 4959 4960 if self._match(TokenType.L_PAREN): 4961 comments = self._prev_comments 4962 query = self._parse_select() 4963 4964 if query: 4965 expressions = [query] 4966 else: 4967 expressions = self._parse_expressions() 4968 4969 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4970 4971 if not this and self._match(TokenType.R_PAREN, advance=False): 4972 this = self.expression(exp.Tuple) 4973 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4974 this = self._parse_subquery(this=this, parse_alias=False) 4975 elif isinstance(this, exp.Subquery): 4976 this = self._parse_subquery( 4977 this=self._parse_set_operations(this), parse_alias=False 4978 ) 4979 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4980 this = self.expression(exp.Tuple, expressions=expressions) 4981 else: 4982 this = self.expression(exp.Paren, this=this) 4983 4984 if this: 4985 this.add_comments(comments) 4986 4987 self._match_r_paren(expression=this) 4988 return this 4989 4990 return None 4991 4992 def _parse_field( 4993 self, 4994 any_token: bool = False, 4995 tokens: t.Optional[t.Collection[TokenType]] = None, 4996 anonymous_func: bool = False, 4997 ) -> t.Optional[exp.Expression]: 4998 if anonymous_func: 4999 field = ( 5000 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5001 or self._parse_primary() 5002 ) 5003 else: 5004 field = self._parse_primary() or self._parse_function( 5005 anonymous=anonymous_func, any_token=any_token 5006 ) 5007 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5008 5009 def _parse_function( 5010 self, 5011 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5012 anonymous: bool = False, 5013 optional_parens: bool = True, 5014 any_token: bool = False, 5015 ) -> t.Optional[exp.Expression]: 5016 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5017 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5018 fn_syntax = False 5019 if ( 5020 self._match(TokenType.L_BRACE, advance=False) 5021 and self._next 5022 and self._next.text.upper() == "FN" 5023 ): 5024 self._advance(2) 5025 fn_syntax = True 5026 5027 func = self._parse_function_call( 5028 functions=functions, 5029 anonymous=anonymous, 5030 optional_parens=optional_parens, 5031 any_token=any_token, 5032 ) 5033 5034 if fn_syntax: 5035 self._match(TokenType.R_BRACE) 5036 5037 return func 5038 5039 def _parse_function_call( 5040 self, 5041 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5042 anonymous: bool = False, 5043 optional_parens: bool = True, 5044 any_token: bool = False, 5045 ) -> t.Optional[exp.Expression]: 5046 if not self._curr: 5047 return None 5048 5049 comments = self._curr.comments 5050 token_type = self._curr.token_type 5051 this = self._curr.text 5052 upper = this.upper() 5053 5054 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5055 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5056 self._advance() 5057 return self._parse_window(parser(self)) 5058 5059 if not self._next or self._next.token_type != TokenType.L_PAREN: 5060 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5061 self._advance() 5062 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5063 5064 return None 5065 5066 if any_token: 5067 if token_type in self.RESERVED_TOKENS: 5068 return None 5069 elif token_type not in self.FUNC_TOKENS: 5070 return None 5071 5072 self._advance(2) 5073 5074 parser = self.FUNCTION_PARSERS.get(upper) 5075 if parser and not anonymous: 5076 this = parser(self) 5077 else: 5078 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5079 5080 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5081 this = self.expression(subquery_predicate, this=self._parse_select()) 5082 self._match_r_paren() 5083 return this 5084 5085 if functions is None: 5086 functions = self.FUNCTIONS 5087 5088 function = functions.get(upper) 5089 5090 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5091 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5092 5093 if alias: 5094 args = self._kv_to_prop_eq(args) 5095 5096 if function and not anonymous: 5097 if "dialect" in function.__code__.co_varnames: 5098 func = function(args, dialect=self.dialect) 5099 else: 5100 func = function(args) 5101 5102 func = self.validate_expression(func, args) 5103 if not self.dialect.NORMALIZE_FUNCTIONS: 5104 func.meta["name"] = this 5105 5106 this = func 5107 else: 5108 if token_type == TokenType.IDENTIFIER: 5109 this = exp.Identifier(this=this, quoted=True) 5110 this = self.expression(exp.Anonymous, this=this, expressions=args) 5111 5112 if isinstance(this, exp.Expression): 5113 this.add_comments(comments) 5114 5115 self._match_r_paren(this) 5116 return self._parse_window(this) 5117 5118 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5119 return expression 5120 5121 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5122 transformed = [] 5123 5124 for index, e in enumerate(expressions): 5125 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5126 if isinstance(e, exp.Alias): 5127 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5128 5129 if not isinstance(e, exp.PropertyEQ): 5130 e = self.expression( 5131 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5132 ) 5133 5134 if isinstance(e.this, exp.Column): 5135 e.this.replace(e.this.this) 5136 else: 5137 e = self._to_prop_eq(e, index) 5138 5139 transformed.append(e) 5140 5141 return transformed 5142 5143 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5144 return self._parse_column_def(self._parse_id_var()) 5145 5146 def _parse_user_defined_function( 5147 self, kind: t.Optional[TokenType] = None 5148 ) -> t.Optional[exp.Expression]: 5149 this = self._parse_id_var() 5150 5151 while self._match(TokenType.DOT): 5152 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5153 5154 if not self._match(TokenType.L_PAREN): 5155 return this 5156 5157 expressions = self._parse_csv(self._parse_function_parameter) 5158 self._match_r_paren() 5159 return self.expression( 5160 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5161 ) 5162 5163 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5164 literal = self._parse_primary() 5165 if literal: 5166 return self.expression(exp.Introducer, this=token.text, expression=literal) 5167 5168 return self.expression(exp.Identifier, this=token.text) 5169 5170 def _parse_session_parameter(self) -> exp.SessionParameter: 5171 kind = None 5172 this = self._parse_id_var() or self._parse_primary() 5173 5174 if this and self._match(TokenType.DOT): 5175 kind = this.name 5176 this = self._parse_var() or self._parse_primary() 5177 5178 return self.expression(exp.SessionParameter, this=this, kind=kind) 5179 5180 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5181 return self._parse_id_var() 5182 5183 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5184 index = self._index 5185 5186 if self._match(TokenType.L_PAREN): 5187 expressions = t.cast( 5188 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5189 ) 5190 5191 if not self._match(TokenType.R_PAREN): 5192 self._retreat(index) 5193 else: 5194 expressions = [self._parse_lambda_arg()] 5195 5196 if self._match_set(self.LAMBDAS): 5197 return self.LAMBDAS[self._prev.token_type](self, expressions) 5198 5199 self._retreat(index) 5200 5201 this: t.Optional[exp.Expression] 5202 5203 if self._match(TokenType.DISTINCT): 5204 this = self.expression( 5205 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5206 ) 5207 else: 5208 this = self._parse_select_or_expression(alias=alias) 5209 5210 return self._parse_limit( 5211 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5212 ) 5213 5214 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5215 index = self._index 5216 if not self._match(TokenType.L_PAREN): 5217 return this 5218 5219 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5220 # expr can be of both types 5221 if self._match_set(self.SELECT_START_TOKENS): 5222 self._retreat(index) 5223 return this 5224 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5225 self._match_r_paren() 5226 return self.expression(exp.Schema, this=this, expressions=args) 5227 5228 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5229 return self._parse_column_def(self._parse_field(any_token=True)) 5230 5231 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5232 # column defs are not really columns, they're identifiers 5233 if isinstance(this, exp.Column): 5234 this = this.this 5235 5236 kind = self._parse_types(schema=True) 5237 5238 if self._match_text_seq("FOR", "ORDINALITY"): 5239 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5240 5241 constraints: t.List[exp.Expression] = [] 5242 5243 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5244 ("ALIAS", "MATERIALIZED") 5245 ): 5246 persisted = self._prev.text.upper() == "MATERIALIZED" 5247 constraints.append( 5248 self.expression( 5249 exp.ComputedColumnConstraint, 5250 this=self._parse_assignment(), 5251 persisted=persisted or self._match_text_seq("PERSISTED"), 5252 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5253 ) 5254 ) 5255 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5256 self._match(TokenType.ALIAS) 5257 constraints.append( 5258 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 5259 ) 5260 5261 while True: 5262 constraint = self._parse_column_constraint() 5263 if not constraint: 5264 break 5265 constraints.append(constraint) 5266 5267 if not kind and not constraints: 5268 return this 5269 5270 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5271 5272 def _parse_auto_increment( 5273 self, 5274 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5275 start = None 5276 increment = None 5277 5278 if self._match(TokenType.L_PAREN, advance=False): 5279 args = self._parse_wrapped_csv(self._parse_bitwise) 5280 start = seq_get(args, 0) 5281 increment = seq_get(args, 1) 5282 elif self._match_text_seq("START"): 5283 start = self._parse_bitwise() 5284 self._match_text_seq("INCREMENT") 5285 increment = self._parse_bitwise() 5286 5287 if start and increment: 5288 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5289 5290 return exp.AutoIncrementColumnConstraint() 5291 5292 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5293 if not self._match_text_seq("REFRESH"): 5294 self._retreat(self._index - 1) 5295 return None 5296 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5297 5298 def _parse_compress(self) -> exp.CompressColumnConstraint: 5299 if self._match(TokenType.L_PAREN, advance=False): 5300 return self.expression( 5301 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5302 ) 5303 5304 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5305 5306 def _parse_generated_as_identity( 5307 self, 5308 ) -> ( 5309 exp.GeneratedAsIdentityColumnConstraint 5310 | exp.ComputedColumnConstraint 5311 | exp.GeneratedAsRowColumnConstraint 5312 ): 5313 if self._match_text_seq("BY", "DEFAULT"): 5314 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5315 this = self.expression( 5316 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5317 ) 5318 else: 5319 self._match_text_seq("ALWAYS") 5320 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5321 5322 self._match(TokenType.ALIAS) 5323 5324 if self._match_text_seq("ROW"): 5325 start = self._match_text_seq("START") 5326 if not start: 5327 self._match(TokenType.END) 5328 hidden = self._match_text_seq("HIDDEN") 5329 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5330 5331 identity = self._match_text_seq("IDENTITY") 5332 5333 if self._match(TokenType.L_PAREN): 5334 if self._match(TokenType.START_WITH): 5335 this.set("start", self._parse_bitwise()) 5336 if self._match_text_seq("INCREMENT", "BY"): 5337 this.set("increment", self._parse_bitwise()) 5338 if self._match_text_seq("MINVALUE"): 5339 this.set("minvalue", self._parse_bitwise()) 5340 if self._match_text_seq("MAXVALUE"): 5341 this.set("maxvalue", self._parse_bitwise()) 5342 5343 if self._match_text_seq("CYCLE"): 5344 this.set("cycle", True) 5345 elif self._match_text_seq("NO", "CYCLE"): 5346 this.set("cycle", False) 5347 5348 if not identity: 5349 this.set("expression", self._parse_range()) 5350 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5351 args = self._parse_csv(self._parse_bitwise) 5352 this.set("start", seq_get(args, 0)) 5353 this.set("increment", seq_get(args, 1)) 5354 5355 self._match_r_paren() 5356 5357 return this 5358 5359 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5360 self._match_text_seq("LENGTH") 5361 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5362 5363 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5364 if self._match_text_seq("NULL"): 5365 return self.expression(exp.NotNullColumnConstraint) 5366 if self._match_text_seq("CASESPECIFIC"): 5367 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5368 if self._match_text_seq("FOR", "REPLICATION"): 5369 return self.expression(exp.NotForReplicationColumnConstraint) 5370 return None 5371 5372 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5373 if self._match(TokenType.CONSTRAINT): 5374 this = self._parse_id_var() 5375 else: 5376 this = None 5377 5378 if self._match_texts(self.CONSTRAINT_PARSERS): 5379 return self.expression( 5380 exp.ColumnConstraint, 5381 this=this, 5382 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5383 ) 5384 5385 return this 5386 5387 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5388 if not self._match(TokenType.CONSTRAINT): 5389 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5390 5391 return self.expression( 5392 exp.Constraint, 5393 this=self._parse_id_var(), 5394 expressions=self._parse_unnamed_constraints(), 5395 ) 5396 5397 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5398 constraints = [] 5399 while True: 5400 constraint = self._parse_unnamed_constraint() or self._parse_function() 5401 if not constraint: 5402 break 5403 constraints.append(constraint) 5404 5405 return constraints 5406 5407 def _parse_unnamed_constraint( 5408 self, constraints: t.Optional[t.Collection[str]] = None 5409 ) -> t.Optional[exp.Expression]: 5410 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5411 constraints or self.CONSTRAINT_PARSERS 5412 ): 5413 return None 5414 5415 constraint = self._prev.text.upper() 5416 if constraint not in self.CONSTRAINT_PARSERS: 5417 self.raise_error(f"No parser found for schema constraint {constraint}.") 5418 5419 return self.CONSTRAINT_PARSERS[constraint](self) 5420 5421 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5422 return self._parse_id_var(any_token=False) 5423 5424 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5425 self._match_text_seq("KEY") 5426 return self.expression( 5427 exp.UniqueColumnConstraint, 5428 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5429 this=self._parse_schema(self._parse_unique_key()), 5430 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5431 on_conflict=self._parse_on_conflict(), 5432 ) 5433 5434 def _parse_key_constraint_options(self) -> t.List[str]: 5435 options = [] 5436 while True: 5437 if not self._curr: 5438 break 5439 5440 if self._match(TokenType.ON): 5441 action = None 5442 on = self._advance_any() and self._prev.text 5443 5444 if self._match_text_seq("NO", "ACTION"): 5445 action = "NO ACTION" 5446 elif self._match_text_seq("CASCADE"): 5447 action = "CASCADE" 5448 elif self._match_text_seq("RESTRICT"): 5449 action = "RESTRICT" 5450 elif self._match_pair(TokenType.SET, TokenType.NULL): 5451 action = "SET NULL" 5452 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5453 action = "SET DEFAULT" 5454 else: 5455 self.raise_error("Invalid key constraint") 5456 5457 options.append(f"ON {on} {action}") 5458 else: 5459 var = self._parse_var_from_options( 5460 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5461 ) 5462 if not var: 5463 break 5464 options.append(var.name) 5465 5466 return options 5467 5468 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5469 if match and not self._match(TokenType.REFERENCES): 5470 return None 5471 5472 expressions = None 5473 this = self._parse_table(schema=True) 5474 options = self._parse_key_constraint_options() 5475 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5476 5477 def _parse_foreign_key(self) -> exp.ForeignKey: 5478 expressions = self._parse_wrapped_id_vars() 5479 reference = self._parse_references() 5480 options = {} 5481 5482 while self._match(TokenType.ON): 5483 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5484 self.raise_error("Expected DELETE or UPDATE") 5485 5486 kind = self._prev.text.lower() 5487 5488 if self._match_text_seq("NO", "ACTION"): 5489 action = "NO ACTION" 5490 elif self._match(TokenType.SET): 5491 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5492 action = "SET " + self._prev.text.upper() 5493 else: 5494 self._advance() 5495 action = self._prev.text.upper() 5496 5497 options[kind] = action 5498 5499 return self.expression( 5500 exp.ForeignKey, 5501 expressions=expressions, 5502 reference=reference, 5503 **options, # type: ignore 5504 ) 5505 5506 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5507 return self._parse_field() 5508 5509 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5510 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5511 self._retreat(self._index - 1) 5512 return None 5513 5514 id_vars = self._parse_wrapped_id_vars() 5515 return self.expression( 5516 exp.PeriodForSystemTimeConstraint, 5517 this=seq_get(id_vars, 0), 5518 expression=seq_get(id_vars, 1), 5519 ) 5520 5521 def _parse_primary_key( 5522 self, wrapped_optional: bool = False, in_props: bool = False 5523 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5524 desc = ( 5525 self._match_set((TokenType.ASC, TokenType.DESC)) 5526 and self._prev.token_type == TokenType.DESC 5527 ) 5528 5529 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5530 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5531 5532 expressions = self._parse_wrapped_csv( 5533 self._parse_primary_key_part, optional=wrapped_optional 5534 ) 5535 options = self._parse_key_constraint_options() 5536 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5537 5538 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5539 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5540 5541 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5542 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5543 return this 5544 5545 bracket_kind = self._prev.token_type 5546 expressions = self._parse_csv( 5547 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5548 ) 5549 5550 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5551 self.raise_error("Expected ]") 5552 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5553 self.raise_error("Expected }") 5554 5555 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5556 if bracket_kind == TokenType.L_BRACE: 5557 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5558 elif not this: 5559 this = build_array_constructor( 5560 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5561 ) 5562 else: 5563 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5564 if constructor_type: 5565 return build_array_constructor( 5566 constructor_type, 5567 args=expressions, 5568 bracket_kind=bracket_kind, 5569 dialect=self.dialect, 5570 ) 5571 5572 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5573 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5574 5575 self._add_comments(this) 5576 return self._parse_bracket(this) 5577 5578 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5579 if self._match(TokenType.COLON): 5580 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5581 return this 5582 5583 def _parse_case(self) -> t.Optional[exp.Expression]: 5584 ifs = [] 5585 default = None 5586 5587 comments = self._prev_comments 5588 expression = self._parse_assignment() 5589 5590 while self._match(TokenType.WHEN): 5591 this = self._parse_assignment() 5592 self._match(TokenType.THEN) 5593 then = self._parse_assignment() 5594 ifs.append(self.expression(exp.If, this=this, true=then)) 5595 5596 if self._match(TokenType.ELSE): 5597 default = self._parse_assignment() 5598 5599 if not self._match(TokenType.END): 5600 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5601 default = exp.column("interval") 5602 else: 5603 self.raise_error("Expected END after CASE", self._prev) 5604 5605 return self.expression( 5606 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5607 ) 5608 5609 def _parse_if(self) -> t.Optional[exp.Expression]: 5610 if self._match(TokenType.L_PAREN): 5611 args = self._parse_csv(self._parse_assignment) 5612 this = self.validate_expression(exp.If.from_arg_list(args), args) 5613 self._match_r_paren() 5614 else: 5615 index = self._index - 1 5616 5617 if self.NO_PAREN_IF_COMMANDS and index == 0: 5618 return self._parse_as_command(self._prev) 5619 5620 condition = self._parse_assignment() 5621 5622 if not condition: 5623 self._retreat(index) 5624 return None 5625 5626 self._match(TokenType.THEN) 5627 true = self._parse_assignment() 5628 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5629 self._match(TokenType.END) 5630 this = self.expression(exp.If, this=condition, true=true, false=false) 5631 5632 return this 5633 5634 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5635 if not self._match_text_seq("VALUE", "FOR"): 5636 self._retreat(self._index - 1) 5637 return None 5638 5639 return self.expression( 5640 exp.NextValueFor, 5641 this=self._parse_column(), 5642 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5643 ) 5644 5645 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5646 this = self._parse_function() or self._parse_var_or_string(upper=True) 5647 5648 if self._match(TokenType.FROM): 5649 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5650 5651 if not self._match(TokenType.COMMA): 5652 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5653 5654 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5655 5656 def _parse_gap_fill(self) -> exp.GapFill: 5657 self._match(TokenType.TABLE) 5658 this = self._parse_table() 5659 5660 self._match(TokenType.COMMA) 5661 args = [this, *self._parse_csv(self._parse_lambda)] 5662 5663 gap_fill = exp.GapFill.from_arg_list(args) 5664 return self.validate_expression(gap_fill, args) 5665 5666 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5667 this = self._parse_assignment() 5668 5669 if not self._match(TokenType.ALIAS): 5670 if self._match(TokenType.COMMA): 5671 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5672 5673 self.raise_error("Expected AS after CAST") 5674 5675 fmt = None 5676 to = self._parse_types() 5677 5678 if self._match(TokenType.FORMAT): 5679 fmt_string = self._parse_string() 5680 fmt = self._parse_at_time_zone(fmt_string) 5681 5682 if not to: 5683 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5684 if to.this in exp.DataType.TEMPORAL_TYPES: 5685 this = self.expression( 5686 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5687 this=this, 5688 format=exp.Literal.string( 5689 format_time( 5690 fmt_string.this if fmt_string else "", 5691 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5692 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5693 ) 5694 ), 5695 safe=safe, 5696 ) 5697 5698 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5699 this.set("zone", fmt.args["zone"]) 5700 return this 5701 elif not to: 5702 self.raise_error("Expected TYPE after CAST") 5703 elif isinstance(to, exp.Identifier): 5704 to = exp.DataType.build(to.name, udt=True) 5705 elif to.this == exp.DataType.Type.CHAR: 5706 if self._match(TokenType.CHARACTER_SET): 5707 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5708 5709 return self.expression( 5710 exp.Cast if strict else exp.TryCast, 5711 this=this, 5712 to=to, 5713 format=fmt, 5714 safe=safe, 5715 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5716 ) 5717 5718 def _parse_string_agg(self) -> exp.Expression: 5719 if self._match(TokenType.DISTINCT): 5720 args: t.List[t.Optional[exp.Expression]] = [ 5721 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5722 ] 5723 if self._match(TokenType.COMMA): 5724 args.extend(self._parse_csv(self._parse_assignment)) 5725 else: 5726 args = self._parse_csv(self._parse_assignment) # type: ignore 5727 5728 index = self._index 5729 if not self._match(TokenType.R_PAREN) and args: 5730 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5731 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5732 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5733 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5734 5735 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5736 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5737 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5738 if not self._match_text_seq("WITHIN", "GROUP"): 5739 self._retreat(index) 5740 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5741 5742 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5743 order = self._parse_order(this=seq_get(args, 0)) 5744 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5745 5746 def _parse_convert( 5747 self, strict: bool, safe: t.Optional[bool] = None 5748 ) -> t.Optional[exp.Expression]: 5749 this = self._parse_bitwise() 5750 5751 if self._match(TokenType.USING): 5752 to: t.Optional[exp.Expression] = self.expression( 5753 exp.CharacterSet, this=self._parse_var() 5754 ) 5755 elif self._match(TokenType.COMMA): 5756 to = self._parse_types() 5757 else: 5758 to = None 5759 5760 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5761 5762 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5763 """ 5764 There are generally two variants of the DECODE function: 5765 5766 - DECODE(bin, charset) 5767 - DECODE(expression, search, result [, search, result] ... [, default]) 5768 5769 The second variant will always be parsed into a CASE expression. Note that NULL 5770 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5771 instead of relying on pattern matching. 5772 """ 5773 args = self._parse_csv(self._parse_assignment) 5774 5775 if len(args) < 3: 5776 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5777 5778 expression, *expressions = args 5779 if not expression: 5780 return None 5781 5782 ifs = [] 5783 for search, result in zip(expressions[::2], expressions[1::2]): 5784 if not search or not result: 5785 return None 5786 5787 if isinstance(search, exp.Literal): 5788 ifs.append( 5789 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5790 ) 5791 elif isinstance(search, exp.Null): 5792 ifs.append( 5793 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5794 ) 5795 else: 5796 cond = exp.or_( 5797 exp.EQ(this=expression.copy(), expression=search), 5798 exp.and_( 5799 exp.Is(this=expression.copy(), expression=exp.Null()), 5800 exp.Is(this=search.copy(), expression=exp.Null()), 5801 copy=False, 5802 ), 5803 copy=False, 5804 ) 5805 ifs.append(exp.If(this=cond, true=result)) 5806 5807 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5808 5809 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5810 self._match_text_seq("KEY") 5811 key = self._parse_column() 5812 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5813 self._match_text_seq("VALUE") 5814 value = self._parse_bitwise() 5815 5816 if not key and not value: 5817 return None 5818 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5819 5820 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5821 if not this or not self._match_text_seq("FORMAT", "JSON"): 5822 return this 5823 5824 return self.expression(exp.FormatJson, this=this) 5825 5826 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5827 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5828 for value in values: 5829 if self._match_text_seq(value, "ON", on): 5830 return f"{value} ON {on}" 5831 5832 return None 5833 5834 @t.overload 5835 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5836 5837 @t.overload 5838 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5839 5840 def _parse_json_object(self, agg=False): 5841 star = self._parse_star() 5842 expressions = ( 5843 [star] 5844 if star 5845 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5846 ) 5847 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5848 5849 unique_keys = None 5850 if self._match_text_seq("WITH", "UNIQUE"): 5851 unique_keys = True 5852 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5853 unique_keys = False 5854 5855 self._match_text_seq("KEYS") 5856 5857 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5858 self._parse_type() 5859 ) 5860 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5861 5862 return self.expression( 5863 exp.JSONObjectAgg if agg else exp.JSONObject, 5864 expressions=expressions, 5865 null_handling=null_handling, 5866 unique_keys=unique_keys, 5867 return_type=return_type, 5868 encoding=encoding, 5869 ) 5870 5871 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5872 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5873 if not self._match_text_seq("NESTED"): 5874 this = self._parse_id_var() 5875 kind = self._parse_types(allow_identifiers=False) 5876 nested = None 5877 else: 5878 this = None 5879 kind = None 5880 nested = True 5881 5882 path = self._match_text_seq("PATH") and self._parse_string() 5883 nested_schema = nested and self._parse_json_schema() 5884 5885 return self.expression( 5886 exp.JSONColumnDef, 5887 this=this, 5888 kind=kind, 5889 path=path, 5890 nested_schema=nested_schema, 5891 ) 5892 5893 def _parse_json_schema(self) -> exp.JSONSchema: 5894 self._match_text_seq("COLUMNS") 5895 return self.expression( 5896 exp.JSONSchema, 5897 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5898 ) 5899 5900 def _parse_json_table(self) -> exp.JSONTable: 5901 this = self._parse_format_json(self._parse_bitwise()) 5902 path = self._match(TokenType.COMMA) and self._parse_string() 5903 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5904 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5905 schema = self._parse_json_schema() 5906 5907 return exp.JSONTable( 5908 this=this, 5909 schema=schema, 5910 path=path, 5911 error_handling=error_handling, 5912 empty_handling=empty_handling, 5913 ) 5914 5915 def _parse_match_against(self) -> exp.MatchAgainst: 5916 expressions = self._parse_csv(self._parse_column) 5917 5918 self._match_text_seq(")", "AGAINST", "(") 5919 5920 this = self._parse_string() 5921 5922 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5923 modifier = "IN NATURAL LANGUAGE MODE" 5924 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5925 modifier = f"{modifier} WITH QUERY EXPANSION" 5926 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5927 modifier = "IN BOOLEAN MODE" 5928 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5929 modifier = "WITH QUERY EXPANSION" 5930 else: 5931 modifier = None 5932 5933 return self.expression( 5934 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5935 ) 5936 5937 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5938 def _parse_open_json(self) -> exp.OpenJSON: 5939 this = self._parse_bitwise() 5940 path = self._match(TokenType.COMMA) and self._parse_string() 5941 5942 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5943 this = self._parse_field(any_token=True) 5944 kind = self._parse_types() 5945 path = self._parse_string() 5946 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5947 5948 return self.expression( 5949 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5950 ) 5951 5952 expressions = None 5953 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5954 self._match_l_paren() 5955 expressions = self._parse_csv(_parse_open_json_column_def) 5956 5957 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5958 5959 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5960 args = self._parse_csv(self._parse_bitwise) 5961 5962 if self._match(TokenType.IN): 5963 return self.expression( 5964 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5965 ) 5966 5967 if haystack_first: 5968 haystack = seq_get(args, 0) 5969 needle = seq_get(args, 1) 5970 else: 5971 needle = seq_get(args, 0) 5972 haystack = seq_get(args, 1) 5973 5974 return self.expression( 5975 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5976 ) 5977 5978 def _parse_predict(self) -> exp.Predict: 5979 self._match_text_seq("MODEL") 5980 this = self._parse_table() 5981 5982 self._match(TokenType.COMMA) 5983 self._match_text_seq("TABLE") 5984 5985 return self.expression( 5986 exp.Predict, 5987 this=this, 5988 expression=self._parse_table(), 5989 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5990 ) 5991 5992 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5993 args = self._parse_csv(self._parse_table) 5994 return exp.JoinHint(this=func_name.upper(), expressions=args) 5995 5996 def _parse_substring(self) -> exp.Substring: 5997 # Postgres supports the form: substring(string [from int] [for int]) 5998 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5999 6000 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6001 6002 if self._match(TokenType.FROM): 6003 args.append(self._parse_bitwise()) 6004 if self._match(TokenType.FOR): 6005 if len(args) == 1: 6006 args.append(exp.Literal.number(1)) 6007 args.append(self._parse_bitwise()) 6008 6009 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6010 6011 def _parse_trim(self) -> exp.Trim: 6012 # https://www.w3resource.com/sql/character-functions/trim.php 6013 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6014 6015 position = None 6016 collation = None 6017 expression = None 6018 6019 if self._match_texts(self.TRIM_TYPES): 6020 position = self._prev.text.upper() 6021 6022 this = self._parse_bitwise() 6023 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6024 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6025 expression = self._parse_bitwise() 6026 6027 if invert_order: 6028 this, expression = expression, this 6029 6030 if self._match(TokenType.COLLATE): 6031 collation = self._parse_bitwise() 6032 6033 return self.expression( 6034 exp.Trim, this=this, position=position, expression=expression, collation=collation 6035 ) 6036 6037 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6038 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6039 6040 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6041 return self._parse_window(self._parse_id_var(), alias=True) 6042 6043 def _parse_respect_or_ignore_nulls( 6044 self, this: t.Optional[exp.Expression] 6045 ) -> t.Optional[exp.Expression]: 6046 if self._match_text_seq("IGNORE", "NULLS"): 6047 return self.expression(exp.IgnoreNulls, this=this) 6048 if self._match_text_seq("RESPECT", "NULLS"): 6049 return self.expression(exp.RespectNulls, this=this) 6050 return this 6051 6052 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6053 if self._match(TokenType.HAVING): 6054 self._match_texts(("MAX", "MIN")) 6055 max = self._prev.text.upper() != "MIN" 6056 return self.expression( 6057 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6058 ) 6059 6060 return this 6061 6062 def _parse_window( 6063 self, this: t.Optional[exp.Expression], alias: bool = False 6064 ) -> t.Optional[exp.Expression]: 6065 func = this 6066 comments = func.comments if isinstance(func, exp.Expression) else None 6067 6068 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6069 self._match(TokenType.WHERE) 6070 this = self.expression( 6071 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6072 ) 6073 self._match_r_paren() 6074 6075 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6076 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6077 if self._match_text_seq("WITHIN", "GROUP"): 6078 order = self._parse_wrapped(self._parse_order) 6079 this = self.expression(exp.WithinGroup, this=this, expression=order) 6080 6081 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6082 # Some dialects choose to implement and some do not. 6083 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6084 6085 # There is some code above in _parse_lambda that handles 6086 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6087 6088 # The below changes handle 6089 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6090 6091 # Oracle allows both formats 6092 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6093 # and Snowflake chose to do the same for familiarity 6094 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6095 if isinstance(this, exp.AggFunc): 6096 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6097 6098 if ignore_respect and ignore_respect is not this: 6099 ignore_respect.replace(ignore_respect.this) 6100 this = self.expression(ignore_respect.__class__, this=this) 6101 6102 this = self._parse_respect_or_ignore_nulls(this) 6103 6104 # bigquery select from window x AS (partition by ...) 6105 if alias: 6106 over = None 6107 self._match(TokenType.ALIAS) 6108 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6109 return this 6110 else: 6111 over = self._prev.text.upper() 6112 6113 if comments and isinstance(func, exp.Expression): 6114 func.pop_comments() 6115 6116 if not self._match(TokenType.L_PAREN): 6117 return self.expression( 6118 exp.Window, 6119 comments=comments, 6120 this=this, 6121 alias=self._parse_id_var(False), 6122 over=over, 6123 ) 6124 6125 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6126 6127 first = self._match(TokenType.FIRST) 6128 if self._match_text_seq("LAST"): 6129 first = False 6130 6131 partition, order = self._parse_partition_and_order() 6132 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6133 6134 if kind: 6135 self._match(TokenType.BETWEEN) 6136 start = self._parse_window_spec() 6137 self._match(TokenType.AND) 6138 end = self._parse_window_spec() 6139 6140 spec = self.expression( 6141 exp.WindowSpec, 6142 kind=kind, 6143 start=start["value"], 6144 start_side=start["side"], 6145 end=end["value"], 6146 end_side=end["side"], 6147 ) 6148 else: 6149 spec = None 6150 6151 self._match_r_paren() 6152 6153 window = self.expression( 6154 exp.Window, 6155 comments=comments, 6156 this=this, 6157 partition_by=partition, 6158 order=order, 6159 spec=spec, 6160 alias=window_alias, 6161 over=over, 6162 first=first, 6163 ) 6164 6165 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6166 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6167 return self._parse_window(window, alias=alias) 6168 6169 return window 6170 6171 def _parse_partition_and_order( 6172 self, 6173 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6174 return self._parse_partition_by(), self._parse_order() 6175 6176 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6177 self._match(TokenType.BETWEEN) 6178 6179 return { 6180 "value": ( 6181 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6182 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6183 or self._parse_bitwise() 6184 ), 6185 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6186 } 6187 6188 def _parse_alias( 6189 self, this: t.Optional[exp.Expression], explicit: bool = False 6190 ) -> t.Optional[exp.Expression]: 6191 any_token = self._match(TokenType.ALIAS) 6192 comments = self._prev_comments or [] 6193 6194 if explicit and not any_token: 6195 return this 6196 6197 if self._match(TokenType.L_PAREN): 6198 aliases = self.expression( 6199 exp.Aliases, 6200 comments=comments, 6201 this=this, 6202 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6203 ) 6204 self._match_r_paren(aliases) 6205 return aliases 6206 6207 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6208 self.STRING_ALIASES and self._parse_string_as_identifier() 6209 ) 6210 6211 if alias: 6212 comments.extend(alias.pop_comments()) 6213 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6214 column = this.this 6215 6216 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6217 if not this.comments and column and column.comments: 6218 this.comments = column.pop_comments() 6219 6220 return this 6221 6222 def _parse_id_var( 6223 self, 6224 any_token: bool = True, 6225 tokens: t.Optional[t.Collection[TokenType]] = None, 6226 ) -> t.Optional[exp.Expression]: 6227 expression = self._parse_identifier() 6228 if not expression and ( 6229 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6230 ): 6231 quoted = self._prev.token_type == TokenType.STRING 6232 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6233 6234 return expression 6235 6236 def _parse_string(self) -> t.Optional[exp.Expression]: 6237 if self._match_set(self.STRING_PARSERS): 6238 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6239 return self._parse_placeholder() 6240 6241 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6242 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6243 6244 def _parse_number(self) -> t.Optional[exp.Expression]: 6245 if self._match_set(self.NUMERIC_PARSERS): 6246 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6247 return self._parse_placeholder() 6248 6249 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6250 if self._match(TokenType.IDENTIFIER): 6251 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6252 return self._parse_placeholder() 6253 6254 def _parse_var( 6255 self, 6256 any_token: bool = False, 6257 tokens: t.Optional[t.Collection[TokenType]] = None, 6258 upper: bool = False, 6259 ) -> t.Optional[exp.Expression]: 6260 if ( 6261 (any_token and self._advance_any()) 6262 or self._match(TokenType.VAR) 6263 or (self._match_set(tokens) if tokens else False) 6264 ): 6265 return self.expression( 6266 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6267 ) 6268 return self._parse_placeholder() 6269 6270 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6271 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6272 self._advance() 6273 return self._prev 6274 return None 6275 6276 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6277 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6278 6279 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6280 return self._parse_primary() or self._parse_var(any_token=True) 6281 6282 def _parse_null(self) -> t.Optional[exp.Expression]: 6283 if self._match_set(self.NULL_TOKENS): 6284 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6285 return self._parse_placeholder() 6286 6287 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6288 if self._match(TokenType.TRUE): 6289 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6290 if self._match(TokenType.FALSE): 6291 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6292 return self._parse_placeholder() 6293 6294 def _parse_star(self) -> t.Optional[exp.Expression]: 6295 if self._match(TokenType.STAR): 6296 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6297 return self._parse_placeholder() 6298 6299 def _parse_parameter(self) -> exp.Parameter: 6300 this = self._parse_identifier() or self._parse_primary_or_var() 6301 return self.expression(exp.Parameter, this=this) 6302 6303 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6304 if self._match_set(self.PLACEHOLDER_PARSERS): 6305 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6306 if placeholder: 6307 return placeholder 6308 self._advance(-1) 6309 return None 6310 6311 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6312 if not self._match_texts(keywords): 6313 return None 6314 if self._match(TokenType.L_PAREN, advance=False): 6315 return self._parse_wrapped_csv(self._parse_expression) 6316 6317 expression = self._parse_expression() 6318 return [expression] if expression else None 6319 6320 def _parse_csv( 6321 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6322 ) -> t.List[exp.Expression]: 6323 parse_result = parse_method() 6324 items = [parse_result] if parse_result is not None else [] 6325 6326 while self._match(sep): 6327 self._add_comments(parse_result) 6328 parse_result = parse_method() 6329 if parse_result is not None: 6330 items.append(parse_result) 6331 6332 return items 6333 6334 def _parse_tokens( 6335 self, parse_method: t.Callable, expressions: t.Dict 6336 ) -> t.Optional[exp.Expression]: 6337 this = parse_method() 6338 6339 while self._match_set(expressions): 6340 this = self.expression( 6341 expressions[self._prev.token_type], 6342 this=this, 6343 comments=self._prev_comments, 6344 expression=parse_method(), 6345 ) 6346 6347 return this 6348 6349 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6350 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6351 6352 def _parse_wrapped_csv( 6353 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6354 ) -> t.List[exp.Expression]: 6355 return self._parse_wrapped( 6356 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6357 ) 6358 6359 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6360 wrapped = self._match(TokenType.L_PAREN) 6361 if not wrapped and not optional: 6362 self.raise_error("Expecting (") 6363 parse_result = parse_method() 6364 if wrapped: 6365 self._match_r_paren() 6366 return parse_result 6367 6368 def _parse_expressions(self) -> t.List[exp.Expression]: 6369 return self._parse_csv(self._parse_expression) 6370 6371 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6372 return self._parse_select() or self._parse_set_operations( 6373 self._parse_expression() if alias else self._parse_assignment() 6374 ) 6375 6376 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6377 return self._parse_query_modifiers( 6378 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6379 ) 6380 6381 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6382 this = None 6383 if self._match_texts(self.TRANSACTION_KIND): 6384 this = self._prev.text 6385 6386 self._match_texts(("TRANSACTION", "WORK")) 6387 6388 modes = [] 6389 while True: 6390 mode = [] 6391 while self._match(TokenType.VAR): 6392 mode.append(self._prev.text) 6393 6394 if mode: 6395 modes.append(" ".join(mode)) 6396 if not self._match(TokenType.COMMA): 6397 break 6398 6399 return self.expression(exp.Transaction, this=this, modes=modes) 6400 6401 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6402 chain = None 6403 savepoint = None 6404 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6405 6406 self._match_texts(("TRANSACTION", "WORK")) 6407 6408 if self._match_text_seq("TO"): 6409 self._match_text_seq("SAVEPOINT") 6410 savepoint = self._parse_id_var() 6411 6412 if self._match(TokenType.AND): 6413 chain = not self._match_text_seq("NO") 6414 self._match_text_seq("CHAIN") 6415 6416 if is_rollback: 6417 return self.expression(exp.Rollback, savepoint=savepoint) 6418 6419 return self.expression(exp.Commit, chain=chain) 6420 6421 def _parse_refresh(self) -> exp.Refresh: 6422 self._match(TokenType.TABLE) 6423 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6424 6425 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6426 if not self._match_text_seq("ADD"): 6427 return None 6428 6429 self._match(TokenType.COLUMN) 6430 exists_column = self._parse_exists(not_=True) 6431 expression = self._parse_field_def() 6432 6433 if expression: 6434 expression.set("exists", exists_column) 6435 6436 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6437 if self._match_texts(("FIRST", "AFTER")): 6438 position = self._prev.text 6439 column_position = self.expression( 6440 exp.ColumnPosition, this=self._parse_column(), position=position 6441 ) 6442 expression.set("position", column_position) 6443 6444 return expression 6445 6446 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6447 drop = self._match(TokenType.DROP) and self._parse_drop() 6448 if drop and not isinstance(drop, exp.Command): 6449 drop.set("kind", drop.args.get("kind", "COLUMN")) 6450 return drop 6451 6452 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6453 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6454 return self.expression( 6455 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6456 ) 6457 6458 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6459 index = self._index - 1 6460 6461 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6462 return self._parse_csv( 6463 lambda: self.expression( 6464 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6465 ) 6466 ) 6467 6468 self._retreat(index) 6469 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6470 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6471 6472 if self._match_text_seq("ADD", "COLUMNS"): 6473 schema = self._parse_schema() 6474 if schema: 6475 return [schema] 6476 return [] 6477 6478 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6479 6480 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6481 if self._match_texts(self.ALTER_ALTER_PARSERS): 6482 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6483 6484 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6485 # keyword after ALTER we default to parsing this statement 6486 self._match(TokenType.COLUMN) 6487 column = self._parse_field(any_token=True) 6488 6489 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6490 return self.expression(exp.AlterColumn, this=column, drop=True) 6491 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6492 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6493 if self._match(TokenType.COMMENT): 6494 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6495 if self._match_text_seq("DROP", "NOT", "NULL"): 6496 return self.expression( 6497 exp.AlterColumn, 6498 this=column, 6499 drop=True, 6500 allow_null=True, 6501 ) 6502 if self._match_text_seq("SET", "NOT", "NULL"): 6503 return self.expression( 6504 exp.AlterColumn, 6505 this=column, 6506 allow_null=False, 6507 ) 6508 self._match_text_seq("SET", "DATA") 6509 self._match_text_seq("TYPE") 6510 return self.expression( 6511 exp.AlterColumn, 6512 this=column, 6513 dtype=self._parse_types(), 6514 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6515 using=self._match(TokenType.USING) and self._parse_assignment(), 6516 ) 6517 6518 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6519 if self._match_texts(("ALL", "EVEN", "AUTO")): 6520 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6521 6522 self._match_text_seq("KEY", "DISTKEY") 6523 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6524 6525 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6526 if compound: 6527 self._match_text_seq("SORTKEY") 6528 6529 if self._match(TokenType.L_PAREN, advance=False): 6530 return self.expression( 6531 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6532 ) 6533 6534 self._match_texts(("AUTO", "NONE")) 6535 return self.expression( 6536 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6537 ) 6538 6539 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6540 index = self._index - 1 6541 6542 partition_exists = self._parse_exists() 6543 if self._match(TokenType.PARTITION, advance=False): 6544 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6545 6546 self._retreat(index) 6547 return self._parse_csv(self._parse_drop_column) 6548 6549 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6550 if self._match(TokenType.COLUMN): 6551 exists = self._parse_exists() 6552 old_column = self._parse_column() 6553 to = self._match_text_seq("TO") 6554 new_column = self._parse_column() 6555 6556 if old_column is None or to is None or new_column is None: 6557 return None 6558 6559 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6560 6561 self._match_text_seq("TO") 6562 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6563 6564 def _parse_alter_table_set(self) -> exp.AlterSet: 6565 alter_set = self.expression(exp.AlterSet) 6566 6567 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6568 "TABLE", "PROPERTIES" 6569 ): 6570 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6571 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6572 alter_set.set("expressions", [self._parse_assignment()]) 6573 elif self._match_texts(("LOGGED", "UNLOGGED")): 6574 alter_set.set("option", exp.var(self._prev.text.upper())) 6575 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6576 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6577 elif self._match_text_seq("LOCATION"): 6578 alter_set.set("location", self._parse_field()) 6579 elif self._match_text_seq("ACCESS", "METHOD"): 6580 alter_set.set("access_method", self._parse_field()) 6581 elif self._match_text_seq("TABLESPACE"): 6582 alter_set.set("tablespace", self._parse_field()) 6583 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6584 alter_set.set("file_format", [self._parse_field()]) 6585 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6586 alter_set.set("file_format", self._parse_wrapped_options()) 6587 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6588 alter_set.set("copy_options", self._parse_wrapped_options()) 6589 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6590 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6591 else: 6592 if self._match_text_seq("SERDE"): 6593 alter_set.set("serde", self._parse_field()) 6594 6595 alter_set.set("expressions", [self._parse_properties()]) 6596 6597 return alter_set 6598 6599 def _parse_alter(self) -> exp.Alter | exp.Command: 6600 start = self._prev 6601 6602 alter_token = self._match_set(self.ALTERABLES) and self._prev 6603 if not alter_token: 6604 return self._parse_as_command(start) 6605 6606 exists = self._parse_exists() 6607 only = self._match_text_seq("ONLY") 6608 this = self._parse_table(schema=True) 6609 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6610 6611 if self._next: 6612 self._advance() 6613 6614 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6615 if parser: 6616 actions = ensure_list(parser(self)) 6617 options = self._parse_csv(self._parse_property) 6618 6619 if not self._curr and actions: 6620 return self.expression( 6621 exp.Alter, 6622 this=this, 6623 kind=alter_token.text.upper(), 6624 exists=exists, 6625 actions=actions, 6626 only=only, 6627 options=options, 6628 cluster=cluster, 6629 ) 6630 6631 return self._parse_as_command(start) 6632 6633 def _parse_merge(self) -> exp.Merge: 6634 self._match(TokenType.INTO) 6635 target = self._parse_table() 6636 6637 if target and self._match(TokenType.ALIAS, advance=False): 6638 target.set("alias", self._parse_table_alias()) 6639 6640 self._match(TokenType.USING) 6641 using = self._parse_table() 6642 6643 self._match(TokenType.ON) 6644 on = self._parse_assignment() 6645 6646 return self.expression( 6647 exp.Merge, 6648 this=target, 6649 using=using, 6650 on=on, 6651 expressions=self._parse_when_matched(), 6652 ) 6653 6654 def _parse_when_matched(self) -> t.List[exp.When]: 6655 whens = [] 6656 6657 while self._match(TokenType.WHEN): 6658 matched = not self._match(TokenType.NOT) 6659 self._match_text_seq("MATCHED") 6660 source = ( 6661 False 6662 if self._match_text_seq("BY", "TARGET") 6663 else self._match_text_seq("BY", "SOURCE") 6664 ) 6665 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6666 6667 self._match(TokenType.THEN) 6668 6669 if self._match(TokenType.INSERT): 6670 _this = self._parse_star() 6671 if _this: 6672 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6673 else: 6674 then = self.expression( 6675 exp.Insert, 6676 this=self._parse_value(), 6677 expression=self._match_text_seq("VALUES") and self._parse_value(), 6678 ) 6679 elif self._match(TokenType.UPDATE): 6680 expressions = self._parse_star() 6681 if expressions: 6682 then = self.expression(exp.Update, expressions=expressions) 6683 else: 6684 then = self.expression( 6685 exp.Update, 6686 expressions=self._match(TokenType.SET) 6687 and self._parse_csv(self._parse_equality), 6688 ) 6689 elif self._match(TokenType.DELETE): 6690 then = self.expression(exp.Var, this=self._prev.text) 6691 else: 6692 then = None 6693 6694 whens.append( 6695 self.expression( 6696 exp.When, 6697 matched=matched, 6698 source=source, 6699 condition=condition, 6700 then=then, 6701 ) 6702 ) 6703 return whens 6704 6705 def _parse_show(self) -> t.Optional[exp.Expression]: 6706 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6707 if parser: 6708 return parser(self) 6709 return self._parse_as_command(self._prev) 6710 6711 def _parse_set_item_assignment( 6712 self, kind: t.Optional[str] = None 6713 ) -> t.Optional[exp.Expression]: 6714 index = self._index 6715 6716 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6717 return self._parse_set_transaction(global_=kind == "GLOBAL") 6718 6719 left = self._parse_primary() or self._parse_column() 6720 assignment_delimiter = self._match_texts(("=", "TO")) 6721 6722 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6723 self._retreat(index) 6724 return None 6725 6726 right = self._parse_statement() or self._parse_id_var() 6727 if isinstance(right, (exp.Column, exp.Identifier)): 6728 right = exp.var(right.name) 6729 6730 this = self.expression(exp.EQ, this=left, expression=right) 6731 return self.expression(exp.SetItem, this=this, kind=kind) 6732 6733 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6734 self._match_text_seq("TRANSACTION") 6735 characteristics = self._parse_csv( 6736 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6737 ) 6738 return self.expression( 6739 exp.SetItem, 6740 expressions=characteristics, 6741 kind="TRANSACTION", 6742 **{"global": global_}, # type: ignore 6743 ) 6744 6745 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6746 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6747 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6748 6749 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6750 index = self._index 6751 set_ = self.expression( 6752 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6753 ) 6754 6755 if self._curr: 6756 self._retreat(index) 6757 return self._parse_as_command(self._prev) 6758 6759 return set_ 6760 6761 def _parse_var_from_options( 6762 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6763 ) -> t.Optional[exp.Var]: 6764 start = self._curr 6765 if not start: 6766 return None 6767 6768 option = start.text.upper() 6769 continuations = options.get(option) 6770 6771 index = self._index 6772 self._advance() 6773 for keywords in continuations or []: 6774 if isinstance(keywords, str): 6775 keywords = (keywords,) 6776 6777 if self._match_text_seq(*keywords): 6778 option = f"{option} {' '.join(keywords)}" 6779 break 6780 else: 6781 if continuations or continuations is None: 6782 if raise_unmatched: 6783 self.raise_error(f"Unknown option {option}") 6784 6785 self._retreat(index) 6786 return None 6787 6788 return exp.var(option) 6789 6790 def _parse_as_command(self, start: Token) -> exp.Command: 6791 while self._curr: 6792 self._advance() 6793 text = self._find_sql(start, self._prev) 6794 size = len(start.text) 6795 self._warn_unsupported() 6796 return exp.Command(this=text[:size], expression=text[size:]) 6797 6798 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6799 settings = [] 6800 6801 self._match_l_paren() 6802 kind = self._parse_id_var() 6803 6804 if self._match(TokenType.L_PAREN): 6805 while True: 6806 key = self._parse_id_var() 6807 value = self._parse_primary() 6808 6809 if not key and value is None: 6810 break 6811 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6812 self._match(TokenType.R_PAREN) 6813 6814 self._match_r_paren() 6815 6816 return self.expression( 6817 exp.DictProperty, 6818 this=this, 6819 kind=kind.this if kind else None, 6820 settings=settings, 6821 ) 6822 6823 def _parse_dict_range(self, this: str) -> exp.DictRange: 6824 self._match_l_paren() 6825 has_min = self._match_text_seq("MIN") 6826 if has_min: 6827 min = self._parse_var() or self._parse_primary() 6828 self._match_text_seq("MAX") 6829 max = self._parse_var() or self._parse_primary() 6830 else: 6831 max = self._parse_var() or self._parse_primary() 6832 min = exp.Literal.number(0) 6833 self._match_r_paren() 6834 return self.expression(exp.DictRange, this=this, min=min, max=max) 6835 6836 def _parse_comprehension( 6837 self, this: t.Optional[exp.Expression] 6838 ) -> t.Optional[exp.Comprehension]: 6839 index = self._index 6840 expression = self._parse_column() 6841 if not self._match(TokenType.IN): 6842 self._retreat(index - 1) 6843 return None 6844 iterator = self._parse_column() 6845 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6846 return self.expression( 6847 exp.Comprehension, 6848 this=this, 6849 expression=expression, 6850 iterator=iterator, 6851 condition=condition, 6852 ) 6853 6854 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6855 if self._match(TokenType.HEREDOC_STRING): 6856 return self.expression(exp.Heredoc, this=self._prev.text) 6857 6858 if not self._match_text_seq("$"): 6859 return None 6860 6861 tags = ["$"] 6862 tag_text = None 6863 6864 if self._is_connected(): 6865 self._advance() 6866 tags.append(self._prev.text.upper()) 6867 else: 6868 self.raise_error("No closing $ found") 6869 6870 if tags[-1] != "$": 6871 if self._is_connected() and self._match_text_seq("$"): 6872 tag_text = tags[-1] 6873 tags.append("$") 6874 else: 6875 self.raise_error("No closing $ found") 6876 6877 heredoc_start = self._curr 6878 6879 while self._curr: 6880 if self._match_text_seq(*tags, advance=False): 6881 this = self._find_sql(heredoc_start, self._prev) 6882 self._advance(len(tags)) 6883 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6884 6885 self._advance() 6886 6887 self.raise_error(f"No closing {''.join(tags)} found") 6888 return None 6889 6890 def _find_parser( 6891 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6892 ) -> t.Optional[t.Callable]: 6893 if not self._curr: 6894 return None 6895 6896 index = self._index 6897 this = [] 6898 while True: 6899 # The current token might be multiple words 6900 curr = self._curr.text.upper() 6901 key = curr.split(" ") 6902 this.append(curr) 6903 6904 self._advance() 6905 result, trie = in_trie(trie, key) 6906 if result == TrieResult.FAILED: 6907 break 6908 6909 if result == TrieResult.EXISTS: 6910 subparser = parsers[" ".join(this)] 6911 return subparser 6912 6913 self._retreat(index) 6914 return None 6915 6916 def _match(self, token_type, advance=True, expression=None): 6917 if not self._curr: 6918 return None 6919 6920 if self._curr.token_type == token_type: 6921 if advance: 6922 self._advance() 6923 self._add_comments(expression) 6924 return True 6925 6926 return None 6927 6928 def _match_set(self, types, advance=True): 6929 if not self._curr: 6930 return None 6931 6932 if self._curr.token_type in types: 6933 if advance: 6934 self._advance() 6935 return True 6936 6937 return None 6938 6939 def _match_pair(self, token_type_a, token_type_b, advance=True): 6940 if not self._curr or not self._next: 6941 return None 6942 6943 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6944 if advance: 6945 self._advance(2) 6946 return True 6947 6948 return None 6949 6950 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6951 if not self._match(TokenType.L_PAREN, expression=expression): 6952 self.raise_error("Expecting (") 6953 6954 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6955 if not self._match(TokenType.R_PAREN, expression=expression): 6956 self.raise_error("Expecting )") 6957 6958 def _match_texts(self, texts, advance=True): 6959 if self._curr and self._curr.text.upper() in texts: 6960 if advance: 6961 self._advance() 6962 return True 6963 return None 6964 6965 def _match_text_seq(self, *texts, advance=True): 6966 index = self._index 6967 for text in texts: 6968 if self._curr and self._curr.text.upper() == text: 6969 self._advance() 6970 else: 6971 self._retreat(index) 6972 return None 6973 6974 if not advance: 6975 self._retreat(index) 6976 6977 return True 6978 6979 def _replace_lambda( 6980 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6981 ) -> t.Optional[exp.Expression]: 6982 if not node: 6983 return node 6984 6985 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6986 6987 for column in node.find_all(exp.Column): 6988 typ = lambda_types.get(column.parts[0].name) 6989 if typ is not None: 6990 dot_or_id = column.to_dot() if column.table else column.this 6991 6992 if typ: 6993 dot_or_id = self.expression( 6994 exp.Cast, 6995 this=dot_or_id, 6996 to=typ, 6997 ) 6998 6999 parent = column.parent 7000 7001 while isinstance(parent, exp.Dot): 7002 if not isinstance(parent.parent, exp.Dot): 7003 parent.replace(dot_or_id) 7004 break 7005 parent = parent.parent 7006 else: 7007 if column is node: 7008 node = dot_or_id 7009 else: 7010 column.replace(dot_or_id) 7011 return node 7012 7013 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7014 start = self._prev 7015 7016 # Not to be confused with TRUNCATE(number, decimals) function call 7017 if self._match(TokenType.L_PAREN): 7018 self._retreat(self._index - 2) 7019 return self._parse_function() 7020 7021 # Clickhouse supports TRUNCATE DATABASE as well 7022 is_database = self._match(TokenType.DATABASE) 7023 7024 self._match(TokenType.TABLE) 7025 7026 exists = self._parse_exists(not_=False) 7027 7028 expressions = self._parse_csv( 7029 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7030 ) 7031 7032 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7033 7034 if self._match_text_seq("RESTART", "IDENTITY"): 7035 identity = "RESTART" 7036 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7037 identity = "CONTINUE" 7038 else: 7039 identity = None 7040 7041 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7042 option = self._prev.text 7043 else: 7044 option = None 7045 7046 partition = self._parse_partition() 7047 7048 # Fallback case 7049 if self._curr: 7050 return self._parse_as_command(start) 7051 7052 return self.expression( 7053 exp.TruncateTable, 7054 expressions=expressions, 7055 is_database=is_database, 7056 exists=exists, 7057 cluster=cluster, 7058 identity=identity, 7059 option=option, 7060 partition=partition, 7061 ) 7062 7063 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7064 this = self._parse_ordered(self._parse_opclass) 7065 7066 if not self._match(TokenType.WITH): 7067 return this 7068 7069 op = self._parse_var(any_token=True) 7070 7071 return self.expression(exp.WithOperator, this=this, op=op) 7072 7073 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7074 self._match(TokenType.EQ) 7075 self._match(TokenType.L_PAREN) 7076 7077 opts: t.List[t.Optional[exp.Expression]] = [] 7078 while self._curr and not self._match(TokenType.R_PAREN): 7079 if self._match_text_seq("FORMAT_NAME", "="): 7080 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7081 # so we parse it separately to use _parse_field() 7082 prop = self.expression( 7083 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7084 ) 7085 opts.append(prop) 7086 else: 7087 opts.append(self._parse_property()) 7088 7089 self._match(TokenType.COMMA) 7090 7091 return opts 7092 7093 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7094 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7095 7096 options = [] 7097 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7098 option = self._parse_var(any_token=True) 7099 prev = self._prev.text.upper() 7100 7101 # Different dialects might separate options and values by white space, "=" and "AS" 7102 self._match(TokenType.EQ) 7103 self._match(TokenType.ALIAS) 7104 7105 param = self.expression(exp.CopyParameter, this=option) 7106 7107 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7108 TokenType.L_PAREN, advance=False 7109 ): 7110 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7111 param.set("expressions", self._parse_wrapped_options()) 7112 elif prev == "FILE_FORMAT": 7113 # T-SQL's external file format case 7114 param.set("expression", self._parse_field()) 7115 else: 7116 param.set("expression", self._parse_unquoted_field()) 7117 7118 options.append(param) 7119 self._match(sep) 7120 7121 return options 7122 7123 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7124 expr = self.expression(exp.Credentials) 7125 7126 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7127 expr.set("storage", self._parse_field()) 7128 if self._match_text_seq("CREDENTIALS"): 7129 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7130 creds = ( 7131 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7132 ) 7133 expr.set("credentials", creds) 7134 if self._match_text_seq("ENCRYPTION"): 7135 expr.set("encryption", self._parse_wrapped_options()) 7136 if self._match_text_seq("IAM_ROLE"): 7137 expr.set("iam_role", self._parse_field()) 7138 if self._match_text_seq("REGION"): 7139 expr.set("region", self._parse_field()) 7140 7141 return expr 7142 7143 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7144 return self._parse_field() 7145 7146 def _parse_copy(self) -> exp.Copy | exp.Command: 7147 start = self._prev 7148 7149 self._match(TokenType.INTO) 7150 7151 this = ( 7152 self._parse_select(nested=True, parse_subquery_alias=False) 7153 if self._match(TokenType.L_PAREN, advance=False) 7154 else self._parse_table(schema=True) 7155 ) 7156 7157 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7158 7159 files = self._parse_csv(self._parse_file_location) 7160 credentials = self._parse_credentials() 7161 7162 self._match_text_seq("WITH") 7163 7164 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7165 7166 # Fallback case 7167 if self._curr: 7168 return self._parse_as_command(start) 7169 7170 return self.expression( 7171 exp.Copy, 7172 this=this, 7173 kind=kind, 7174 credentials=credentials, 7175 files=files, 7176 params=params, 7177 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
121def build_array_constructor( 122 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 123) -> exp.Expression: 124 array_exp = exp_class(expressions=args) 125 126 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 127 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 128 129 return array_exp
132def build_convert_timezone( 133 args: t.List, default_source_tz: t.Optional[str] = None 134) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 135 if len(args) == 2: 136 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 137 return exp.ConvertTimezone( 138 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 139 ) 140 141 return exp.ConvertTimezone.from_arg_list(args)
162class Parser(metaclass=_Parser): 163 """ 164 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 165 166 Args: 167 error_level: The desired error level. 168 Default: ErrorLevel.IMMEDIATE 169 error_message_context: The amount of context to capture from a query string when displaying 170 the error message (in number of characters). 171 Default: 100 172 max_errors: Maximum number of error messages to include in a raised ParseError. 173 This is only relevant if error_level is ErrorLevel.RAISE. 174 Default: 3 175 """ 176 177 FUNCTIONS: t.Dict[str, t.Callable] = { 178 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 179 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 180 "CONCAT": lambda args, dialect: exp.Concat( 181 expressions=args, 182 safe=not dialect.STRICT_STRING_CONCAT, 183 coalesce=dialect.CONCAT_COALESCE, 184 ), 185 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 186 expressions=args, 187 safe=not dialect.STRICT_STRING_CONCAT, 188 coalesce=dialect.CONCAT_COALESCE, 189 ), 190 "CONVERT_TIMEZONE": build_convert_timezone, 191 "DATE_TO_DATE_STR": lambda args: exp.Cast( 192 this=seq_get(args, 0), 193 to=exp.DataType(this=exp.DataType.Type.TEXT), 194 ), 195 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 196 start=seq_get(args, 0), 197 end=seq_get(args, 1), 198 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 199 ), 200 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 201 "HEX": build_hex, 202 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 203 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 204 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 205 "LIKE": build_like, 206 "LOG": build_logarithm, 207 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 208 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 209 "LOWER": build_lower, 210 "LPAD": lambda args: build_pad(args), 211 "LEFTPAD": lambda args: build_pad(args), 212 "LTRIM": lambda args: build_trim(args), 213 "MOD": build_mod, 214 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 215 "RPAD": lambda args: build_pad(args, is_left=False), 216 "RTRIM": lambda args: build_trim(args, is_left=False), 217 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 218 if len(args) != 2 219 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 220 "TIME_TO_TIME_STR": lambda args: exp.Cast( 221 this=seq_get(args, 0), 222 to=exp.DataType(this=exp.DataType.Type.TEXT), 223 ), 224 "TO_HEX": build_hex, 225 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 226 this=exp.Cast( 227 this=seq_get(args, 0), 228 to=exp.DataType(this=exp.DataType.Type.TEXT), 229 ), 230 start=exp.Literal.number(1), 231 length=exp.Literal.number(10), 232 ), 233 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 234 "UPPER": build_upper, 235 "VAR_MAP": build_var_map, 236 "COALESCE": lambda args: exp.Coalesce(this=seq_get(args, 0), expressions=args[1:]), 237 } 238 239 NO_PAREN_FUNCTIONS = { 240 TokenType.CURRENT_DATE: exp.CurrentDate, 241 TokenType.CURRENT_DATETIME: exp.CurrentDate, 242 TokenType.CURRENT_TIME: exp.CurrentTime, 243 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 244 TokenType.CURRENT_USER: exp.CurrentUser, 245 } 246 247 STRUCT_TYPE_TOKENS = { 248 TokenType.NESTED, 249 TokenType.OBJECT, 250 TokenType.STRUCT, 251 } 252 253 NESTED_TYPE_TOKENS = { 254 TokenType.ARRAY, 255 TokenType.LIST, 256 TokenType.LOWCARDINALITY, 257 TokenType.MAP, 258 TokenType.NULLABLE, 259 *STRUCT_TYPE_TOKENS, 260 } 261 262 ENUM_TYPE_TOKENS = { 263 TokenType.ENUM, 264 TokenType.ENUM8, 265 TokenType.ENUM16, 266 } 267 268 AGGREGATE_TYPE_TOKENS = { 269 TokenType.AGGREGATEFUNCTION, 270 TokenType.SIMPLEAGGREGATEFUNCTION, 271 } 272 273 TYPE_TOKENS = { 274 TokenType.BIT, 275 TokenType.BOOLEAN, 276 TokenType.TINYINT, 277 TokenType.UTINYINT, 278 TokenType.SMALLINT, 279 TokenType.USMALLINT, 280 TokenType.INT, 281 TokenType.UINT, 282 TokenType.BIGINT, 283 TokenType.UBIGINT, 284 TokenType.INT128, 285 TokenType.UINT128, 286 TokenType.INT256, 287 TokenType.UINT256, 288 TokenType.MEDIUMINT, 289 TokenType.UMEDIUMINT, 290 TokenType.FIXEDSTRING, 291 TokenType.FLOAT, 292 TokenType.DOUBLE, 293 TokenType.CHAR, 294 TokenType.NCHAR, 295 TokenType.VARCHAR, 296 TokenType.NVARCHAR, 297 TokenType.BPCHAR, 298 TokenType.TEXT, 299 TokenType.MEDIUMTEXT, 300 TokenType.LONGTEXT, 301 TokenType.MEDIUMBLOB, 302 TokenType.LONGBLOB, 303 TokenType.BINARY, 304 TokenType.VARBINARY, 305 TokenType.JSON, 306 TokenType.JSONB, 307 TokenType.INTERVAL, 308 TokenType.TINYBLOB, 309 TokenType.TINYTEXT, 310 TokenType.TIME, 311 TokenType.TIMETZ, 312 TokenType.TIMESTAMP, 313 TokenType.TIMESTAMP_S, 314 TokenType.TIMESTAMP_MS, 315 TokenType.TIMESTAMP_NS, 316 TokenType.TIMESTAMPTZ, 317 TokenType.TIMESTAMPLTZ, 318 TokenType.TIMESTAMPNTZ, 319 TokenType.DATETIME, 320 TokenType.DATETIME64, 321 TokenType.DATE, 322 TokenType.DATE32, 323 TokenType.INT4RANGE, 324 TokenType.INT4MULTIRANGE, 325 TokenType.INT8RANGE, 326 TokenType.INT8MULTIRANGE, 327 TokenType.NUMRANGE, 328 TokenType.NUMMULTIRANGE, 329 TokenType.TSRANGE, 330 TokenType.TSMULTIRANGE, 331 TokenType.TSTZRANGE, 332 TokenType.TSTZMULTIRANGE, 333 TokenType.DATERANGE, 334 TokenType.DATEMULTIRANGE, 335 TokenType.DECIMAL, 336 TokenType.UDECIMAL, 337 TokenType.BIGDECIMAL, 338 TokenType.UUID, 339 TokenType.GEOGRAPHY, 340 TokenType.GEOMETRY, 341 TokenType.HLLSKETCH, 342 TokenType.HSTORE, 343 TokenType.PSEUDO_TYPE, 344 TokenType.SUPER, 345 TokenType.SERIAL, 346 TokenType.SMALLSERIAL, 347 TokenType.BIGSERIAL, 348 TokenType.XML, 349 TokenType.YEAR, 350 TokenType.UNIQUEIDENTIFIER, 351 TokenType.USERDEFINED, 352 TokenType.MONEY, 353 TokenType.SMALLMONEY, 354 TokenType.ROWVERSION, 355 TokenType.IMAGE, 356 TokenType.VARIANT, 357 TokenType.VECTOR, 358 TokenType.OBJECT, 359 TokenType.OBJECT_IDENTIFIER, 360 TokenType.INET, 361 TokenType.IPADDRESS, 362 TokenType.IPPREFIX, 363 TokenType.IPV4, 364 TokenType.IPV6, 365 TokenType.UNKNOWN, 366 TokenType.NULL, 367 TokenType.NAME, 368 TokenType.TDIGEST, 369 *ENUM_TYPE_TOKENS, 370 *NESTED_TYPE_TOKENS, 371 *AGGREGATE_TYPE_TOKENS, 372 } 373 374 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 375 TokenType.BIGINT: TokenType.UBIGINT, 376 TokenType.INT: TokenType.UINT, 377 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 378 TokenType.SMALLINT: TokenType.USMALLINT, 379 TokenType.TINYINT: TokenType.UTINYINT, 380 TokenType.DECIMAL: TokenType.UDECIMAL, 381 } 382 383 SUBQUERY_PREDICATES = { 384 TokenType.ANY: exp.Any, 385 TokenType.ALL: exp.All, 386 TokenType.EXISTS: exp.Exists, 387 TokenType.SOME: exp.Any, 388 } 389 390 RESERVED_TOKENS = { 391 *Tokenizer.SINGLE_TOKENS.values(), 392 TokenType.SELECT, 393 } - {TokenType.IDENTIFIER} 394 395 DB_CREATABLES = { 396 TokenType.DATABASE, 397 TokenType.DICTIONARY, 398 TokenType.MODEL, 399 TokenType.SCHEMA, 400 TokenType.SEQUENCE, 401 TokenType.STORAGE_INTEGRATION, 402 TokenType.TABLE, 403 TokenType.TAG, 404 TokenType.VIEW, 405 TokenType.WAREHOUSE, 406 TokenType.STREAMLIT, 407 } 408 409 CREATABLES = { 410 TokenType.COLUMN, 411 TokenType.CONSTRAINT, 412 TokenType.FOREIGN_KEY, 413 TokenType.FUNCTION, 414 TokenType.INDEX, 415 TokenType.PROCEDURE, 416 *DB_CREATABLES, 417 } 418 419 ALTERABLES = { 420 TokenType.TABLE, 421 TokenType.VIEW, 422 } 423 424 # Tokens that can represent identifiers 425 ID_VAR_TOKENS = { 426 TokenType.ALL, 427 TokenType.VAR, 428 TokenType.ANTI, 429 TokenType.APPLY, 430 TokenType.ASC, 431 TokenType.ASOF, 432 TokenType.AUTO_INCREMENT, 433 TokenType.BEGIN, 434 TokenType.BPCHAR, 435 TokenType.CACHE, 436 TokenType.CASE, 437 TokenType.COLLATE, 438 TokenType.COMMAND, 439 TokenType.COMMENT, 440 TokenType.COMMIT, 441 TokenType.CONSTRAINT, 442 TokenType.COPY, 443 TokenType.CUBE, 444 TokenType.DEFAULT, 445 TokenType.DELETE, 446 TokenType.DESC, 447 TokenType.DESCRIBE, 448 TokenType.DICTIONARY, 449 TokenType.DIV, 450 TokenType.END, 451 TokenType.EXECUTE, 452 TokenType.ESCAPE, 453 TokenType.FALSE, 454 TokenType.FIRST, 455 TokenType.FILTER, 456 TokenType.FINAL, 457 TokenType.FORMAT, 458 TokenType.FULL, 459 TokenType.IDENTIFIER, 460 TokenType.IS, 461 TokenType.ISNULL, 462 TokenType.INTERVAL, 463 TokenType.KEEP, 464 TokenType.KILL, 465 TokenType.LEFT, 466 TokenType.LOAD, 467 TokenType.MERGE, 468 TokenType.NATURAL, 469 TokenType.NEXT, 470 TokenType.OFFSET, 471 TokenType.OPERATOR, 472 TokenType.ORDINALITY, 473 TokenType.OVERLAPS, 474 TokenType.OVERWRITE, 475 TokenType.PARTITION, 476 TokenType.PERCENT, 477 TokenType.PIVOT, 478 TokenType.PRAGMA, 479 TokenType.RANGE, 480 TokenType.RECURSIVE, 481 TokenType.REFERENCES, 482 TokenType.REFRESH, 483 TokenType.RENAME, 484 TokenType.REPLACE, 485 TokenType.RIGHT, 486 TokenType.ROLLUP, 487 TokenType.ROW, 488 TokenType.ROWS, 489 TokenType.SEMI, 490 TokenType.SET, 491 TokenType.SETTINGS, 492 TokenType.SHOW, 493 TokenType.TEMPORARY, 494 TokenType.TOP, 495 TokenType.TRUE, 496 TokenType.TRUNCATE, 497 TokenType.UNIQUE, 498 TokenType.UNNEST, 499 TokenType.UNPIVOT, 500 TokenType.UPDATE, 501 TokenType.USE, 502 TokenType.VOLATILE, 503 TokenType.WINDOW, 504 *CREATABLES, 505 *SUBQUERY_PREDICATES, 506 *TYPE_TOKENS, 507 *NO_PAREN_FUNCTIONS, 508 } 509 510 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 511 512 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 513 TokenType.ANTI, 514 TokenType.APPLY, 515 TokenType.ASOF, 516 TokenType.FULL, 517 TokenType.LEFT, 518 TokenType.LOCK, 519 TokenType.NATURAL, 520 TokenType.OFFSET, 521 TokenType.RIGHT, 522 TokenType.SEMI, 523 TokenType.WINDOW, 524 } 525 526 ALIAS_TOKENS = ID_VAR_TOKENS 527 528 ARRAY_CONSTRUCTORS = { 529 "ARRAY": exp.Array, 530 "LIST": exp.List, 531 } 532 533 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 534 535 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 536 537 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 538 539 FUNC_TOKENS = { 540 TokenType.COLLATE, 541 TokenType.COMMAND, 542 TokenType.CURRENT_DATE, 543 TokenType.CURRENT_DATETIME, 544 TokenType.CURRENT_TIMESTAMP, 545 TokenType.CURRENT_TIME, 546 TokenType.CURRENT_USER, 547 TokenType.FILTER, 548 TokenType.FIRST, 549 TokenType.FORMAT, 550 TokenType.GLOB, 551 TokenType.IDENTIFIER, 552 TokenType.INDEX, 553 TokenType.ISNULL, 554 TokenType.ILIKE, 555 TokenType.INSERT, 556 TokenType.LIKE, 557 TokenType.MERGE, 558 TokenType.OFFSET, 559 TokenType.PRIMARY_KEY, 560 TokenType.RANGE, 561 TokenType.REPLACE, 562 TokenType.RLIKE, 563 TokenType.ROW, 564 TokenType.UNNEST, 565 TokenType.VAR, 566 TokenType.LEFT, 567 TokenType.RIGHT, 568 TokenType.SEQUENCE, 569 TokenType.DATE, 570 TokenType.DATETIME, 571 TokenType.TABLE, 572 TokenType.TIMESTAMP, 573 TokenType.TIMESTAMPTZ, 574 TokenType.TRUNCATE, 575 TokenType.WINDOW, 576 TokenType.XOR, 577 *TYPE_TOKENS, 578 *SUBQUERY_PREDICATES, 579 } 580 581 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 582 TokenType.AND: exp.And, 583 } 584 585 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 586 TokenType.COLON_EQ: exp.PropertyEQ, 587 } 588 589 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 590 TokenType.OR: exp.Or, 591 } 592 593 EQUALITY = { 594 TokenType.EQ: exp.EQ, 595 TokenType.NEQ: exp.NEQ, 596 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 597 } 598 599 COMPARISON = { 600 TokenType.GT: exp.GT, 601 TokenType.GTE: exp.GTE, 602 TokenType.LT: exp.LT, 603 TokenType.LTE: exp.LTE, 604 } 605 606 BITWISE = { 607 TokenType.AMP: exp.BitwiseAnd, 608 TokenType.CARET: exp.BitwiseXor, 609 TokenType.PIPE: exp.BitwiseOr, 610 } 611 612 TERM = { 613 TokenType.DASH: exp.Sub, 614 TokenType.PLUS: exp.Add, 615 TokenType.MOD: exp.Mod, 616 TokenType.COLLATE: exp.Collate, 617 } 618 619 FACTOR = { 620 TokenType.DIV: exp.IntDiv, 621 TokenType.LR_ARROW: exp.Distance, 622 TokenType.SLASH: exp.Div, 623 TokenType.STAR: exp.Mul, 624 } 625 626 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 627 628 TIMES = { 629 TokenType.TIME, 630 TokenType.TIMETZ, 631 } 632 633 TIMESTAMPS = { 634 TokenType.TIMESTAMP, 635 TokenType.TIMESTAMPTZ, 636 TokenType.TIMESTAMPLTZ, 637 *TIMES, 638 } 639 640 SET_OPERATIONS = { 641 TokenType.UNION, 642 TokenType.INTERSECT, 643 TokenType.EXCEPT, 644 } 645 646 JOIN_METHODS = { 647 TokenType.ASOF, 648 TokenType.NATURAL, 649 TokenType.POSITIONAL, 650 } 651 652 JOIN_SIDES = { 653 TokenType.LEFT, 654 TokenType.RIGHT, 655 TokenType.FULL, 656 } 657 658 JOIN_KINDS = { 659 TokenType.ANTI, 660 TokenType.CROSS, 661 TokenType.INNER, 662 TokenType.OUTER, 663 TokenType.SEMI, 664 TokenType.STRAIGHT_JOIN, 665 } 666 667 JOIN_HINTS: t.Set[str] = set() 668 669 LAMBDAS = { 670 TokenType.ARROW: lambda self, expressions: self.expression( 671 exp.Lambda, 672 this=self._replace_lambda( 673 self._parse_assignment(), 674 expressions, 675 ), 676 expressions=expressions, 677 ), 678 TokenType.FARROW: lambda self, expressions: self.expression( 679 exp.Kwarg, 680 this=exp.var(expressions[0].name), 681 expression=self._parse_assignment(), 682 ), 683 } 684 685 COLUMN_OPERATORS = { 686 TokenType.DOT: None, 687 TokenType.DCOLON: lambda self, this, to: self.expression( 688 exp.Cast if self.STRICT_CAST else exp.TryCast, 689 this=this, 690 to=to, 691 ), 692 TokenType.ARROW: lambda self, this, path: self.expression( 693 exp.JSONExtract, 694 this=this, 695 expression=self.dialect.to_json_path(path), 696 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 697 ), 698 TokenType.DARROW: lambda self, this, path: self.expression( 699 exp.JSONExtractScalar, 700 this=this, 701 expression=self.dialect.to_json_path(path), 702 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 703 ), 704 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 705 exp.JSONBExtract, 706 this=this, 707 expression=path, 708 ), 709 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 710 exp.JSONBExtractScalar, 711 this=this, 712 expression=path, 713 ), 714 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 715 exp.JSONBContains, 716 this=this, 717 expression=key, 718 ), 719 } 720 721 EXPRESSION_PARSERS = { 722 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 723 exp.Column: lambda self: self._parse_column(), 724 exp.Condition: lambda self: self._parse_assignment(), 725 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 726 exp.Expression: lambda self: self._parse_expression(), 727 exp.From: lambda self: self._parse_from(joins=True), 728 exp.Group: lambda self: self._parse_group(), 729 exp.Having: lambda self: self._parse_having(), 730 exp.Identifier: lambda self: self._parse_id_var(), 731 exp.Join: lambda self: self._parse_join(), 732 exp.Lambda: lambda self: self._parse_lambda(), 733 exp.Lateral: lambda self: self._parse_lateral(), 734 exp.Limit: lambda self: self._parse_limit(), 735 exp.Offset: lambda self: self._parse_offset(), 736 exp.Order: lambda self: self._parse_order(), 737 exp.Ordered: lambda self: self._parse_ordered(), 738 exp.Properties: lambda self: self._parse_properties(), 739 exp.Qualify: lambda self: self._parse_qualify(), 740 exp.Returning: lambda self: self._parse_returning(), 741 exp.Select: lambda self: self._parse_select(), 742 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 743 exp.Table: lambda self: self._parse_table_parts(), 744 exp.TableAlias: lambda self: self._parse_table_alias(), 745 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 746 exp.Where: lambda self: self._parse_where(), 747 exp.Window: lambda self: self._parse_named_window(), 748 exp.With: lambda self: self._parse_with(), 749 "JOIN_TYPE": lambda self: self._parse_join_parts(), 750 } 751 752 STATEMENT_PARSERS = { 753 TokenType.ALTER: lambda self: self._parse_alter(), 754 TokenType.BEGIN: lambda self: self._parse_transaction(), 755 TokenType.CACHE: lambda self: self._parse_cache(), 756 TokenType.COMMENT: lambda self: self._parse_comment(), 757 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 758 TokenType.COPY: lambda self: self._parse_copy(), 759 TokenType.CREATE: lambda self: self._parse_create(), 760 TokenType.DELETE: lambda self: self._parse_delete(), 761 TokenType.DESC: lambda self: self._parse_describe(), 762 TokenType.DESCRIBE: lambda self: self._parse_describe(), 763 TokenType.DROP: lambda self: self._parse_drop(), 764 TokenType.INSERT: lambda self: self._parse_insert(), 765 TokenType.KILL: lambda self: self._parse_kill(), 766 TokenType.LOAD: lambda self: self._parse_load(), 767 TokenType.MERGE: lambda self: self._parse_merge(), 768 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 769 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 770 TokenType.REFRESH: lambda self: self._parse_refresh(), 771 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 772 TokenType.SET: lambda self: self._parse_set(), 773 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 774 TokenType.UNCACHE: lambda self: self._parse_uncache(), 775 TokenType.UPDATE: lambda self: self._parse_update(), 776 TokenType.USE: lambda self: self.expression( 777 exp.Use, 778 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 779 this=self._parse_table(schema=False), 780 ), 781 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 782 } 783 784 UNARY_PARSERS = { 785 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 786 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 787 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 788 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 789 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 790 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 791 } 792 793 STRING_PARSERS = { 794 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 795 exp.RawString, this=token.text 796 ), 797 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 798 exp.National, this=token.text 799 ), 800 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 801 TokenType.STRING: lambda self, token: self.expression( 802 exp.Literal, this=token.text, is_string=True 803 ), 804 TokenType.UNICODE_STRING: lambda self, token: self.expression( 805 exp.UnicodeString, 806 this=token.text, 807 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 808 ), 809 } 810 811 NUMERIC_PARSERS = { 812 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 813 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 814 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 815 TokenType.NUMBER: lambda self, token: self.expression( 816 exp.Literal, this=token.text, is_string=False 817 ), 818 } 819 820 PRIMARY_PARSERS = { 821 **STRING_PARSERS, 822 **NUMERIC_PARSERS, 823 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 824 TokenType.NULL: lambda self, _: self.expression(exp.Null), 825 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 826 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 827 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 828 TokenType.STAR: lambda self, _: self.expression( 829 exp.Star, 830 **{ 831 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 832 "replace": self._parse_star_op("REPLACE"), 833 "rename": self._parse_star_op("RENAME"), 834 }, 835 ), 836 } 837 838 PLACEHOLDER_PARSERS = { 839 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 840 TokenType.PARAMETER: lambda self: self._parse_parameter(), 841 TokenType.COLON: lambda self: ( 842 self.expression(exp.Placeholder, this=self._prev.text) 843 if self._match_set(self.ID_VAR_TOKENS) 844 else None 845 ), 846 } 847 848 RANGE_PARSERS = { 849 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 850 TokenType.GLOB: binary_range_parser(exp.Glob), 851 TokenType.ILIKE: binary_range_parser(exp.ILike), 852 TokenType.IN: lambda self, this: self._parse_in(this), 853 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 854 TokenType.IS: lambda self, this: self._parse_is(this), 855 TokenType.LIKE: binary_range_parser(exp.Like), 856 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 857 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 858 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 859 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 860 } 861 862 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 863 "ALLOWED_VALUES": lambda self: self.expression( 864 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 865 ), 866 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 867 "AUTO": lambda self: self._parse_auto_property(), 868 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 869 "BACKUP": lambda self: self.expression( 870 exp.BackupProperty, this=self._parse_var(any_token=True) 871 ), 872 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 873 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 874 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 875 "CHECKSUM": lambda self: self._parse_checksum(), 876 "CLUSTER BY": lambda self: self._parse_cluster(), 877 "CLUSTERED": lambda self: self._parse_clustered_by(), 878 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 879 exp.CollateProperty, **kwargs 880 ), 881 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 882 "CONTAINS": lambda self: self._parse_contains_property(), 883 "COPY": lambda self: self._parse_copy_property(), 884 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 885 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 886 "DEFINER": lambda self: self._parse_definer(), 887 "DETERMINISTIC": lambda self: self.expression( 888 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 889 ), 890 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 891 "DISTKEY": lambda self: self._parse_distkey(), 892 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 893 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 894 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 895 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 896 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 897 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 898 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 899 "FREESPACE": lambda self: self._parse_freespace(), 900 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 901 "HEAP": lambda self: self.expression(exp.HeapProperty), 902 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 903 "IMMUTABLE": lambda self: self.expression( 904 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 905 ), 906 "INHERITS": lambda self: self.expression( 907 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 908 ), 909 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 910 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 911 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 912 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 913 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 914 "LIKE": lambda self: self._parse_create_like(), 915 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 916 "LOCK": lambda self: self._parse_locking(), 917 "LOCKING": lambda self: self._parse_locking(), 918 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 919 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 920 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 921 "MODIFIES": lambda self: self._parse_modifies_property(), 922 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 923 "NO": lambda self: self._parse_no_property(), 924 "ON": lambda self: self._parse_on_property(), 925 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 926 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 927 "PARTITION": lambda self: self._parse_partitioned_of(), 928 "PARTITION BY": lambda self: self._parse_partitioned_by(), 929 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 930 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 931 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 932 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 933 "READS": lambda self: self._parse_reads_property(), 934 "REMOTE": lambda self: self._parse_remote_with_connection(), 935 "RETURNS": lambda self: self._parse_returns(), 936 "STRICT": lambda self: self.expression(exp.StrictProperty), 937 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 938 "ROW": lambda self: self._parse_row(), 939 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 940 "SAMPLE": lambda self: self.expression( 941 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 942 ), 943 "SECURE": lambda self: self.expression(exp.SecureProperty), 944 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 945 "SETTINGS": lambda self: self._parse_settings_property(), 946 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 947 "SORTKEY": lambda self: self._parse_sortkey(), 948 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 949 "STABLE": lambda self: self.expression( 950 exp.StabilityProperty, this=exp.Literal.string("STABLE") 951 ), 952 "STORED": lambda self: self._parse_stored(), 953 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 954 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 955 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 956 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 957 "TO": lambda self: self._parse_to_table(), 958 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 959 "TRANSFORM": lambda self: self.expression( 960 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 961 ), 962 "TTL": lambda self: self._parse_ttl(), 963 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 964 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 965 "VOLATILE": lambda self: self._parse_volatile_property(), 966 "WITH": lambda self: self._parse_with_property(), 967 } 968 969 CONSTRAINT_PARSERS = { 970 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 971 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 972 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 973 "CHARACTER SET": lambda self: self.expression( 974 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 975 ), 976 "CHECK": lambda self: self.expression( 977 exp.CheckColumnConstraint, 978 this=self._parse_wrapped(self._parse_assignment), 979 enforced=self._match_text_seq("ENFORCED"), 980 ), 981 "COLLATE": lambda self: self.expression( 982 exp.CollateColumnConstraint, 983 this=self._parse_identifier() or self._parse_column(), 984 ), 985 "COMMENT": lambda self: self.expression( 986 exp.CommentColumnConstraint, this=self._parse_string() 987 ), 988 "COMPRESS": lambda self: self._parse_compress(), 989 "CLUSTERED": lambda self: self.expression( 990 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 991 ), 992 "NONCLUSTERED": lambda self: self.expression( 993 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 994 ), 995 "DEFAULT": lambda self: self.expression( 996 exp.DefaultColumnConstraint, this=self._parse_bitwise() 997 ), 998 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 999 "EPHEMERAL": lambda self: self.expression( 1000 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1001 ), 1002 "EXCLUDE": lambda self: self.expression( 1003 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1004 ), 1005 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1006 "FORMAT": lambda self: self.expression( 1007 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1008 ), 1009 "GENERATED": lambda self: self._parse_generated_as_identity(), 1010 "IDENTITY": lambda self: self._parse_auto_increment(), 1011 "INLINE": lambda self: self._parse_inline(), 1012 "LIKE": lambda self: self._parse_create_like(), 1013 "NOT": lambda self: self._parse_not_constraint(), 1014 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1015 "ON": lambda self: ( 1016 self._match(TokenType.UPDATE) 1017 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1018 ) 1019 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1020 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1021 "PERIOD": lambda self: self._parse_period_for_system_time(), 1022 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1023 "REFERENCES": lambda self: self._parse_references(match=False), 1024 "TITLE": lambda self: self.expression( 1025 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1026 ), 1027 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1028 "UNIQUE": lambda self: self._parse_unique(), 1029 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1030 "WITH": lambda self: self.expression( 1031 exp.Properties, expressions=self._parse_wrapped_properties() 1032 ), 1033 } 1034 1035 ALTER_PARSERS = { 1036 "ADD": lambda self: self._parse_alter_table_add(), 1037 "ALTER": lambda self: self._parse_alter_table_alter(), 1038 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1039 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1040 "DROP": lambda self: self._parse_alter_table_drop(), 1041 "RENAME": lambda self: self._parse_alter_table_rename(), 1042 "SET": lambda self: self._parse_alter_table_set(), 1043 "AS": lambda self: self._parse_select(), 1044 } 1045 1046 ALTER_ALTER_PARSERS = { 1047 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1048 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1049 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1050 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1051 } 1052 1053 SCHEMA_UNNAMED_CONSTRAINTS = { 1054 "CHECK", 1055 "EXCLUDE", 1056 "FOREIGN KEY", 1057 "LIKE", 1058 "PERIOD", 1059 "PRIMARY KEY", 1060 "UNIQUE", 1061 } 1062 1063 NO_PAREN_FUNCTION_PARSERS = { 1064 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1065 "CASE": lambda self: self._parse_case(), 1066 "CONNECT_BY_ROOT": lambda self: self.expression( 1067 exp.ConnectByRoot, this=self._parse_column() 1068 ), 1069 "IF": lambda self: self._parse_if(), 1070 "NEXT": lambda self: self._parse_next_value_for(), 1071 } 1072 1073 INVALID_FUNC_NAME_TOKENS = { 1074 TokenType.IDENTIFIER, 1075 TokenType.STRING, 1076 } 1077 1078 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1079 1080 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1081 1082 FUNCTION_PARSERS = { 1083 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1084 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1085 "DECODE": lambda self: self._parse_decode(), 1086 "EXTRACT": lambda self: self._parse_extract(), 1087 "GAP_FILL": lambda self: self._parse_gap_fill(), 1088 "JSON_OBJECT": lambda self: self._parse_json_object(), 1089 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1090 "JSON_TABLE": lambda self: self._parse_json_table(), 1091 "MATCH": lambda self: self._parse_match_against(), 1092 "OPENJSON": lambda self: self._parse_open_json(), 1093 "POSITION": lambda self: self._parse_position(), 1094 "PREDICT": lambda self: self._parse_predict(), 1095 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1096 "STRING_AGG": lambda self: self._parse_string_agg(), 1097 "SUBSTRING": lambda self: self._parse_substring(), 1098 "TRIM": lambda self: self._parse_trim(), 1099 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1100 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1101 } 1102 1103 QUERY_MODIFIER_PARSERS = { 1104 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1105 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1106 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1107 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1108 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1109 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1110 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1111 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1112 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1113 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1114 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1115 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1116 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1117 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1118 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1119 TokenType.CLUSTER_BY: lambda self: ( 1120 "cluster", 1121 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1122 ), 1123 TokenType.DISTRIBUTE_BY: lambda self: ( 1124 "distribute", 1125 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1126 ), 1127 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1128 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1129 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1130 } 1131 1132 SET_PARSERS = { 1133 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1134 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1135 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1136 "TRANSACTION": lambda self: self._parse_set_transaction(), 1137 } 1138 1139 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1140 1141 TYPE_LITERAL_PARSERS = { 1142 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1143 } 1144 1145 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1146 1147 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1148 1149 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1150 1151 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1152 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1153 "ISOLATION": ( 1154 ("LEVEL", "REPEATABLE", "READ"), 1155 ("LEVEL", "READ", "COMMITTED"), 1156 ("LEVEL", "READ", "UNCOMITTED"), 1157 ("LEVEL", "SERIALIZABLE"), 1158 ), 1159 "READ": ("WRITE", "ONLY"), 1160 } 1161 1162 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1163 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1164 ) 1165 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1166 1167 CREATE_SEQUENCE: OPTIONS_TYPE = { 1168 "SCALE": ("EXTEND", "NOEXTEND"), 1169 "SHARD": ("EXTEND", "NOEXTEND"), 1170 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1171 **dict.fromkeys( 1172 ( 1173 "SESSION", 1174 "GLOBAL", 1175 "KEEP", 1176 "NOKEEP", 1177 "ORDER", 1178 "NOORDER", 1179 "NOCACHE", 1180 "CYCLE", 1181 "NOCYCLE", 1182 "NOMINVALUE", 1183 "NOMAXVALUE", 1184 "NOSCALE", 1185 "NOSHARD", 1186 ), 1187 tuple(), 1188 ), 1189 } 1190 1191 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1192 1193 USABLES: OPTIONS_TYPE = dict.fromkeys( 1194 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1195 ) 1196 1197 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1198 1199 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1200 "TYPE": ("EVOLUTION",), 1201 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1202 } 1203 1204 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1205 "NOT": ("ENFORCED",), 1206 "MATCH": ( 1207 "FULL", 1208 "PARTIAL", 1209 "SIMPLE", 1210 ), 1211 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1212 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1213 } 1214 1215 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1216 1217 CLONE_KEYWORDS = {"CLONE", "COPY"} 1218 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1219 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1220 1221 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1222 1223 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1224 1225 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1226 1227 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1228 1229 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1230 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1231 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1232 1233 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1234 1235 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1236 1237 ADD_CONSTRAINT_TOKENS = { 1238 TokenType.CONSTRAINT, 1239 TokenType.FOREIGN_KEY, 1240 TokenType.INDEX, 1241 TokenType.KEY, 1242 TokenType.PRIMARY_KEY, 1243 TokenType.UNIQUE, 1244 } 1245 1246 DISTINCT_TOKENS = {TokenType.DISTINCT} 1247 1248 NULL_TOKENS = {TokenType.NULL} 1249 1250 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1251 1252 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1253 1254 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1255 1256 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1257 1258 STRICT_CAST = True 1259 1260 PREFIXED_PIVOT_COLUMNS = False 1261 IDENTIFY_PIVOT_STRINGS = False 1262 1263 LOG_DEFAULTS_TO_LN = False 1264 1265 # Whether ADD is present for each column added by ALTER TABLE 1266 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1267 1268 # Whether the table sample clause expects CSV syntax 1269 TABLESAMPLE_CSV = False 1270 1271 # The default method used for table sampling 1272 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1273 1274 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1275 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1276 1277 # Whether the TRIM function expects the characters to trim as its first argument 1278 TRIM_PATTERN_FIRST = False 1279 1280 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1281 STRING_ALIASES = False 1282 1283 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1284 MODIFIERS_ATTACHED_TO_SET_OP = True 1285 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1286 1287 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1288 NO_PAREN_IF_COMMANDS = True 1289 1290 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1291 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1292 1293 # Whether the `:` operator is used to extract a value from a VARIANT column 1294 COLON_IS_VARIANT_EXTRACT = False 1295 1296 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1297 # If this is True and '(' is not found, the keyword will be treated as an identifier 1298 VALUES_FOLLOWED_BY_PAREN = True 1299 1300 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1301 SUPPORTS_IMPLICIT_UNNEST = False 1302 1303 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1304 INTERVAL_SPANS = True 1305 1306 # Whether a PARTITION clause can follow a table reference 1307 SUPPORTS_PARTITION_SELECTION = False 1308 1309 __slots__ = ( 1310 "error_level", 1311 "error_message_context", 1312 "max_errors", 1313 "dialect", 1314 "sql", 1315 "errors", 1316 "_tokens", 1317 "_index", 1318 "_curr", 1319 "_next", 1320 "_prev", 1321 "_prev_comments", 1322 ) 1323 1324 # Autofilled 1325 SHOW_TRIE: t.Dict = {} 1326 SET_TRIE: t.Dict = {} 1327 1328 def __init__( 1329 self, 1330 error_level: t.Optional[ErrorLevel] = None, 1331 error_message_context: int = 100, 1332 max_errors: int = 3, 1333 dialect: DialectType = None, 1334 ): 1335 from sqlglot.dialects import Dialect 1336 1337 self.error_level = error_level or ErrorLevel.IMMEDIATE 1338 self.error_message_context = error_message_context 1339 self.max_errors = max_errors 1340 self.dialect = Dialect.get_or_raise(dialect) 1341 self.reset() 1342 1343 def reset(self): 1344 self.sql = "" 1345 self.errors = [] 1346 self._tokens = [] 1347 self._index = 0 1348 self._curr = None 1349 self._next = None 1350 self._prev = None 1351 self._prev_comments = None 1352 1353 def parse( 1354 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1355 ) -> t.List[t.Optional[exp.Expression]]: 1356 """ 1357 Parses a list of tokens and returns a list of syntax trees, one tree 1358 per parsed SQL statement. 1359 1360 Args: 1361 raw_tokens: The list of tokens. 1362 sql: The original SQL string, used to produce helpful debug messages. 1363 1364 Returns: 1365 The list of the produced syntax trees. 1366 """ 1367 return self._parse( 1368 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1369 ) 1370 1371 def parse_into( 1372 self, 1373 expression_types: exp.IntoType, 1374 raw_tokens: t.List[Token], 1375 sql: t.Optional[str] = None, 1376 ) -> t.List[t.Optional[exp.Expression]]: 1377 """ 1378 Parses a list of tokens into a given Expression type. If a collection of Expression 1379 types is given instead, this method will try to parse the token list into each one 1380 of them, stopping at the first for which the parsing succeeds. 1381 1382 Args: 1383 expression_types: The expression type(s) to try and parse the token list into. 1384 raw_tokens: The list of tokens. 1385 sql: The original SQL string, used to produce helpful debug messages. 1386 1387 Returns: 1388 The target Expression. 1389 """ 1390 errors = [] 1391 for expression_type in ensure_list(expression_types): 1392 parser = self.EXPRESSION_PARSERS.get(expression_type) 1393 if not parser: 1394 raise TypeError(f"No parser registered for {expression_type}") 1395 1396 try: 1397 return self._parse(parser, raw_tokens, sql) 1398 except ParseError as e: 1399 e.errors[0]["into_expression"] = expression_type 1400 errors.append(e) 1401 1402 raise ParseError( 1403 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1404 errors=merge_errors(errors), 1405 ) from errors[-1] 1406 1407 def _parse( 1408 self, 1409 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1410 raw_tokens: t.List[Token], 1411 sql: t.Optional[str] = None, 1412 ) -> t.List[t.Optional[exp.Expression]]: 1413 self.reset() 1414 self.sql = sql or "" 1415 1416 total = len(raw_tokens) 1417 chunks: t.List[t.List[Token]] = [[]] 1418 1419 for i, token in enumerate(raw_tokens): 1420 if token.token_type == TokenType.SEMICOLON: 1421 if token.comments: 1422 chunks.append([token]) 1423 1424 if i < total - 1: 1425 chunks.append([]) 1426 else: 1427 chunks[-1].append(token) 1428 1429 expressions = [] 1430 1431 for tokens in chunks: 1432 self._index = -1 1433 self._tokens = tokens 1434 self._advance() 1435 1436 expressions.append(parse_method(self)) 1437 1438 if self._index < len(self._tokens): 1439 self.raise_error("Invalid expression / Unexpected token") 1440 1441 self.check_errors() 1442 1443 return expressions 1444 1445 def check_errors(self) -> None: 1446 """Logs or raises any found errors, depending on the chosen error level setting.""" 1447 if self.error_level == ErrorLevel.WARN: 1448 for error in self.errors: 1449 logger.error(str(error)) 1450 elif self.error_level == ErrorLevel.RAISE and self.errors: 1451 raise ParseError( 1452 concat_messages(self.errors, self.max_errors), 1453 errors=merge_errors(self.errors), 1454 ) 1455 1456 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1457 """ 1458 Appends an error in the list of recorded errors or raises it, depending on the chosen 1459 error level setting. 1460 """ 1461 token = token or self._curr or self._prev or Token.string("") 1462 start = token.start 1463 end = token.end + 1 1464 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1465 highlight = self.sql[start:end] 1466 end_context = self.sql[end : end + self.error_message_context] 1467 1468 error = ParseError.new( 1469 f"{message}. Line {token.line}, Col: {token.col}.\n" 1470 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1471 description=message, 1472 line=token.line, 1473 col=token.col, 1474 start_context=start_context, 1475 highlight=highlight, 1476 end_context=end_context, 1477 ) 1478 1479 if self.error_level == ErrorLevel.IMMEDIATE: 1480 raise error 1481 1482 self.errors.append(error) 1483 1484 def expression( 1485 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1486 ) -> E: 1487 """ 1488 Creates a new, validated Expression. 1489 1490 Args: 1491 exp_class: The expression class to instantiate. 1492 comments: An optional list of comments to attach to the expression. 1493 kwargs: The arguments to set for the expression along with their respective values. 1494 1495 Returns: 1496 The target expression. 1497 """ 1498 instance = exp_class(**kwargs) 1499 instance.add_comments(comments) if comments else self._add_comments(instance) 1500 return self.validate_expression(instance) 1501 1502 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1503 if expression and self._prev_comments: 1504 expression.add_comments(self._prev_comments) 1505 self._prev_comments = None 1506 1507 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1508 """ 1509 Validates an Expression, making sure that all its mandatory arguments are set. 1510 1511 Args: 1512 expression: The expression to validate. 1513 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1514 1515 Returns: 1516 The validated expression. 1517 """ 1518 if self.error_level != ErrorLevel.IGNORE: 1519 for error_message in expression.error_messages(args): 1520 self.raise_error(error_message) 1521 1522 return expression 1523 1524 def _find_sql(self, start: Token, end: Token) -> str: 1525 return self.sql[start.start : end.end + 1] 1526 1527 def _is_connected(self) -> bool: 1528 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1529 1530 def _advance(self, times: int = 1) -> None: 1531 self._index += times 1532 self._curr = seq_get(self._tokens, self._index) 1533 self._next = seq_get(self._tokens, self._index + 1) 1534 1535 if self._index > 0: 1536 self._prev = self._tokens[self._index - 1] 1537 self._prev_comments = self._prev.comments 1538 else: 1539 self._prev = None 1540 self._prev_comments = None 1541 1542 def _retreat(self, index: int) -> None: 1543 if index != self._index: 1544 self._advance(index - self._index) 1545 1546 def _warn_unsupported(self) -> None: 1547 if len(self._tokens) <= 1: 1548 return 1549 1550 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1551 # interested in emitting a warning for the one being currently processed. 1552 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1553 1554 logger.warning( 1555 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1556 ) 1557 1558 def _parse_command(self) -> exp.Command: 1559 self._warn_unsupported() 1560 return self.expression( 1561 exp.Command, 1562 comments=self._prev_comments, 1563 this=self._prev.text.upper(), 1564 expression=self._parse_string(), 1565 ) 1566 1567 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1568 """ 1569 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1570 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1571 solve this by setting & resetting the parser state accordingly 1572 """ 1573 index = self._index 1574 error_level = self.error_level 1575 1576 self.error_level = ErrorLevel.IMMEDIATE 1577 try: 1578 this = parse_method() 1579 except ParseError: 1580 this = None 1581 finally: 1582 if not this or retreat: 1583 self._retreat(index) 1584 self.error_level = error_level 1585 1586 return this 1587 1588 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1589 start = self._prev 1590 exists = self._parse_exists() if allow_exists else None 1591 1592 self._match(TokenType.ON) 1593 1594 materialized = self._match_text_seq("MATERIALIZED") 1595 kind = self._match_set(self.CREATABLES) and self._prev 1596 if not kind: 1597 return self._parse_as_command(start) 1598 1599 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1600 this = self._parse_user_defined_function(kind=kind.token_type) 1601 elif kind.token_type == TokenType.TABLE: 1602 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1603 elif kind.token_type == TokenType.COLUMN: 1604 this = self._parse_column() 1605 else: 1606 this = self._parse_id_var() 1607 1608 self._match(TokenType.IS) 1609 1610 return self.expression( 1611 exp.Comment, 1612 this=this, 1613 kind=kind.text, 1614 expression=self._parse_string(), 1615 exists=exists, 1616 materialized=materialized, 1617 ) 1618 1619 def _parse_to_table( 1620 self, 1621 ) -> exp.ToTableProperty: 1622 table = self._parse_table_parts(schema=True) 1623 return self.expression(exp.ToTableProperty, this=table) 1624 1625 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1626 def _parse_ttl(self) -> exp.Expression: 1627 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1628 this = self._parse_bitwise() 1629 1630 if self._match_text_seq("DELETE"): 1631 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1632 if self._match_text_seq("RECOMPRESS"): 1633 return self.expression( 1634 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1635 ) 1636 if self._match_text_seq("TO", "DISK"): 1637 return self.expression( 1638 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1639 ) 1640 if self._match_text_seq("TO", "VOLUME"): 1641 return self.expression( 1642 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1643 ) 1644 1645 return this 1646 1647 expressions = self._parse_csv(_parse_ttl_action) 1648 where = self._parse_where() 1649 group = self._parse_group() 1650 1651 aggregates = None 1652 if group and self._match(TokenType.SET): 1653 aggregates = self._parse_csv(self._parse_set_item) 1654 1655 return self.expression( 1656 exp.MergeTreeTTL, 1657 expressions=expressions, 1658 where=where, 1659 group=group, 1660 aggregates=aggregates, 1661 ) 1662 1663 def _parse_statement(self) -> t.Optional[exp.Expression]: 1664 if self._curr is None: 1665 return None 1666 1667 if self._match_set(self.STATEMENT_PARSERS): 1668 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1669 1670 if self._match_set(self.dialect.tokenizer.COMMANDS): 1671 return self._parse_command() 1672 1673 expression = self._parse_expression() 1674 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1675 return self._parse_query_modifiers(expression) 1676 1677 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1678 start = self._prev 1679 temporary = self._match(TokenType.TEMPORARY) 1680 materialized = self._match_text_seq("MATERIALIZED") 1681 1682 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1683 if not kind: 1684 return self._parse_as_command(start) 1685 1686 if_exists = exists or self._parse_exists() 1687 table = self._parse_table_parts( 1688 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1689 ) 1690 1691 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1692 1693 if self._match(TokenType.L_PAREN, advance=False): 1694 expressions = self._parse_wrapped_csv(self._parse_types) 1695 else: 1696 expressions = None 1697 1698 return self.expression( 1699 exp.Drop, 1700 comments=start.comments, 1701 exists=if_exists, 1702 this=table, 1703 expressions=expressions, 1704 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1705 temporary=temporary, 1706 materialized=materialized, 1707 cascade=self._match_text_seq("CASCADE"), 1708 constraints=self._match_text_seq("CONSTRAINTS"), 1709 purge=self._match_text_seq("PURGE"), 1710 cluster=cluster, 1711 ) 1712 1713 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1714 return ( 1715 self._match_text_seq("IF") 1716 and (not not_ or self._match(TokenType.NOT)) 1717 and self._match(TokenType.EXISTS) 1718 ) 1719 1720 def _parse_create(self) -> exp.Create | exp.Command: 1721 # Note: this can't be None because we've matched a statement parser 1722 start = self._prev 1723 comments = self._prev_comments 1724 1725 replace = ( 1726 start.token_type == TokenType.REPLACE 1727 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1728 or self._match_pair(TokenType.OR, TokenType.ALTER) 1729 ) 1730 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1731 1732 unique = self._match(TokenType.UNIQUE) 1733 1734 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1735 clustered = True 1736 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1737 "COLUMNSTORE" 1738 ): 1739 clustered = False 1740 else: 1741 clustered = None 1742 1743 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1744 self._advance() 1745 1746 properties = None 1747 create_token = self._match_set(self.CREATABLES) and self._prev 1748 1749 if not create_token: 1750 # exp.Properties.Location.POST_CREATE 1751 properties = self._parse_properties() 1752 create_token = self._match_set(self.CREATABLES) and self._prev 1753 1754 if not properties or not create_token: 1755 return self._parse_as_command(start) 1756 1757 concurrently = self._match_text_seq("CONCURRENTLY") 1758 exists = self._parse_exists(not_=True) 1759 this = None 1760 expression: t.Optional[exp.Expression] = None 1761 indexes = None 1762 no_schema_binding = None 1763 begin = None 1764 end = None 1765 clone = None 1766 1767 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1768 nonlocal properties 1769 if properties and temp_props: 1770 properties.expressions.extend(temp_props.expressions) 1771 elif temp_props: 1772 properties = temp_props 1773 1774 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1775 this = self._parse_user_defined_function(kind=create_token.token_type) 1776 1777 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1778 extend_props(self._parse_properties()) 1779 1780 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1781 extend_props(self._parse_properties()) 1782 1783 if not expression: 1784 if self._match(TokenType.COMMAND): 1785 expression = self._parse_as_command(self._prev) 1786 else: 1787 begin = self._match(TokenType.BEGIN) 1788 return_ = self._match_text_seq("RETURN") 1789 1790 if self._match(TokenType.STRING, advance=False): 1791 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1792 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1793 expression = self._parse_string() 1794 extend_props(self._parse_properties()) 1795 else: 1796 expression = self._parse_statement() 1797 1798 end = self._match_text_seq("END") 1799 1800 if return_: 1801 expression = self.expression(exp.Return, this=expression) 1802 elif create_token.token_type == TokenType.INDEX: 1803 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1804 if not self._match(TokenType.ON): 1805 index = self._parse_id_var() 1806 anonymous = False 1807 else: 1808 index = None 1809 anonymous = True 1810 1811 this = self._parse_index(index=index, anonymous=anonymous) 1812 elif create_token.token_type in self.DB_CREATABLES: 1813 table_parts = self._parse_table_parts( 1814 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1815 ) 1816 1817 # exp.Properties.Location.POST_NAME 1818 self._match(TokenType.COMMA) 1819 extend_props(self._parse_properties(before=True)) 1820 1821 this = self._parse_schema(this=table_parts) 1822 1823 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1824 extend_props(self._parse_properties()) 1825 1826 self._match(TokenType.ALIAS) 1827 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1828 # exp.Properties.Location.POST_ALIAS 1829 extend_props(self._parse_properties()) 1830 1831 if create_token.token_type == TokenType.SEQUENCE: 1832 expression = self._parse_types() 1833 extend_props(self._parse_properties()) 1834 else: 1835 expression = self._parse_ddl_select() 1836 1837 if create_token.token_type == TokenType.TABLE: 1838 # exp.Properties.Location.POST_EXPRESSION 1839 extend_props(self._parse_properties()) 1840 1841 indexes = [] 1842 while True: 1843 index = self._parse_index() 1844 1845 # exp.Properties.Location.POST_INDEX 1846 extend_props(self._parse_properties()) 1847 if not index: 1848 break 1849 else: 1850 self._match(TokenType.COMMA) 1851 indexes.append(index) 1852 elif create_token.token_type == TokenType.VIEW: 1853 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1854 no_schema_binding = True 1855 1856 shallow = self._match_text_seq("SHALLOW") 1857 1858 if self._match_texts(self.CLONE_KEYWORDS): 1859 copy = self._prev.text.lower() == "copy" 1860 clone = self.expression( 1861 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1862 ) 1863 1864 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1865 return self._parse_as_command(start) 1866 1867 create_kind_text = create_token.text.upper() 1868 return self.expression( 1869 exp.Create, 1870 comments=comments, 1871 this=this, 1872 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1873 replace=replace, 1874 refresh=refresh, 1875 unique=unique, 1876 expression=expression, 1877 exists=exists, 1878 properties=properties, 1879 indexes=indexes, 1880 no_schema_binding=no_schema_binding, 1881 begin=begin, 1882 end=end, 1883 clone=clone, 1884 concurrently=concurrently, 1885 clustered=clustered, 1886 ) 1887 1888 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1889 seq = exp.SequenceProperties() 1890 1891 options = [] 1892 index = self._index 1893 1894 while self._curr: 1895 self._match(TokenType.COMMA) 1896 if self._match_text_seq("INCREMENT"): 1897 self._match_text_seq("BY") 1898 self._match_text_seq("=") 1899 seq.set("increment", self._parse_term()) 1900 elif self._match_text_seq("MINVALUE"): 1901 seq.set("minvalue", self._parse_term()) 1902 elif self._match_text_seq("MAXVALUE"): 1903 seq.set("maxvalue", self._parse_term()) 1904 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1905 self._match_text_seq("=") 1906 seq.set("start", self._parse_term()) 1907 elif self._match_text_seq("CACHE"): 1908 # T-SQL allows empty CACHE which is initialized dynamically 1909 seq.set("cache", self._parse_number() or True) 1910 elif self._match_text_seq("OWNED", "BY"): 1911 # "OWNED BY NONE" is the default 1912 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1913 else: 1914 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1915 if opt: 1916 options.append(opt) 1917 else: 1918 break 1919 1920 seq.set("options", options if options else None) 1921 return None if self._index == index else seq 1922 1923 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1924 # only used for teradata currently 1925 self._match(TokenType.COMMA) 1926 1927 kwargs = { 1928 "no": self._match_text_seq("NO"), 1929 "dual": self._match_text_seq("DUAL"), 1930 "before": self._match_text_seq("BEFORE"), 1931 "default": self._match_text_seq("DEFAULT"), 1932 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1933 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1934 "after": self._match_text_seq("AFTER"), 1935 "minimum": self._match_texts(("MIN", "MINIMUM")), 1936 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1937 } 1938 1939 if self._match_texts(self.PROPERTY_PARSERS): 1940 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1941 try: 1942 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1943 except TypeError: 1944 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1945 1946 return None 1947 1948 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1949 return self._parse_wrapped_csv(self._parse_property) 1950 1951 def _parse_property(self) -> t.Optional[exp.Expression]: 1952 if self._match_texts(self.PROPERTY_PARSERS): 1953 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1954 1955 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1956 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1957 1958 if self._match_text_seq("COMPOUND", "SORTKEY"): 1959 return self._parse_sortkey(compound=True) 1960 1961 if self._match_text_seq("SQL", "SECURITY"): 1962 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1963 1964 index = self._index 1965 key = self._parse_column() 1966 1967 if not self._match(TokenType.EQ): 1968 self._retreat(index) 1969 return self._parse_sequence_properties() 1970 1971 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1972 if isinstance(key, exp.Column): 1973 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1974 1975 value = self._parse_bitwise() or self._parse_var(any_token=True) 1976 1977 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1978 if isinstance(value, exp.Column): 1979 value = exp.var(value.name) 1980 1981 return self.expression(exp.Property, this=key, value=value) 1982 1983 def _parse_stored(self) -> exp.FileFormatProperty: 1984 self._match(TokenType.ALIAS) 1985 1986 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1987 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1988 1989 return self.expression( 1990 exp.FileFormatProperty, 1991 this=( 1992 self.expression( 1993 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1994 ) 1995 if input_format or output_format 1996 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1997 ), 1998 ) 1999 2000 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2001 field = self._parse_field() 2002 if isinstance(field, exp.Identifier) and not field.quoted: 2003 field = exp.var(field) 2004 2005 return field 2006 2007 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2008 self._match(TokenType.EQ) 2009 self._match(TokenType.ALIAS) 2010 2011 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2012 2013 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2014 properties = [] 2015 while True: 2016 if before: 2017 prop = self._parse_property_before() 2018 else: 2019 prop = self._parse_property() 2020 if not prop: 2021 break 2022 for p in ensure_list(prop): 2023 properties.append(p) 2024 2025 if properties: 2026 return self.expression(exp.Properties, expressions=properties) 2027 2028 return None 2029 2030 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2031 return self.expression( 2032 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2033 ) 2034 2035 def _parse_settings_property(self) -> exp.SettingsProperty: 2036 return self.expression( 2037 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2038 ) 2039 2040 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2041 if self._index >= 2: 2042 pre_volatile_token = self._tokens[self._index - 2] 2043 else: 2044 pre_volatile_token = None 2045 2046 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2047 return exp.VolatileProperty() 2048 2049 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2050 2051 def _parse_retention_period(self) -> exp.Var: 2052 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2053 number = self._parse_number() 2054 number_str = f"{number} " if number else "" 2055 unit = self._parse_var(any_token=True) 2056 return exp.var(f"{number_str}{unit}") 2057 2058 def _parse_system_versioning_property( 2059 self, with_: bool = False 2060 ) -> exp.WithSystemVersioningProperty: 2061 self._match(TokenType.EQ) 2062 prop = self.expression( 2063 exp.WithSystemVersioningProperty, 2064 **{ # type: ignore 2065 "on": True, 2066 "with": with_, 2067 }, 2068 ) 2069 2070 if self._match_text_seq("OFF"): 2071 prop.set("on", False) 2072 return prop 2073 2074 self._match(TokenType.ON) 2075 if self._match(TokenType.L_PAREN): 2076 while self._curr and not self._match(TokenType.R_PAREN): 2077 if self._match_text_seq("HISTORY_TABLE", "="): 2078 prop.set("this", self._parse_table_parts()) 2079 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2080 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2081 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2082 prop.set("retention_period", self._parse_retention_period()) 2083 2084 self._match(TokenType.COMMA) 2085 2086 return prop 2087 2088 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2089 self._match(TokenType.EQ) 2090 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2091 prop = self.expression(exp.DataDeletionProperty, on=on) 2092 2093 if self._match(TokenType.L_PAREN): 2094 while self._curr and not self._match(TokenType.R_PAREN): 2095 if self._match_text_seq("FILTER_COLUMN", "="): 2096 prop.set("filter_column", self._parse_column()) 2097 elif self._match_text_seq("RETENTION_PERIOD", "="): 2098 prop.set("retention_period", self._parse_retention_period()) 2099 2100 self._match(TokenType.COMMA) 2101 2102 return prop 2103 2104 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2105 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2106 prop = self._parse_system_versioning_property(with_=True) 2107 self._match_r_paren() 2108 return prop 2109 2110 if self._match(TokenType.L_PAREN, advance=False): 2111 return self._parse_wrapped_properties() 2112 2113 if self._match_text_seq("JOURNAL"): 2114 return self._parse_withjournaltable() 2115 2116 if self._match_texts(self.VIEW_ATTRIBUTES): 2117 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2118 2119 if self._match_text_seq("DATA"): 2120 return self._parse_withdata(no=False) 2121 elif self._match_text_seq("NO", "DATA"): 2122 return self._parse_withdata(no=True) 2123 2124 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2125 return self._parse_serde_properties(with_=True) 2126 2127 if self._match(TokenType.SCHEMA): 2128 return self.expression( 2129 exp.WithSchemaBindingProperty, 2130 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2131 ) 2132 2133 if not self._next: 2134 return None 2135 2136 return self._parse_withisolatedloading() 2137 2138 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2139 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2140 self._match(TokenType.EQ) 2141 2142 user = self._parse_id_var() 2143 self._match(TokenType.PARAMETER) 2144 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2145 2146 if not user or not host: 2147 return None 2148 2149 return exp.DefinerProperty(this=f"{user}@{host}") 2150 2151 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2152 self._match(TokenType.TABLE) 2153 self._match(TokenType.EQ) 2154 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2155 2156 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2157 return self.expression(exp.LogProperty, no=no) 2158 2159 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2160 return self.expression(exp.JournalProperty, **kwargs) 2161 2162 def _parse_checksum(self) -> exp.ChecksumProperty: 2163 self._match(TokenType.EQ) 2164 2165 on = None 2166 if self._match(TokenType.ON): 2167 on = True 2168 elif self._match_text_seq("OFF"): 2169 on = False 2170 2171 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2172 2173 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2174 return self.expression( 2175 exp.Cluster, 2176 expressions=( 2177 self._parse_wrapped_csv(self._parse_ordered) 2178 if wrapped 2179 else self._parse_csv(self._parse_ordered) 2180 ), 2181 ) 2182 2183 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2184 self._match_text_seq("BY") 2185 2186 self._match_l_paren() 2187 expressions = self._parse_csv(self._parse_column) 2188 self._match_r_paren() 2189 2190 if self._match_text_seq("SORTED", "BY"): 2191 self._match_l_paren() 2192 sorted_by = self._parse_csv(self._parse_ordered) 2193 self._match_r_paren() 2194 else: 2195 sorted_by = None 2196 2197 self._match(TokenType.INTO) 2198 buckets = self._parse_number() 2199 self._match_text_seq("BUCKETS") 2200 2201 return self.expression( 2202 exp.ClusteredByProperty, 2203 expressions=expressions, 2204 sorted_by=sorted_by, 2205 buckets=buckets, 2206 ) 2207 2208 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2209 if not self._match_text_seq("GRANTS"): 2210 self._retreat(self._index - 1) 2211 return None 2212 2213 return self.expression(exp.CopyGrantsProperty) 2214 2215 def _parse_freespace(self) -> exp.FreespaceProperty: 2216 self._match(TokenType.EQ) 2217 return self.expression( 2218 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2219 ) 2220 2221 def _parse_mergeblockratio( 2222 self, no: bool = False, default: bool = False 2223 ) -> exp.MergeBlockRatioProperty: 2224 if self._match(TokenType.EQ): 2225 return self.expression( 2226 exp.MergeBlockRatioProperty, 2227 this=self._parse_number(), 2228 percent=self._match(TokenType.PERCENT), 2229 ) 2230 2231 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2232 2233 def _parse_datablocksize( 2234 self, 2235 default: t.Optional[bool] = None, 2236 minimum: t.Optional[bool] = None, 2237 maximum: t.Optional[bool] = None, 2238 ) -> exp.DataBlocksizeProperty: 2239 self._match(TokenType.EQ) 2240 size = self._parse_number() 2241 2242 units = None 2243 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2244 units = self._prev.text 2245 2246 return self.expression( 2247 exp.DataBlocksizeProperty, 2248 size=size, 2249 units=units, 2250 default=default, 2251 minimum=minimum, 2252 maximum=maximum, 2253 ) 2254 2255 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2256 self._match(TokenType.EQ) 2257 always = self._match_text_seq("ALWAYS") 2258 manual = self._match_text_seq("MANUAL") 2259 never = self._match_text_seq("NEVER") 2260 default = self._match_text_seq("DEFAULT") 2261 2262 autotemp = None 2263 if self._match_text_seq("AUTOTEMP"): 2264 autotemp = self._parse_schema() 2265 2266 return self.expression( 2267 exp.BlockCompressionProperty, 2268 always=always, 2269 manual=manual, 2270 never=never, 2271 default=default, 2272 autotemp=autotemp, 2273 ) 2274 2275 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2276 index = self._index 2277 no = self._match_text_seq("NO") 2278 concurrent = self._match_text_seq("CONCURRENT") 2279 2280 if not self._match_text_seq("ISOLATED", "LOADING"): 2281 self._retreat(index) 2282 return None 2283 2284 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2285 return self.expression( 2286 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2287 ) 2288 2289 def _parse_locking(self) -> exp.LockingProperty: 2290 if self._match(TokenType.TABLE): 2291 kind = "TABLE" 2292 elif self._match(TokenType.VIEW): 2293 kind = "VIEW" 2294 elif self._match(TokenType.ROW): 2295 kind = "ROW" 2296 elif self._match_text_seq("DATABASE"): 2297 kind = "DATABASE" 2298 else: 2299 kind = None 2300 2301 if kind in ("DATABASE", "TABLE", "VIEW"): 2302 this = self._parse_table_parts() 2303 else: 2304 this = None 2305 2306 if self._match(TokenType.FOR): 2307 for_or_in = "FOR" 2308 elif self._match(TokenType.IN): 2309 for_or_in = "IN" 2310 else: 2311 for_or_in = None 2312 2313 if self._match_text_seq("ACCESS"): 2314 lock_type = "ACCESS" 2315 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2316 lock_type = "EXCLUSIVE" 2317 elif self._match_text_seq("SHARE"): 2318 lock_type = "SHARE" 2319 elif self._match_text_seq("READ"): 2320 lock_type = "READ" 2321 elif self._match_text_seq("WRITE"): 2322 lock_type = "WRITE" 2323 elif self._match_text_seq("CHECKSUM"): 2324 lock_type = "CHECKSUM" 2325 else: 2326 lock_type = None 2327 2328 override = self._match_text_seq("OVERRIDE") 2329 2330 return self.expression( 2331 exp.LockingProperty, 2332 this=this, 2333 kind=kind, 2334 for_or_in=for_or_in, 2335 lock_type=lock_type, 2336 override=override, 2337 ) 2338 2339 def _parse_partition_by(self) -> t.List[exp.Expression]: 2340 if self._match(TokenType.PARTITION_BY): 2341 return self._parse_csv(self._parse_assignment) 2342 return [] 2343 2344 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2345 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2346 if self._match_text_seq("MINVALUE"): 2347 return exp.var("MINVALUE") 2348 if self._match_text_seq("MAXVALUE"): 2349 return exp.var("MAXVALUE") 2350 return self._parse_bitwise() 2351 2352 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2353 expression = None 2354 from_expressions = None 2355 to_expressions = None 2356 2357 if self._match(TokenType.IN): 2358 this = self._parse_wrapped_csv(self._parse_bitwise) 2359 elif self._match(TokenType.FROM): 2360 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2361 self._match_text_seq("TO") 2362 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2363 elif self._match_text_seq("WITH", "(", "MODULUS"): 2364 this = self._parse_number() 2365 self._match_text_seq(",", "REMAINDER") 2366 expression = self._parse_number() 2367 self._match_r_paren() 2368 else: 2369 self.raise_error("Failed to parse partition bound spec.") 2370 2371 return self.expression( 2372 exp.PartitionBoundSpec, 2373 this=this, 2374 expression=expression, 2375 from_expressions=from_expressions, 2376 to_expressions=to_expressions, 2377 ) 2378 2379 # https://www.postgresql.org/docs/current/sql-createtable.html 2380 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2381 if not self._match_text_seq("OF"): 2382 self._retreat(self._index - 1) 2383 return None 2384 2385 this = self._parse_table(schema=True) 2386 2387 if self._match(TokenType.DEFAULT): 2388 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2389 elif self._match_text_seq("FOR", "VALUES"): 2390 expression = self._parse_partition_bound_spec() 2391 else: 2392 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2393 2394 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2395 2396 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2397 self._match(TokenType.EQ) 2398 return self.expression( 2399 exp.PartitionedByProperty, 2400 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2401 ) 2402 2403 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2404 if self._match_text_seq("AND", "STATISTICS"): 2405 statistics = True 2406 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2407 statistics = False 2408 else: 2409 statistics = None 2410 2411 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2412 2413 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2414 if self._match_text_seq("SQL"): 2415 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2416 return None 2417 2418 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2419 if self._match_text_seq("SQL", "DATA"): 2420 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2421 return None 2422 2423 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2424 if self._match_text_seq("PRIMARY", "INDEX"): 2425 return exp.NoPrimaryIndexProperty() 2426 if self._match_text_seq("SQL"): 2427 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2428 return None 2429 2430 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2431 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2432 return exp.OnCommitProperty() 2433 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2434 return exp.OnCommitProperty(delete=True) 2435 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2436 2437 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2438 if self._match_text_seq("SQL", "DATA"): 2439 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2440 return None 2441 2442 def _parse_distkey(self) -> exp.DistKeyProperty: 2443 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2444 2445 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2446 table = self._parse_table(schema=True) 2447 2448 options = [] 2449 while self._match_texts(("INCLUDING", "EXCLUDING")): 2450 this = self._prev.text.upper() 2451 2452 id_var = self._parse_id_var() 2453 if not id_var: 2454 return None 2455 2456 options.append( 2457 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2458 ) 2459 2460 return self.expression(exp.LikeProperty, this=table, expressions=options) 2461 2462 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2463 return self.expression( 2464 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2465 ) 2466 2467 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2468 self._match(TokenType.EQ) 2469 return self.expression( 2470 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2471 ) 2472 2473 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2474 self._match_text_seq("WITH", "CONNECTION") 2475 return self.expression( 2476 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2477 ) 2478 2479 def _parse_returns(self) -> exp.ReturnsProperty: 2480 value: t.Optional[exp.Expression] 2481 null = None 2482 is_table = self._match(TokenType.TABLE) 2483 2484 if is_table: 2485 if self._match(TokenType.LT): 2486 value = self.expression( 2487 exp.Schema, 2488 this="TABLE", 2489 expressions=self._parse_csv(self._parse_struct_types), 2490 ) 2491 if not self._match(TokenType.GT): 2492 self.raise_error("Expecting >") 2493 else: 2494 value = self._parse_schema(exp.var("TABLE")) 2495 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2496 null = True 2497 value = None 2498 else: 2499 value = self._parse_types() 2500 2501 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2502 2503 def _parse_describe(self) -> exp.Describe: 2504 kind = self._match_set(self.CREATABLES) and self._prev.text 2505 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2506 if self._match(TokenType.DOT): 2507 style = None 2508 self._retreat(self._index - 2) 2509 this = self._parse_table(schema=True) 2510 properties = self._parse_properties() 2511 expressions = properties.expressions if properties else None 2512 partition = self._parse_partition() 2513 return self.expression( 2514 exp.Describe, 2515 this=this, 2516 style=style, 2517 kind=kind, 2518 expressions=expressions, 2519 partition=partition, 2520 ) 2521 2522 def _parse_insert(self) -> exp.Insert: 2523 comments = ensure_list(self._prev_comments) 2524 hint = self._parse_hint() 2525 overwrite = self._match(TokenType.OVERWRITE) 2526 ignore = self._match(TokenType.IGNORE) 2527 local = self._match_text_seq("LOCAL") 2528 alternative = None 2529 is_function = None 2530 2531 if self._match_text_seq("DIRECTORY"): 2532 this: t.Optional[exp.Expression] = self.expression( 2533 exp.Directory, 2534 this=self._parse_var_or_string(), 2535 local=local, 2536 row_format=self._parse_row_format(match_row=True), 2537 ) 2538 else: 2539 if self._match(TokenType.OR): 2540 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2541 2542 self._match(TokenType.INTO) 2543 comments += ensure_list(self._prev_comments) 2544 self._match(TokenType.TABLE) 2545 is_function = self._match(TokenType.FUNCTION) 2546 2547 this = ( 2548 self._parse_table(schema=True, parse_partition=True) 2549 if not is_function 2550 else self._parse_function() 2551 ) 2552 2553 returning = self._parse_returning() 2554 2555 return self.expression( 2556 exp.Insert, 2557 comments=comments, 2558 hint=hint, 2559 is_function=is_function, 2560 this=this, 2561 stored=self._match_text_seq("STORED") and self._parse_stored(), 2562 by_name=self._match_text_seq("BY", "NAME"), 2563 exists=self._parse_exists(), 2564 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2565 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2566 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2567 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2568 conflict=self._parse_on_conflict(), 2569 returning=returning or self._parse_returning(), 2570 overwrite=overwrite, 2571 alternative=alternative, 2572 ignore=ignore, 2573 source=self._match(TokenType.TABLE) and self._parse_table(), 2574 ) 2575 2576 def _parse_kill(self) -> exp.Kill: 2577 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2578 2579 return self.expression( 2580 exp.Kill, 2581 this=self._parse_primary(), 2582 kind=kind, 2583 ) 2584 2585 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2586 conflict = self._match_text_seq("ON", "CONFLICT") 2587 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2588 2589 if not conflict and not duplicate: 2590 return None 2591 2592 conflict_keys = None 2593 constraint = None 2594 2595 if conflict: 2596 if self._match_text_seq("ON", "CONSTRAINT"): 2597 constraint = self._parse_id_var() 2598 elif self._match(TokenType.L_PAREN): 2599 conflict_keys = self._parse_csv(self._parse_id_var) 2600 self._match_r_paren() 2601 2602 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2603 if self._prev.token_type == TokenType.UPDATE: 2604 self._match(TokenType.SET) 2605 expressions = self._parse_csv(self._parse_equality) 2606 else: 2607 expressions = None 2608 2609 return self.expression( 2610 exp.OnConflict, 2611 duplicate=duplicate, 2612 expressions=expressions, 2613 action=action, 2614 conflict_keys=conflict_keys, 2615 constraint=constraint, 2616 ) 2617 2618 def _parse_returning(self) -> t.Optional[exp.Returning]: 2619 if not self._match(TokenType.RETURNING): 2620 return None 2621 return self.expression( 2622 exp.Returning, 2623 expressions=self._parse_csv(self._parse_expression), 2624 into=self._match(TokenType.INTO) and self._parse_table_part(), 2625 ) 2626 2627 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2628 if not self._match(TokenType.FORMAT): 2629 return None 2630 return self._parse_row_format() 2631 2632 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2633 index = self._index 2634 with_ = with_ or self._match_text_seq("WITH") 2635 2636 if not self._match(TokenType.SERDE_PROPERTIES): 2637 self._retreat(index) 2638 return None 2639 return self.expression( 2640 exp.SerdeProperties, 2641 **{ # type: ignore 2642 "expressions": self._parse_wrapped_properties(), 2643 "with": with_, 2644 }, 2645 ) 2646 2647 def _parse_row_format( 2648 self, match_row: bool = False 2649 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2650 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2651 return None 2652 2653 if self._match_text_seq("SERDE"): 2654 this = self._parse_string() 2655 2656 serde_properties = self._parse_serde_properties() 2657 2658 return self.expression( 2659 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2660 ) 2661 2662 self._match_text_seq("DELIMITED") 2663 2664 kwargs = {} 2665 2666 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2667 kwargs["fields"] = self._parse_string() 2668 if self._match_text_seq("ESCAPED", "BY"): 2669 kwargs["escaped"] = self._parse_string() 2670 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2671 kwargs["collection_items"] = self._parse_string() 2672 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2673 kwargs["map_keys"] = self._parse_string() 2674 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2675 kwargs["lines"] = self._parse_string() 2676 if self._match_text_seq("NULL", "DEFINED", "AS"): 2677 kwargs["null"] = self._parse_string() 2678 2679 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2680 2681 def _parse_load(self) -> exp.LoadData | exp.Command: 2682 if self._match_text_seq("DATA"): 2683 local = self._match_text_seq("LOCAL") 2684 self._match_text_seq("INPATH") 2685 inpath = self._parse_string() 2686 overwrite = self._match(TokenType.OVERWRITE) 2687 self._match_pair(TokenType.INTO, TokenType.TABLE) 2688 2689 return self.expression( 2690 exp.LoadData, 2691 this=self._parse_table(schema=True), 2692 local=local, 2693 overwrite=overwrite, 2694 inpath=inpath, 2695 partition=self._parse_partition(), 2696 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2697 serde=self._match_text_seq("SERDE") and self._parse_string(), 2698 ) 2699 return self._parse_as_command(self._prev) 2700 2701 def _parse_delete(self) -> exp.Delete: 2702 # This handles MySQL's "Multiple-Table Syntax" 2703 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2704 tables = None 2705 comments = self._prev_comments 2706 if not self._match(TokenType.FROM, advance=False): 2707 tables = self._parse_csv(self._parse_table) or None 2708 2709 returning = self._parse_returning() 2710 2711 return self.expression( 2712 exp.Delete, 2713 comments=comments, 2714 tables=tables, 2715 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2716 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2717 where=self._parse_where(), 2718 returning=returning or self._parse_returning(), 2719 limit=self._parse_limit(), 2720 ) 2721 2722 def _parse_update(self) -> exp.Update: 2723 comments = self._prev_comments 2724 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2725 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2726 returning = self._parse_returning() 2727 return self.expression( 2728 exp.Update, 2729 comments=comments, 2730 **{ # type: ignore 2731 "this": this, 2732 "expressions": expressions, 2733 "from": self._parse_from(joins=True), 2734 "where": self._parse_where(), 2735 "returning": returning or self._parse_returning(), 2736 "order": self._parse_order(), 2737 "limit": self._parse_limit(), 2738 }, 2739 ) 2740 2741 def _parse_uncache(self) -> exp.Uncache: 2742 if not self._match(TokenType.TABLE): 2743 self.raise_error("Expecting TABLE after UNCACHE") 2744 2745 return self.expression( 2746 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2747 ) 2748 2749 def _parse_cache(self) -> exp.Cache: 2750 lazy = self._match_text_seq("LAZY") 2751 self._match(TokenType.TABLE) 2752 table = self._parse_table(schema=True) 2753 2754 options = [] 2755 if self._match_text_seq("OPTIONS"): 2756 self._match_l_paren() 2757 k = self._parse_string() 2758 self._match(TokenType.EQ) 2759 v = self._parse_string() 2760 options = [k, v] 2761 self._match_r_paren() 2762 2763 self._match(TokenType.ALIAS) 2764 return self.expression( 2765 exp.Cache, 2766 this=table, 2767 lazy=lazy, 2768 options=options, 2769 expression=self._parse_select(nested=True), 2770 ) 2771 2772 def _parse_partition(self) -> t.Optional[exp.Partition]: 2773 if not self._match(TokenType.PARTITION): 2774 return None 2775 2776 return self.expression( 2777 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2778 ) 2779 2780 def _parse_value(self) -> t.Optional[exp.Tuple]: 2781 if self._match(TokenType.L_PAREN): 2782 expressions = self._parse_csv(self._parse_expression) 2783 self._match_r_paren() 2784 return self.expression(exp.Tuple, expressions=expressions) 2785 2786 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2787 expression = self._parse_expression() 2788 if expression: 2789 return self.expression(exp.Tuple, expressions=[expression]) 2790 return None 2791 2792 def _parse_projections(self) -> t.List[exp.Expression]: 2793 return self._parse_expressions() 2794 2795 def _parse_select( 2796 self, 2797 nested: bool = False, 2798 table: bool = False, 2799 parse_subquery_alias: bool = True, 2800 parse_set_operation: bool = True, 2801 ) -> t.Optional[exp.Expression]: 2802 cte = self._parse_with() 2803 2804 if cte: 2805 this = self._parse_statement() 2806 2807 if not this: 2808 self.raise_error("Failed to parse any statement following CTE") 2809 return cte 2810 2811 if "with" in this.arg_types: 2812 this.set("with", cte) 2813 else: 2814 self.raise_error(f"{this.key} does not support CTE") 2815 this = cte 2816 2817 return this 2818 2819 # duckdb supports leading with FROM x 2820 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2821 2822 if self._match(TokenType.SELECT): 2823 comments = self._prev_comments 2824 2825 hint = self._parse_hint() 2826 2827 if self._next and not self._next.token_type == TokenType.DOT: 2828 all_ = self._match(TokenType.ALL) 2829 distinct = self._match_set(self.DISTINCT_TOKENS) 2830 else: 2831 all_, distinct = None, None 2832 2833 kind = ( 2834 self._match(TokenType.ALIAS) 2835 and self._match_texts(("STRUCT", "VALUE")) 2836 and self._prev.text.upper() 2837 ) 2838 2839 if distinct: 2840 distinct = self.expression( 2841 exp.Distinct, 2842 on=self._parse_value() if self._match(TokenType.ON) else None, 2843 ) 2844 2845 if all_ and distinct: 2846 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2847 2848 limit = self._parse_limit(top=True) 2849 projections = self._parse_projections() 2850 2851 this = self.expression( 2852 exp.Select, 2853 kind=kind, 2854 hint=hint, 2855 distinct=distinct, 2856 expressions=projections, 2857 limit=limit, 2858 ) 2859 this.comments = comments 2860 2861 into = self._parse_into() 2862 if into: 2863 this.set("into", into) 2864 2865 if not from_: 2866 from_ = self._parse_from() 2867 2868 if from_: 2869 this.set("from", from_) 2870 2871 this = self._parse_query_modifiers(this) 2872 elif (table or nested) and self._match(TokenType.L_PAREN): 2873 if self._match(TokenType.PIVOT): 2874 this = self._parse_simplified_pivot() 2875 elif self._match(TokenType.FROM): 2876 this = exp.select("*").from_( 2877 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2878 ) 2879 else: 2880 this = ( 2881 self._parse_table() 2882 if table 2883 else self._parse_select(nested=True, parse_set_operation=False) 2884 ) 2885 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2886 2887 self._match_r_paren() 2888 2889 # We return early here so that the UNION isn't attached to the subquery by the 2890 # following call to _parse_set_operations, but instead becomes the parent node 2891 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2892 elif self._match(TokenType.VALUES, advance=False): 2893 this = self._parse_derived_table_values() 2894 elif from_: 2895 this = exp.select("*").from_(from_.this, copy=False) 2896 elif self._match(TokenType.SUMMARIZE): 2897 table = self._match(TokenType.TABLE) 2898 this = self._parse_select() or self._parse_string() or self._parse_table() 2899 return self.expression(exp.Summarize, this=this, table=table) 2900 elif self._match(TokenType.DESCRIBE): 2901 this = self._parse_describe() 2902 elif self._match_text_seq("STREAM"): 2903 this = self.expression(exp.Stream, this=self._parse_function()) 2904 else: 2905 this = None 2906 2907 return self._parse_set_operations(this) if parse_set_operation else this 2908 2909 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2910 if not skip_with_token and not self._match(TokenType.WITH): 2911 return None 2912 2913 comments = self._prev_comments 2914 recursive = self._match(TokenType.RECURSIVE) 2915 2916 expressions = [] 2917 while True: 2918 expressions.append(self._parse_cte()) 2919 2920 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2921 break 2922 else: 2923 self._match(TokenType.WITH) 2924 2925 return self.expression( 2926 exp.With, comments=comments, expressions=expressions, recursive=recursive 2927 ) 2928 2929 def _parse_cte(self) -> exp.CTE: 2930 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2931 if not alias or not alias.this: 2932 self.raise_error("Expected CTE to have alias") 2933 2934 self._match(TokenType.ALIAS) 2935 comments = self._prev_comments 2936 2937 if self._match_text_seq("NOT", "MATERIALIZED"): 2938 materialized = False 2939 elif self._match_text_seq("MATERIALIZED"): 2940 materialized = True 2941 else: 2942 materialized = None 2943 2944 return self.expression( 2945 exp.CTE, 2946 this=self._parse_wrapped(self._parse_statement), 2947 alias=alias, 2948 materialized=materialized, 2949 comments=comments, 2950 ) 2951 2952 def _parse_table_alias( 2953 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2954 ) -> t.Optional[exp.TableAlias]: 2955 any_token = self._match(TokenType.ALIAS) 2956 alias = ( 2957 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2958 or self._parse_string_as_identifier() 2959 ) 2960 2961 index = self._index 2962 if self._match(TokenType.L_PAREN): 2963 columns = self._parse_csv(self._parse_function_parameter) 2964 self._match_r_paren() if columns else self._retreat(index) 2965 else: 2966 columns = None 2967 2968 if not alias and not columns: 2969 return None 2970 2971 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2972 2973 # We bubble up comments from the Identifier to the TableAlias 2974 if isinstance(alias, exp.Identifier): 2975 table_alias.add_comments(alias.pop_comments()) 2976 2977 return table_alias 2978 2979 def _parse_subquery( 2980 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2981 ) -> t.Optional[exp.Subquery]: 2982 if not this: 2983 return None 2984 2985 return self.expression( 2986 exp.Subquery, 2987 this=this, 2988 pivots=self._parse_pivots(), 2989 alias=self._parse_table_alias() if parse_alias else None, 2990 sample=self._parse_table_sample(), 2991 ) 2992 2993 def _implicit_unnests_to_explicit(self, this: E) -> E: 2994 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2995 2996 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2997 for i, join in enumerate(this.args.get("joins") or []): 2998 table = join.this 2999 normalized_table = table.copy() 3000 normalized_table.meta["maybe_column"] = True 3001 normalized_table = _norm(normalized_table, dialect=self.dialect) 3002 3003 if isinstance(table, exp.Table) and not join.args.get("on"): 3004 if normalized_table.parts[0].name in refs: 3005 table_as_column = table.to_column() 3006 unnest = exp.Unnest(expressions=[table_as_column]) 3007 3008 # Table.to_column creates a parent Alias node that we want to convert to 3009 # a TableAlias and attach to the Unnest, so it matches the parser's output 3010 if isinstance(table.args.get("alias"), exp.TableAlias): 3011 table_as_column.replace(table_as_column.this) 3012 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3013 3014 table.replace(unnest) 3015 3016 refs.add(normalized_table.alias_or_name) 3017 3018 return this 3019 3020 def _parse_query_modifiers( 3021 self, this: t.Optional[exp.Expression] 3022 ) -> t.Optional[exp.Expression]: 3023 if isinstance(this, (exp.Query, exp.Table)): 3024 for join in self._parse_joins(): 3025 this.append("joins", join) 3026 for lateral in iter(self._parse_lateral, None): 3027 this.append("laterals", lateral) 3028 3029 while True: 3030 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3031 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3032 key, expression = parser(self) 3033 3034 if expression: 3035 this.set(key, expression) 3036 if key == "limit": 3037 offset = expression.args.pop("offset", None) 3038 3039 if offset: 3040 offset = exp.Offset(expression=offset) 3041 this.set("offset", offset) 3042 3043 limit_by_expressions = expression.expressions 3044 expression.set("expressions", None) 3045 offset.set("expressions", limit_by_expressions) 3046 continue 3047 break 3048 3049 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3050 this = self._implicit_unnests_to_explicit(this) 3051 3052 return this 3053 3054 def _parse_hint(self) -> t.Optional[exp.Hint]: 3055 if self._match(TokenType.HINT): 3056 hints = [] 3057 for hint in iter( 3058 lambda: self._parse_csv( 3059 lambda: self._parse_function() or self._parse_var(upper=True) 3060 ), 3061 [], 3062 ): 3063 hints.extend(hint) 3064 3065 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3066 self.raise_error("Expected */ after HINT") 3067 3068 return self.expression(exp.Hint, expressions=hints) 3069 3070 return None 3071 3072 def _parse_into(self) -> t.Optional[exp.Into]: 3073 if not self._match(TokenType.INTO): 3074 return None 3075 3076 temp = self._match(TokenType.TEMPORARY) 3077 unlogged = self._match_text_seq("UNLOGGED") 3078 self._match(TokenType.TABLE) 3079 3080 return self.expression( 3081 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3082 ) 3083 3084 def _parse_from( 3085 self, joins: bool = False, skip_from_token: bool = False 3086 ) -> t.Optional[exp.From]: 3087 if not skip_from_token and not self._match(TokenType.FROM): 3088 return None 3089 3090 return self.expression( 3091 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3092 ) 3093 3094 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3095 return self.expression( 3096 exp.MatchRecognizeMeasure, 3097 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3098 this=self._parse_expression(), 3099 ) 3100 3101 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3102 if not self._match(TokenType.MATCH_RECOGNIZE): 3103 return None 3104 3105 self._match_l_paren() 3106 3107 partition = self._parse_partition_by() 3108 order = self._parse_order() 3109 3110 measures = ( 3111 self._parse_csv(self._parse_match_recognize_measure) 3112 if self._match_text_seq("MEASURES") 3113 else None 3114 ) 3115 3116 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3117 rows = exp.var("ONE ROW PER MATCH") 3118 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3119 text = "ALL ROWS PER MATCH" 3120 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3121 text += " SHOW EMPTY MATCHES" 3122 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3123 text += " OMIT EMPTY MATCHES" 3124 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3125 text += " WITH UNMATCHED ROWS" 3126 rows = exp.var(text) 3127 else: 3128 rows = None 3129 3130 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3131 text = "AFTER MATCH SKIP" 3132 if self._match_text_seq("PAST", "LAST", "ROW"): 3133 text += " PAST LAST ROW" 3134 elif self._match_text_seq("TO", "NEXT", "ROW"): 3135 text += " TO NEXT ROW" 3136 elif self._match_text_seq("TO", "FIRST"): 3137 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3138 elif self._match_text_seq("TO", "LAST"): 3139 text += f" TO LAST {self._advance_any().text}" # type: ignore 3140 after = exp.var(text) 3141 else: 3142 after = None 3143 3144 if self._match_text_seq("PATTERN"): 3145 self._match_l_paren() 3146 3147 if not self._curr: 3148 self.raise_error("Expecting )", self._curr) 3149 3150 paren = 1 3151 start = self._curr 3152 3153 while self._curr and paren > 0: 3154 if self._curr.token_type == TokenType.L_PAREN: 3155 paren += 1 3156 if self._curr.token_type == TokenType.R_PAREN: 3157 paren -= 1 3158 3159 end = self._prev 3160 self._advance() 3161 3162 if paren > 0: 3163 self.raise_error("Expecting )", self._curr) 3164 3165 pattern = exp.var(self._find_sql(start, end)) 3166 else: 3167 pattern = None 3168 3169 define = ( 3170 self._parse_csv(self._parse_name_as_expression) 3171 if self._match_text_seq("DEFINE") 3172 else None 3173 ) 3174 3175 self._match_r_paren() 3176 3177 return self.expression( 3178 exp.MatchRecognize, 3179 partition_by=partition, 3180 order=order, 3181 measures=measures, 3182 rows=rows, 3183 after=after, 3184 pattern=pattern, 3185 define=define, 3186 alias=self._parse_table_alias(), 3187 ) 3188 3189 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3190 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3191 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3192 cross_apply = False 3193 3194 if cross_apply is not None: 3195 this = self._parse_select(table=True) 3196 view = None 3197 outer = None 3198 elif self._match(TokenType.LATERAL): 3199 this = self._parse_select(table=True) 3200 view = self._match(TokenType.VIEW) 3201 outer = self._match(TokenType.OUTER) 3202 else: 3203 return None 3204 3205 if not this: 3206 this = ( 3207 self._parse_unnest() 3208 or self._parse_function() 3209 or self._parse_id_var(any_token=False) 3210 ) 3211 3212 while self._match(TokenType.DOT): 3213 this = exp.Dot( 3214 this=this, 3215 expression=self._parse_function() or self._parse_id_var(any_token=False), 3216 ) 3217 3218 if view: 3219 table = self._parse_id_var(any_token=False) 3220 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3221 table_alias: t.Optional[exp.TableAlias] = self.expression( 3222 exp.TableAlias, this=table, columns=columns 3223 ) 3224 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3225 # We move the alias from the lateral's child node to the lateral itself 3226 table_alias = this.args["alias"].pop() 3227 else: 3228 table_alias = self._parse_table_alias() 3229 3230 return self.expression( 3231 exp.Lateral, 3232 this=this, 3233 view=view, 3234 outer=outer, 3235 alias=table_alias, 3236 cross_apply=cross_apply, 3237 ) 3238 3239 def _parse_join_parts( 3240 self, 3241 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3242 return ( 3243 self._match_set(self.JOIN_METHODS) and self._prev, 3244 self._match_set(self.JOIN_SIDES) and self._prev, 3245 self._match_set(self.JOIN_KINDS) and self._prev, 3246 ) 3247 3248 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3249 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3250 this = self._parse_column() 3251 if isinstance(this, exp.Column): 3252 return this.this 3253 return this 3254 3255 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3256 3257 def _parse_join( 3258 self, skip_join_token: bool = False, parse_bracket: bool = False 3259 ) -> t.Optional[exp.Join]: 3260 if self._match(TokenType.COMMA): 3261 return self.expression(exp.Join, this=self._parse_table()) 3262 3263 index = self._index 3264 method, side, kind = self._parse_join_parts() 3265 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3266 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3267 3268 if not skip_join_token and not join: 3269 self._retreat(index) 3270 kind = None 3271 method = None 3272 side = None 3273 3274 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3275 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3276 3277 if not skip_join_token and not join and not outer_apply and not cross_apply: 3278 return None 3279 3280 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3281 3282 if method: 3283 kwargs["method"] = method.text 3284 if side: 3285 kwargs["side"] = side.text 3286 if kind: 3287 kwargs["kind"] = kind.text 3288 if hint: 3289 kwargs["hint"] = hint 3290 3291 if self._match(TokenType.MATCH_CONDITION): 3292 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3293 3294 if self._match(TokenType.ON): 3295 kwargs["on"] = self._parse_assignment() 3296 elif self._match(TokenType.USING): 3297 kwargs["using"] = self._parse_using_identifiers() 3298 elif ( 3299 not (outer_apply or cross_apply) 3300 and not isinstance(kwargs["this"], exp.Unnest) 3301 and not (kind and kind.token_type == TokenType.CROSS) 3302 ): 3303 index = self._index 3304 joins: t.Optional[list] = list(self._parse_joins()) 3305 3306 if joins and self._match(TokenType.ON): 3307 kwargs["on"] = self._parse_assignment() 3308 elif joins and self._match(TokenType.USING): 3309 kwargs["using"] = self._parse_using_identifiers() 3310 else: 3311 joins = None 3312 self._retreat(index) 3313 3314 kwargs["this"].set("joins", joins if joins else None) 3315 3316 comments = [c for token in (method, side, kind) if token for c in token.comments] 3317 return self.expression(exp.Join, comments=comments, **kwargs) 3318 3319 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3320 this = self._parse_assignment() 3321 3322 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3323 return this 3324 3325 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3326 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3327 3328 return this 3329 3330 def _parse_index_params(self) -> exp.IndexParameters: 3331 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3332 3333 if self._match(TokenType.L_PAREN, advance=False): 3334 columns = self._parse_wrapped_csv(self._parse_with_operator) 3335 else: 3336 columns = None 3337 3338 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3339 partition_by = self._parse_partition_by() 3340 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3341 tablespace = ( 3342 self._parse_var(any_token=True) 3343 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3344 else None 3345 ) 3346 where = self._parse_where() 3347 3348 on = self._parse_field() if self._match(TokenType.ON) else None 3349 3350 return self.expression( 3351 exp.IndexParameters, 3352 using=using, 3353 columns=columns, 3354 include=include, 3355 partition_by=partition_by, 3356 where=where, 3357 with_storage=with_storage, 3358 tablespace=tablespace, 3359 on=on, 3360 ) 3361 3362 def _parse_index( 3363 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3364 ) -> t.Optional[exp.Index]: 3365 if index or anonymous: 3366 unique = None 3367 primary = None 3368 amp = None 3369 3370 self._match(TokenType.ON) 3371 self._match(TokenType.TABLE) # hive 3372 table = self._parse_table_parts(schema=True) 3373 else: 3374 unique = self._match(TokenType.UNIQUE) 3375 primary = self._match_text_seq("PRIMARY") 3376 amp = self._match_text_seq("AMP") 3377 3378 if not self._match(TokenType.INDEX): 3379 return None 3380 3381 index = self._parse_id_var() 3382 table = None 3383 3384 params = self._parse_index_params() 3385 3386 return self.expression( 3387 exp.Index, 3388 this=index, 3389 table=table, 3390 unique=unique, 3391 primary=primary, 3392 amp=amp, 3393 params=params, 3394 ) 3395 3396 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3397 hints: t.List[exp.Expression] = [] 3398 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3399 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3400 hints.append( 3401 self.expression( 3402 exp.WithTableHint, 3403 expressions=self._parse_csv( 3404 lambda: self._parse_function() or self._parse_var(any_token=True) 3405 ), 3406 ) 3407 ) 3408 self._match_r_paren() 3409 else: 3410 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3411 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3412 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3413 3414 self._match_set((TokenType.INDEX, TokenType.KEY)) 3415 if self._match(TokenType.FOR): 3416 hint.set("target", self._advance_any() and self._prev.text.upper()) 3417 3418 hint.set("expressions", self._parse_wrapped_id_vars()) 3419 hints.append(hint) 3420 3421 return hints or None 3422 3423 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3424 return ( 3425 (not schema and self._parse_function(optional_parens=False)) 3426 or self._parse_id_var(any_token=False) 3427 or self._parse_string_as_identifier() 3428 or self._parse_placeholder() 3429 ) 3430 3431 def _parse_table_parts( 3432 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3433 ) -> exp.Table: 3434 catalog = None 3435 db = None 3436 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3437 3438 while self._match(TokenType.DOT): 3439 if catalog: 3440 # This allows nesting the table in arbitrarily many dot expressions if needed 3441 table = self.expression( 3442 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3443 ) 3444 else: 3445 catalog = db 3446 db = table 3447 # "" used for tsql FROM a..b case 3448 table = self._parse_table_part(schema=schema) or "" 3449 3450 if ( 3451 wildcard 3452 and self._is_connected() 3453 and (isinstance(table, exp.Identifier) or not table) 3454 and self._match(TokenType.STAR) 3455 ): 3456 if isinstance(table, exp.Identifier): 3457 table.args["this"] += "*" 3458 else: 3459 table = exp.Identifier(this="*") 3460 3461 # We bubble up comments from the Identifier to the Table 3462 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3463 3464 if is_db_reference: 3465 catalog = db 3466 db = table 3467 table = None 3468 3469 if not table and not is_db_reference: 3470 self.raise_error(f"Expected table name but got {self._curr}") 3471 if not db and is_db_reference: 3472 self.raise_error(f"Expected database name but got {self._curr}") 3473 3474 table = self.expression( 3475 exp.Table, 3476 comments=comments, 3477 this=table, 3478 db=db, 3479 catalog=catalog, 3480 ) 3481 3482 changes = self._parse_changes() 3483 if changes: 3484 table.set("changes", changes) 3485 3486 at_before = self._parse_historical_data() 3487 if at_before: 3488 table.set("when", at_before) 3489 3490 pivots = self._parse_pivots() 3491 if pivots: 3492 table.set("pivots", pivots) 3493 3494 return table 3495 3496 def _parse_table( 3497 self, 3498 schema: bool = False, 3499 joins: bool = False, 3500 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3501 parse_bracket: bool = False, 3502 is_db_reference: bool = False, 3503 parse_partition: bool = False, 3504 ) -> t.Optional[exp.Expression]: 3505 lateral = self._parse_lateral() 3506 if lateral: 3507 return lateral 3508 3509 unnest = self._parse_unnest() 3510 if unnest: 3511 return unnest 3512 3513 values = self._parse_derived_table_values() 3514 if values: 3515 return values 3516 3517 subquery = self._parse_select(table=True) 3518 if subquery: 3519 if not subquery.args.get("pivots"): 3520 subquery.set("pivots", self._parse_pivots()) 3521 return subquery 3522 3523 bracket = parse_bracket and self._parse_bracket(None) 3524 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3525 3526 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3527 self._parse_table 3528 ) 3529 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3530 3531 only = self._match(TokenType.ONLY) 3532 3533 this = t.cast( 3534 exp.Expression, 3535 bracket 3536 or rows_from 3537 or self._parse_bracket( 3538 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3539 ), 3540 ) 3541 3542 if only: 3543 this.set("only", only) 3544 3545 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3546 self._match_text_seq("*") 3547 3548 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3549 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3550 this.set("partition", self._parse_partition()) 3551 3552 if schema: 3553 return self._parse_schema(this=this) 3554 3555 version = self._parse_version() 3556 3557 if version: 3558 this.set("version", version) 3559 3560 if self.dialect.ALIAS_POST_TABLESAMPLE: 3561 this.set("sample", self._parse_table_sample()) 3562 3563 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3564 if alias: 3565 this.set("alias", alias) 3566 3567 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3568 return self.expression( 3569 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3570 ) 3571 3572 this.set("hints", self._parse_table_hints()) 3573 3574 if not this.args.get("pivots"): 3575 this.set("pivots", self._parse_pivots()) 3576 3577 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3578 this.set("sample", self._parse_table_sample()) 3579 3580 if joins: 3581 for join in self._parse_joins(): 3582 this.append("joins", join) 3583 3584 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3585 this.set("ordinality", True) 3586 this.set("alias", self._parse_table_alias()) 3587 3588 return this 3589 3590 def _parse_version(self) -> t.Optional[exp.Version]: 3591 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3592 this = "TIMESTAMP" 3593 elif self._match(TokenType.VERSION_SNAPSHOT): 3594 this = "VERSION" 3595 else: 3596 return None 3597 3598 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3599 kind = self._prev.text.upper() 3600 start = self._parse_bitwise() 3601 self._match_texts(("TO", "AND")) 3602 end = self._parse_bitwise() 3603 expression: t.Optional[exp.Expression] = self.expression( 3604 exp.Tuple, expressions=[start, end] 3605 ) 3606 elif self._match_text_seq("CONTAINED", "IN"): 3607 kind = "CONTAINED IN" 3608 expression = self.expression( 3609 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3610 ) 3611 elif self._match(TokenType.ALL): 3612 kind = "ALL" 3613 expression = None 3614 else: 3615 self._match_text_seq("AS", "OF") 3616 kind = "AS OF" 3617 expression = self._parse_type() 3618 3619 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3620 3621 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3622 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3623 index = self._index 3624 historical_data = None 3625 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3626 this = self._prev.text.upper() 3627 kind = ( 3628 self._match(TokenType.L_PAREN) 3629 and self._match_texts(self.HISTORICAL_DATA_KIND) 3630 and self._prev.text.upper() 3631 ) 3632 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3633 3634 if expression: 3635 self._match_r_paren() 3636 historical_data = self.expression( 3637 exp.HistoricalData, this=this, kind=kind, expression=expression 3638 ) 3639 else: 3640 self._retreat(index) 3641 3642 return historical_data 3643 3644 def _parse_changes(self) -> t.Optional[exp.Changes]: 3645 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3646 return None 3647 3648 information = self._parse_var(any_token=True) 3649 self._match_r_paren() 3650 3651 return self.expression( 3652 exp.Changes, 3653 information=information, 3654 at_before=self._parse_historical_data(), 3655 end=self._parse_historical_data(), 3656 ) 3657 3658 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3659 if not self._match(TokenType.UNNEST): 3660 return None 3661 3662 expressions = self._parse_wrapped_csv(self._parse_equality) 3663 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3664 3665 alias = self._parse_table_alias() if with_alias else None 3666 3667 if alias: 3668 if self.dialect.UNNEST_COLUMN_ONLY: 3669 if alias.args.get("columns"): 3670 self.raise_error("Unexpected extra column alias in unnest.") 3671 3672 alias.set("columns", [alias.this]) 3673 alias.set("this", None) 3674 3675 columns = alias.args.get("columns") or [] 3676 if offset and len(expressions) < len(columns): 3677 offset = columns.pop() 3678 3679 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3680 self._match(TokenType.ALIAS) 3681 offset = self._parse_id_var( 3682 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3683 ) or exp.to_identifier("offset") 3684 3685 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3686 3687 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3688 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3689 if not is_derived and not ( 3690 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3691 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3692 ): 3693 return None 3694 3695 expressions = self._parse_csv(self._parse_value) 3696 alias = self._parse_table_alias() 3697 3698 if is_derived: 3699 self._match_r_paren() 3700 3701 return self.expression( 3702 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3703 ) 3704 3705 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3706 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3707 as_modifier and self._match_text_seq("USING", "SAMPLE") 3708 ): 3709 return None 3710 3711 bucket_numerator = None 3712 bucket_denominator = None 3713 bucket_field = None 3714 percent = None 3715 size = None 3716 seed = None 3717 3718 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3719 matched_l_paren = self._match(TokenType.L_PAREN) 3720 3721 if self.TABLESAMPLE_CSV: 3722 num = None 3723 expressions = self._parse_csv(self._parse_primary) 3724 else: 3725 expressions = None 3726 num = ( 3727 self._parse_factor() 3728 if self._match(TokenType.NUMBER, advance=False) 3729 else self._parse_primary() or self._parse_placeholder() 3730 ) 3731 3732 if self._match_text_seq("BUCKET"): 3733 bucket_numerator = self._parse_number() 3734 self._match_text_seq("OUT", "OF") 3735 bucket_denominator = bucket_denominator = self._parse_number() 3736 self._match(TokenType.ON) 3737 bucket_field = self._parse_field() 3738 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3739 percent = num 3740 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3741 size = num 3742 else: 3743 percent = num 3744 3745 if matched_l_paren: 3746 self._match_r_paren() 3747 3748 if self._match(TokenType.L_PAREN): 3749 method = self._parse_var(upper=True) 3750 seed = self._match(TokenType.COMMA) and self._parse_number() 3751 self._match_r_paren() 3752 elif self._match_texts(("SEED", "REPEATABLE")): 3753 seed = self._parse_wrapped(self._parse_number) 3754 3755 if not method and self.DEFAULT_SAMPLING_METHOD: 3756 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3757 3758 return self.expression( 3759 exp.TableSample, 3760 expressions=expressions, 3761 method=method, 3762 bucket_numerator=bucket_numerator, 3763 bucket_denominator=bucket_denominator, 3764 bucket_field=bucket_field, 3765 percent=percent, 3766 size=size, 3767 seed=seed, 3768 ) 3769 3770 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3771 return list(iter(self._parse_pivot, None)) or None 3772 3773 def _parse_joins(self) -> t.Iterator[exp.Join]: 3774 return iter(self._parse_join, None) 3775 3776 # https://duckdb.org/docs/sql/statements/pivot 3777 def _parse_simplified_pivot(self) -> exp.Pivot: 3778 def _parse_on() -> t.Optional[exp.Expression]: 3779 this = self._parse_bitwise() 3780 return self._parse_in(this) if self._match(TokenType.IN) else this 3781 3782 this = self._parse_table() 3783 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3784 using = self._match(TokenType.USING) and self._parse_csv( 3785 lambda: self._parse_alias(self._parse_function()) 3786 ) 3787 group = self._parse_group() 3788 return self.expression( 3789 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3790 ) 3791 3792 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3793 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3794 this = self._parse_select_or_expression() 3795 3796 self._match(TokenType.ALIAS) 3797 alias = self._parse_bitwise() 3798 if alias: 3799 if isinstance(alias, exp.Column) and not alias.db: 3800 alias = alias.this 3801 return self.expression(exp.PivotAlias, this=this, alias=alias) 3802 3803 return this 3804 3805 value = self._parse_column() 3806 3807 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3808 self.raise_error("Expecting IN (") 3809 3810 if self._match(TokenType.ANY): 3811 expr: exp.PivotAny | exp.In = self.expression(exp.PivotAny, this=self._parse_order()) 3812 else: 3813 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3814 expr = self.expression(exp.In, this=value, expressions=aliased_expressions) 3815 3816 self._match_r_paren() 3817 return expr 3818 3819 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3820 index = self._index 3821 include_nulls = None 3822 3823 if self._match(TokenType.PIVOT): 3824 unpivot = False 3825 elif self._match(TokenType.UNPIVOT): 3826 unpivot = True 3827 3828 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3829 if self._match_text_seq("INCLUDE", "NULLS"): 3830 include_nulls = True 3831 elif self._match_text_seq("EXCLUDE", "NULLS"): 3832 include_nulls = False 3833 else: 3834 return None 3835 3836 expressions = [] 3837 3838 if not self._match(TokenType.L_PAREN): 3839 self._retreat(index) 3840 return None 3841 3842 if unpivot: 3843 expressions = self._parse_csv(self._parse_column) 3844 else: 3845 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3846 3847 if not expressions: 3848 self.raise_error("Failed to parse PIVOT's aggregation list") 3849 3850 if not self._match(TokenType.FOR): 3851 self.raise_error("Expecting FOR") 3852 3853 field = self._parse_pivot_in() 3854 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 3855 self._parse_bitwise 3856 ) 3857 3858 self._match_r_paren() 3859 3860 pivot = self.expression( 3861 exp.Pivot, 3862 expressions=expressions, 3863 field=field, 3864 unpivot=unpivot, 3865 include_nulls=include_nulls, 3866 default_on_null=default_on_null, 3867 ) 3868 3869 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3870 pivot.set("alias", self._parse_table_alias()) 3871 3872 if not unpivot: 3873 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3874 3875 columns: t.List[exp.Expression] = [] 3876 for fld in pivot.args["field"].expressions: 3877 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3878 for name in names: 3879 if self.PREFIXED_PIVOT_COLUMNS: 3880 name = f"{name}_{field_name}" if name else field_name 3881 else: 3882 name = f"{field_name}_{name}" if name else field_name 3883 3884 columns.append(exp.to_identifier(name)) 3885 3886 pivot.set("columns", columns) 3887 3888 return pivot 3889 3890 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3891 return [agg.alias for agg in aggregations] 3892 3893 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3894 if not skip_where_token and not self._match(TokenType.PREWHERE): 3895 return None 3896 3897 return self.expression( 3898 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3899 ) 3900 3901 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3902 if not skip_where_token and not self._match(TokenType.WHERE): 3903 return None 3904 3905 return self.expression( 3906 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3907 ) 3908 3909 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3910 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3911 return None 3912 3913 elements: t.Dict[str, t.Any] = defaultdict(list) 3914 3915 if self._match(TokenType.ALL): 3916 elements["all"] = True 3917 elif self._match(TokenType.DISTINCT): 3918 elements["all"] = False 3919 3920 while True: 3921 index = self._index 3922 3923 elements["expressions"].extend( 3924 self._parse_csv( 3925 lambda: None 3926 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 3927 else self._parse_assignment() 3928 ) 3929 ) 3930 3931 before_with_index = self._index 3932 with_prefix = self._match(TokenType.WITH) 3933 3934 if self._match(TokenType.ROLLUP): 3935 elements["rollup"].append( 3936 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 3937 ) 3938 elif self._match(TokenType.CUBE): 3939 elements["cube"].append( 3940 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 3941 ) 3942 elif self._match(TokenType.GROUPING_SETS): 3943 elements["grouping_sets"].append( 3944 self.expression( 3945 exp.GroupingSets, 3946 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 3947 ) 3948 ) 3949 elif self._match_text_seq("TOTALS"): 3950 elements["totals"] = True # type: ignore 3951 3952 if before_with_index <= self._index <= before_with_index + 1: 3953 self._retreat(before_with_index) 3954 break 3955 3956 if index == self._index: 3957 break 3958 3959 return self.expression(exp.Group, **elements) # type: ignore 3960 3961 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 3962 return self.expression( 3963 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 3964 ) 3965 3966 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3967 if self._match(TokenType.L_PAREN): 3968 grouping_set = self._parse_csv(self._parse_column) 3969 self._match_r_paren() 3970 return self.expression(exp.Tuple, expressions=grouping_set) 3971 3972 return self._parse_column() 3973 3974 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3975 if not skip_having_token and not self._match(TokenType.HAVING): 3976 return None 3977 return self.expression(exp.Having, this=self._parse_assignment()) 3978 3979 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3980 if not self._match(TokenType.QUALIFY): 3981 return None 3982 return self.expression(exp.Qualify, this=self._parse_assignment()) 3983 3984 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3985 if skip_start_token: 3986 start = None 3987 elif self._match(TokenType.START_WITH): 3988 start = self._parse_assignment() 3989 else: 3990 return None 3991 3992 self._match(TokenType.CONNECT_BY) 3993 nocycle = self._match_text_seq("NOCYCLE") 3994 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3995 exp.Prior, this=self._parse_bitwise() 3996 ) 3997 connect = self._parse_assignment() 3998 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3999 4000 if not start and self._match(TokenType.START_WITH): 4001 start = self._parse_assignment() 4002 4003 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4004 4005 def _parse_name_as_expression(self) -> exp.Alias: 4006 return self.expression( 4007 exp.Alias, 4008 alias=self._parse_id_var(any_token=True), 4009 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 4010 ) 4011 4012 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4013 if self._match_text_seq("INTERPOLATE"): 4014 return self._parse_wrapped_csv(self._parse_name_as_expression) 4015 return None 4016 4017 def _parse_order( 4018 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4019 ) -> t.Optional[exp.Expression]: 4020 siblings = None 4021 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4022 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4023 return this 4024 4025 siblings = True 4026 4027 return self.expression( 4028 exp.Order, 4029 this=this, 4030 expressions=self._parse_csv(self._parse_ordered), 4031 siblings=siblings, 4032 ) 4033 4034 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4035 if not self._match(token): 4036 return None 4037 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4038 4039 def _parse_ordered( 4040 self, parse_method: t.Optional[t.Callable] = None 4041 ) -> t.Optional[exp.Ordered]: 4042 this = parse_method() if parse_method else self._parse_assignment() 4043 if not this: 4044 return None 4045 4046 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4047 this = exp.var("ALL") 4048 4049 asc = self._match(TokenType.ASC) 4050 desc = self._match(TokenType.DESC) or (asc and False) 4051 4052 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4053 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4054 4055 nulls_first = is_nulls_first or False 4056 explicitly_null_ordered = is_nulls_first or is_nulls_last 4057 4058 if ( 4059 not explicitly_null_ordered 4060 and ( 4061 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4062 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4063 ) 4064 and self.dialect.NULL_ORDERING != "nulls_are_last" 4065 ): 4066 nulls_first = True 4067 4068 if self._match_text_seq("WITH", "FILL"): 4069 with_fill = self.expression( 4070 exp.WithFill, 4071 **{ # type: ignore 4072 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4073 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4074 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4075 "interpolate": self._parse_interpolate(), 4076 }, 4077 ) 4078 else: 4079 with_fill = None 4080 4081 return self.expression( 4082 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4083 ) 4084 4085 def _parse_limit( 4086 self, 4087 this: t.Optional[exp.Expression] = None, 4088 top: bool = False, 4089 skip_limit_token: bool = False, 4090 ) -> t.Optional[exp.Expression]: 4091 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4092 comments = self._prev_comments 4093 if top: 4094 limit_paren = self._match(TokenType.L_PAREN) 4095 expression = self._parse_term() if limit_paren else self._parse_number() 4096 4097 if limit_paren: 4098 self._match_r_paren() 4099 else: 4100 expression = self._parse_term() 4101 4102 if self._match(TokenType.COMMA): 4103 offset = expression 4104 expression = self._parse_term() 4105 else: 4106 offset = None 4107 4108 limit_exp = self.expression( 4109 exp.Limit, 4110 this=this, 4111 expression=expression, 4112 offset=offset, 4113 comments=comments, 4114 expressions=self._parse_limit_by(), 4115 ) 4116 4117 return limit_exp 4118 4119 if self._match(TokenType.FETCH): 4120 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4121 direction = self._prev.text.upper() if direction else "FIRST" 4122 4123 count = self._parse_field(tokens=self.FETCH_TOKENS) 4124 percent = self._match(TokenType.PERCENT) 4125 4126 self._match_set((TokenType.ROW, TokenType.ROWS)) 4127 4128 only = self._match_text_seq("ONLY") 4129 with_ties = self._match_text_seq("WITH", "TIES") 4130 4131 if only and with_ties: 4132 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4133 4134 return self.expression( 4135 exp.Fetch, 4136 direction=direction, 4137 count=count, 4138 percent=percent, 4139 with_ties=with_ties, 4140 ) 4141 4142 return this 4143 4144 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4145 if not self._match(TokenType.OFFSET): 4146 return this 4147 4148 count = self._parse_term() 4149 self._match_set((TokenType.ROW, TokenType.ROWS)) 4150 4151 return self.expression( 4152 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4153 ) 4154 4155 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4156 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4157 4158 def _parse_locks(self) -> t.List[exp.Lock]: 4159 locks = [] 4160 while True: 4161 if self._match_text_seq("FOR", "UPDATE"): 4162 update = True 4163 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4164 "LOCK", "IN", "SHARE", "MODE" 4165 ): 4166 update = False 4167 else: 4168 break 4169 4170 expressions = None 4171 if self._match_text_seq("OF"): 4172 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4173 4174 wait: t.Optional[bool | exp.Expression] = None 4175 if self._match_text_seq("NOWAIT"): 4176 wait = True 4177 elif self._match_text_seq("WAIT"): 4178 wait = self._parse_primary() 4179 elif self._match_text_seq("SKIP", "LOCKED"): 4180 wait = False 4181 4182 locks.append( 4183 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4184 ) 4185 4186 return locks 4187 4188 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4189 while this and self._match_set(self.SET_OPERATIONS): 4190 token_type = self._prev.token_type 4191 4192 if token_type == TokenType.UNION: 4193 operation: t.Type[exp.SetOperation] = exp.Union 4194 elif token_type == TokenType.EXCEPT: 4195 operation = exp.Except 4196 else: 4197 operation = exp.Intersect 4198 4199 comments = self._prev.comments 4200 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 4201 by_name = self._match_text_seq("BY", "NAME") 4202 expression = self._parse_select(nested=True, parse_set_operation=False) 4203 4204 this = self.expression( 4205 operation, 4206 comments=comments, 4207 this=this, 4208 distinct=distinct, 4209 by_name=by_name, 4210 expression=expression, 4211 ) 4212 4213 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4214 expression = this.expression 4215 4216 if expression: 4217 for arg in self.SET_OP_MODIFIERS: 4218 expr = expression.args.get(arg) 4219 if expr: 4220 this.set(arg, expr.pop()) 4221 4222 return this 4223 4224 def _parse_expression(self) -> t.Optional[exp.Expression]: 4225 return self._parse_alias(self._parse_assignment()) 4226 4227 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4228 this = self._parse_disjunction() 4229 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4230 # This allows us to parse <non-identifier token> := <expr> 4231 this = exp.column( 4232 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4233 ) 4234 4235 while self._match_set(self.ASSIGNMENT): 4236 this = self.expression( 4237 self.ASSIGNMENT[self._prev.token_type], 4238 this=this, 4239 comments=self._prev_comments, 4240 expression=self._parse_assignment(), 4241 ) 4242 4243 return this 4244 4245 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4246 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4247 4248 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4249 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4250 4251 def _parse_equality(self) -> t.Optional[exp.Expression]: 4252 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4253 4254 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4255 return self._parse_tokens(self._parse_range, self.COMPARISON) 4256 4257 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4258 this = this or self._parse_bitwise() 4259 negate = self._match(TokenType.NOT) 4260 4261 if self._match_set(self.RANGE_PARSERS): 4262 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4263 if not expression: 4264 return this 4265 4266 this = expression 4267 elif self._match(TokenType.ISNULL): 4268 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4269 4270 # Postgres supports ISNULL and NOTNULL for conditions. 4271 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4272 if self._match(TokenType.NOTNULL): 4273 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4274 this = self.expression(exp.Not, this=this) 4275 4276 if negate: 4277 this = self._negate_range(this) 4278 4279 if self._match(TokenType.IS): 4280 this = self._parse_is(this) 4281 4282 return this 4283 4284 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4285 if not this: 4286 return this 4287 4288 return self.expression(exp.Not, this=this) 4289 4290 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4291 index = self._index - 1 4292 negate = self._match(TokenType.NOT) 4293 4294 if self._match_text_seq("DISTINCT", "FROM"): 4295 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4296 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4297 4298 if self._match(TokenType.JSON): 4299 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4300 4301 if self._match_text_seq("WITH"): 4302 _with = True 4303 elif self._match_text_seq("WITHOUT"): 4304 _with = False 4305 else: 4306 _with = None 4307 4308 unique = self._match(TokenType.UNIQUE) 4309 self._match_text_seq("KEYS") 4310 expression: t.Optional[exp.Expression] = self.expression( 4311 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4312 ) 4313 else: 4314 expression = self._parse_primary() or self._parse_null() 4315 if not expression: 4316 self._retreat(index) 4317 return None 4318 4319 this = self.expression(exp.Is, this=this, expression=expression) 4320 return self.expression(exp.Not, this=this) if negate else this 4321 4322 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4323 unnest = self._parse_unnest(with_alias=False) 4324 if unnest: 4325 this = self.expression(exp.In, this=this, unnest=unnest) 4326 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4327 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4328 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4329 4330 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4331 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4332 else: 4333 this = self.expression(exp.In, this=this, expressions=expressions) 4334 4335 if matched_l_paren: 4336 self._match_r_paren(this) 4337 elif not self._match(TokenType.R_BRACKET, expression=this): 4338 self.raise_error("Expecting ]") 4339 else: 4340 this = self.expression(exp.In, this=this, field=self._parse_field()) 4341 4342 return this 4343 4344 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4345 low = self._parse_bitwise() 4346 self._match(TokenType.AND) 4347 high = self._parse_bitwise() 4348 return self.expression(exp.Between, this=this, low=low, high=high) 4349 4350 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4351 if not self._match(TokenType.ESCAPE): 4352 return this 4353 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4354 4355 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4356 index = self._index 4357 4358 if not self._match(TokenType.INTERVAL) and match_interval: 4359 return None 4360 4361 if self._match(TokenType.STRING, advance=False): 4362 this = self._parse_primary() 4363 else: 4364 this = self._parse_term() 4365 4366 if not this or ( 4367 isinstance(this, exp.Column) 4368 and not this.table 4369 and not this.this.quoted 4370 and this.name.upper() == "IS" 4371 ): 4372 self._retreat(index) 4373 return None 4374 4375 unit = self._parse_function() or ( 4376 not self._match(TokenType.ALIAS, advance=False) 4377 and self._parse_var(any_token=True, upper=True) 4378 ) 4379 4380 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4381 # each INTERVAL expression into this canonical form so it's easy to transpile 4382 if this and this.is_number: 4383 this = exp.Literal.string(this.to_py()) 4384 elif this and this.is_string: 4385 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4386 if len(parts) == 1: 4387 if unit: 4388 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4389 self._retreat(self._index - 1) 4390 4391 this = exp.Literal.string(parts[0][0]) 4392 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4393 4394 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4395 unit = self.expression( 4396 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4397 ) 4398 4399 interval = self.expression(exp.Interval, this=this, unit=unit) 4400 4401 index = self._index 4402 self._match(TokenType.PLUS) 4403 4404 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4405 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4406 return self.expression( 4407 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4408 ) 4409 4410 self._retreat(index) 4411 return interval 4412 4413 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4414 this = self._parse_term() 4415 4416 while True: 4417 if self._match_set(self.BITWISE): 4418 this = self.expression( 4419 self.BITWISE[self._prev.token_type], 4420 this=this, 4421 expression=self._parse_term(), 4422 ) 4423 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4424 this = self.expression( 4425 exp.DPipe, 4426 this=this, 4427 expression=self._parse_term(), 4428 safe=not self.dialect.STRICT_STRING_CONCAT, 4429 ) 4430 elif self._match(TokenType.DQMARK): 4431 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4432 elif self._match_pair(TokenType.LT, TokenType.LT): 4433 this = self.expression( 4434 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4435 ) 4436 elif self._match_pair(TokenType.GT, TokenType.GT): 4437 this = self.expression( 4438 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4439 ) 4440 else: 4441 break 4442 4443 return this 4444 4445 def _parse_term(self) -> t.Optional[exp.Expression]: 4446 this = self._parse_factor() 4447 4448 while self._match_set(self.TERM): 4449 klass = self.TERM[self._prev.token_type] 4450 comments = self._prev_comments 4451 expression = self._parse_factor() 4452 4453 this = self.expression(klass, this=this, comments=comments, expression=expression) 4454 4455 if isinstance(this, exp.Collate): 4456 expr = this.expression 4457 4458 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4459 # fallback to Identifier / Var 4460 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4461 ident = expr.this 4462 if isinstance(ident, exp.Identifier): 4463 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4464 4465 return this 4466 4467 def _parse_factor(self) -> t.Optional[exp.Expression]: 4468 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4469 this = parse_method() 4470 4471 while self._match_set(self.FACTOR): 4472 klass = self.FACTOR[self._prev.token_type] 4473 comments = self._prev_comments 4474 expression = parse_method() 4475 4476 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4477 self._retreat(self._index - 1) 4478 return this 4479 4480 this = self.expression(klass, this=this, comments=comments, expression=expression) 4481 4482 if isinstance(this, exp.Div): 4483 this.args["typed"] = self.dialect.TYPED_DIVISION 4484 this.args["safe"] = self.dialect.SAFE_DIVISION 4485 4486 return this 4487 4488 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4489 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4490 4491 def _parse_unary(self) -> t.Optional[exp.Expression]: 4492 if self._match_set(self.UNARY_PARSERS): 4493 return self.UNARY_PARSERS[self._prev.token_type](self) 4494 return self._parse_at_time_zone(self._parse_type()) 4495 4496 def _parse_type( 4497 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4498 ) -> t.Optional[exp.Expression]: 4499 interval = parse_interval and self._parse_interval() 4500 if interval: 4501 return interval 4502 4503 index = self._index 4504 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4505 4506 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4507 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4508 if isinstance(data_type, exp.Cast): 4509 # This constructor can contain ops directly after it, for instance struct unnesting: 4510 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4511 return self._parse_column_ops(data_type) 4512 4513 if data_type: 4514 index2 = self._index 4515 this = self._parse_primary() 4516 4517 if isinstance(this, exp.Literal): 4518 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4519 if parser: 4520 return parser(self, this, data_type) 4521 4522 return self.expression(exp.Cast, this=this, to=data_type) 4523 4524 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4525 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4526 # 4527 # If the index difference here is greater than 1, that means the parser itself must have 4528 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4529 # 4530 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4531 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4532 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4533 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4534 # 4535 # In these cases, we don't really want to return the converted type, but instead retreat 4536 # and try to parse a Column or Identifier in the section below. 4537 if data_type.expressions and index2 - index > 1: 4538 self._retreat(index2) 4539 return self._parse_column_ops(data_type) 4540 4541 self._retreat(index) 4542 4543 if fallback_to_identifier: 4544 return self._parse_id_var() 4545 4546 this = self._parse_column() 4547 return this and self._parse_column_ops(this) 4548 4549 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4550 this = self._parse_type() 4551 if not this: 4552 return None 4553 4554 if isinstance(this, exp.Column) and not this.table: 4555 this = exp.var(this.name.upper()) 4556 4557 return self.expression( 4558 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4559 ) 4560 4561 def _parse_types( 4562 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4563 ) -> t.Optional[exp.Expression]: 4564 index = self._index 4565 4566 this: t.Optional[exp.Expression] = None 4567 prefix = self._match_text_seq("SYSUDTLIB", ".") 4568 4569 if not self._match_set(self.TYPE_TOKENS): 4570 identifier = allow_identifiers and self._parse_id_var( 4571 any_token=False, tokens=(TokenType.VAR,) 4572 ) 4573 if isinstance(identifier, exp.Identifier): 4574 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4575 4576 if len(tokens) != 1: 4577 self.raise_error("Unexpected identifier", self._prev) 4578 4579 if tokens[0].token_type in self.TYPE_TOKENS: 4580 self._prev = tokens[0] 4581 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4582 type_name = identifier.name 4583 4584 while self._match(TokenType.DOT): 4585 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4586 4587 this = exp.DataType.build(type_name, udt=True) 4588 else: 4589 self._retreat(self._index - 1) 4590 return None 4591 else: 4592 return None 4593 4594 type_token = self._prev.token_type 4595 4596 if type_token == TokenType.PSEUDO_TYPE: 4597 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4598 4599 if type_token == TokenType.OBJECT_IDENTIFIER: 4600 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4601 4602 # https://materialize.com/docs/sql/types/map/ 4603 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4604 key_type = self._parse_types( 4605 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4606 ) 4607 if not self._match(TokenType.FARROW): 4608 self._retreat(index) 4609 return None 4610 4611 value_type = self._parse_types( 4612 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4613 ) 4614 if not self._match(TokenType.R_BRACKET): 4615 self._retreat(index) 4616 return None 4617 4618 return exp.DataType( 4619 this=exp.DataType.Type.MAP, 4620 expressions=[key_type, value_type], 4621 nested=True, 4622 prefix=prefix, 4623 ) 4624 4625 nested = type_token in self.NESTED_TYPE_TOKENS 4626 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4627 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4628 expressions = None 4629 maybe_func = False 4630 4631 if self._match(TokenType.L_PAREN): 4632 if is_struct: 4633 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4634 elif nested: 4635 expressions = self._parse_csv( 4636 lambda: self._parse_types( 4637 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4638 ) 4639 ) 4640 elif type_token in self.ENUM_TYPE_TOKENS: 4641 expressions = self._parse_csv(self._parse_equality) 4642 elif is_aggregate: 4643 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4644 any_token=False, tokens=(TokenType.VAR,) 4645 ) 4646 if not func_or_ident or not self._match(TokenType.COMMA): 4647 return None 4648 expressions = self._parse_csv( 4649 lambda: self._parse_types( 4650 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4651 ) 4652 ) 4653 expressions.insert(0, func_or_ident) 4654 else: 4655 expressions = self._parse_csv(self._parse_type_size) 4656 4657 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4658 if type_token == TokenType.VECTOR and len(expressions) == 2: 4659 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4660 4661 if not expressions or not self._match(TokenType.R_PAREN): 4662 self._retreat(index) 4663 return None 4664 4665 maybe_func = True 4666 4667 values: t.Optional[t.List[exp.Expression]] = None 4668 4669 if nested and self._match(TokenType.LT): 4670 if is_struct: 4671 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4672 else: 4673 expressions = self._parse_csv( 4674 lambda: self._parse_types( 4675 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4676 ) 4677 ) 4678 4679 if not self._match(TokenType.GT): 4680 self.raise_error("Expecting >") 4681 4682 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4683 values = self._parse_csv(self._parse_assignment) 4684 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4685 4686 if type_token in self.TIMESTAMPS: 4687 if self._match_text_seq("WITH", "TIME", "ZONE"): 4688 maybe_func = False 4689 tz_type = ( 4690 exp.DataType.Type.TIMETZ 4691 if type_token in self.TIMES 4692 else exp.DataType.Type.TIMESTAMPTZ 4693 ) 4694 this = exp.DataType(this=tz_type, expressions=expressions) 4695 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4696 maybe_func = False 4697 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4698 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4699 maybe_func = False 4700 elif type_token == TokenType.INTERVAL: 4701 unit = self._parse_var(upper=True) 4702 if unit: 4703 if self._match_text_seq("TO"): 4704 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4705 4706 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4707 else: 4708 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4709 4710 if maybe_func and check_func: 4711 index2 = self._index 4712 peek = self._parse_string() 4713 4714 if not peek: 4715 self._retreat(index) 4716 return None 4717 4718 self._retreat(index2) 4719 4720 if not this: 4721 if self._match_text_seq("UNSIGNED"): 4722 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4723 if not unsigned_type_token: 4724 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4725 4726 type_token = unsigned_type_token or type_token 4727 4728 this = exp.DataType( 4729 this=exp.DataType.Type[type_token.value], 4730 expressions=expressions, 4731 nested=nested, 4732 prefix=prefix, 4733 ) 4734 4735 # Empty arrays/structs are allowed 4736 if values is not None: 4737 cls = exp.Struct if is_struct else exp.Array 4738 this = exp.cast(cls(expressions=values), this, copy=False) 4739 4740 elif expressions: 4741 this.set("expressions", expressions) 4742 4743 # https://materialize.com/docs/sql/types/list/#type-name 4744 while self._match(TokenType.LIST): 4745 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4746 4747 index = self._index 4748 4749 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4750 matched_array = self._match(TokenType.ARRAY) 4751 4752 while self._curr: 4753 datatype_token = self._prev.token_type 4754 matched_l_bracket = self._match(TokenType.L_BRACKET) 4755 if not matched_l_bracket and not matched_array: 4756 break 4757 4758 matched_array = False 4759 values = self._parse_csv(self._parse_assignment) or None 4760 if ( 4761 values 4762 and not schema 4763 and ( 4764 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4765 ) 4766 ): 4767 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4768 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4769 self._retreat(index) 4770 break 4771 4772 this = exp.DataType( 4773 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4774 ) 4775 self._match(TokenType.R_BRACKET) 4776 4777 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4778 converter = self.TYPE_CONVERTERS.get(this.this) 4779 if converter: 4780 this = converter(t.cast(exp.DataType, this)) 4781 4782 return this 4783 4784 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4785 index = self._index 4786 4787 if ( 4788 self._curr 4789 and self._next 4790 and self._curr.token_type in self.TYPE_TOKENS 4791 and self._next.token_type in self.TYPE_TOKENS 4792 ): 4793 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4794 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4795 this = self._parse_id_var() 4796 else: 4797 this = ( 4798 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4799 or self._parse_id_var() 4800 ) 4801 4802 self._match(TokenType.COLON) 4803 4804 if ( 4805 type_required 4806 and not isinstance(this, exp.DataType) 4807 and not self._match_set(self.TYPE_TOKENS, advance=False) 4808 ): 4809 self._retreat(index) 4810 return self._parse_types() 4811 4812 return self._parse_column_def(this) 4813 4814 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4815 if not self._match_text_seq("AT", "TIME", "ZONE"): 4816 return this 4817 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4818 4819 def _parse_column(self) -> t.Optional[exp.Expression]: 4820 this = self._parse_column_reference() 4821 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4822 4823 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4824 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4825 4826 return column 4827 4828 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4829 this = self._parse_field() 4830 if ( 4831 not this 4832 and self._match(TokenType.VALUES, advance=False) 4833 and self.VALUES_FOLLOWED_BY_PAREN 4834 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4835 ): 4836 this = self._parse_id_var() 4837 4838 if isinstance(this, exp.Identifier): 4839 # We bubble up comments from the Identifier to the Column 4840 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4841 4842 return this 4843 4844 def _parse_colon_as_variant_extract( 4845 self, this: t.Optional[exp.Expression] 4846 ) -> t.Optional[exp.Expression]: 4847 casts = [] 4848 json_path = [] 4849 4850 while self._match(TokenType.COLON): 4851 start_index = self._index 4852 4853 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4854 path = self._parse_column_ops( 4855 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4856 ) 4857 4858 # The cast :: operator has a lower precedence than the extraction operator :, so 4859 # we rearrange the AST appropriately to avoid casting the JSON path 4860 while isinstance(path, exp.Cast): 4861 casts.append(path.to) 4862 path = path.this 4863 4864 if casts: 4865 dcolon_offset = next( 4866 i 4867 for i, t in enumerate(self._tokens[start_index:]) 4868 if t.token_type == TokenType.DCOLON 4869 ) 4870 end_token = self._tokens[start_index + dcolon_offset - 1] 4871 else: 4872 end_token = self._prev 4873 4874 if path: 4875 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4876 4877 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 4878 # Databricks transforms it back to the colon/dot notation 4879 if json_path: 4880 this = self.expression( 4881 exp.JSONExtract, 4882 this=this, 4883 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4884 variant_extract=True, 4885 ) 4886 4887 while casts: 4888 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4889 4890 return this 4891 4892 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 4893 return self._parse_types() 4894 4895 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4896 this = self._parse_bracket(this) 4897 4898 while self._match_set(self.COLUMN_OPERATORS): 4899 op_token = self._prev.token_type 4900 op = self.COLUMN_OPERATORS.get(op_token) 4901 4902 if op_token == TokenType.DCOLON: 4903 field = self._parse_dcolon() 4904 if not field: 4905 self.raise_error("Expected type") 4906 elif op and self._curr: 4907 field = self._parse_column_reference() 4908 else: 4909 field = self._parse_field(any_token=True, anonymous_func=True) 4910 4911 if isinstance(field, exp.Func) and this: 4912 # bigquery allows function calls like x.y.count(...) 4913 # SAFE.SUBSTR(...) 4914 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4915 this = exp.replace_tree( 4916 this, 4917 lambda n: ( 4918 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4919 if n.table 4920 else n.this 4921 ) 4922 if isinstance(n, exp.Column) 4923 else n, 4924 ) 4925 4926 if op: 4927 this = op(self, this, field) 4928 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4929 this = self.expression( 4930 exp.Column, 4931 this=field, 4932 table=this.this, 4933 db=this.args.get("table"), 4934 catalog=this.args.get("db"), 4935 ) 4936 else: 4937 this = self.expression(exp.Dot, this=this, expression=field) 4938 4939 this = self._parse_bracket(this) 4940 4941 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 4942 4943 def _parse_primary(self) -> t.Optional[exp.Expression]: 4944 if self._match_set(self.PRIMARY_PARSERS): 4945 token_type = self._prev.token_type 4946 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4947 4948 if token_type == TokenType.STRING: 4949 expressions = [primary] 4950 while self._match(TokenType.STRING): 4951 expressions.append(exp.Literal.string(self._prev.text)) 4952 4953 if len(expressions) > 1: 4954 return self.expression(exp.Concat, expressions=expressions) 4955 4956 return primary 4957 4958 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4959 return exp.Literal.number(f"0.{self._prev.text}") 4960 4961 if self._match(TokenType.L_PAREN): 4962 comments = self._prev_comments 4963 query = self._parse_select() 4964 4965 if query: 4966 expressions = [query] 4967 else: 4968 expressions = self._parse_expressions() 4969 4970 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4971 4972 if not this and self._match(TokenType.R_PAREN, advance=False): 4973 this = self.expression(exp.Tuple) 4974 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4975 this = self._parse_subquery(this=this, parse_alias=False) 4976 elif isinstance(this, exp.Subquery): 4977 this = self._parse_subquery( 4978 this=self._parse_set_operations(this), parse_alias=False 4979 ) 4980 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4981 this = self.expression(exp.Tuple, expressions=expressions) 4982 else: 4983 this = self.expression(exp.Paren, this=this) 4984 4985 if this: 4986 this.add_comments(comments) 4987 4988 self._match_r_paren(expression=this) 4989 return this 4990 4991 return None 4992 4993 def _parse_field( 4994 self, 4995 any_token: bool = False, 4996 tokens: t.Optional[t.Collection[TokenType]] = None, 4997 anonymous_func: bool = False, 4998 ) -> t.Optional[exp.Expression]: 4999 if anonymous_func: 5000 field = ( 5001 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5002 or self._parse_primary() 5003 ) 5004 else: 5005 field = self._parse_primary() or self._parse_function( 5006 anonymous=anonymous_func, any_token=any_token 5007 ) 5008 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5009 5010 def _parse_function( 5011 self, 5012 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5013 anonymous: bool = False, 5014 optional_parens: bool = True, 5015 any_token: bool = False, 5016 ) -> t.Optional[exp.Expression]: 5017 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5018 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5019 fn_syntax = False 5020 if ( 5021 self._match(TokenType.L_BRACE, advance=False) 5022 and self._next 5023 and self._next.text.upper() == "FN" 5024 ): 5025 self._advance(2) 5026 fn_syntax = True 5027 5028 func = self._parse_function_call( 5029 functions=functions, 5030 anonymous=anonymous, 5031 optional_parens=optional_parens, 5032 any_token=any_token, 5033 ) 5034 5035 if fn_syntax: 5036 self._match(TokenType.R_BRACE) 5037 5038 return func 5039 5040 def _parse_function_call( 5041 self, 5042 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5043 anonymous: bool = False, 5044 optional_parens: bool = True, 5045 any_token: bool = False, 5046 ) -> t.Optional[exp.Expression]: 5047 if not self._curr: 5048 return None 5049 5050 comments = self._curr.comments 5051 token_type = self._curr.token_type 5052 this = self._curr.text 5053 upper = this.upper() 5054 5055 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5056 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5057 self._advance() 5058 return self._parse_window(parser(self)) 5059 5060 if not self._next or self._next.token_type != TokenType.L_PAREN: 5061 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5062 self._advance() 5063 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5064 5065 return None 5066 5067 if any_token: 5068 if token_type in self.RESERVED_TOKENS: 5069 return None 5070 elif token_type not in self.FUNC_TOKENS: 5071 return None 5072 5073 self._advance(2) 5074 5075 parser = self.FUNCTION_PARSERS.get(upper) 5076 if parser and not anonymous: 5077 this = parser(self) 5078 else: 5079 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5080 5081 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5082 this = self.expression(subquery_predicate, this=self._parse_select()) 5083 self._match_r_paren() 5084 return this 5085 5086 if functions is None: 5087 functions = self.FUNCTIONS 5088 5089 function = functions.get(upper) 5090 5091 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5092 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5093 5094 if alias: 5095 args = self._kv_to_prop_eq(args) 5096 5097 if function and not anonymous: 5098 if "dialect" in function.__code__.co_varnames: 5099 func = function(args, dialect=self.dialect) 5100 else: 5101 func = function(args) 5102 5103 func = self.validate_expression(func, args) 5104 if not self.dialect.NORMALIZE_FUNCTIONS: 5105 func.meta["name"] = this 5106 5107 this = func 5108 else: 5109 if token_type == TokenType.IDENTIFIER: 5110 this = exp.Identifier(this=this, quoted=True) 5111 this = self.expression(exp.Anonymous, this=this, expressions=args) 5112 5113 if isinstance(this, exp.Expression): 5114 this.add_comments(comments) 5115 5116 self._match_r_paren(this) 5117 return self._parse_window(this) 5118 5119 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5120 return expression 5121 5122 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5123 transformed = [] 5124 5125 for index, e in enumerate(expressions): 5126 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5127 if isinstance(e, exp.Alias): 5128 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5129 5130 if not isinstance(e, exp.PropertyEQ): 5131 e = self.expression( 5132 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5133 ) 5134 5135 if isinstance(e.this, exp.Column): 5136 e.this.replace(e.this.this) 5137 else: 5138 e = self._to_prop_eq(e, index) 5139 5140 transformed.append(e) 5141 5142 return transformed 5143 5144 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5145 return self._parse_column_def(self._parse_id_var()) 5146 5147 def _parse_user_defined_function( 5148 self, kind: t.Optional[TokenType] = None 5149 ) -> t.Optional[exp.Expression]: 5150 this = self._parse_id_var() 5151 5152 while self._match(TokenType.DOT): 5153 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5154 5155 if not self._match(TokenType.L_PAREN): 5156 return this 5157 5158 expressions = self._parse_csv(self._parse_function_parameter) 5159 self._match_r_paren() 5160 return self.expression( 5161 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5162 ) 5163 5164 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5165 literal = self._parse_primary() 5166 if literal: 5167 return self.expression(exp.Introducer, this=token.text, expression=literal) 5168 5169 return self.expression(exp.Identifier, this=token.text) 5170 5171 def _parse_session_parameter(self) -> exp.SessionParameter: 5172 kind = None 5173 this = self._parse_id_var() or self._parse_primary() 5174 5175 if this and self._match(TokenType.DOT): 5176 kind = this.name 5177 this = self._parse_var() or self._parse_primary() 5178 5179 return self.expression(exp.SessionParameter, this=this, kind=kind) 5180 5181 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5182 return self._parse_id_var() 5183 5184 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5185 index = self._index 5186 5187 if self._match(TokenType.L_PAREN): 5188 expressions = t.cast( 5189 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5190 ) 5191 5192 if not self._match(TokenType.R_PAREN): 5193 self._retreat(index) 5194 else: 5195 expressions = [self._parse_lambda_arg()] 5196 5197 if self._match_set(self.LAMBDAS): 5198 return self.LAMBDAS[self._prev.token_type](self, expressions) 5199 5200 self._retreat(index) 5201 5202 this: t.Optional[exp.Expression] 5203 5204 if self._match(TokenType.DISTINCT): 5205 this = self.expression( 5206 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5207 ) 5208 else: 5209 this = self._parse_select_or_expression(alias=alias) 5210 5211 return self._parse_limit( 5212 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5213 ) 5214 5215 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5216 index = self._index 5217 if not self._match(TokenType.L_PAREN): 5218 return this 5219 5220 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5221 # expr can be of both types 5222 if self._match_set(self.SELECT_START_TOKENS): 5223 self._retreat(index) 5224 return this 5225 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5226 self._match_r_paren() 5227 return self.expression(exp.Schema, this=this, expressions=args) 5228 5229 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5230 return self._parse_column_def(self._parse_field(any_token=True)) 5231 5232 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5233 # column defs are not really columns, they're identifiers 5234 if isinstance(this, exp.Column): 5235 this = this.this 5236 5237 kind = self._parse_types(schema=True) 5238 5239 if self._match_text_seq("FOR", "ORDINALITY"): 5240 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5241 5242 constraints: t.List[exp.Expression] = [] 5243 5244 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5245 ("ALIAS", "MATERIALIZED") 5246 ): 5247 persisted = self._prev.text.upper() == "MATERIALIZED" 5248 constraints.append( 5249 self.expression( 5250 exp.ComputedColumnConstraint, 5251 this=self._parse_assignment(), 5252 persisted=persisted or self._match_text_seq("PERSISTED"), 5253 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5254 ) 5255 ) 5256 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5257 self._match(TokenType.ALIAS) 5258 constraints.append( 5259 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 5260 ) 5261 5262 while True: 5263 constraint = self._parse_column_constraint() 5264 if not constraint: 5265 break 5266 constraints.append(constraint) 5267 5268 if not kind and not constraints: 5269 return this 5270 5271 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5272 5273 def _parse_auto_increment( 5274 self, 5275 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5276 start = None 5277 increment = None 5278 5279 if self._match(TokenType.L_PAREN, advance=False): 5280 args = self._parse_wrapped_csv(self._parse_bitwise) 5281 start = seq_get(args, 0) 5282 increment = seq_get(args, 1) 5283 elif self._match_text_seq("START"): 5284 start = self._parse_bitwise() 5285 self._match_text_seq("INCREMENT") 5286 increment = self._parse_bitwise() 5287 5288 if start and increment: 5289 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5290 5291 return exp.AutoIncrementColumnConstraint() 5292 5293 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5294 if not self._match_text_seq("REFRESH"): 5295 self._retreat(self._index - 1) 5296 return None 5297 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5298 5299 def _parse_compress(self) -> exp.CompressColumnConstraint: 5300 if self._match(TokenType.L_PAREN, advance=False): 5301 return self.expression( 5302 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5303 ) 5304 5305 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5306 5307 def _parse_generated_as_identity( 5308 self, 5309 ) -> ( 5310 exp.GeneratedAsIdentityColumnConstraint 5311 | exp.ComputedColumnConstraint 5312 | exp.GeneratedAsRowColumnConstraint 5313 ): 5314 if self._match_text_seq("BY", "DEFAULT"): 5315 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5316 this = self.expression( 5317 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5318 ) 5319 else: 5320 self._match_text_seq("ALWAYS") 5321 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5322 5323 self._match(TokenType.ALIAS) 5324 5325 if self._match_text_seq("ROW"): 5326 start = self._match_text_seq("START") 5327 if not start: 5328 self._match(TokenType.END) 5329 hidden = self._match_text_seq("HIDDEN") 5330 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5331 5332 identity = self._match_text_seq("IDENTITY") 5333 5334 if self._match(TokenType.L_PAREN): 5335 if self._match(TokenType.START_WITH): 5336 this.set("start", self._parse_bitwise()) 5337 if self._match_text_seq("INCREMENT", "BY"): 5338 this.set("increment", self._parse_bitwise()) 5339 if self._match_text_seq("MINVALUE"): 5340 this.set("minvalue", self._parse_bitwise()) 5341 if self._match_text_seq("MAXVALUE"): 5342 this.set("maxvalue", self._parse_bitwise()) 5343 5344 if self._match_text_seq("CYCLE"): 5345 this.set("cycle", True) 5346 elif self._match_text_seq("NO", "CYCLE"): 5347 this.set("cycle", False) 5348 5349 if not identity: 5350 this.set("expression", self._parse_range()) 5351 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5352 args = self._parse_csv(self._parse_bitwise) 5353 this.set("start", seq_get(args, 0)) 5354 this.set("increment", seq_get(args, 1)) 5355 5356 self._match_r_paren() 5357 5358 return this 5359 5360 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5361 self._match_text_seq("LENGTH") 5362 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5363 5364 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5365 if self._match_text_seq("NULL"): 5366 return self.expression(exp.NotNullColumnConstraint) 5367 if self._match_text_seq("CASESPECIFIC"): 5368 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5369 if self._match_text_seq("FOR", "REPLICATION"): 5370 return self.expression(exp.NotForReplicationColumnConstraint) 5371 return None 5372 5373 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5374 if self._match(TokenType.CONSTRAINT): 5375 this = self._parse_id_var() 5376 else: 5377 this = None 5378 5379 if self._match_texts(self.CONSTRAINT_PARSERS): 5380 return self.expression( 5381 exp.ColumnConstraint, 5382 this=this, 5383 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5384 ) 5385 5386 return this 5387 5388 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5389 if not self._match(TokenType.CONSTRAINT): 5390 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5391 5392 return self.expression( 5393 exp.Constraint, 5394 this=self._parse_id_var(), 5395 expressions=self._parse_unnamed_constraints(), 5396 ) 5397 5398 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5399 constraints = [] 5400 while True: 5401 constraint = self._parse_unnamed_constraint() or self._parse_function() 5402 if not constraint: 5403 break 5404 constraints.append(constraint) 5405 5406 return constraints 5407 5408 def _parse_unnamed_constraint( 5409 self, constraints: t.Optional[t.Collection[str]] = None 5410 ) -> t.Optional[exp.Expression]: 5411 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5412 constraints or self.CONSTRAINT_PARSERS 5413 ): 5414 return None 5415 5416 constraint = self._prev.text.upper() 5417 if constraint not in self.CONSTRAINT_PARSERS: 5418 self.raise_error(f"No parser found for schema constraint {constraint}.") 5419 5420 return self.CONSTRAINT_PARSERS[constraint](self) 5421 5422 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5423 return self._parse_id_var(any_token=False) 5424 5425 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5426 self._match_text_seq("KEY") 5427 return self.expression( 5428 exp.UniqueColumnConstraint, 5429 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5430 this=self._parse_schema(self._parse_unique_key()), 5431 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5432 on_conflict=self._parse_on_conflict(), 5433 ) 5434 5435 def _parse_key_constraint_options(self) -> t.List[str]: 5436 options = [] 5437 while True: 5438 if not self._curr: 5439 break 5440 5441 if self._match(TokenType.ON): 5442 action = None 5443 on = self._advance_any() and self._prev.text 5444 5445 if self._match_text_seq("NO", "ACTION"): 5446 action = "NO ACTION" 5447 elif self._match_text_seq("CASCADE"): 5448 action = "CASCADE" 5449 elif self._match_text_seq("RESTRICT"): 5450 action = "RESTRICT" 5451 elif self._match_pair(TokenType.SET, TokenType.NULL): 5452 action = "SET NULL" 5453 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5454 action = "SET DEFAULT" 5455 else: 5456 self.raise_error("Invalid key constraint") 5457 5458 options.append(f"ON {on} {action}") 5459 else: 5460 var = self._parse_var_from_options( 5461 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5462 ) 5463 if not var: 5464 break 5465 options.append(var.name) 5466 5467 return options 5468 5469 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5470 if match and not self._match(TokenType.REFERENCES): 5471 return None 5472 5473 expressions = None 5474 this = self._parse_table(schema=True) 5475 options = self._parse_key_constraint_options() 5476 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5477 5478 def _parse_foreign_key(self) -> exp.ForeignKey: 5479 expressions = self._parse_wrapped_id_vars() 5480 reference = self._parse_references() 5481 options = {} 5482 5483 while self._match(TokenType.ON): 5484 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5485 self.raise_error("Expected DELETE or UPDATE") 5486 5487 kind = self._prev.text.lower() 5488 5489 if self._match_text_seq("NO", "ACTION"): 5490 action = "NO ACTION" 5491 elif self._match(TokenType.SET): 5492 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5493 action = "SET " + self._prev.text.upper() 5494 else: 5495 self._advance() 5496 action = self._prev.text.upper() 5497 5498 options[kind] = action 5499 5500 return self.expression( 5501 exp.ForeignKey, 5502 expressions=expressions, 5503 reference=reference, 5504 **options, # type: ignore 5505 ) 5506 5507 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5508 return self._parse_field() 5509 5510 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5511 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5512 self._retreat(self._index - 1) 5513 return None 5514 5515 id_vars = self._parse_wrapped_id_vars() 5516 return self.expression( 5517 exp.PeriodForSystemTimeConstraint, 5518 this=seq_get(id_vars, 0), 5519 expression=seq_get(id_vars, 1), 5520 ) 5521 5522 def _parse_primary_key( 5523 self, wrapped_optional: bool = False, in_props: bool = False 5524 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5525 desc = ( 5526 self._match_set((TokenType.ASC, TokenType.DESC)) 5527 and self._prev.token_type == TokenType.DESC 5528 ) 5529 5530 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5531 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5532 5533 expressions = self._parse_wrapped_csv( 5534 self._parse_primary_key_part, optional=wrapped_optional 5535 ) 5536 options = self._parse_key_constraint_options() 5537 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5538 5539 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5540 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5541 5542 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5543 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5544 return this 5545 5546 bracket_kind = self._prev.token_type 5547 expressions = self._parse_csv( 5548 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5549 ) 5550 5551 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5552 self.raise_error("Expected ]") 5553 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5554 self.raise_error("Expected }") 5555 5556 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5557 if bracket_kind == TokenType.L_BRACE: 5558 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5559 elif not this: 5560 this = build_array_constructor( 5561 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5562 ) 5563 else: 5564 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5565 if constructor_type: 5566 return build_array_constructor( 5567 constructor_type, 5568 args=expressions, 5569 bracket_kind=bracket_kind, 5570 dialect=self.dialect, 5571 ) 5572 5573 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5574 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5575 5576 self._add_comments(this) 5577 return self._parse_bracket(this) 5578 5579 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5580 if self._match(TokenType.COLON): 5581 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5582 return this 5583 5584 def _parse_case(self) -> t.Optional[exp.Expression]: 5585 ifs = [] 5586 default = None 5587 5588 comments = self._prev_comments 5589 expression = self._parse_assignment() 5590 5591 while self._match(TokenType.WHEN): 5592 this = self._parse_assignment() 5593 self._match(TokenType.THEN) 5594 then = self._parse_assignment() 5595 ifs.append(self.expression(exp.If, this=this, true=then)) 5596 5597 if self._match(TokenType.ELSE): 5598 default = self._parse_assignment() 5599 5600 if not self._match(TokenType.END): 5601 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5602 default = exp.column("interval") 5603 else: 5604 self.raise_error("Expected END after CASE", self._prev) 5605 5606 return self.expression( 5607 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5608 ) 5609 5610 def _parse_if(self) -> t.Optional[exp.Expression]: 5611 if self._match(TokenType.L_PAREN): 5612 args = self._parse_csv(self._parse_assignment) 5613 this = self.validate_expression(exp.If.from_arg_list(args), args) 5614 self._match_r_paren() 5615 else: 5616 index = self._index - 1 5617 5618 if self.NO_PAREN_IF_COMMANDS and index == 0: 5619 return self._parse_as_command(self._prev) 5620 5621 condition = self._parse_assignment() 5622 5623 if not condition: 5624 self._retreat(index) 5625 return None 5626 5627 self._match(TokenType.THEN) 5628 true = self._parse_assignment() 5629 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5630 self._match(TokenType.END) 5631 this = self.expression(exp.If, this=condition, true=true, false=false) 5632 5633 return this 5634 5635 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5636 if not self._match_text_seq("VALUE", "FOR"): 5637 self._retreat(self._index - 1) 5638 return None 5639 5640 return self.expression( 5641 exp.NextValueFor, 5642 this=self._parse_column(), 5643 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5644 ) 5645 5646 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5647 this = self._parse_function() or self._parse_var_or_string(upper=True) 5648 5649 if self._match(TokenType.FROM): 5650 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5651 5652 if not self._match(TokenType.COMMA): 5653 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5654 5655 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5656 5657 def _parse_gap_fill(self) -> exp.GapFill: 5658 self._match(TokenType.TABLE) 5659 this = self._parse_table() 5660 5661 self._match(TokenType.COMMA) 5662 args = [this, *self._parse_csv(self._parse_lambda)] 5663 5664 gap_fill = exp.GapFill.from_arg_list(args) 5665 return self.validate_expression(gap_fill, args) 5666 5667 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5668 this = self._parse_assignment() 5669 5670 if not self._match(TokenType.ALIAS): 5671 if self._match(TokenType.COMMA): 5672 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5673 5674 self.raise_error("Expected AS after CAST") 5675 5676 fmt = None 5677 to = self._parse_types() 5678 5679 if self._match(TokenType.FORMAT): 5680 fmt_string = self._parse_string() 5681 fmt = self._parse_at_time_zone(fmt_string) 5682 5683 if not to: 5684 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5685 if to.this in exp.DataType.TEMPORAL_TYPES: 5686 this = self.expression( 5687 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5688 this=this, 5689 format=exp.Literal.string( 5690 format_time( 5691 fmt_string.this if fmt_string else "", 5692 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5693 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5694 ) 5695 ), 5696 safe=safe, 5697 ) 5698 5699 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5700 this.set("zone", fmt.args["zone"]) 5701 return this 5702 elif not to: 5703 self.raise_error("Expected TYPE after CAST") 5704 elif isinstance(to, exp.Identifier): 5705 to = exp.DataType.build(to.name, udt=True) 5706 elif to.this == exp.DataType.Type.CHAR: 5707 if self._match(TokenType.CHARACTER_SET): 5708 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5709 5710 return self.expression( 5711 exp.Cast if strict else exp.TryCast, 5712 this=this, 5713 to=to, 5714 format=fmt, 5715 safe=safe, 5716 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5717 ) 5718 5719 def _parse_string_agg(self) -> exp.Expression: 5720 if self._match(TokenType.DISTINCT): 5721 args: t.List[t.Optional[exp.Expression]] = [ 5722 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5723 ] 5724 if self._match(TokenType.COMMA): 5725 args.extend(self._parse_csv(self._parse_assignment)) 5726 else: 5727 args = self._parse_csv(self._parse_assignment) # type: ignore 5728 5729 index = self._index 5730 if not self._match(TokenType.R_PAREN) and args: 5731 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5732 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5733 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5734 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5735 5736 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5737 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5738 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5739 if not self._match_text_seq("WITHIN", "GROUP"): 5740 self._retreat(index) 5741 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5742 5743 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5744 order = self._parse_order(this=seq_get(args, 0)) 5745 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5746 5747 def _parse_convert( 5748 self, strict: bool, safe: t.Optional[bool] = None 5749 ) -> t.Optional[exp.Expression]: 5750 this = self._parse_bitwise() 5751 5752 if self._match(TokenType.USING): 5753 to: t.Optional[exp.Expression] = self.expression( 5754 exp.CharacterSet, this=self._parse_var() 5755 ) 5756 elif self._match(TokenType.COMMA): 5757 to = self._parse_types() 5758 else: 5759 to = None 5760 5761 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5762 5763 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5764 """ 5765 There are generally two variants of the DECODE function: 5766 5767 - DECODE(bin, charset) 5768 - DECODE(expression, search, result [, search, result] ... [, default]) 5769 5770 The second variant will always be parsed into a CASE expression. Note that NULL 5771 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5772 instead of relying on pattern matching. 5773 """ 5774 args = self._parse_csv(self._parse_assignment) 5775 5776 if len(args) < 3: 5777 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5778 5779 expression, *expressions = args 5780 if not expression: 5781 return None 5782 5783 ifs = [] 5784 for search, result in zip(expressions[::2], expressions[1::2]): 5785 if not search or not result: 5786 return None 5787 5788 if isinstance(search, exp.Literal): 5789 ifs.append( 5790 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5791 ) 5792 elif isinstance(search, exp.Null): 5793 ifs.append( 5794 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5795 ) 5796 else: 5797 cond = exp.or_( 5798 exp.EQ(this=expression.copy(), expression=search), 5799 exp.and_( 5800 exp.Is(this=expression.copy(), expression=exp.Null()), 5801 exp.Is(this=search.copy(), expression=exp.Null()), 5802 copy=False, 5803 ), 5804 copy=False, 5805 ) 5806 ifs.append(exp.If(this=cond, true=result)) 5807 5808 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5809 5810 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5811 self._match_text_seq("KEY") 5812 key = self._parse_column() 5813 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5814 self._match_text_seq("VALUE") 5815 value = self._parse_bitwise() 5816 5817 if not key and not value: 5818 return None 5819 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5820 5821 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5822 if not this or not self._match_text_seq("FORMAT", "JSON"): 5823 return this 5824 5825 return self.expression(exp.FormatJson, this=this) 5826 5827 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5828 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5829 for value in values: 5830 if self._match_text_seq(value, "ON", on): 5831 return f"{value} ON {on}" 5832 5833 return None 5834 5835 @t.overload 5836 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5837 5838 @t.overload 5839 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5840 5841 def _parse_json_object(self, agg=False): 5842 star = self._parse_star() 5843 expressions = ( 5844 [star] 5845 if star 5846 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5847 ) 5848 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5849 5850 unique_keys = None 5851 if self._match_text_seq("WITH", "UNIQUE"): 5852 unique_keys = True 5853 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5854 unique_keys = False 5855 5856 self._match_text_seq("KEYS") 5857 5858 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5859 self._parse_type() 5860 ) 5861 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5862 5863 return self.expression( 5864 exp.JSONObjectAgg if agg else exp.JSONObject, 5865 expressions=expressions, 5866 null_handling=null_handling, 5867 unique_keys=unique_keys, 5868 return_type=return_type, 5869 encoding=encoding, 5870 ) 5871 5872 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5873 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5874 if not self._match_text_seq("NESTED"): 5875 this = self._parse_id_var() 5876 kind = self._parse_types(allow_identifiers=False) 5877 nested = None 5878 else: 5879 this = None 5880 kind = None 5881 nested = True 5882 5883 path = self._match_text_seq("PATH") and self._parse_string() 5884 nested_schema = nested and self._parse_json_schema() 5885 5886 return self.expression( 5887 exp.JSONColumnDef, 5888 this=this, 5889 kind=kind, 5890 path=path, 5891 nested_schema=nested_schema, 5892 ) 5893 5894 def _parse_json_schema(self) -> exp.JSONSchema: 5895 self._match_text_seq("COLUMNS") 5896 return self.expression( 5897 exp.JSONSchema, 5898 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5899 ) 5900 5901 def _parse_json_table(self) -> exp.JSONTable: 5902 this = self._parse_format_json(self._parse_bitwise()) 5903 path = self._match(TokenType.COMMA) and self._parse_string() 5904 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5905 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5906 schema = self._parse_json_schema() 5907 5908 return exp.JSONTable( 5909 this=this, 5910 schema=schema, 5911 path=path, 5912 error_handling=error_handling, 5913 empty_handling=empty_handling, 5914 ) 5915 5916 def _parse_match_against(self) -> exp.MatchAgainst: 5917 expressions = self._parse_csv(self._parse_column) 5918 5919 self._match_text_seq(")", "AGAINST", "(") 5920 5921 this = self._parse_string() 5922 5923 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5924 modifier = "IN NATURAL LANGUAGE MODE" 5925 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5926 modifier = f"{modifier} WITH QUERY EXPANSION" 5927 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5928 modifier = "IN BOOLEAN MODE" 5929 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5930 modifier = "WITH QUERY EXPANSION" 5931 else: 5932 modifier = None 5933 5934 return self.expression( 5935 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5936 ) 5937 5938 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5939 def _parse_open_json(self) -> exp.OpenJSON: 5940 this = self._parse_bitwise() 5941 path = self._match(TokenType.COMMA) and self._parse_string() 5942 5943 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5944 this = self._parse_field(any_token=True) 5945 kind = self._parse_types() 5946 path = self._parse_string() 5947 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5948 5949 return self.expression( 5950 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5951 ) 5952 5953 expressions = None 5954 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5955 self._match_l_paren() 5956 expressions = self._parse_csv(_parse_open_json_column_def) 5957 5958 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5959 5960 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5961 args = self._parse_csv(self._parse_bitwise) 5962 5963 if self._match(TokenType.IN): 5964 return self.expression( 5965 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5966 ) 5967 5968 if haystack_first: 5969 haystack = seq_get(args, 0) 5970 needle = seq_get(args, 1) 5971 else: 5972 needle = seq_get(args, 0) 5973 haystack = seq_get(args, 1) 5974 5975 return self.expression( 5976 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5977 ) 5978 5979 def _parse_predict(self) -> exp.Predict: 5980 self._match_text_seq("MODEL") 5981 this = self._parse_table() 5982 5983 self._match(TokenType.COMMA) 5984 self._match_text_seq("TABLE") 5985 5986 return self.expression( 5987 exp.Predict, 5988 this=this, 5989 expression=self._parse_table(), 5990 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5991 ) 5992 5993 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5994 args = self._parse_csv(self._parse_table) 5995 return exp.JoinHint(this=func_name.upper(), expressions=args) 5996 5997 def _parse_substring(self) -> exp.Substring: 5998 # Postgres supports the form: substring(string [from int] [for int]) 5999 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6000 6001 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6002 6003 if self._match(TokenType.FROM): 6004 args.append(self._parse_bitwise()) 6005 if self._match(TokenType.FOR): 6006 if len(args) == 1: 6007 args.append(exp.Literal.number(1)) 6008 args.append(self._parse_bitwise()) 6009 6010 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6011 6012 def _parse_trim(self) -> exp.Trim: 6013 # https://www.w3resource.com/sql/character-functions/trim.php 6014 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6015 6016 position = None 6017 collation = None 6018 expression = None 6019 6020 if self._match_texts(self.TRIM_TYPES): 6021 position = self._prev.text.upper() 6022 6023 this = self._parse_bitwise() 6024 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6025 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6026 expression = self._parse_bitwise() 6027 6028 if invert_order: 6029 this, expression = expression, this 6030 6031 if self._match(TokenType.COLLATE): 6032 collation = self._parse_bitwise() 6033 6034 return self.expression( 6035 exp.Trim, this=this, position=position, expression=expression, collation=collation 6036 ) 6037 6038 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6039 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6040 6041 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6042 return self._parse_window(self._parse_id_var(), alias=True) 6043 6044 def _parse_respect_or_ignore_nulls( 6045 self, this: t.Optional[exp.Expression] 6046 ) -> t.Optional[exp.Expression]: 6047 if self._match_text_seq("IGNORE", "NULLS"): 6048 return self.expression(exp.IgnoreNulls, this=this) 6049 if self._match_text_seq("RESPECT", "NULLS"): 6050 return self.expression(exp.RespectNulls, this=this) 6051 return this 6052 6053 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6054 if self._match(TokenType.HAVING): 6055 self._match_texts(("MAX", "MIN")) 6056 max = self._prev.text.upper() != "MIN" 6057 return self.expression( 6058 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6059 ) 6060 6061 return this 6062 6063 def _parse_window( 6064 self, this: t.Optional[exp.Expression], alias: bool = False 6065 ) -> t.Optional[exp.Expression]: 6066 func = this 6067 comments = func.comments if isinstance(func, exp.Expression) else None 6068 6069 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6070 self._match(TokenType.WHERE) 6071 this = self.expression( 6072 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6073 ) 6074 self._match_r_paren() 6075 6076 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6077 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6078 if self._match_text_seq("WITHIN", "GROUP"): 6079 order = self._parse_wrapped(self._parse_order) 6080 this = self.expression(exp.WithinGroup, this=this, expression=order) 6081 6082 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6083 # Some dialects choose to implement and some do not. 6084 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6085 6086 # There is some code above in _parse_lambda that handles 6087 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6088 6089 # The below changes handle 6090 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6091 6092 # Oracle allows both formats 6093 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6094 # and Snowflake chose to do the same for familiarity 6095 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6096 if isinstance(this, exp.AggFunc): 6097 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6098 6099 if ignore_respect and ignore_respect is not this: 6100 ignore_respect.replace(ignore_respect.this) 6101 this = self.expression(ignore_respect.__class__, this=this) 6102 6103 this = self._parse_respect_or_ignore_nulls(this) 6104 6105 # bigquery select from window x AS (partition by ...) 6106 if alias: 6107 over = None 6108 self._match(TokenType.ALIAS) 6109 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6110 return this 6111 else: 6112 over = self._prev.text.upper() 6113 6114 if comments and isinstance(func, exp.Expression): 6115 func.pop_comments() 6116 6117 if not self._match(TokenType.L_PAREN): 6118 return self.expression( 6119 exp.Window, 6120 comments=comments, 6121 this=this, 6122 alias=self._parse_id_var(False), 6123 over=over, 6124 ) 6125 6126 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6127 6128 first = self._match(TokenType.FIRST) 6129 if self._match_text_seq("LAST"): 6130 first = False 6131 6132 partition, order = self._parse_partition_and_order() 6133 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6134 6135 if kind: 6136 self._match(TokenType.BETWEEN) 6137 start = self._parse_window_spec() 6138 self._match(TokenType.AND) 6139 end = self._parse_window_spec() 6140 6141 spec = self.expression( 6142 exp.WindowSpec, 6143 kind=kind, 6144 start=start["value"], 6145 start_side=start["side"], 6146 end=end["value"], 6147 end_side=end["side"], 6148 ) 6149 else: 6150 spec = None 6151 6152 self._match_r_paren() 6153 6154 window = self.expression( 6155 exp.Window, 6156 comments=comments, 6157 this=this, 6158 partition_by=partition, 6159 order=order, 6160 spec=spec, 6161 alias=window_alias, 6162 over=over, 6163 first=first, 6164 ) 6165 6166 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6167 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6168 return self._parse_window(window, alias=alias) 6169 6170 return window 6171 6172 def _parse_partition_and_order( 6173 self, 6174 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6175 return self._parse_partition_by(), self._parse_order() 6176 6177 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6178 self._match(TokenType.BETWEEN) 6179 6180 return { 6181 "value": ( 6182 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6183 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6184 or self._parse_bitwise() 6185 ), 6186 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6187 } 6188 6189 def _parse_alias( 6190 self, this: t.Optional[exp.Expression], explicit: bool = False 6191 ) -> t.Optional[exp.Expression]: 6192 any_token = self._match(TokenType.ALIAS) 6193 comments = self._prev_comments or [] 6194 6195 if explicit and not any_token: 6196 return this 6197 6198 if self._match(TokenType.L_PAREN): 6199 aliases = self.expression( 6200 exp.Aliases, 6201 comments=comments, 6202 this=this, 6203 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6204 ) 6205 self._match_r_paren(aliases) 6206 return aliases 6207 6208 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6209 self.STRING_ALIASES and self._parse_string_as_identifier() 6210 ) 6211 6212 if alias: 6213 comments.extend(alias.pop_comments()) 6214 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6215 column = this.this 6216 6217 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6218 if not this.comments and column and column.comments: 6219 this.comments = column.pop_comments() 6220 6221 return this 6222 6223 def _parse_id_var( 6224 self, 6225 any_token: bool = True, 6226 tokens: t.Optional[t.Collection[TokenType]] = None, 6227 ) -> t.Optional[exp.Expression]: 6228 expression = self._parse_identifier() 6229 if not expression and ( 6230 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6231 ): 6232 quoted = self._prev.token_type == TokenType.STRING 6233 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6234 6235 return expression 6236 6237 def _parse_string(self) -> t.Optional[exp.Expression]: 6238 if self._match_set(self.STRING_PARSERS): 6239 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6240 return self._parse_placeholder() 6241 6242 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6243 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6244 6245 def _parse_number(self) -> t.Optional[exp.Expression]: 6246 if self._match_set(self.NUMERIC_PARSERS): 6247 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6248 return self._parse_placeholder() 6249 6250 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6251 if self._match(TokenType.IDENTIFIER): 6252 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6253 return self._parse_placeholder() 6254 6255 def _parse_var( 6256 self, 6257 any_token: bool = False, 6258 tokens: t.Optional[t.Collection[TokenType]] = None, 6259 upper: bool = False, 6260 ) -> t.Optional[exp.Expression]: 6261 if ( 6262 (any_token and self._advance_any()) 6263 or self._match(TokenType.VAR) 6264 or (self._match_set(tokens) if tokens else False) 6265 ): 6266 return self.expression( 6267 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6268 ) 6269 return self._parse_placeholder() 6270 6271 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6272 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6273 self._advance() 6274 return self._prev 6275 return None 6276 6277 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6278 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6279 6280 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6281 return self._parse_primary() or self._parse_var(any_token=True) 6282 6283 def _parse_null(self) -> t.Optional[exp.Expression]: 6284 if self._match_set(self.NULL_TOKENS): 6285 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6286 return self._parse_placeholder() 6287 6288 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6289 if self._match(TokenType.TRUE): 6290 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6291 if self._match(TokenType.FALSE): 6292 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6293 return self._parse_placeholder() 6294 6295 def _parse_star(self) -> t.Optional[exp.Expression]: 6296 if self._match(TokenType.STAR): 6297 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6298 return self._parse_placeholder() 6299 6300 def _parse_parameter(self) -> exp.Parameter: 6301 this = self._parse_identifier() or self._parse_primary_or_var() 6302 return self.expression(exp.Parameter, this=this) 6303 6304 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6305 if self._match_set(self.PLACEHOLDER_PARSERS): 6306 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6307 if placeholder: 6308 return placeholder 6309 self._advance(-1) 6310 return None 6311 6312 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6313 if not self._match_texts(keywords): 6314 return None 6315 if self._match(TokenType.L_PAREN, advance=False): 6316 return self._parse_wrapped_csv(self._parse_expression) 6317 6318 expression = self._parse_expression() 6319 return [expression] if expression else None 6320 6321 def _parse_csv( 6322 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6323 ) -> t.List[exp.Expression]: 6324 parse_result = parse_method() 6325 items = [parse_result] if parse_result is not None else [] 6326 6327 while self._match(sep): 6328 self._add_comments(parse_result) 6329 parse_result = parse_method() 6330 if parse_result is not None: 6331 items.append(parse_result) 6332 6333 return items 6334 6335 def _parse_tokens( 6336 self, parse_method: t.Callable, expressions: t.Dict 6337 ) -> t.Optional[exp.Expression]: 6338 this = parse_method() 6339 6340 while self._match_set(expressions): 6341 this = self.expression( 6342 expressions[self._prev.token_type], 6343 this=this, 6344 comments=self._prev_comments, 6345 expression=parse_method(), 6346 ) 6347 6348 return this 6349 6350 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6351 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6352 6353 def _parse_wrapped_csv( 6354 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6355 ) -> t.List[exp.Expression]: 6356 return self._parse_wrapped( 6357 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6358 ) 6359 6360 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6361 wrapped = self._match(TokenType.L_PAREN) 6362 if not wrapped and not optional: 6363 self.raise_error("Expecting (") 6364 parse_result = parse_method() 6365 if wrapped: 6366 self._match_r_paren() 6367 return parse_result 6368 6369 def _parse_expressions(self) -> t.List[exp.Expression]: 6370 return self._parse_csv(self._parse_expression) 6371 6372 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6373 return self._parse_select() or self._parse_set_operations( 6374 self._parse_expression() if alias else self._parse_assignment() 6375 ) 6376 6377 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6378 return self._parse_query_modifiers( 6379 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6380 ) 6381 6382 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6383 this = None 6384 if self._match_texts(self.TRANSACTION_KIND): 6385 this = self._prev.text 6386 6387 self._match_texts(("TRANSACTION", "WORK")) 6388 6389 modes = [] 6390 while True: 6391 mode = [] 6392 while self._match(TokenType.VAR): 6393 mode.append(self._prev.text) 6394 6395 if mode: 6396 modes.append(" ".join(mode)) 6397 if not self._match(TokenType.COMMA): 6398 break 6399 6400 return self.expression(exp.Transaction, this=this, modes=modes) 6401 6402 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6403 chain = None 6404 savepoint = None 6405 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6406 6407 self._match_texts(("TRANSACTION", "WORK")) 6408 6409 if self._match_text_seq("TO"): 6410 self._match_text_seq("SAVEPOINT") 6411 savepoint = self._parse_id_var() 6412 6413 if self._match(TokenType.AND): 6414 chain = not self._match_text_seq("NO") 6415 self._match_text_seq("CHAIN") 6416 6417 if is_rollback: 6418 return self.expression(exp.Rollback, savepoint=savepoint) 6419 6420 return self.expression(exp.Commit, chain=chain) 6421 6422 def _parse_refresh(self) -> exp.Refresh: 6423 self._match(TokenType.TABLE) 6424 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6425 6426 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6427 if not self._match_text_seq("ADD"): 6428 return None 6429 6430 self._match(TokenType.COLUMN) 6431 exists_column = self._parse_exists(not_=True) 6432 expression = self._parse_field_def() 6433 6434 if expression: 6435 expression.set("exists", exists_column) 6436 6437 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6438 if self._match_texts(("FIRST", "AFTER")): 6439 position = self._prev.text 6440 column_position = self.expression( 6441 exp.ColumnPosition, this=self._parse_column(), position=position 6442 ) 6443 expression.set("position", column_position) 6444 6445 return expression 6446 6447 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6448 drop = self._match(TokenType.DROP) and self._parse_drop() 6449 if drop and not isinstance(drop, exp.Command): 6450 drop.set("kind", drop.args.get("kind", "COLUMN")) 6451 return drop 6452 6453 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6454 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6455 return self.expression( 6456 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6457 ) 6458 6459 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6460 index = self._index - 1 6461 6462 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6463 return self._parse_csv( 6464 lambda: self.expression( 6465 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6466 ) 6467 ) 6468 6469 self._retreat(index) 6470 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6471 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6472 6473 if self._match_text_seq("ADD", "COLUMNS"): 6474 schema = self._parse_schema() 6475 if schema: 6476 return [schema] 6477 return [] 6478 6479 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6480 6481 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6482 if self._match_texts(self.ALTER_ALTER_PARSERS): 6483 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6484 6485 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6486 # keyword after ALTER we default to parsing this statement 6487 self._match(TokenType.COLUMN) 6488 column = self._parse_field(any_token=True) 6489 6490 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6491 return self.expression(exp.AlterColumn, this=column, drop=True) 6492 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6493 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6494 if self._match(TokenType.COMMENT): 6495 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6496 if self._match_text_seq("DROP", "NOT", "NULL"): 6497 return self.expression( 6498 exp.AlterColumn, 6499 this=column, 6500 drop=True, 6501 allow_null=True, 6502 ) 6503 if self._match_text_seq("SET", "NOT", "NULL"): 6504 return self.expression( 6505 exp.AlterColumn, 6506 this=column, 6507 allow_null=False, 6508 ) 6509 self._match_text_seq("SET", "DATA") 6510 self._match_text_seq("TYPE") 6511 return self.expression( 6512 exp.AlterColumn, 6513 this=column, 6514 dtype=self._parse_types(), 6515 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6516 using=self._match(TokenType.USING) and self._parse_assignment(), 6517 ) 6518 6519 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6520 if self._match_texts(("ALL", "EVEN", "AUTO")): 6521 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6522 6523 self._match_text_seq("KEY", "DISTKEY") 6524 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6525 6526 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6527 if compound: 6528 self._match_text_seq("SORTKEY") 6529 6530 if self._match(TokenType.L_PAREN, advance=False): 6531 return self.expression( 6532 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6533 ) 6534 6535 self._match_texts(("AUTO", "NONE")) 6536 return self.expression( 6537 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6538 ) 6539 6540 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6541 index = self._index - 1 6542 6543 partition_exists = self._parse_exists() 6544 if self._match(TokenType.PARTITION, advance=False): 6545 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6546 6547 self._retreat(index) 6548 return self._parse_csv(self._parse_drop_column) 6549 6550 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6551 if self._match(TokenType.COLUMN): 6552 exists = self._parse_exists() 6553 old_column = self._parse_column() 6554 to = self._match_text_seq("TO") 6555 new_column = self._parse_column() 6556 6557 if old_column is None or to is None or new_column is None: 6558 return None 6559 6560 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6561 6562 self._match_text_seq("TO") 6563 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6564 6565 def _parse_alter_table_set(self) -> exp.AlterSet: 6566 alter_set = self.expression(exp.AlterSet) 6567 6568 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6569 "TABLE", "PROPERTIES" 6570 ): 6571 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6572 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6573 alter_set.set("expressions", [self._parse_assignment()]) 6574 elif self._match_texts(("LOGGED", "UNLOGGED")): 6575 alter_set.set("option", exp.var(self._prev.text.upper())) 6576 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6577 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6578 elif self._match_text_seq("LOCATION"): 6579 alter_set.set("location", self._parse_field()) 6580 elif self._match_text_seq("ACCESS", "METHOD"): 6581 alter_set.set("access_method", self._parse_field()) 6582 elif self._match_text_seq("TABLESPACE"): 6583 alter_set.set("tablespace", self._parse_field()) 6584 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6585 alter_set.set("file_format", [self._parse_field()]) 6586 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6587 alter_set.set("file_format", self._parse_wrapped_options()) 6588 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6589 alter_set.set("copy_options", self._parse_wrapped_options()) 6590 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6591 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6592 else: 6593 if self._match_text_seq("SERDE"): 6594 alter_set.set("serde", self._parse_field()) 6595 6596 alter_set.set("expressions", [self._parse_properties()]) 6597 6598 return alter_set 6599 6600 def _parse_alter(self) -> exp.Alter | exp.Command: 6601 start = self._prev 6602 6603 alter_token = self._match_set(self.ALTERABLES) and self._prev 6604 if not alter_token: 6605 return self._parse_as_command(start) 6606 6607 exists = self._parse_exists() 6608 only = self._match_text_seq("ONLY") 6609 this = self._parse_table(schema=True) 6610 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6611 6612 if self._next: 6613 self._advance() 6614 6615 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6616 if parser: 6617 actions = ensure_list(parser(self)) 6618 options = self._parse_csv(self._parse_property) 6619 6620 if not self._curr and actions: 6621 return self.expression( 6622 exp.Alter, 6623 this=this, 6624 kind=alter_token.text.upper(), 6625 exists=exists, 6626 actions=actions, 6627 only=only, 6628 options=options, 6629 cluster=cluster, 6630 ) 6631 6632 return self._parse_as_command(start) 6633 6634 def _parse_merge(self) -> exp.Merge: 6635 self._match(TokenType.INTO) 6636 target = self._parse_table() 6637 6638 if target and self._match(TokenType.ALIAS, advance=False): 6639 target.set("alias", self._parse_table_alias()) 6640 6641 self._match(TokenType.USING) 6642 using = self._parse_table() 6643 6644 self._match(TokenType.ON) 6645 on = self._parse_assignment() 6646 6647 return self.expression( 6648 exp.Merge, 6649 this=target, 6650 using=using, 6651 on=on, 6652 expressions=self._parse_when_matched(), 6653 ) 6654 6655 def _parse_when_matched(self) -> t.List[exp.When]: 6656 whens = [] 6657 6658 while self._match(TokenType.WHEN): 6659 matched = not self._match(TokenType.NOT) 6660 self._match_text_seq("MATCHED") 6661 source = ( 6662 False 6663 if self._match_text_seq("BY", "TARGET") 6664 else self._match_text_seq("BY", "SOURCE") 6665 ) 6666 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6667 6668 self._match(TokenType.THEN) 6669 6670 if self._match(TokenType.INSERT): 6671 _this = self._parse_star() 6672 if _this: 6673 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6674 else: 6675 then = self.expression( 6676 exp.Insert, 6677 this=self._parse_value(), 6678 expression=self._match_text_seq("VALUES") and self._parse_value(), 6679 ) 6680 elif self._match(TokenType.UPDATE): 6681 expressions = self._parse_star() 6682 if expressions: 6683 then = self.expression(exp.Update, expressions=expressions) 6684 else: 6685 then = self.expression( 6686 exp.Update, 6687 expressions=self._match(TokenType.SET) 6688 and self._parse_csv(self._parse_equality), 6689 ) 6690 elif self._match(TokenType.DELETE): 6691 then = self.expression(exp.Var, this=self._prev.text) 6692 else: 6693 then = None 6694 6695 whens.append( 6696 self.expression( 6697 exp.When, 6698 matched=matched, 6699 source=source, 6700 condition=condition, 6701 then=then, 6702 ) 6703 ) 6704 return whens 6705 6706 def _parse_show(self) -> t.Optional[exp.Expression]: 6707 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6708 if parser: 6709 return parser(self) 6710 return self._parse_as_command(self._prev) 6711 6712 def _parse_set_item_assignment( 6713 self, kind: t.Optional[str] = None 6714 ) -> t.Optional[exp.Expression]: 6715 index = self._index 6716 6717 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6718 return self._parse_set_transaction(global_=kind == "GLOBAL") 6719 6720 left = self._parse_primary() or self._parse_column() 6721 assignment_delimiter = self._match_texts(("=", "TO")) 6722 6723 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6724 self._retreat(index) 6725 return None 6726 6727 right = self._parse_statement() or self._parse_id_var() 6728 if isinstance(right, (exp.Column, exp.Identifier)): 6729 right = exp.var(right.name) 6730 6731 this = self.expression(exp.EQ, this=left, expression=right) 6732 return self.expression(exp.SetItem, this=this, kind=kind) 6733 6734 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6735 self._match_text_seq("TRANSACTION") 6736 characteristics = self._parse_csv( 6737 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6738 ) 6739 return self.expression( 6740 exp.SetItem, 6741 expressions=characteristics, 6742 kind="TRANSACTION", 6743 **{"global": global_}, # type: ignore 6744 ) 6745 6746 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6747 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6748 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6749 6750 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6751 index = self._index 6752 set_ = self.expression( 6753 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6754 ) 6755 6756 if self._curr: 6757 self._retreat(index) 6758 return self._parse_as_command(self._prev) 6759 6760 return set_ 6761 6762 def _parse_var_from_options( 6763 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6764 ) -> t.Optional[exp.Var]: 6765 start = self._curr 6766 if not start: 6767 return None 6768 6769 option = start.text.upper() 6770 continuations = options.get(option) 6771 6772 index = self._index 6773 self._advance() 6774 for keywords in continuations or []: 6775 if isinstance(keywords, str): 6776 keywords = (keywords,) 6777 6778 if self._match_text_seq(*keywords): 6779 option = f"{option} {' '.join(keywords)}" 6780 break 6781 else: 6782 if continuations or continuations is None: 6783 if raise_unmatched: 6784 self.raise_error(f"Unknown option {option}") 6785 6786 self._retreat(index) 6787 return None 6788 6789 return exp.var(option) 6790 6791 def _parse_as_command(self, start: Token) -> exp.Command: 6792 while self._curr: 6793 self._advance() 6794 text = self._find_sql(start, self._prev) 6795 size = len(start.text) 6796 self._warn_unsupported() 6797 return exp.Command(this=text[:size], expression=text[size:]) 6798 6799 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6800 settings = [] 6801 6802 self._match_l_paren() 6803 kind = self._parse_id_var() 6804 6805 if self._match(TokenType.L_PAREN): 6806 while True: 6807 key = self._parse_id_var() 6808 value = self._parse_primary() 6809 6810 if not key and value is None: 6811 break 6812 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6813 self._match(TokenType.R_PAREN) 6814 6815 self._match_r_paren() 6816 6817 return self.expression( 6818 exp.DictProperty, 6819 this=this, 6820 kind=kind.this if kind else None, 6821 settings=settings, 6822 ) 6823 6824 def _parse_dict_range(self, this: str) -> exp.DictRange: 6825 self._match_l_paren() 6826 has_min = self._match_text_seq("MIN") 6827 if has_min: 6828 min = self._parse_var() or self._parse_primary() 6829 self._match_text_seq("MAX") 6830 max = self._parse_var() or self._parse_primary() 6831 else: 6832 max = self._parse_var() or self._parse_primary() 6833 min = exp.Literal.number(0) 6834 self._match_r_paren() 6835 return self.expression(exp.DictRange, this=this, min=min, max=max) 6836 6837 def _parse_comprehension( 6838 self, this: t.Optional[exp.Expression] 6839 ) -> t.Optional[exp.Comprehension]: 6840 index = self._index 6841 expression = self._parse_column() 6842 if not self._match(TokenType.IN): 6843 self._retreat(index - 1) 6844 return None 6845 iterator = self._parse_column() 6846 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6847 return self.expression( 6848 exp.Comprehension, 6849 this=this, 6850 expression=expression, 6851 iterator=iterator, 6852 condition=condition, 6853 ) 6854 6855 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6856 if self._match(TokenType.HEREDOC_STRING): 6857 return self.expression(exp.Heredoc, this=self._prev.text) 6858 6859 if not self._match_text_seq("$"): 6860 return None 6861 6862 tags = ["$"] 6863 tag_text = None 6864 6865 if self._is_connected(): 6866 self._advance() 6867 tags.append(self._prev.text.upper()) 6868 else: 6869 self.raise_error("No closing $ found") 6870 6871 if tags[-1] != "$": 6872 if self._is_connected() and self._match_text_seq("$"): 6873 tag_text = tags[-1] 6874 tags.append("$") 6875 else: 6876 self.raise_error("No closing $ found") 6877 6878 heredoc_start = self._curr 6879 6880 while self._curr: 6881 if self._match_text_seq(*tags, advance=False): 6882 this = self._find_sql(heredoc_start, self._prev) 6883 self._advance(len(tags)) 6884 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6885 6886 self._advance() 6887 6888 self.raise_error(f"No closing {''.join(tags)} found") 6889 return None 6890 6891 def _find_parser( 6892 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6893 ) -> t.Optional[t.Callable]: 6894 if not self._curr: 6895 return None 6896 6897 index = self._index 6898 this = [] 6899 while True: 6900 # The current token might be multiple words 6901 curr = self._curr.text.upper() 6902 key = curr.split(" ") 6903 this.append(curr) 6904 6905 self._advance() 6906 result, trie = in_trie(trie, key) 6907 if result == TrieResult.FAILED: 6908 break 6909 6910 if result == TrieResult.EXISTS: 6911 subparser = parsers[" ".join(this)] 6912 return subparser 6913 6914 self._retreat(index) 6915 return None 6916 6917 def _match(self, token_type, advance=True, expression=None): 6918 if not self._curr: 6919 return None 6920 6921 if self._curr.token_type == token_type: 6922 if advance: 6923 self._advance() 6924 self._add_comments(expression) 6925 return True 6926 6927 return None 6928 6929 def _match_set(self, types, advance=True): 6930 if not self._curr: 6931 return None 6932 6933 if self._curr.token_type in types: 6934 if advance: 6935 self._advance() 6936 return True 6937 6938 return None 6939 6940 def _match_pair(self, token_type_a, token_type_b, advance=True): 6941 if not self._curr or not self._next: 6942 return None 6943 6944 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6945 if advance: 6946 self._advance(2) 6947 return True 6948 6949 return None 6950 6951 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6952 if not self._match(TokenType.L_PAREN, expression=expression): 6953 self.raise_error("Expecting (") 6954 6955 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6956 if not self._match(TokenType.R_PAREN, expression=expression): 6957 self.raise_error("Expecting )") 6958 6959 def _match_texts(self, texts, advance=True): 6960 if self._curr and self._curr.text.upper() in texts: 6961 if advance: 6962 self._advance() 6963 return True 6964 return None 6965 6966 def _match_text_seq(self, *texts, advance=True): 6967 index = self._index 6968 for text in texts: 6969 if self._curr and self._curr.text.upper() == text: 6970 self._advance() 6971 else: 6972 self._retreat(index) 6973 return None 6974 6975 if not advance: 6976 self._retreat(index) 6977 6978 return True 6979 6980 def _replace_lambda( 6981 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6982 ) -> t.Optional[exp.Expression]: 6983 if not node: 6984 return node 6985 6986 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6987 6988 for column in node.find_all(exp.Column): 6989 typ = lambda_types.get(column.parts[0].name) 6990 if typ is not None: 6991 dot_or_id = column.to_dot() if column.table else column.this 6992 6993 if typ: 6994 dot_or_id = self.expression( 6995 exp.Cast, 6996 this=dot_or_id, 6997 to=typ, 6998 ) 6999 7000 parent = column.parent 7001 7002 while isinstance(parent, exp.Dot): 7003 if not isinstance(parent.parent, exp.Dot): 7004 parent.replace(dot_or_id) 7005 break 7006 parent = parent.parent 7007 else: 7008 if column is node: 7009 node = dot_or_id 7010 else: 7011 column.replace(dot_or_id) 7012 return node 7013 7014 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7015 start = self._prev 7016 7017 # Not to be confused with TRUNCATE(number, decimals) function call 7018 if self._match(TokenType.L_PAREN): 7019 self._retreat(self._index - 2) 7020 return self._parse_function() 7021 7022 # Clickhouse supports TRUNCATE DATABASE as well 7023 is_database = self._match(TokenType.DATABASE) 7024 7025 self._match(TokenType.TABLE) 7026 7027 exists = self._parse_exists(not_=False) 7028 7029 expressions = self._parse_csv( 7030 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7031 ) 7032 7033 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7034 7035 if self._match_text_seq("RESTART", "IDENTITY"): 7036 identity = "RESTART" 7037 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7038 identity = "CONTINUE" 7039 else: 7040 identity = None 7041 7042 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7043 option = self._prev.text 7044 else: 7045 option = None 7046 7047 partition = self._parse_partition() 7048 7049 # Fallback case 7050 if self._curr: 7051 return self._parse_as_command(start) 7052 7053 return self.expression( 7054 exp.TruncateTable, 7055 expressions=expressions, 7056 is_database=is_database, 7057 exists=exists, 7058 cluster=cluster, 7059 identity=identity, 7060 option=option, 7061 partition=partition, 7062 ) 7063 7064 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7065 this = self._parse_ordered(self._parse_opclass) 7066 7067 if not self._match(TokenType.WITH): 7068 return this 7069 7070 op = self._parse_var(any_token=True) 7071 7072 return self.expression(exp.WithOperator, this=this, op=op) 7073 7074 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7075 self._match(TokenType.EQ) 7076 self._match(TokenType.L_PAREN) 7077 7078 opts: t.List[t.Optional[exp.Expression]] = [] 7079 while self._curr and not self._match(TokenType.R_PAREN): 7080 if self._match_text_seq("FORMAT_NAME", "="): 7081 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7082 # so we parse it separately to use _parse_field() 7083 prop = self.expression( 7084 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7085 ) 7086 opts.append(prop) 7087 else: 7088 opts.append(self._parse_property()) 7089 7090 self._match(TokenType.COMMA) 7091 7092 return opts 7093 7094 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7095 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7096 7097 options = [] 7098 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7099 option = self._parse_var(any_token=True) 7100 prev = self._prev.text.upper() 7101 7102 # Different dialects might separate options and values by white space, "=" and "AS" 7103 self._match(TokenType.EQ) 7104 self._match(TokenType.ALIAS) 7105 7106 param = self.expression(exp.CopyParameter, this=option) 7107 7108 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7109 TokenType.L_PAREN, advance=False 7110 ): 7111 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7112 param.set("expressions", self._parse_wrapped_options()) 7113 elif prev == "FILE_FORMAT": 7114 # T-SQL's external file format case 7115 param.set("expression", self._parse_field()) 7116 else: 7117 param.set("expression", self._parse_unquoted_field()) 7118 7119 options.append(param) 7120 self._match(sep) 7121 7122 return options 7123 7124 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7125 expr = self.expression(exp.Credentials) 7126 7127 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7128 expr.set("storage", self._parse_field()) 7129 if self._match_text_seq("CREDENTIALS"): 7130 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7131 creds = ( 7132 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7133 ) 7134 expr.set("credentials", creds) 7135 if self._match_text_seq("ENCRYPTION"): 7136 expr.set("encryption", self._parse_wrapped_options()) 7137 if self._match_text_seq("IAM_ROLE"): 7138 expr.set("iam_role", self._parse_field()) 7139 if self._match_text_seq("REGION"): 7140 expr.set("region", self._parse_field()) 7141 7142 return expr 7143 7144 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7145 return self._parse_field() 7146 7147 def _parse_copy(self) -> exp.Copy | exp.Command: 7148 start = self._prev 7149 7150 self._match(TokenType.INTO) 7151 7152 this = ( 7153 self._parse_select(nested=True, parse_subquery_alias=False) 7154 if self._match(TokenType.L_PAREN, advance=False) 7155 else self._parse_table(schema=True) 7156 ) 7157 7158 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7159 7160 files = self._parse_csv(self._parse_file_location) 7161 credentials = self._parse_credentials() 7162 7163 self._match_text_seq("WITH") 7164 7165 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7166 7167 # Fallback case 7168 if self._curr: 7169 return self._parse_as_command(start) 7170 7171 return self.expression( 7172 exp.Copy, 7173 this=this, 7174 kind=kind, 7175 credentials=credentials, 7176 files=files, 7177 params=params, 7178 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1328 def __init__( 1329 self, 1330 error_level: t.Optional[ErrorLevel] = None, 1331 error_message_context: int = 100, 1332 max_errors: int = 3, 1333 dialect: DialectType = None, 1334 ): 1335 from sqlglot.dialects import Dialect 1336 1337 self.error_level = error_level or ErrorLevel.IMMEDIATE 1338 self.error_message_context = error_message_context 1339 self.max_errors = max_errors 1340 self.dialect = Dialect.get_or_raise(dialect) 1341 self.reset()
1353 def parse( 1354 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1355 ) -> t.List[t.Optional[exp.Expression]]: 1356 """ 1357 Parses a list of tokens and returns a list of syntax trees, one tree 1358 per parsed SQL statement. 1359 1360 Args: 1361 raw_tokens: The list of tokens. 1362 sql: The original SQL string, used to produce helpful debug messages. 1363 1364 Returns: 1365 The list of the produced syntax trees. 1366 """ 1367 return self._parse( 1368 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1369 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1371 def parse_into( 1372 self, 1373 expression_types: exp.IntoType, 1374 raw_tokens: t.List[Token], 1375 sql: t.Optional[str] = None, 1376 ) -> t.List[t.Optional[exp.Expression]]: 1377 """ 1378 Parses a list of tokens into a given Expression type. If a collection of Expression 1379 types is given instead, this method will try to parse the token list into each one 1380 of them, stopping at the first for which the parsing succeeds. 1381 1382 Args: 1383 expression_types: The expression type(s) to try and parse the token list into. 1384 raw_tokens: The list of tokens. 1385 sql: The original SQL string, used to produce helpful debug messages. 1386 1387 Returns: 1388 The target Expression. 1389 """ 1390 errors = [] 1391 for expression_type in ensure_list(expression_types): 1392 parser = self.EXPRESSION_PARSERS.get(expression_type) 1393 if not parser: 1394 raise TypeError(f"No parser registered for {expression_type}") 1395 1396 try: 1397 return self._parse(parser, raw_tokens, sql) 1398 except ParseError as e: 1399 e.errors[0]["into_expression"] = expression_type 1400 errors.append(e) 1401 1402 raise ParseError( 1403 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1404 errors=merge_errors(errors), 1405 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1445 def check_errors(self) -> None: 1446 """Logs or raises any found errors, depending on the chosen error level setting.""" 1447 if self.error_level == ErrorLevel.WARN: 1448 for error in self.errors: 1449 logger.error(str(error)) 1450 elif self.error_level == ErrorLevel.RAISE and self.errors: 1451 raise ParseError( 1452 concat_messages(self.errors, self.max_errors), 1453 errors=merge_errors(self.errors), 1454 )
Logs or raises any found errors, depending on the chosen error level setting.
1456 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1457 """ 1458 Appends an error in the list of recorded errors or raises it, depending on the chosen 1459 error level setting. 1460 """ 1461 token = token or self._curr or self._prev or Token.string("") 1462 start = token.start 1463 end = token.end + 1 1464 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1465 highlight = self.sql[start:end] 1466 end_context = self.sql[end : end + self.error_message_context] 1467 1468 error = ParseError.new( 1469 f"{message}. Line {token.line}, Col: {token.col}.\n" 1470 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1471 description=message, 1472 line=token.line, 1473 col=token.col, 1474 start_context=start_context, 1475 highlight=highlight, 1476 end_context=end_context, 1477 ) 1478 1479 if self.error_level == ErrorLevel.IMMEDIATE: 1480 raise error 1481 1482 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1484 def expression( 1485 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1486 ) -> E: 1487 """ 1488 Creates a new, validated Expression. 1489 1490 Args: 1491 exp_class: The expression class to instantiate. 1492 comments: An optional list of comments to attach to the expression. 1493 kwargs: The arguments to set for the expression along with their respective values. 1494 1495 Returns: 1496 The target expression. 1497 """ 1498 instance = exp_class(**kwargs) 1499 instance.add_comments(comments) if comments else self._add_comments(instance) 1500 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1507 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1508 """ 1509 Validates an Expression, making sure that all its mandatory arguments are set. 1510 1511 Args: 1512 expression: The expression to validate. 1513 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1514 1515 Returns: 1516 The validated expression. 1517 """ 1518 if self.error_level != ErrorLevel.IGNORE: 1519 for error_message in expression.error_messages(args): 1520 self.raise_error(error_message) 1521 1522 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.