sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111def build_pad(args: t.List, is_left: bool = True): 112 return exp.Pad( 113 this=seq_get(args, 0), 114 expression=seq_get(args, 1), 115 fill_pattern=seq_get(args, 2), 116 is_left=is_left, 117 ) 118 119 120def build_array_constructor( 121 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 122) -> exp.Expression: 123 array_exp = exp_class(expressions=args) 124 125 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 126 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 127 128 return array_exp 129 130 131def build_convert_timezone( 132 args: t.List, default_source_tz: t.Optional[str] = None 133) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 134 if len(args) == 2: 135 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 136 return exp.ConvertTimezone( 137 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 138 ) 139 140 return exp.ConvertTimezone.from_arg_list(args) 141 142 143def build_trim(args: t.List, is_left: bool = True): 144 return exp.Trim( 145 this=seq_get(args, 0), 146 expression=seq_get(args, 1), 147 position="LEADING" if is_left else "TRAILING", 148 ) 149 150 151def build_coalesce(args: t.List, is_nvl: t.Optional[bool] = None) -> exp.Coalesce: 152 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl) 153 154 155class _Parser(type): 156 def __new__(cls, clsname, bases, attrs): 157 klass = super().__new__(cls, clsname, bases, attrs) 158 159 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 160 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 161 162 return klass 163 164 165class Parser(metaclass=_Parser): 166 """ 167 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 168 169 Args: 170 error_level: The desired error level. 171 Default: ErrorLevel.IMMEDIATE 172 error_message_context: The amount of context to capture from a query string when displaying 173 the error message (in number of characters). 174 Default: 100 175 max_errors: Maximum number of error messages to include in a raised ParseError. 176 This is only relevant if error_level is ErrorLevel.RAISE. 177 Default: 3 178 """ 179 180 FUNCTIONS: t.Dict[str, t.Callable] = { 181 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 182 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 183 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 184 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 185 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 186 ), 187 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 188 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 189 ), 190 "CHAR": lambda args: exp.Chr(expressions=args), 191 "CHR": lambda args: exp.Chr(expressions=args), 192 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 193 "CONCAT": lambda args, dialect: exp.Concat( 194 expressions=args, 195 safe=not dialect.STRICT_STRING_CONCAT, 196 coalesce=dialect.CONCAT_COALESCE, 197 ), 198 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 199 expressions=args, 200 safe=not dialect.STRICT_STRING_CONCAT, 201 coalesce=dialect.CONCAT_COALESCE, 202 ), 203 "CONVERT_TIMEZONE": build_convert_timezone, 204 "DATE_TO_DATE_STR": lambda args: exp.Cast( 205 this=seq_get(args, 0), 206 to=exp.DataType(this=exp.DataType.Type.TEXT), 207 ), 208 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 209 start=seq_get(args, 0), 210 end=seq_get(args, 1), 211 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 212 ), 213 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 214 "HEX": build_hex, 215 "INSTR": lambda args: exp.StrPosition(this=seq_get(args, 0), substr=seq_get(args, 1)), 216 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 217 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 218 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 219 "LIKE": build_like, 220 "LOG": build_logarithm, 221 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 222 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 223 "LOWER": build_lower, 224 "LPAD": lambda args: build_pad(args), 225 "LEFTPAD": lambda args: build_pad(args), 226 "LTRIM": lambda args: build_trim(args), 227 "MOD": build_mod, 228 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 229 "RPAD": lambda args: build_pad(args, is_left=False), 230 "RTRIM": lambda args: build_trim(args, is_left=False), 231 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 232 if len(args) != 2 233 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 234 "TIME_TO_TIME_STR": lambda args: exp.Cast( 235 this=seq_get(args, 0), 236 to=exp.DataType(this=exp.DataType.Type.TEXT), 237 ), 238 "TO_HEX": build_hex, 239 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 240 this=exp.Cast( 241 this=seq_get(args, 0), 242 to=exp.DataType(this=exp.DataType.Type.TEXT), 243 ), 244 start=exp.Literal.number(1), 245 length=exp.Literal.number(10), 246 ), 247 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 248 "UPPER": build_upper, 249 "VAR_MAP": build_var_map, 250 } 251 252 NO_PAREN_FUNCTIONS = { 253 TokenType.CURRENT_DATE: exp.CurrentDate, 254 TokenType.CURRENT_DATETIME: exp.CurrentDate, 255 TokenType.CURRENT_TIME: exp.CurrentTime, 256 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 257 TokenType.CURRENT_USER: exp.CurrentUser, 258 } 259 260 STRUCT_TYPE_TOKENS = { 261 TokenType.NESTED, 262 TokenType.OBJECT, 263 TokenType.STRUCT, 264 TokenType.UNION, 265 } 266 267 NESTED_TYPE_TOKENS = { 268 TokenType.ARRAY, 269 TokenType.LIST, 270 TokenType.LOWCARDINALITY, 271 TokenType.MAP, 272 TokenType.NULLABLE, 273 TokenType.RANGE, 274 *STRUCT_TYPE_TOKENS, 275 } 276 277 ENUM_TYPE_TOKENS = { 278 TokenType.ENUM, 279 TokenType.ENUM8, 280 TokenType.ENUM16, 281 } 282 283 AGGREGATE_TYPE_TOKENS = { 284 TokenType.AGGREGATEFUNCTION, 285 TokenType.SIMPLEAGGREGATEFUNCTION, 286 } 287 288 TYPE_TOKENS = { 289 TokenType.BIT, 290 TokenType.BOOLEAN, 291 TokenType.TINYINT, 292 TokenType.UTINYINT, 293 TokenType.SMALLINT, 294 TokenType.USMALLINT, 295 TokenType.INT, 296 TokenType.UINT, 297 TokenType.BIGINT, 298 TokenType.UBIGINT, 299 TokenType.INT128, 300 TokenType.UINT128, 301 TokenType.INT256, 302 TokenType.UINT256, 303 TokenType.MEDIUMINT, 304 TokenType.UMEDIUMINT, 305 TokenType.FIXEDSTRING, 306 TokenType.FLOAT, 307 TokenType.DOUBLE, 308 TokenType.CHAR, 309 TokenType.NCHAR, 310 TokenType.VARCHAR, 311 TokenType.NVARCHAR, 312 TokenType.BPCHAR, 313 TokenType.TEXT, 314 TokenType.MEDIUMTEXT, 315 TokenType.LONGTEXT, 316 TokenType.MEDIUMBLOB, 317 TokenType.LONGBLOB, 318 TokenType.BINARY, 319 TokenType.VARBINARY, 320 TokenType.JSON, 321 TokenType.JSONB, 322 TokenType.INTERVAL, 323 TokenType.TINYBLOB, 324 TokenType.TINYTEXT, 325 TokenType.TIME, 326 TokenType.TIMETZ, 327 TokenType.TIMESTAMP, 328 TokenType.TIMESTAMP_S, 329 TokenType.TIMESTAMP_MS, 330 TokenType.TIMESTAMP_NS, 331 TokenType.TIMESTAMPTZ, 332 TokenType.TIMESTAMPLTZ, 333 TokenType.TIMESTAMPNTZ, 334 TokenType.DATETIME, 335 TokenType.DATETIME2, 336 TokenType.DATETIME64, 337 TokenType.SMALLDATETIME, 338 TokenType.DATE, 339 TokenType.DATE32, 340 TokenType.INT4RANGE, 341 TokenType.INT4MULTIRANGE, 342 TokenType.INT8RANGE, 343 TokenType.INT8MULTIRANGE, 344 TokenType.NUMRANGE, 345 TokenType.NUMMULTIRANGE, 346 TokenType.TSRANGE, 347 TokenType.TSMULTIRANGE, 348 TokenType.TSTZRANGE, 349 TokenType.TSTZMULTIRANGE, 350 TokenType.DATERANGE, 351 TokenType.DATEMULTIRANGE, 352 TokenType.DECIMAL, 353 TokenType.DECIMAL32, 354 TokenType.DECIMAL64, 355 TokenType.DECIMAL128, 356 TokenType.DECIMAL256, 357 TokenType.UDECIMAL, 358 TokenType.BIGDECIMAL, 359 TokenType.UUID, 360 TokenType.GEOGRAPHY, 361 TokenType.GEOMETRY, 362 TokenType.POINT, 363 TokenType.RING, 364 TokenType.LINESTRING, 365 TokenType.MULTILINESTRING, 366 TokenType.POLYGON, 367 TokenType.MULTIPOLYGON, 368 TokenType.HLLSKETCH, 369 TokenType.HSTORE, 370 TokenType.PSEUDO_TYPE, 371 TokenType.SUPER, 372 TokenType.SERIAL, 373 TokenType.SMALLSERIAL, 374 TokenType.BIGSERIAL, 375 TokenType.XML, 376 TokenType.YEAR, 377 TokenType.UNIQUEIDENTIFIER, 378 TokenType.USERDEFINED, 379 TokenType.MONEY, 380 TokenType.SMALLMONEY, 381 TokenType.ROWVERSION, 382 TokenType.IMAGE, 383 TokenType.VARIANT, 384 TokenType.VECTOR, 385 TokenType.OBJECT, 386 TokenType.OBJECT_IDENTIFIER, 387 TokenType.INET, 388 TokenType.IPADDRESS, 389 TokenType.IPPREFIX, 390 TokenType.IPV4, 391 TokenType.IPV6, 392 TokenType.UNKNOWN, 393 TokenType.NULL, 394 TokenType.NAME, 395 TokenType.TDIGEST, 396 *ENUM_TYPE_TOKENS, 397 *NESTED_TYPE_TOKENS, 398 *AGGREGATE_TYPE_TOKENS, 399 } 400 401 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 402 TokenType.BIGINT: TokenType.UBIGINT, 403 TokenType.INT: TokenType.UINT, 404 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 405 TokenType.SMALLINT: TokenType.USMALLINT, 406 TokenType.TINYINT: TokenType.UTINYINT, 407 TokenType.DECIMAL: TokenType.UDECIMAL, 408 } 409 410 SUBQUERY_PREDICATES = { 411 TokenType.ANY: exp.Any, 412 TokenType.ALL: exp.All, 413 TokenType.EXISTS: exp.Exists, 414 TokenType.SOME: exp.Any, 415 } 416 417 RESERVED_TOKENS = { 418 *Tokenizer.SINGLE_TOKENS.values(), 419 TokenType.SELECT, 420 } - {TokenType.IDENTIFIER} 421 422 DB_CREATABLES = { 423 TokenType.DATABASE, 424 TokenType.DICTIONARY, 425 TokenType.MODEL, 426 TokenType.NAMESPACE, 427 TokenType.SCHEMA, 428 TokenType.SEQUENCE, 429 TokenType.SINK, 430 TokenType.SOURCE, 431 TokenType.STORAGE_INTEGRATION, 432 TokenType.STREAMLIT, 433 TokenType.TABLE, 434 TokenType.TAG, 435 TokenType.VIEW, 436 TokenType.WAREHOUSE, 437 } 438 439 CREATABLES = { 440 TokenType.COLUMN, 441 TokenType.CONSTRAINT, 442 TokenType.FOREIGN_KEY, 443 TokenType.FUNCTION, 444 TokenType.INDEX, 445 TokenType.PROCEDURE, 446 *DB_CREATABLES, 447 } 448 449 ALTERABLES = { 450 TokenType.INDEX, 451 TokenType.TABLE, 452 TokenType.VIEW, 453 } 454 455 # Tokens that can represent identifiers 456 ID_VAR_TOKENS = { 457 TokenType.ALL, 458 TokenType.ATTACH, 459 TokenType.VAR, 460 TokenType.ANTI, 461 TokenType.APPLY, 462 TokenType.ASC, 463 TokenType.ASOF, 464 TokenType.AUTO_INCREMENT, 465 TokenType.BEGIN, 466 TokenType.BPCHAR, 467 TokenType.CACHE, 468 TokenType.CASE, 469 TokenType.COLLATE, 470 TokenType.COMMAND, 471 TokenType.COMMENT, 472 TokenType.COMMIT, 473 TokenType.CONSTRAINT, 474 TokenType.COPY, 475 TokenType.CUBE, 476 TokenType.DEFAULT, 477 TokenType.DELETE, 478 TokenType.DESC, 479 TokenType.DESCRIBE, 480 TokenType.DETACH, 481 TokenType.DICTIONARY, 482 TokenType.DIV, 483 TokenType.END, 484 TokenType.EXECUTE, 485 TokenType.ESCAPE, 486 TokenType.FALSE, 487 TokenType.FIRST, 488 TokenType.FILTER, 489 TokenType.FINAL, 490 TokenType.FORMAT, 491 TokenType.FULL, 492 TokenType.IDENTIFIER, 493 TokenType.IS, 494 TokenType.ISNULL, 495 TokenType.INTERVAL, 496 TokenType.KEEP, 497 TokenType.KILL, 498 TokenType.LEFT, 499 TokenType.LOAD, 500 TokenType.MERGE, 501 TokenType.NATURAL, 502 TokenType.NEXT, 503 TokenType.OFFSET, 504 TokenType.OPERATOR, 505 TokenType.ORDINALITY, 506 TokenType.OVERLAPS, 507 TokenType.OVERWRITE, 508 TokenType.PARTITION, 509 TokenType.PERCENT, 510 TokenType.PIVOT, 511 TokenType.PRAGMA, 512 TokenType.RANGE, 513 TokenType.RECURSIVE, 514 TokenType.REFERENCES, 515 TokenType.REFRESH, 516 TokenType.RENAME, 517 TokenType.REPLACE, 518 TokenType.RIGHT, 519 TokenType.ROLLUP, 520 TokenType.ROW, 521 TokenType.ROWS, 522 TokenType.SEMI, 523 TokenType.SET, 524 TokenType.SETTINGS, 525 TokenType.SHOW, 526 TokenType.TEMPORARY, 527 TokenType.TOP, 528 TokenType.TRUE, 529 TokenType.TRUNCATE, 530 TokenType.UNIQUE, 531 TokenType.UNNEST, 532 TokenType.UNPIVOT, 533 TokenType.UPDATE, 534 TokenType.USE, 535 TokenType.VOLATILE, 536 TokenType.WINDOW, 537 *CREATABLES, 538 *SUBQUERY_PREDICATES, 539 *TYPE_TOKENS, 540 *NO_PAREN_FUNCTIONS, 541 } 542 ID_VAR_TOKENS.remove(TokenType.UNION) 543 544 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 545 546 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 547 TokenType.ANTI, 548 TokenType.APPLY, 549 TokenType.ASOF, 550 TokenType.FULL, 551 TokenType.LEFT, 552 TokenType.LOCK, 553 TokenType.NATURAL, 554 TokenType.OFFSET, 555 TokenType.RIGHT, 556 TokenType.SEMI, 557 TokenType.WINDOW, 558 } 559 560 ALIAS_TOKENS = ID_VAR_TOKENS 561 562 ARRAY_CONSTRUCTORS = { 563 "ARRAY": exp.Array, 564 "LIST": exp.List, 565 } 566 567 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 568 569 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 570 571 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 572 573 FUNC_TOKENS = { 574 TokenType.COLLATE, 575 TokenType.COMMAND, 576 TokenType.CURRENT_DATE, 577 TokenType.CURRENT_DATETIME, 578 TokenType.CURRENT_TIMESTAMP, 579 TokenType.CURRENT_TIME, 580 TokenType.CURRENT_USER, 581 TokenType.FILTER, 582 TokenType.FIRST, 583 TokenType.FORMAT, 584 TokenType.GLOB, 585 TokenType.IDENTIFIER, 586 TokenType.INDEX, 587 TokenType.ISNULL, 588 TokenType.ILIKE, 589 TokenType.INSERT, 590 TokenType.LIKE, 591 TokenType.MERGE, 592 TokenType.NEXT, 593 TokenType.OFFSET, 594 TokenType.PRIMARY_KEY, 595 TokenType.RANGE, 596 TokenType.REPLACE, 597 TokenType.RLIKE, 598 TokenType.ROW, 599 TokenType.UNNEST, 600 TokenType.VAR, 601 TokenType.LEFT, 602 TokenType.RIGHT, 603 TokenType.SEQUENCE, 604 TokenType.DATE, 605 TokenType.DATETIME, 606 TokenType.TABLE, 607 TokenType.TIMESTAMP, 608 TokenType.TIMESTAMPTZ, 609 TokenType.TRUNCATE, 610 TokenType.WINDOW, 611 TokenType.XOR, 612 *TYPE_TOKENS, 613 *SUBQUERY_PREDICATES, 614 } 615 616 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 617 TokenType.AND: exp.And, 618 } 619 620 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 621 TokenType.COLON_EQ: exp.PropertyEQ, 622 } 623 624 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 625 TokenType.OR: exp.Or, 626 } 627 628 EQUALITY = { 629 TokenType.EQ: exp.EQ, 630 TokenType.NEQ: exp.NEQ, 631 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 632 } 633 634 COMPARISON = { 635 TokenType.GT: exp.GT, 636 TokenType.GTE: exp.GTE, 637 TokenType.LT: exp.LT, 638 TokenType.LTE: exp.LTE, 639 } 640 641 BITWISE = { 642 TokenType.AMP: exp.BitwiseAnd, 643 TokenType.CARET: exp.BitwiseXor, 644 TokenType.PIPE: exp.BitwiseOr, 645 } 646 647 TERM = { 648 TokenType.DASH: exp.Sub, 649 TokenType.PLUS: exp.Add, 650 TokenType.MOD: exp.Mod, 651 TokenType.COLLATE: exp.Collate, 652 } 653 654 FACTOR = { 655 TokenType.DIV: exp.IntDiv, 656 TokenType.LR_ARROW: exp.Distance, 657 TokenType.SLASH: exp.Div, 658 TokenType.STAR: exp.Mul, 659 } 660 661 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 662 663 TIMES = { 664 TokenType.TIME, 665 TokenType.TIMETZ, 666 } 667 668 TIMESTAMPS = { 669 TokenType.TIMESTAMP, 670 TokenType.TIMESTAMPTZ, 671 TokenType.TIMESTAMPLTZ, 672 *TIMES, 673 } 674 675 SET_OPERATIONS = { 676 TokenType.UNION, 677 TokenType.INTERSECT, 678 TokenType.EXCEPT, 679 } 680 681 JOIN_METHODS = { 682 TokenType.ASOF, 683 TokenType.NATURAL, 684 TokenType.POSITIONAL, 685 } 686 687 JOIN_SIDES = { 688 TokenType.LEFT, 689 TokenType.RIGHT, 690 TokenType.FULL, 691 } 692 693 JOIN_KINDS = { 694 TokenType.ANTI, 695 TokenType.CROSS, 696 TokenType.INNER, 697 TokenType.OUTER, 698 TokenType.SEMI, 699 TokenType.STRAIGHT_JOIN, 700 } 701 702 JOIN_HINTS: t.Set[str] = set() 703 704 LAMBDAS = { 705 TokenType.ARROW: lambda self, expressions: self.expression( 706 exp.Lambda, 707 this=self._replace_lambda( 708 self._parse_assignment(), 709 expressions, 710 ), 711 expressions=expressions, 712 ), 713 TokenType.FARROW: lambda self, expressions: self.expression( 714 exp.Kwarg, 715 this=exp.var(expressions[0].name), 716 expression=self._parse_assignment(), 717 ), 718 } 719 720 COLUMN_OPERATORS = { 721 TokenType.DOT: None, 722 TokenType.DCOLON: lambda self, this, to: self.expression( 723 exp.Cast if self.STRICT_CAST else exp.TryCast, 724 this=this, 725 to=to, 726 ), 727 TokenType.ARROW: lambda self, this, path: self.expression( 728 exp.JSONExtract, 729 this=this, 730 expression=self.dialect.to_json_path(path), 731 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 732 ), 733 TokenType.DARROW: lambda self, this, path: self.expression( 734 exp.JSONExtractScalar, 735 this=this, 736 expression=self.dialect.to_json_path(path), 737 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 738 ), 739 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 740 exp.JSONBExtract, 741 this=this, 742 expression=path, 743 ), 744 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 745 exp.JSONBExtractScalar, 746 this=this, 747 expression=path, 748 ), 749 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 750 exp.JSONBContains, 751 this=this, 752 expression=key, 753 ), 754 } 755 756 EXPRESSION_PARSERS = { 757 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 758 exp.Column: lambda self: self._parse_column(), 759 exp.Condition: lambda self: self._parse_assignment(), 760 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 761 exp.Expression: lambda self: self._parse_expression(), 762 exp.From: lambda self: self._parse_from(joins=True), 763 exp.Group: lambda self: self._parse_group(), 764 exp.Having: lambda self: self._parse_having(), 765 exp.Hint: lambda self: self._parse_hint_body(), 766 exp.Identifier: lambda self: self._parse_id_var(), 767 exp.Join: lambda self: self._parse_join(), 768 exp.Lambda: lambda self: self._parse_lambda(), 769 exp.Lateral: lambda self: self._parse_lateral(), 770 exp.Limit: lambda self: self._parse_limit(), 771 exp.Offset: lambda self: self._parse_offset(), 772 exp.Order: lambda self: self._parse_order(), 773 exp.Ordered: lambda self: self._parse_ordered(), 774 exp.Properties: lambda self: self._parse_properties(), 775 exp.Qualify: lambda self: self._parse_qualify(), 776 exp.Returning: lambda self: self._parse_returning(), 777 exp.Select: lambda self: self._parse_select(), 778 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 779 exp.Table: lambda self: self._parse_table_parts(), 780 exp.TableAlias: lambda self: self._parse_table_alias(), 781 exp.Tuple: lambda self: self._parse_value(), 782 exp.Whens: lambda self: self._parse_when_matched(), 783 exp.Where: lambda self: self._parse_where(), 784 exp.Window: lambda self: self._parse_named_window(), 785 exp.With: lambda self: self._parse_with(), 786 "JOIN_TYPE": lambda self: self._parse_join_parts(), 787 } 788 789 STATEMENT_PARSERS = { 790 TokenType.ALTER: lambda self: self._parse_alter(), 791 TokenType.BEGIN: lambda self: self._parse_transaction(), 792 TokenType.CACHE: lambda self: self._parse_cache(), 793 TokenType.COMMENT: lambda self: self._parse_comment(), 794 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 795 TokenType.COPY: lambda self: self._parse_copy(), 796 TokenType.CREATE: lambda self: self._parse_create(), 797 TokenType.DELETE: lambda self: self._parse_delete(), 798 TokenType.DESC: lambda self: self._parse_describe(), 799 TokenType.DESCRIBE: lambda self: self._parse_describe(), 800 TokenType.DROP: lambda self: self._parse_drop(), 801 TokenType.GRANT: lambda self: self._parse_grant(), 802 TokenType.INSERT: lambda self: self._parse_insert(), 803 TokenType.KILL: lambda self: self._parse_kill(), 804 TokenType.LOAD: lambda self: self._parse_load(), 805 TokenType.MERGE: lambda self: self._parse_merge(), 806 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 807 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 808 TokenType.REFRESH: lambda self: self._parse_refresh(), 809 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 810 TokenType.SET: lambda self: self._parse_set(), 811 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 812 TokenType.UNCACHE: lambda self: self._parse_uncache(), 813 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 814 TokenType.UPDATE: lambda self: self._parse_update(), 815 TokenType.USE: lambda self: self.expression( 816 exp.Use, 817 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 818 this=self._parse_table(schema=False), 819 ), 820 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 821 } 822 823 UNARY_PARSERS = { 824 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 825 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 826 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 827 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 828 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 829 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 830 } 831 832 STRING_PARSERS = { 833 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 834 exp.RawString, this=token.text 835 ), 836 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 837 exp.National, this=token.text 838 ), 839 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 840 TokenType.STRING: lambda self, token: self.expression( 841 exp.Literal, this=token.text, is_string=True 842 ), 843 TokenType.UNICODE_STRING: lambda self, token: self.expression( 844 exp.UnicodeString, 845 this=token.text, 846 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 847 ), 848 } 849 850 NUMERIC_PARSERS = { 851 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 852 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 853 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 854 TokenType.NUMBER: lambda self, token: self.expression( 855 exp.Literal, this=token.text, is_string=False 856 ), 857 } 858 859 PRIMARY_PARSERS = { 860 **STRING_PARSERS, 861 **NUMERIC_PARSERS, 862 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 863 TokenType.NULL: lambda self, _: self.expression(exp.Null), 864 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 865 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 866 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 867 TokenType.STAR: lambda self, _: self._parse_star_ops(), 868 } 869 870 PLACEHOLDER_PARSERS = { 871 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 872 TokenType.PARAMETER: lambda self: self._parse_parameter(), 873 TokenType.COLON: lambda self: ( 874 self.expression(exp.Placeholder, this=self._prev.text) 875 if self._match_set(self.ID_VAR_TOKENS) 876 else None 877 ), 878 } 879 880 RANGE_PARSERS = { 881 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 882 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 883 TokenType.GLOB: binary_range_parser(exp.Glob), 884 TokenType.ILIKE: binary_range_parser(exp.ILike), 885 TokenType.IN: lambda self, this: self._parse_in(this), 886 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 887 TokenType.IS: lambda self, this: self._parse_is(this), 888 TokenType.LIKE: binary_range_parser(exp.Like), 889 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 890 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 891 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 892 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 893 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 894 } 895 896 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 897 "ALLOWED_VALUES": lambda self: self.expression( 898 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 899 ), 900 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 901 "AUTO": lambda self: self._parse_auto_property(), 902 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 903 "BACKUP": lambda self: self.expression( 904 exp.BackupProperty, this=self._parse_var(any_token=True) 905 ), 906 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 907 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 908 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 909 "CHECKSUM": lambda self: self._parse_checksum(), 910 "CLUSTER BY": lambda self: self._parse_cluster(), 911 "CLUSTERED": lambda self: self._parse_clustered_by(), 912 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 913 exp.CollateProperty, **kwargs 914 ), 915 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 916 "CONTAINS": lambda self: self._parse_contains_property(), 917 "COPY": lambda self: self._parse_copy_property(), 918 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 919 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 920 "DEFINER": lambda self: self._parse_definer(), 921 "DETERMINISTIC": lambda self: self.expression( 922 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 923 ), 924 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 925 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 926 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 927 "DISTKEY": lambda self: self._parse_distkey(), 928 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 929 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 930 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 931 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 932 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 933 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 934 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 935 "FREESPACE": lambda self: self._parse_freespace(), 936 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 937 "HEAP": lambda self: self.expression(exp.HeapProperty), 938 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 939 "IMMUTABLE": lambda self: self.expression( 940 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 941 ), 942 "INHERITS": lambda self: self.expression( 943 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 944 ), 945 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 946 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 947 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 948 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 949 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 950 "LIKE": lambda self: self._parse_create_like(), 951 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 952 "LOCK": lambda self: self._parse_locking(), 953 "LOCKING": lambda self: self._parse_locking(), 954 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 955 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 956 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 957 "MODIFIES": lambda self: self._parse_modifies_property(), 958 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 959 "NO": lambda self: self._parse_no_property(), 960 "ON": lambda self: self._parse_on_property(), 961 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 962 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 963 "PARTITION": lambda self: self._parse_partitioned_of(), 964 "PARTITION BY": lambda self: self._parse_partitioned_by(), 965 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 966 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 967 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 968 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 969 "READS": lambda self: self._parse_reads_property(), 970 "REMOTE": lambda self: self._parse_remote_with_connection(), 971 "RETURNS": lambda self: self._parse_returns(), 972 "STRICT": lambda self: self.expression(exp.StrictProperty), 973 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 974 "ROW": lambda self: self._parse_row(), 975 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 976 "SAMPLE": lambda self: self.expression( 977 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 978 ), 979 "SECURE": lambda self: self.expression(exp.SecureProperty), 980 "SECURITY": lambda self: self._parse_security(), 981 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 982 "SETTINGS": lambda self: self._parse_settings_property(), 983 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 984 "SORTKEY": lambda self: self._parse_sortkey(), 985 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 986 "STABLE": lambda self: self.expression( 987 exp.StabilityProperty, this=exp.Literal.string("STABLE") 988 ), 989 "STORED": lambda self: self._parse_stored(), 990 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 991 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 992 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 993 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 994 "TO": lambda self: self._parse_to_table(), 995 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 996 "TRANSFORM": lambda self: self.expression( 997 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 998 ), 999 "TTL": lambda self: self._parse_ttl(), 1000 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1001 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1002 "VOLATILE": lambda self: self._parse_volatile_property(), 1003 "WITH": lambda self: self._parse_with_property(), 1004 } 1005 1006 CONSTRAINT_PARSERS = { 1007 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1008 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1009 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1010 "CHARACTER SET": lambda self: self.expression( 1011 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1012 ), 1013 "CHECK": lambda self: self.expression( 1014 exp.CheckColumnConstraint, 1015 this=self._parse_wrapped(self._parse_assignment), 1016 enforced=self._match_text_seq("ENFORCED"), 1017 ), 1018 "COLLATE": lambda self: self.expression( 1019 exp.CollateColumnConstraint, 1020 this=self._parse_identifier() or self._parse_column(), 1021 ), 1022 "COMMENT": lambda self: self.expression( 1023 exp.CommentColumnConstraint, this=self._parse_string() 1024 ), 1025 "COMPRESS": lambda self: self._parse_compress(), 1026 "CLUSTERED": lambda self: self.expression( 1027 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1028 ), 1029 "NONCLUSTERED": lambda self: self.expression( 1030 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1031 ), 1032 "DEFAULT": lambda self: self.expression( 1033 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1034 ), 1035 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1036 "EPHEMERAL": lambda self: self.expression( 1037 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1038 ), 1039 "EXCLUDE": lambda self: self.expression( 1040 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1041 ), 1042 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1043 "FORMAT": lambda self: self.expression( 1044 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1045 ), 1046 "GENERATED": lambda self: self._parse_generated_as_identity(), 1047 "IDENTITY": lambda self: self._parse_auto_increment(), 1048 "INLINE": lambda self: self._parse_inline(), 1049 "LIKE": lambda self: self._parse_create_like(), 1050 "NOT": lambda self: self._parse_not_constraint(), 1051 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1052 "ON": lambda self: ( 1053 self._match(TokenType.UPDATE) 1054 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1055 ) 1056 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1057 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1058 "PERIOD": lambda self: self._parse_period_for_system_time(), 1059 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1060 "REFERENCES": lambda self: self._parse_references(match=False), 1061 "TITLE": lambda self: self.expression( 1062 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1063 ), 1064 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1065 "UNIQUE": lambda self: self._parse_unique(), 1066 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1067 "WATERMARK": lambda self: self.expression( 1068 exp.WatermarkColumnConstraint, 1069 this=self._match(TokenType.FOR) and self._parse_column(), 1070 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1071 ), 1072 "WITH": lambda self: self.expression( 1073 exp.Properties, expressions=self._parse_wrapped_properties() 1074 ), 1075 } 1076 1077 ALTER_PARSERS = { 1078 "ADD": lambda self: self._parse_alter_table_add(), 1079 "AS": lambda self: self._parse_select(), 1080 "ALTER": lambda self: self._parse_alter_table_alter(), 1081 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1082 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1083 "DROP": lambda self: self._parse_alter_table_drop(), 1084 "RENAME": lambda self: self._parse_alter_table_rename(), 1085 "SET": lambda self: self._parse_alter_table_set(), 1086 "SWAP": lambda self: self.expression( 1087 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1088 ), 1089 } 1090 1091 ALTER_ALTER_PARSERS = { 1092 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1093 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1094 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1095 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1096 } 1097 1098 SCHEMA_UNNAMED_CONSTRAINTS = { 1099 "CHECK", 1100 "EXCLUDE", 1101 "FOREIGN KEY", 1102 "LIKE", 1103 "PERIOD", 1104 "PRIMARY KEY", 1105 "UNIQUE", 1106 "WATERMARK", 1107 } 1108 1109 NO_PAREN_FUNCTION_PARSERS = { 1110 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1111 "CASE": lambda self: self._parse_case(), 1112 "CONNECT_BY_ROOT": lambda self: self.expression( 1113 exp.ConnectByRoot, this=self._parse_column() 1114 ), 1115 "IF": lambda self: self._parse_if(), 1116 } 1117 1118 INVALID_FUNC_NAME_TOKENS = { 1119 TokenType.IDENTIFIER, 1120 TokenType.STRING, 1121 } 1122 1123 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1124 1125 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1126 1127 FUNCTION_PARSERS = { 1128 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1129 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1130 "DECODE": lambda self: self._parse_decode(), 1131 "EXTRACT": lambda self: self._parse_extract(), 1132 "GAP_FILL": lambda self: self._parse_gap_fill(), 1133 "JSON_OBJECT": lambda self: self._parse_json_object(), 1134 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1135 "JSON_TABLE": lambda self: self._parse_json_table(), 1136 "MATCH": lambda self: self._parse_match_against(), 1137 "NORMALIZE": lambda self: self._parse_normalize(), 1138 "OPENJSON": lambda self: self._parse_open_json(), 1139 "OVERLAY": lambda self: self._parse_overlay(), 1140 "POSITION": lambda self: self._parse_position(), 1141 "PREDICT": lambda self: self._parse_predict(), 1142 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1143 "STRING_AGG": lambda self: self._parse_string_agg(), 1144 "SUBSTRING": lambda self: self._parse_substring(), 1145 "TRIM": lambda self: self._parse_trim(), 1146 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1147 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1148 "XMLELEMENT": lambda self: self.expression( 1149 exp.XMLElement, 1150 this=self._match_text_seq("NAME") and self._parse_id_var(), 1151 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1152 ), 1153 "XMLTABLE": lambda self: self._parse_xml_table(), 1154 } 1155 1156 QUERY_MODIFIER_PARSERS = { 1157 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1158 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1159 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1160 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1161 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1162 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1163 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1164 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1165 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1166 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1167 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1168 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1169 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1170 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1171 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1172 TokenType.CLUSTER_BY: lambda self: ( 1173 "cluster", 1174 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1175 ), 1176 TokenType.DISTRIBUTE_BY: lambda self: ( 1177 "distribute", 1178 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1179 ), 1180 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1181 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1182 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1183 } 1184 1185 SET_PARSERS = { 1186 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1187 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1188 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1189 "TRANSACTION": lambda self: self._parse_set_transaction(), 1190 } 1191 1192 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1193 1194 TYPE_LITERAL_PARSERS = { 1195 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1196 } 1197 1198 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1199 1200 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1201 1202 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1203 1204 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1205 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1206 "ISOLATION": ( 1207 ("LEVEL", "REPEATABLE", "READ"), 1208 ("LEVEL", "READ", "COMMITTED"), 1209 ("LEVEL", "READ", "UNCOMITTED"), 1210 ("LEVEL", "SERIALIZABLE"), 1211 ), 1212 "READ": ("WRITE", "ONLY"), 1213 } 1214 1215 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1216 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1217 ) 1218 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1219 1220 CREATE_SEQUENCE: OPTIONS_TYPE = { 1221 "SCALE": ("EXTEND", "NOEXTEND"), 1222 "SHARD": ("EXTEND", "NOEXTEND"), 1223 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1224 **dict.fromkeys( 1225 ( 1226 "SESSION", 1227 "GLOBAL", 1228 "KEEP", 1229 "NOKEEP", 1230 "ORDER", 1231 "NOORDER", 1232 "NOCACHE", 1233 "CYCLE", 1234 "NOCYCLE", 1235 "NOMINVALUE", 1236 "NOMAXVALUE", 1237 "NOSCALE", 1238 "NOSHARD", 1239 ), 1240 tuple(), 1241 ), 1242 } 1243 1244 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1245 1246 USABLES: OPTIONS_TYPE = dict.fromkeys( 1247 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1248 ) 1249 1250 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1251 1252 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1253 "TYPE": ("EVOLUTION",), 1254 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1255 } 1256 1257 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1258 1259 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1260 1261 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1262 "NOT": ("ENFORCED",), 1263 "MATCH": ( 1264 "FULL", 1265 "PARTIAL", 1266 "SIMPLE", 1267 ), 1268 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1269 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1270 } 1271 1272 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1273 1274 CLONE_KEYWORDS = {"CLONE", "COPY"} 1275 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1276 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1277 1278 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1279 1280 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1281 1282 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1283 1284 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1285 1286 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1287 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1288 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1289 1290 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1291 1292 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1293 1294 ADD_CONSTRAINT_TOKENS = { 1295 TokenType.CONSTRAINT, 1296 TokenType.FOREIGN_KEY, 1297 TokenType.INDEX, 1298 TokenType.KEY, 1299 TokenType.PRIMARY_KEY, 1300 TokenType.UNIQUE, 1301 } 1302 1303 DISTINCT_TOKENS = {TokenType.DISTINCT} 1304 1305 NULL_TOKENS = {TokenType.NULL} 1306 1307 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1308 1309 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1310 1311 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1312 1313 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1314 1315 ODBC_DATETIME_LITERALS = { 1316 "d": exp.Date, 1317 "t": exp.Time, 1318 "ts": exp.Timestamp, 1319 } 1320 1321 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1322 1323 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1324 1325 # The style options for the DESCRIBE statement 1326 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1327 1328 OPERATION_MODIFIERS: t.Set[str] = set() 1329 1330 STRICT_CAST = True 1331 1332 PREFIXED_PIVOT_COLUMNS = False 1333 IDENTIFY_PIVOT_STRINGS = False 1334 1335 LOG_DEFAULTS_TO_LN = False 1336 1337 # Whether ADD is present for each column added by ALTER TABLE 1338 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1339 1340 # Whether the table sample clause expects CSV syntax 1341 TABLESAMPLE_CSV = False 1342 1343 # The default method used for table sampling 1344 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1345 1346 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1347 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1348 1349 # Whether the TRIM function expects the characters to trim as its first argument 1350 TRIM_PATTERN_FIRST = False 1351 1352 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1353 STRING_ALIASES = False 1354 1355 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1356 MODIFIERS_ATTACHED_TO_SET_OP = True 1357 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1358 1359 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1360 NO_PAREN_IF_COMMANDS = True 1361 1362 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1363 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1364 1365 # Whether the `:` operator is used to extract a value from a VARIANT column 1366 COLON_IS_VARIANT_EXTRACT = False 1367 1368 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1369 # If this is True and '(' is not found, the keyword will be treated as an identifier 1370 VALUES_FOLLOWED_BY_PAREN = True 1371 1372 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1373 SUPPORTS_IMPLICIT_UNNEST = False 1374 1375 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1376 INTERVAL_SPANS = True 1377 1378 # Whether a PARTITION clause can follow a table reference 1379 SUPPORTS_PARTITION_SELECTION = False 1380 1381 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1382 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1383 1384 # Whether the 'AS' keyword is optional in the CTE definition syntax 1385 OPTIONAL_ALIAS_TOKEN_CTE = False 1386 1387 __slots__ = ( 1388 "error_level", 1389 "error_message_context", 1390 "max_errors", 1391 "dialect", 1392 "sql", 1393 "errors", 1394 "_tokens", 1395 "_index", 1396 "_curr", 1397 "_next", 1398 "_prev", 1399 "_prev_comments", 1400 ) 1401 1402 # Autofilled 1403 SHOW_TRIE: t.Dict = {} 1404 SET_TRIE: t.Dict = {} 1405 1406 def __init__( 1407 self, 1408 error_level: t.Optional[ErrorLevel] = None, 1409 error_message_context: int = 100, 1410 max_errors: int = 3, 1411 dialect: DialectType = None, 1412 ): 1413 from sqlglot.dialects import Dialect 1414 1415 self.error_level = error_level or ErrorLevel.IMMEDIATE 1416 self.error_message_context = error_message_context 1417 self.max_errors = max_errors 1418 self.dialect = Dialect.get_or_raise(dialect) 1419 self.reset() 1420 1421 def reset(self): 1422 self.sql = "" 1423 self.errors = [] 1424 self._tokens = [] 1425 self._index = 0 1426 self._curr = None 1427 self._next = None 1428 self._prev = None 1429 self._prev_comments = None 1430 1431 def parse( 1432 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1433 ) -> t.List[t.Optional[exp.Expression]]: 1434 """ 1435 Parses a list of tokens and returns a list of syntax trees, one tree 1436 per parsed SQL statement. 1437 1438 Args: 1439 raw_tokens: The list of tokens. 1440 sql: The original SQL string, used to produce helpful debug messages. 1441 1442 Returns: 1443 The list of the produced syntax trees. 1444 """ 1445 return self._parse( 1446 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1447 ) 1448 1449 def parse_into( 1450 self, 1451 expression_types: exp.IntoType, 1452 raw_tokens: t.List[Token], 1453 sql: t.Optional[str] = None, 1454 ) -> t.List[t.Optional[exp.Expression]]: 1455 """ 1456 Parses a list of tokens into a given Expression type. If a collection of Expression 1457 types is given instead, this method will try to parse the token list into each one 1458 of them, stopping at the first for which the parsing succeeds. 1459 1460 Args: 1461 expression_types: The expression type(s) to try and parse the token list into. 1462 raw_tokens: The list of tokens. 1463 sql: The original SQL string, used to produce helpful debug messages. 1464 1465 Returns: 1466 The target Expression. 1467 """ 1468 errors = [] 1469 for expression_type in ensure_list(expression_types): 1470 parser = self.EXPRESSION_PARSERS.get(expression_type) 1471 if not parser: 1472 raise TypeError(f"No parser registered for {expression_type}") 1473 1474 try: 1475 return self._parse(parser, raw_tokens, sql) 1476 except ParseError as e: 1477 e.errors[0]["into_expression"] = expression_type 1478 errors.append(e) 1479 1480 raise ParseError( 1481 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1482 errors=merge_errors(errors), 1483 ) from errors[-1] 1484 1485 def _parse( 1486 self, 1487 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1488 raw_tokens: t.List[Token], 1489 sql: t.Optional[str] = None, 1490 ) -> t.List[t.Optional[exp.Expression]]: 1491 self.reset() 1492 self.sql = sql or "" 1493 1494 total = len(raw_tokens) 1495 chunks: t.List[t.List[Token]] = [[]] 1496 1497 for i, token in enumerate(raw_tokens): 1498 if token.token_type == TokenType.SEMICOLON: 1499 if token.comments: 1500 chunks.append([token]) 1501 1502 if i < total - 1: 1503 chunks.append([]) 1504 else: 1505 chunks[-1].append(token) 1506 1507 expressions = [] 1508 1509 for tokens in chunks: 1510 self._index = -1 1511 self._tokens = tokens 1512 self._advance() 1513 1514 expressions.append(parse_method(self)) 1515 1516 if self._index < len(self._tokens): 1517 self.raise_error("Invalid expression / Unexpected token") 1518 1519 self.check_errors() 1520 1521 return expressions 1522 1523 def check_errors(self) -> None: 1524 """Logs or raises any found errors, depending on the chosen error level setting.""" 1525 if self.error_level == ErrorLevel.WARN: 1526 for error in self.errors: 1527 logger.error(str(error)) 1528 elif self.error_level == ErrorLevel.RAISE and self.errors: 1529 raise ParseError( 1530 concat_messages(self.errors, self.max_errors), 1531 errors=merge_errors(self.errors), 1532 ) 1533 1534 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1535 """ 1536 Appends an error in the list of recorded errors or raises it, depending on the chosen 1537 error level setting. 1538 """ 1539 token = token or self._curr or self._prev or Token.string("") 1540 start = token.start 1541 end = token.end + 1 1542 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1543 highlight = self.sql[start:end] 1544 end_context = self.sql[end : end + self.error_message_context] 1545 1546 error = ParseError.new( 1547 f"{message}. Line {token.line}, Col: {token.col}.\n" 1548 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1549 description=message, 1550 line=token.line, 1551 col=token.col, 1552 start_context=start_context, 1553 highlight=highlight, 1554 end_context=end_context, 1555 ) 1556 1557 if self.error_level == ErrorLevel.IMMEDIATE: 1558 raise error 1559 1560 self.errors.append(error) 1561 1562 def expression( 1563 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1564 ) -> E: 1565 """ 1566 Creates a new, validated Expression. 1567 1568 Args: 1569 exp_class: The expression class to instantiate. 1570 comments: An optional list of comments to attach to the expression. 1571 kwargs: The arguments to set for the expression along with their respective values. 1572 1573 Returns: 1574 The target expression. 1575 """ 1576 instance = exp_class(**kwargs) 1577 instance.add_comments(comments) if comments else self._add_comments(instance) 1578 return self.validate_expression(instance) 1579 1580 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1581 if expression and self._prev_comments: 1582 expression.add_comments(self._prev_comments) 1583 self._prev_comments = None 1584 1585 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1586 """ 1587 Validates an Expression, making sure that all its mandatory arguments are set. 1588 1589 Args: 1590 expression: The expression to validate. 1591 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1592 1593 Returns: 1594 The validated expression. 1595 """ 1596 if self.error_level != ErrorLevel.IGNORE: 1597 for error_message in expression.error_messages(args): 1598 self.raise_error(error_message) 1599 1600 return expression 1601 1602 def _find_sql(self, start: Token, end: Token) -> str: 1603 return self.sql[start.start : end.end + 1] 1604 1605 def _is_connected(self) -> bool: 1606 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1607 1608 def _advance(self, times: int = 1) -> None: 1609 self._index += times 1610 self._curr = seq_get(self._tokens, self._index) 1611 self._next = seq_get(self._tokens, self._index + 1) 1612 1613 if self._index > 0: 1614 self._prev = self._tokens[self._index - 1] 1615 self._prev_comments = self._prev.comments 1616 else: 1617 self._prev = None 1618 self._prev_comments = None 1619 1620 def _retreat(self, index: int) -> None: 1621 if index != self._index: 1622 self._advance(index - self._index) 1623 1624 def _warn_unsupported(self) -> None: 1625 if len(self._tokens) <= 1: 1626 return 1627 1628 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1629 # interested in emitting a warning for the one being currently processed. 1630 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1631 1632 logger.warning( 1633 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1634 ) 1635 1636 def _parse_command(self) -> exp.Command: 1637 self._warn_unsupported() 1638 return self.expression( 1639 exp.Command, 1640 comments=self._prev_comments, 1641 this=self._prev.text.upper(), 1642 expression=self._parse_string(), 1643 ) 1644 1645 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1646 """ 1647 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1648 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1649 solve this by setting & resetting the parser state accordingly 1650 """ 1651 index = self._index 1652 error_level = self.error_level 1653 1654 self.error_level = ErrorLevel.IMMEDIATE 1655 try: 1656 this = parse_method() 1657 except ParseError: 1658 this = None 1659 finally: 1660 if not this or retreat: 1661 self._retreat(index) 1662 self.error_level = error_level 1663 1664 return this 1665 1666 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1667 start = self._prev 1668 exists = self._parse_exists() if allow_exists else None 1669 1670 self._match(TokenType.ON) 1671 1672 materialized = self._match_text_seq("MATERIALIZED") 1673 kind = self._match_set(self.CREATABLES) and self._prev 1674 if not kind: 1675 return self._parse_as_command(start) 1676 1677 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1678 this = self._parse_user_defined_function(kind=kind.token_type) 1679 elif kind.token_type == TokenType.TABLE: 1680 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1681 elif kind.token_type == TokenType.COLUMN: 1682 this = self._parse_column() 1683 else: 1684 this = self._parse_id_var() 1685 1686 self._match(TokenType.IS) 1687 1688 return self.expression( 1689 exp.Comment, 1690 this=this, 1691 kind=kind.text, 1692 expression=self._parse_string(), 1693 exists=exists, 1694 materialized=materialized, 1695 ) 1696 1697 def _parse_to_table( 1698 self, 1699 ) -> exp.ToTableProperty: 1700 table = self._parse_table_parts(schema=True) 1701 return self.expression(exp.ToTableProperty, this=table) 1702 1703 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1704 def _parse_ttl(self) -> exp.Expression: 1705 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1706 this = self._parse_bitwise() 1707 1708 if self._match_text_seq("DELETE"): 1709 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1710 if self._match_text_seq("RECOMPRESS"): 1711 return self.expression( 1712 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1713 ) 1714 if self._match_text_seq("TO", "DISK"): 1715 return self.expression( 1716 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1717 ) 1718 if self._match_text_seq("TO", "VOLUME"): 1719 return self.expression( 1720 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1721 ) 1722 1723 return this 1724 1725 expressions = self._parse_csv(_parse_ttl_action) 1726 where = self._parse_where() 1727 group = self._parse_group() 1728 1729 aggregates = None 1730 if group and self._match(TokenType.SET): 1731 aggregates = self._parse_csv(self._parse_set_item) 1732 1733 return self.expression( 1734 exp.MergeTreeTTL, 1735 expressions=expressions, 1736 where=where, 1737 group=group, 1738 aggregates=aggregates, 1739 ) 1740 1741 def _parse_statement(self) -> t.Optional[exp.Expression]: 1742 if self._curr is None: 1743 return None 1744 1745 if self._match_set(self.STATEMENT_PARSERS): 1746 comments = self._prev_comments 1747 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1748 stmt.add_comments(comments, prepend=True) 1749 return stmt 1750 1751 if self._match_set(self.dialect.tokenizer.COMMANDS): 1752 return self._parse_command() 1753 1754 expression = self._parse_expression() 1755 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1756 return self._parse_query_modifiers(expression) 1757 1758 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1759 start = self._prev 1760 temporary = self._match(TokenType.TEMPORARY) 1761 materialized = self._match_text_seq("MATERIALIZED") 1762 1763 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1764 if not kind: 1765 return self._parse_as_command(start) 1766 1767 concurrently = self._match_text_seq("CONCURRENTLY") 1768 if_exists = exists or self._parse_exists() 1769 1770 if kind == "COLUMN": 1771 this = self._parse_column() 1772 else: 1773 this = self._parse_table_parts( 1774 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1775 ) 1776 1777 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1778 1779 if self._match(TokenType.L_PAREN, advance=False): 1780 expressions = self._parse_wrapped_csv(self._parse_types) 1781 else: 1782 expressions = None 1783 1784 return self.expression( 1785 exp.Drop, 1786 exists=if_exists, 1787 this=this, 1788 expressions=expressions, 1789 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1790 temporary=temporary, 1791 materialized=materialized, 1792 cascade=self._match_text_seq("CASCADE"), 1793 constraints=self._match_text_seq("CONSTRAINTS"), 1794 purge=self._match_text_seq("PURGE"), 1795 cluster=cluster, 1796 concurrently=concurrently, 1797 ) 1798 1799 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1800 return ( 1801 self._match_text_seq("IF") 1802 and (not not_ or self._match(TokenType.NOT)) 1803 and self._match(TokenType.EXISTS) 1804 ) 1805 1806 def _parse_create(self) -> exp.Create | exp.Command: 1807 # Note: this can't be None because we've matched a statement parser 1808 start = self._prev 1809 1810 replace = ( 1811 start.token_type == TokenType.REPLACE 1812 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1813 or self._match_pair(TokenType.OR, TokenType.ALTER) 1814 ) 1815 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1816 1817 unique = self._match(TokenType.UNIQUE) 1818 1819 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1820 clustered = True 1821 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1822 "COLUMNSTORE" 1823 ): 1824 clustered = False 1825 else: 1826 clustered = None 1827 1828 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1829 self._advance() 1830 1831 properties = None 1832 create_token = self._match_set(self.CREATABLES) and self._prev 1833 1834 if not create_token: 1835 # exp.Properties.Location.POST_CREATE 1836 properties = self._parse_properties() 1837 create_token = self._match_set(self.CREATABLES) and self._prev 1838 1839 if not properties or not create_token: 1840 return self._parse_as_command(start) 1841 1842 concurrently = self._match_text_seq("CONCURRENTLY") 1843 exists = self._parse_exists(not_=True) 1844 this = None 1845 expression: t.Optional[exp.Expression] = None 1846 indexes = None 1847 no_schema_binding = None 1848 begin = None 1849 end = None 1850 clone = None 1851 1852 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1853 nonlocal properties 1854 if properties and temp_props: 1855 properties.expressions.extend(temp_props.expressions) 1856 elif temp_props: 1857 properties = temp_props 1858 1859 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1860 this = self._parse_user_defined_function(kind=create_token.token_type) 1861 1862 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1863 extend_props(self._parse_properties()) 1864 1865 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1866 extend_props(self._parse_properties()) 1867 1868 if not expression: 1869 if self._match(TokenType.COMMAND): 1870 expression = self._parse_as_command(self._prev) 1871 else: 1872 begin = self._match(TokenType.BEGIN) 1873 return_ = self._match_text_seq("RETURN") 1874 1875 if self._match(TokenType.STRING, advance=False): 1876 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1877 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1878 expression = self._parse_string() 1879 extend_props(self._parse_properties()) 1880 else: 1881 expression = self._parse_user_defined_function_expression() 1882 1883 end = self._match_text_seq("END") 1884 1885 if return_: 1886 expression = self.expression(exp.Return, this=expression) 1887 elif create_token.token_type == TokenType.INDEX: 1888 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1889 if not self._match(TokenType.ON): 1890 index = self._parse_id_var() 1891 anonymous = False 1892 else: 1893 index = None 1894 anonymous = True 1895 1896 this = self._parse_index(index=index, anonymous=anonymous) 1897 elif create_token.token_type in self.DB_CREATABLES: 1898 table_parts = self._parse_table_parts( 1899 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1900 ) 1901 1902 # exp.Properties.Location.POST_NAME 1903 self._match(TokenType.COMMA) 1904 extend_props(self._parse_properties(before=True)) 1905 1906 this = self._parse_schema(this=table_parts) 1907 1908 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1909 extend_props(self._parse_properties()) 1910 1911 self._match(TokenType.ALIAS) 1912 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1913 # exp.Properties.Location.POST_ALIAS 1914 extend_props(self._parse_properties()) 1915 1916 if create_token.token_type == TokenType.SEQUENCE: 1917 expression = self._parse_types() 1918 extend_props(self._parse_properties()) 1919 else: 1920 expression = self._parse_ddl_select() 1921 1922 if create_token.token_type == TokenType.TABLE: 1923 # exp.Properties.Location.POST_EXPRESSION 1924 extend_props(self._parse_properties()) 1925 1926 indexes = [] 1927 while True: 1928 index = self._parse_index() 1929 1930 # exp.Properties.Location.POST_INDEX 1931 extend_props(self._parse_properties()) 1932 if not index: 1933 break 1934 else: 1935 self._match(TokenType.COMMA) 1936 indexes.append(index) 1937 elif create_token.token_type == TokenType.VIEW: 1938 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1939 no_schema_binding = True 1940 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 1941 extend_props(self._parse_properties()) 1942 1943 shallow = self._match_text_seq("SHALLOW") 1944 1945 if self._match_texts(self.CLONE_KEYWORDS): 1946 copy = self._prev.text.lower() == "copy" 1947 clone = self.expression( 1948 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1949 ) 1950 1951 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1952 return self._parse_as_command(start) 1953 1954 create_kind_text = create_token.text.upper() 1955 return self.expression( 1956 exp.Create, 1957 this=this, 1958 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1959 replace=replace, 1960 refresh=refresh, 1961 unique=unique, 1962 expression=expression, 1963 exists=exists, 1964 properties=properties, 1965 indexes=indexes, 1966 no_schema_binding=no_schema_binding, 1967 begin=begin, 1968 end=end, 1969 clone=clone, 1970 concurrently=concurrently, 1971 clustered=clustered, 1972 ) 1973 1974 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1975 seq = exp.SequenceProperties() 1976 1977 options = [] 1978 index = self._index 1979 1980 while self._curr: 1981 self._match(TokenType.COMMA) 1982 if self._match_text_seq("INCREMENT"): 1983 self._match_text_seq("BY") 1984 self._match_text_seq("=") 1985 seq.set("increment", self._parse_term()) 1986 elif self._match_text_seq("MINVALUE"): 1987 seq.set("minvalue", self._parse_term()) 1988 elif self._match_text_seq("MAXVALUE"): 1989 seq.set("maxvalue", self._parse_term()) 1990 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1991 self._match_text_seq("=") 1992 seq.set("start", self._parse_term()) 1993 elif self._match_text_seq("CACHE"): 1994 # T-SQL allows empty CACHE which is initialized dynamically 1995 seq.set("cache", self._parse_number() or True) 1996 elif self._match_text_seq("OWNED", "BY"): 1997 # "OWNED BY NONE" is the default 1998 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1999 else: 2000 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2001 if opt: 2002 options.append(opt) 2003 else: 2004 break 2005 2006 seq.set("options", options if options else None) 2007 return None if self._index == index else seq 2008 2009 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2010 # only used for teradata currently 2011 self._match(TokenType.COMMA) 2012 2013 kwargs = { 2014 "no": self._match_text_seq("NO"), 2015 "dual": self._match_text_seq("DUAL"), 2016 "before": self._match_text_seq("BEFORE"), 2017 "default": self._match_text_seq("DEFAULT"), 2018 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2019 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2020 "after": self._match_text_seq("AFTER"), 2021 "minimum": self._match_texts(("MIN", "MINIMUM")), 2022 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2023 } 2024 2025 if self._match_texts(self.PROPERTY_PARSERS): 2026 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2027 try: 2028 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2029 except TypeError: 2030 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2031 2032 return None 2033 2034 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2035 return self._parse_wrapped_csv(self._parse_property) 2036 2037 def _parse_property(self) -> t.Optional[exp.Expression]: 2038 if self._match_texts(self.PROPERTY_PARSERS): 2039 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2040 2041 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2042 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2043 2044 if self._match_text_seq("COMPOUND", "SORTKEY"): 2045 return self._parse_sortkey(compound=True) 2046 2047 if self._match_text_seq("SQL", "SECURITY"): 2048 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2049 2050 index = self._index 2051 key = self._parse_column() 2052 2053 if not self._match(TokenType.EQ): 2054 self._retreat(index) 2055 return self._parse_sequence_properties() 2056 2057 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2058 if isinstance(key, exp.Column): 2059 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2060 2061 value = self._parse_bitwise() or self._parse_var(any_token=True) 2062 2063 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2064 if isinstance(value, exp.Column): 2065 value = exp.var(value.name) 2066 2067 return self.expression(exp.Property, this=key, value=value) 2068 2069 def _parse_stored(self) -> exp.FileFormatProperty: 2070 self._match(TokenType.ALIAS) 2071 2072 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2073 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2074 2075 return self.expression( 2076 exp.FileFormatProperty, 2077 this=( 2078 self.expression( 2079 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2080 ) 2081 if input_format or output_format 2082 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2083 ), 2084 ) 2085 2086 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2087 field = self._parse_field() 2088 if isinstance(field, exp.Identifier) and not field.quoted: 2089 field = exp.var(field) 2090 2091 return field 2092 2093 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2094 self._match(TokenType.EQ) 2095 self._match(TokenType.ALIAS) 2096 2097 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2098 2099 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2100 properties = [] 2101 while True: 2102 if before: 2103 prop = self._parse_property_before() 2104 else: 2105 prop = self._parse_property() 2106 if not prop: 2107 break 2108 for p in ensure_list(prop): 2109 properties.append(p) 2110 2111 if properties: 2112 return self.expression(exp.Properties, expressions=properties) 2113 2114 return None 2115 2116 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2117 return self.expression( 2118 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2119 ) 2120 2121 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2122 if self._match_texts(("DEFINER", "INVOKER")): 2123 security_specifier = self._prev.text.upper() 2124 return self.expression(exp.SecurityProperty, this=security_specifier) 2125 return None 2126 2127 def _parse_settings_property(self) -> exp.SettingsProperty: 2128 return self.expression( 2129 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2130 ) 2131 2132 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2133 if self._index >= 2: 2134 pre_volatile_token = self._tokens[self._index - 2] 2135 else: 2136 pre_volatile_token = None 2137 2138 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2139 return exp.VolatileProperty() 2140 2141 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2142 2143 def _parse_retention_period(self) -> exp.Var: 2144 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2145 number = self._parse_number() 2146 number_str = f"{number} " if number else "" 2147 unit = self._parse_var(any_token=True) 2148 return exp.var(f"{number_str}{unit}") 2149 2150 def _parse_system_versioning_property( 2151 self, with_: bool = False 2152 ) -> exp.WithSystemVersioningProperty: 2153 self._match(TokenType.EQ) 2154 prop = self.expression( 2155 exp.WithSystemVersioningProperty, 2156 **{ # type: ignore 2157 "on": True, 2158 "with": with_, 2159 }, 2160 ) 2161 2162 if self._match_text_seq("OFF"): 2163 prop.set("on", False) 2164 return prop 2165 2166 self._match(TokenType.ON) 2167 if self._match(TokenType.L_PAREN): 2168 while self._curr and not self._match(TokenType.R_PAREN): 2169 if self._match_text_seq("HISTORY_TABLE", "="): 2170 prop.set("this", self._parse_table_parts()) 2171 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2172 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2173 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2174 prop.set("retention_period", self._parse_retention_period()) 2175 2176 self._match(TokenType.COMMA) 2177 2178 return prop 2179 2180 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2181 self._match(TokenType.EQ) 2182 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2183 prop = self.expression(exp.DataDeletionProperty, on=on) 2184 2185 if self._match(TokenType.L_PAREN): 2186 while self._curr and not self._match(TokenType.R_PAREN): 2187 if self._match_text_seq("FILTER_COLUMN", "="): 2188 prop.set("filter_column", self._parse_column()) 2189 elif self._match_text_seq("RETENTION_PERIOD", "="): 2190 prop.set("retention_period", self._parse_retention_period()) 2191 2192 self._match(TokenType.COMMA) 2193 2194 return prop 2195 2196 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2197 kind = "HASH" 2198 expressions: t.Optional[t.List[exp.Expression]] = None 2199 if self._match_text_seq("BY", "HASH"): 2200 expressions = self._parse_wrapped_csv(self._parse_id_var) 2201 elif self._match_text_seq("BY", "RANDOM"): 2202 kind = "RANDOM" 2203 2204 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2205 buckets: t.Optional[exp.Expression] = None 2206 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2207 buckets = self._parse_number() 2208 2209 return self.expression( 2210 exp.DistributedByProperty, 2211 expressions=expressions, 2212 kind=kind, 2213 buckets=buckets, 2214 order=self._parse_order(), 2215 ) 2216 2217 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2218 self._match_text_seq("KEY") 2219 expressions = self._parse_wrapped_id_vars() 2220 return self.expression(expr_type, expressions=expressions) 2221 2222 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2223 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2224 prop = self._parse_system_versioning_property(with_=True) 2225 self._match_r_paren() 2226 return prop 2227 2228 if self._match(TokenType.L_PAREN, advance=False): 2229 return self._parse_wrapped_properties() 2230 2231 if self._match_text_seq("JOURNAL"): 2232 return self._parse_withjournaltable() 2233 2234 if self._match_texts(self.VIEW_ATTRIBUTES): 2235 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2236 2237 if self._match_text_seq("DATA"): 2238 return self._parse_withdata(no=False) 2239 elif self._match_text_seq("NO", "DATA"): 2240 return self._parse_withdata(no=True) 2241 2242 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2243 return self._parse_serde_properties(with_=True) 2244 2245 if self._match(TokenType.SCHEMA): 2246 return self.expression( 2247 exp.WithSchemaBindingProperty, 2248 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2249 ) 2250 2251 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2252 return self.expression( 2253 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2254 ) 2255 2256 if not self._next: 2257 return None 2258 2259 return self._parse_withisolatedloading() 2260 2261 def _parse_procedure_option(self) -> exp.Expression | None: 2262 if self._match_text_seq("EXECUTE", "AS"): 2263 return self.expression( 2264 exp.ExecuteAsProperty, 2265 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2266 or self._parse_string(), 2267 ) 2268 2269 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2270 2271 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2272 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2273 self._match(TokenType.EQ) 2274 2275 user = self._parse_id_var() 2276 self._match(TokenType.PARAMETER) 2277 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2278 2279 if not user or not host: 2280 return None 2281 2282 return exp.DefinerProperty(this=f"{user}@{host}") 2283 2284 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2285 self._match(TokenType.TABLE) 2286 self._match(TokenType.EQ) 2287 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2288 2289 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2290 return self.expression(exp.LogProperty, no=no) 2291 2292 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2293 return self.expression(exp.JournalProperty, **kwargs) 2294 2295 def _parse_checksum(self) -> exp.ChecksumProperty: 2296 self._match(TokenType.EQ) 2297 2298 on = None 2299 if self._match(TokenType.ON): 2300 on = True 2301 elif self._match_text_seq("OFF"): 2302 on = False 2303 2304 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2305 2306 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2307 return self.expression( 2308 exp.Cluster, 2309 expressions=( 2310 self._parse_wrapped_csv(self._parse_ordered) 2311 if wrapped 2312 else self._parse_csv(self._parse_ordered) 2313 ), 2314 ) 2315 2316 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2317 self._match_text_seq("BY") 2318 2319 self._match_l_paren() 2320 expressions = self._parse_csv(self._parse_column) 2321 self._match_r_paren() 2322 2323 if self._match_text_seq("SORTED", "BY"): 2324 self._match_l_paren() 2325 sorted_by = self._parse_csv(self._parse_ordered) 2326 self._match_r_paren() 2327 else: 2328 sorted_by = None 2329 2330 self._match(TokenType.INTO) 2331 buckets = self._parse_number() 2332 self._match_text_seq("BUCKETS") 2333 2334 return self.expression( 2335 exp.ClusteredByProperty, 2336 expressions=expressions, 2337 sorted_by=sorted_by, 2338 buckets=buckets, 2339 ) 2340 2341 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2342 if not self._match_text_seq("GRANTS"): 2343 self._retreat(self._index - 1) 2344 return None 2345 2346 return self.expression(exp.CopyGrantsProperty) 2347 2348 def _parse_freespace(self) -> exp.FreespaceProperty: 2349 self._match(TokenType.EQ) 2350 return self.expression( 2351 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2352 ) 2353 2354 def _parse_mergeblockratio( 2355 self, no: bool = False, default: bool = False 2356 ) -> exp.MergeBlockRatioProperty: 2357 if self._match(TokenType.EQ): 2358 return self.expression( 2359 exp.MergeBlockRatioProperty, 2360 this=self._parse_number(), 2361 percent=self._match(TokenType.PERCENT), 2362 ) 2363 2364 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2365 2366 def _parse_datablocksize( 2367 self, 2368 default: t.Optional[bool] = None, 2369 minimum: t.Optional[bool] = None, 2370 maximum: t.Optional[bool] = None, 2371 ) -> exp.DataBlocksizeProperty: 2372 self._match(TokenType.EQ) 2373 size = self._parse_number() 2374 2375 units = None 2376 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2377 units = self._prev.text 2378 2379 return self.expression( 2380 exp.DataBlocksizeProperty, 2381 size=size, 2382 units=units, 2383 default=default, 2384 minimum=minimum, 2385 maximum=maximum, 2386 ) 2387 2388 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2389 self._match(TokenType.EQ) 2390 always = self._match_text_seq("ALWAYS") 2391 manual = self._match_text_seq("MANUAL") 2392 never = self._match_text_seq("NEVER") 2393 default = self._match_text_seq("DEFAULT") 2394 2395 autotemp = None 2396 if self._match_text_seq("AUTOTEMP"): 2397 autotemp = self._parse_schema() 2398 2399 return self.expression( 2400 exp.BlockCompressionProperty, 2401 always=always, 2402 manual=manual, 2403 never=never, 2404 default=default, 2405 autotemp=autotemp, 2406 ) 2407 2408 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2409 index = self._index 2410 no = self._match_text_seq("NO") 2411 concurrent = self._match_text_seq("CONCURRENT") 2412 2413 if not self._match_text_seq("ISOLATED", "LOADING"): 2414 self._retreat(index) 2415 return None 2416 2417 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2418 return self.expression( 2419 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2420 ) 2421 2422 def _parse_locking(self) -> exp.LockingProperty: 2423 if self._match(TokenType.TABLE): 2424 kind = "TABLE" 2425 elif self._match(TokenType.VIEW): 2426 kind = "VIEW" 2427 elif self._match(TokenType.ROW): 2428 kind = "ROW" 2429 elif self._match_text_seq("DATABASE"): 2430 kind = "DATABASE" 2431 else: 2432 kind = None 2433 2434 if kind in ("DATABASE", "TABLE", "VIEW"): 2435 this = self._parse_table_parts() 2436 else: 2437 this = None 2438 2439 if self._match(TokenType.FOR): 2440 for_or_in = "FOR" 2441 elif self._match(TokenType.IN): 2442 for_or_in = "IN" 2443 else: 2444 for_or_in = None 2445 2446 if self._match_text_seq("ACCESS"): 2447 lock_type = "ACCESS" 2448 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2449 lock_type = "EXCLUSIVE" 2450 elif self._match_text_seq("SHARE"): 2451 lock_type = "SHARE" 2452 elif self._match_text_seq("READ"): 2453 lock_type = "READ" 2454 elif self._match_text_seq("WRITE"): 2455 lock_type = "WRITE" 2456 elif self._match_text_seq("CHECKSUM"): 2457 lock_type = "CHECKSUM" 2458 else: 2459 lock_type = None 2460 2461 override = self._match_text_seq("OVERRIDE") 2462 2463 return self.expression( 2464 exp.LockingProperty, 2465 this=this, 2466 kind=kind, 2467 for_or_in=for_or_in, 2468 lock_type=lock_type, 2469 override=override, 2470 ) 2471 2472 def _parse_partition_by(self) -> t.List[exp.Expression]: 2473 if self._match(TokenType.PARTITION_BY): 2474 return self._parse_csv(self._parse_assignment) 2475 return [] 2476 2477 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2478 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2479 if self._match_text_seq("MINVALUE"): 2480 return exp.var("MINVALUE") 2481 if self._match_text_seq("MAXVALUE"): 2482 return exp.var("MAXVALUE") 2483 return self._parse_bitwise() 2484 2485 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2486 expression = None 2487 from_expressions = None 2488 to_expressions = None 2489 2490 if self._match(TokenType.IN): 2491 this = self._parse_wrapped_csv(self._parse_bitwise) 2492 elif self._match(TokenType.FROM): 2493 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2494 self._match_text_seq("TO") 2495 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2496 elif self._match_text_seq("WITH", "(", "MODULUS"): 2497 this = self._parse_number() 2498 self._match_text_seq(",", "REMAINDER") 2499 expression = self._parse_number() 2500 self._match_r_paren() 2501 else: 2502 self.raise_error("Failed to parse partition bound spec.") 2503 2504 return self.expression( 2505 exp.PartitionBoundSpec, 2506 this=this, 2507 expression=expression, 2508 from_expressions=from_expressions, 2509 to_expressions=to_expressions, 2510 ) 2511 2512 # https://www.postgresql.org/docs/current/sql-createtable.html 2513 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2514 if not self._match_text_seq("OF"): 2515 self._retreat(self._index - 1) 2516 return None 2517 2518 this = self._parse_table(schema=True) 2519 2520 if self._match(TokenType.DEFAULT): 2521 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2522 elif self._match_text_seq("FOR", "VALUES"): 2523 expression = self._parse_partition_bound_spec() 2524 else: 2525 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2526 2527 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2528 2529 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2530 self._match(TokenType.EQ) 2531 return self.expression( 2532 exp.PartitionedByProperty, 2533 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2534 ) 2535 2536 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2537 if self._match_text_seq("AND", "STATISTICS"): 2538 statistics = True 2539 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2540 statistics = False 2541 else: 2542 statistics = None 2543 2544 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2545 2546 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2547 if self._match_text_seq("SQL"): 2548 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2549 return None 2550 2551 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2552 if self._match_text_seq("SQL", "DATA"): 2553 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2554 return None 2555 2556 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2557 if self._match_text_seq("PRIMARY", "INDEX"): 2558 return exp.NoPrimaryIndexProperty() 2559 if self._match_text_seq("SQL"): 2560 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2561 return None 2562 2563 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2564 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2565 return exp.OnCommitProperty() 2566 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2567 return exp.OnCommitProperty(delete=True) 2568 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2569 2570 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2571 if self._match_text_seq("SQL", "DATA"): 2572 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2573 return None 2574 2575 def _parse_distkey(self) -> exp.DistKeyProperty: 2576 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2577 2578 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2579 table = self._parse_table(schema=True) 2580 2581 options = [] 2582 while self._match_texts(("INCLUDING", "EXCLUDING")): 2583 this = self._prev.text.upper() 2584 2585 id_var = self._parse_id_var() 2586 if not id_var: 2587 return None 2588 2589 options.append( 2590 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2591 ) 2592 2593 return self.expression(exp.LikeProperty, this=table, expressions=options) 2594 2595 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2596 return self.expression( 2597 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2598 ) 2599 2600 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2601 self._match(TokenType.EQ) 2602 return self.expression( 2603 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2604 ) 2605 2606 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2607 self._match_text_seq("WITH", "CONNECTION") 2608 return self.expression( 2609 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2610 ) 2611 2612 def _parse_returns(self) -> exp.ReturnsProperty: 2613 value: t.Optional[exp.Expression] 2614 null = None 2615 is_table = self._match(TokenType.TABLE) 2616 2617 if is_table: 2618 if self._match(TokenType.LT): 2619 value = self.expression( 2620 exp.Schema, 2621 this="TABLE", 2622 expressions=self._parse_csv(self._parse_struct_types), 2623 ) 2624 if not self._match(TokenType.GT): 2625 self.raise_error("Expecting >") 2626 else: 2627 value = self._parse_schema(exp.var("TABLE")) 2628 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2629 null = True 2630 value = None 2631 else: 2632 value = self._parse_types() 2633 2634 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2635 2636 def _parse_describe(self) -> exp.Describe: 2637 kind = self._match_set(self.CREATABLES) and self._prev.text 2638 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2639 if self._match(TokenType.DOT): 2640 style = None 2641 self._retreat(self._index - 2) 2642 2643 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2644 2645 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2646 this = self._parse_statement() 2647 else: 2648 this = self._parse_table(schema=True) 2649 2650 properties = self._parse_properties() 2651 expressions = properties.expressions if properties else None 2652 partition = self._parse_partition() 2653 return self.expression( 2654 exp.Describe, 2655 this=this, 2656 style=style, 2657 kind=kind, 2658 expressions=expressions, 2659 partition=partition, 2660 format=format, 2661 ) 2662 2663 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2664 kind = self._prev.text.upper() 2665 expressions = [] 2666 2667 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2668 if self._match(TokenType.WHEN): 2669 expression = self._parse_disjunction() 2670 self._match(TokenType.THEN) 2671 else: 2672 expression = None 2673 2674 else_ = self._match(TokenType.ELSE) 2675 2676 if not self._match(TokenType.INTO): 2677 return None 2678 2679 return self.expression( 2680 exp.ConditionalInsert, 2681 this=self.expression( 2682 exp.Insert, 2683 this=self._parse_table(schema=True), 2684 expression=self._parse_derived_table_values(), 2685 ), 2686 expression=expression, 2687 else_=else_, 2688 ) 2689 2690 expression = parse_conditional_insert() 2691 while expression is not None: 2692 expressions.append(expression) 2693 expression = parse_conditional_insert() 2694 2695 return self.expression( 2696 exp.MultitableInserts, 2697 kind=kind, 2698 comments=comments, 2699 expressions=expressions, 2700 source=self._parse_table(), 2701 ) 2702 2703 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2704 comments = [] 2705 hint = self._parse_hint() 2706 overwrite = self._match(TokenType.OVERWRITE) 2707 ignore = self._match(TokenType.IGNORE) 2708 local = self._match_text_seq("LOCAL") 2709 alternative = None 2710 is_function = None 2711 2712 if self._match_text_seq("DIRECTORY"): 2713 this: t.Optional[exp.Expression] = self.expression( 2714 exp.Directory, 2715 this=self._parse_var_or_string(), 2716 local=local, 2717 row_format=self._parse_row_format(match_row=True), 2718 ) 2719 else: 2720 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2721 comments += ensure_list(self._prev_comments) 2722 return self._parse_multitable_inserts(comments) 2723 2724 if self._match(TokenType.OR): 2725 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2726 2727 self._match(TokenType.INTO) 2728 comments += ensure_list(self._prev_comments) 2729 self._match(TokenType.TABLE) 2730 is_function = self._match(TokenType.FUNCTION) 2731 2732 this = ( 2733 self._parse_table(schema=True, parse_partition=True) 2734 if not is_function 2735 else self._parse_function() 2736 ) 2737 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2738 this.set("alias", self._parse_table_alias()) 2739 2740 returning = self._parse_returning() 2741 2742 return self.expression( 2743 exp.Insert, 2744 comments=comments, 2745 hint=hint, 2746 is_function=is_function, 2747 this=this, 2748 stored=self._match_text_seq("STORED") and self._parse_stored(), 2749 by_name=self._match_text_seq("BY", "NAME"), 2750 exists=self._parse_exists(), 2751 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2752 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2753 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2754 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2755 conflict=self._parse_on_conflict(), 2756 returning=returning or self._parse_returning(), 2757 overwrite=overwrite, 2758 alternative=alternative, 2759 ignore=ignore, 2760 source=self._match(TokenType.TABLE) and self._parse_table(), 2761 ) 2762 2763 def _parse_kill(self) -> exp.Kill: 2764 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2765 2766 return self.expression( 2767 exp.Kill, 2768 this=self._parse_primary(), 2769 kind=kind, 2770 ) 2771 2772 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2773 conflict = self._match_text_seq("ON", "CONFLICT") 2774 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2775 2776 if not conflict and not duplicate: 2777 return None 2778 2779 conflict_keys = None 2780 constraint = None 2781 2782 if conflict: 2783 if self._match_text_seq("ON", "CONSTRAINT"): 2784 constraint = self._parse_id_var() 2785 elif self._match(TokenType.L_PAREN): 2786 conflict_keys = self._parse_csv(self._parse_id_var) 2787 self._match_r_paren() 2788 2789 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2790 if self._prev.token_type == TokenType.UPDATE: 2791 self._match(TokenType.SET) 2792 expressions = self._parse_csv(self._parse_equality) 2793 else: 2794 expressions = None 2795 2796 return self.expression( 2797 exp.OnConflict, 2798 duplicate=duplicate, 2799 expressions=expressions, 2800 action=action, 2801 conflict_keys=conflict_keys, 2802 constraint=constraint, 2803 where=self._parse_where(), 2804 ) 2805 2806 def _parse_returning(self) -> t.Optional[exp.Returning]: 2807 if not self._match(TokenType.RETURNING): 2808 return None 2809 return self.expression( 2810 exp.Returning, 2811 expressions=self._parse_csv(self._parse_expression), 2812 into=self._match(TokenType.INTO) and self._parse_table_part(), 2813 ) 2814 2815 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2816 if not self._match(TokenType.FORMAT): 2817 return None 2818 return self._parse_row_format() 2819 2820 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2821 index = self._index 2822 with_ = with_ or self._match_text_seq("WITH") 2823 2824 if not self._match(TokenType.SERDE_PROPERTIES): 2825 self._retreat(index) 2826 return None 2827 return self.expression( 2828 exp.SerdeProperties, 2829 **{ # type: ignore 2830 "expressions": self._parse_wrapped_properties(), 2831 "with": with_, 2832 }, 2833 ) 2834 2835 def _parse_row_format( 2836 self, match_row: bool = False 2837 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2838 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2839 return None 2840 2841 if self._match_text_seq("SERDE"): 2842 this = self._parse_string() 2843 2844 serde_properties = self._parse_serde_properties() 2845 2846 return self.expression( 2847 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2848 ) 2849 2850 self._match_text_seq("DELIMITED") 2851 2852 kwargs = {} 2853 2854 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2855 kwargs["fields"] = self._parse_string() 2856 if self._match_text_seq("ESCAPED", "BY"): 2857 kwargs["escaped"] = self._parse_string() 2858 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2859 kwargs["collection_items"] = self._parse_string() 2860 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2861 kwargs["map_keys"] = self._parse_string() 2862 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2863 kwargs["lines"] = self._parse_string() 2864 if self._match_text_seq("NULL", "DEFINED", "AS"): 2865 kwargs["null"] = self._parse_string() 2866 2867 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2868 2869 def _parse_load(self) -> exp.LoadData | exp.Command: 2870 if self._match_text_seq("DATA"): 2871 local = self._match_text_seq("LOCAL") 2872 self._match_text_seq("INPATH") 2873 inpath = self._parse_string() 2874 overwrite = self._match(TokenType.OVERWRITE) 2875 self._match_pair(TokenType.INTO, TokenType.TABLE) 2876 2877 return self.expression( 2878 exp.LoadData, 2879 this=self._parse_table(schema=True), 2880 local=local, 2881 overwrite=overwrite, 2882 inpath=inpath, 2883 partition=self._parse_partition(), 2884 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2885 serde=self._match_text_seq("SERDE") and self._parse_string(), 2886 ) 2887 return self._parse_as_command(self._prev) 2888 2889 def _parse_delete(self) -> exp.Delete: 2890 # This handles MySQL's "Multiple-Table Syntax" 2891 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2892 tables = None 2893 if not self._match(TokenType.FROM, advance=False): 2894 tables = self._parse_csv(self._parse_table) or None 2895 2896 returning = self._parse_returning() 2897 2898 return self.expression( 2899 exp.Delete, 2900 tables=tables, 2901 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2902 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2903 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2904 where=self._parse_where(), 2905 returning=returning or self._parse_returning(), 2906 limit=self._parse_limit(), 2907 ) 2908 2909 def _parse_update(self) -> exp.Update: 2910 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2911 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2912 returning = self._parse_returning() 2913 return self.expression( 2914 exp.Update, 2915 **{ # type: ignore 2916 "this": this, 2917 "expressions": expressions, 2918 "from": self._parse_from(joins=True), 2919 "where": self._parse_where(), 2920 "returning": returning or self._parse_returning(), 2921 "order": self._parse_order(), 2922 "limit": self._parse_limit(), 2923 }, 2924 ) 2925 2926 def _parse_uncache(self) -> exp.Uncache: 2927 if not self._match(TokenType.TABLE): 2928 self.raise_error("Expecting TABLE after UNCACHE") 2929 2930 return self.expression( 2931 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2932 ) 2933 2934 def _parse_cache(self) -> exp.Cache: 2935 lazy = self._match_text_seq("LAZY") 2936 self._match(TokenType.TABLE) 2937 table = self._parse_table(schema=True) 2938 2939 options = [] 2940 if self._match_text_seq("OPTIONS"): 2941 self._match_l_paren() 2942 k = self._parse_string() 2943 self._match(TokenType.EQ) 2944 v = self._parse_string() 2945 options = [k, v] 2946 self._match_r_paren() 2947 2948 self._match(TokenType.ALIAS) 2949 return self.expression( 2950 exp.Cache, 2951 this=table, 2952 lazy=lazy, 2953 options=options, 2954 expression=self._parse_select(nested=True), 2955 ) 2956 2957 def _parse_partition(self) -> t.Optional[exp.Partition]: 2958 if not self._match(TokenType.PARTITION): 2959 return None 2960 2961 return self.expression( 2962 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2963 ) 2964 2965 def _parse_value(self) -> t.Optional[exp.Tuple]: 2966 def _parse_value_expression() -> t.Optional[exp.Expression]: 2967 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 2968 return exp.var(self._prev.text.upper()) 2969 return self._parse_expression() 2970 2971 if self._match(TokenType.L_PAREN): 2972 expressions = self._parse_csv(_parse_value_expression) 2973 self._match_r_paren() 2974 return self.expression(exp.Tuple, expressions=expressions) 2975 2976 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2977 expression = self._parse_expression() 2978 if expression: 2979 return self.expression(exp.Tuple, expressions=[expression]) 2980 return None 2981 2982 def _parse_projections(self) -> t.List[exp.Expression]: 2983 return self._parse_expressions() 2984 2985 def _parse_select( 2986 self, 2987 nested: bool = False, 2988 table: bool = False, 2989 parse_subquery_alias: bool = True, 2990 parse_set_operation: bool = True, 2991 ) -> t.Optional[exp.Expression]: 2992 cte = self._parse_with() 2993 2994 if cte: 2995 this = self._parse_statement() 2996 2997 if not this: 2998 self.raise_error("Failed to parse any statement following CTE") 2999 return cte 3000 3001 if "with" in this.arg_types: 3002 this.set("with", cte) 3003 else: 3004 self.raise_error(f"{this.key} does not support CTE") 3005 this = cte 3006 3007 return this 3008 3009 # duckdb supports leading with FROM x 3010 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3011 3012 if self._match(TokenType.SELECT): 3013 comments = self._prev_comments 3014 3015 hint = self._parse_hint() 3016 3017 if self._next and not self._next.token_type == TokenType.DOT: 3018 all_ = self._match(TokenType.ALL) 3019 distinct = self._match_set(self.DISTINCT_TOKENS) 3020 else: 3021 all_, distinct = None, None 3022 3023 kind = ( 3024 self._match(TokenType.ALIAS) 3025 and self._match_texts(("STRUCT", "VALUE")) 3026 and self._prev.text.upper() 3027 ) 3028 3029 if distinct: 3030 distinct = self.expression( 3031 exp.Distinct, 3032 on=self._parse_value() if self._match(TokenType.ON) else None, 3033 ) 3034 3035 if all_ and distinct: 3036 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3037 3038 operation_modifiers = [] 3039 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3040 operation_modifiers.append(exp.var(self._prev.text.upper())) 3041 3042 limit = self._parse_limit(top=True) 3043 projections = self._parse_projections() 3044 3045 this = self.expression( 3046 exp.Select, 3047 kind=kind, 3048 hint=hint, 3049 distinct=distinct, 3050 expressions=projections, 3051 limit=limit, 3052 operation_modifiers=operation_modifiers or None, 3053 ) 3054 this.comments = comments 3055 3056 into = self._parse_into() 3057 if into: 3058 this.set("into", into) 3059 3060 if not from_: 3061 from_ = self._parse_from() 3062 3063 if from_: 3064 this.set("from", from_) 3065 3066 this = self._parse_query_modifiers(this) 3067 elif (table or nested) and self._match(TokenType.L_PAREN): 3068 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3069 this = self._parse_simplified_pivot( 3070 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3071 ) 3072 elif self._match(TokenType.FROM): 3073 from_ = self._parse_from(skip_from_token=True) 3074 # Support parentheses for duckdb FROM-first syntax 3075 select = self._parse_select() 3076 if select: 3077 select.set("from", from_) 3078 this = select 3079 else: 3080 this = exp.select("*").from_(t.cast(exp.From, from_)) 3081 else: 3082 this = ( 3083 self._parse_table() 3084 if table 3085 else self._parse_select(nested=True, parse_set_operation=False) 3086 ) 3087 3088 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3089 # in case a modifier (e.g. join) is following 3090 if table and isinstance(this, exp.Values) and this.alias: 3091 alias = this.args["alias"].pop() 3092 this = exp.Table(this=this, alias=alias) 3093 3094 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3095 3096 self._match_r_paren() 3097 3098 # We return early here so that the UNION isn't attached to the subquery by the 3099 # following call to _parse_set_operations, but instead becomes the parent node 3100 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3101 elif self._match(TokenType.VALUES, advance=False): 3102 this = self._parse_derived_table_values() 3103 elif from_: 3104 this = exp.select("*").from_(from_.this, copy=False) 3105 elif self._match(TokenType.SUMMARIZE): 3106 table = self._match(TokenType.TABLE) 3107 this = self._parse_select() or self._parse_string() or self._parse_table() 3108 return self.expression(exp.Summarize, this=this, table=table) 3109 elif self._match(TokenType.DESCRIBE): 3110 this = self._parse_describe() 3111 elif self._match_text_seq("STREAM"): 3112 this = self._parse_function() 3113 if this: 3114 this = self.expression(exp.Stream, this=this) 3115 else: 3116 self._retreat(self._index - 1) 3117 else: 3118 this = None 3119 3120 return self._parse_set_operations(this) if parse_set_operation else this 3121 3122 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3123 if not skip_with_token and not self._match(TokenType.WITH): 3124 return None 3125 3126 comments = self._prev_comments 3127 recursive = self._match(TokenType.RECURSIVE) 3128 3129 last_comments = None 3130 expressions = [] 3131 while True: 3132 expressions.append(self._parse_cte()) 3133 if last_comments: 3134 expressions[-1].add_comments(last_comments) 3135 3136 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3137 break 3138 else: 3139 self._match(TokenType.WITH) 3140 3141 last_comments = self._prev_comments 3142 3143 return self.expression( 3144 exp.With, comments=comments, expressions=expressions, recursive=recursive 3145 ) 3146 3147 def _parse_cte(self) -> t.Optional[exp.CTE]: 3148 index = self._index 3149 3150 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3151 if not alias or not alias.this: 3152 self.raise_error("Expected CTE to have alias") 3153 3154 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3155 self._retreat(index) 3156 return None 3157 3158 comments = self._prev_comments 3159 3160 if self._match_text_seq("NOT", "MATERIALIZED"): 3161 materialized = False 3162 elif self._match_text_seq("MATERIALIZED"): 3163 materialized = True 3164 else: 3165 materialized = None 3166 3167 return self.expression( 3168 exp.CTE, 3169 this=self._parse_wrapped(self._parse_statement), 3170 alias=alias, 3171 materialized=materialized, 3172 comments=comments, 3173 ) 3174 3175 def _parse_table_alias( 3176 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3177 ) -> t.Optional[exp.TableAlias]: 3178 any_token = self._match(TokenType.ALIAS) 3179 alias = ( 3180 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3181 or self._parse_string_as_identifier() 3182 ) 3183 3184 index = self._index 3185 if self._match(TokenType.L_PAREN): 3186 columns = self._parse_csv(self._parse_function_parameter) 3187 self._match_r_paren() if columns else self._retreat(index) 3188 else: 3189 columns = None 3190 3191 if not alias and not columns: 3192 return None 3193 3194 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3195 3196 # We bubble up comments from the Identifier to the TableAlias 3197 if isinstance(alias, exp.Identifier): 3198 table_alias.add_comments(alias.pop_comments()) 3199 3200 return table_alias 3201 3202 def _parse_subquery( 3203 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3204 ) -> t.Optional[exp.Subquery]: 3205 if not this: 3206 return None 3207 3208 return self.expression( 3209 exp.Subquery, 3210 this=this, 3211 pivots=self._parse_pivots(), 3212 alias=self._parse_table_alias() if parse_alias else None, 3213 sample=self._parse_table_sample(), 3214 ) 3215 3216 def _implicit_unnests_to_explicit(self, this: E) -> E: 3217 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3218 3219 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3220 for i, join in enumerate(this.args.get("joins") or []): 3221 table = join.this 3222 normalized_table = table.copy() 3223 normalized_table.meta["maybe_column"] = True 3224 normalized_table = _norm(normalized_table, dialect=self.dialect) 3225 3226 if isinstance(table, exp.Table) and not join.args.get("on"): 3227 if normalized_table.parts[0].name in refs: 3228 table_as_column = table.to_column() 3229 unnest = exp.Unnest(expressions=[table_as_column]) 3230 3231 # Table.to_column creates a parent Alias node that we want to convert to 3232 # a TableAlias and attach to the Unnest, so it matches the parser's output 3233 if isinstance(table.args.get("alias"), exp.TableAlias): 3234 table_as_column.replace(table_as_column.this) 3235 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3236 3237 table.replace(unnest) 3238 3239 refs.add(normalized_table.alias_or_name) 3240 3241 return this 3242 3243 def _parse_query_modifiers( 3244 self, this: t.Optional[exp.Expression] 3245 ) -> t.Optional[exp.Expression]: 3246 if isinstance(this, (exp.Query, exp.Table)): 3247 for join in self._parse_joins(): 3248 this.append("joins", join) 3249 for lateral in iter(self._parse_lateral, None): 3250 this.append("laterals", lateral) 3251 3252 while True: 3253 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3254 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3255 key, expression = parser(self) 3256 3257 if expression: 3258 this.set(key, expression) 3259 if key == "limit": 3260 offset = expression.args.pop("offset", None) 3261 3262 if offset: 3263 offset = exp.Offset(expression=offset) 3264 this.set("offset", offset) 3265 3266 limit_by_expressions = expression.expressions 3267 expression.set("expressions", None) 3268 offset.set("expressions", limit_by_expressions) 3269 continue 3270 break 3271 3272 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3273 this = self._implicit_unnests_to_explicit(this) 3274 3275 return this 3276 3277 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3278 start = self._curr 3279 while self._curr: 3280 self._advance() 3281 3282 end = self._tokens[self._index - 1] 3283 return exp.Hint(expressions=[self._find_sql(start, end)]) 3284 3285 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3286 return self._parse_function_call() 3287 3288 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3289 start_index = self._index 3290 should_fallback_to_string = False 3291 3292 hints = [] 3293 try: 3294 for hint in iter( 3295 lambda: self._parse_csv( 3296 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3297 ), 3298 [], 3299 ): 3300 hints.extend(hint) 3301 except ParseError: 3302 should_fallback_to_string = True 3303 3304 if should_fallback_to_string or self._curr: 3305 self._retreat(start_index) 3306 return self._parse_hint_fallback_to_string() 3307 3308 return self.expression(exp.Hint, expressions=hints) 3309 3310 def _parse_hint(self) -> t.Optional[exp.Hint]: 3311 if self._match(TokenType.HINT) and self._prev_comments: 3312 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3313 3314 return None 3315 3316 def _parse_into(self) -> t.Optional[exp.Into]: 3317 if not self._match(TokenType.INTO): 3318 return None 3319 3320 temp = self._match(TokenType.TEMPORARY) 3321 unlogged = self._match_text_seq("UNLOGGED") 3322 self._match(TokenType.TABLE) 3323 3324 return self.expression( 3325 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3326 ) 3327 3328 def _parse_from( 3329 self, joins: bool = False, skip_from_token: bool = False 3330 ) -> t.Optional[exp.From]: 3331 if not skip_from_token and not self._match(TokenType.FROM): 3332 return None 3333 3334 return self.expression( 3335 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3336 ) 3337 3338 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3339 return self.expression( 3340 exp.MatchRecognizeMeasure, 3341 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3342 this=self._parse_expression(), 3343 ) 3344 3345 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3346 if not self._match(TokenType.MATCH_RECOGNIZE): 3347 return None 3348 3349 self._match_l_paren() 3350 3351 partition = self._parse_partition_by() 3352 order = self._parse_order() 3353 3354 measures = ( 3355 self._parse_csv(self._parse_match_recognize_measure) 3356 if self._match_text_seq("MEASURES") 3357 else None 3358 ) 3359 3360 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3361 rows = exp.var("ONE ROW PER MATCH") 3362 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3363 text = "ALL ROWS PER MATCH" 3364 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3365 text += " SHOW EMPTY MATCHES" 3366 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3367 text += " OMIT EMPTY MATCHES" 3368 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3369 text += " WITH UNMATCHED ROWS" 3370 rows = exp.var(text) 3371 else: 3372 rows = None 3373 3374 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3375 text = "AFTER MATCH SKIP" 3376 if self._match_text_seq("PAST", "LAST", "ROW"): 3377 text += " PAST LAST ROW" 3378 elif self._match_text_seq("TO", "NEXT", "ROW"): 3379 text += " TO NEXT ROW" 3380 elif self._match_text_seq("TO", "FIRST"): 3381 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3382 elif self._match_text_seq("TO", "LAST"): 3383 text += f" TO LAST {self._advance_any().text}" # type: ignore 3384 after = exp.var(text) 3385 else: 3386 after = None 3387 3388 if self._match_text_seq("PATTERN"): 3389 self._match_l_paren() 3390 3391 if not self._curr: 3392 self.raise_error("Expecting )", self._curr) 3393 3394 paren = 1 3395 start = self._curr 3396 3397 while self._curr and paren > 0: 3398 if self._curr.token_type == TokenType.L_PAREN: 3399 paren += 1 3400 if self._curr.token_type == TokenType.R_PAREN: 3401 paren -= 1 3402 3403 end = self._prev 3404 self._advance() 3405 3406 if paren > 0: 3407 self.raise_error("Expecting )", self._curr) 3408 3409 pattern = exp.var(self._find_sql(start, end)) 3410 else: 3411 pattern = None 3412 3413 define = ( 3414 self._parse_csv(self._parse_name_as_expression) 3415 if self._match_text_seq("DEFINE") 3416 else None 3417 ) 3418 3419 self._match_r_paren() 3420 3421 return self.expression( 3422 exp.MatchRecognize, 3423 partition_by=partition, 3424 order=order, 3425 measures=measures, 3426 rows=rows, 3427 after=after, 3428 pattern=pattern, 3429 define=define, 3430 alias=self._parse_table_alias(), 3431 ) 3432 3433 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3434 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3435 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3436 cross_apply = False 3437 3438 if cross_apply is not None: 3439 this = self._parse_select(table=True) 3440 view = None 3441 outer = None 3442 elif self._match(TokenType.LATERAL): 3443 this = self._parse_select(table=True) 3444 view = self._match(TokenType.VIEW) 3445 outer = self._match(TokenType.OUTER) 3446 else: 3447 return None 3448 3449 if not this: 3450 this = ( 3451 self._parse_unnest() 3452 or self._parse_function() 3453 or self._parse_id_var(any_token=False) 3454 ) 3455 3456 while self._match(TokenType.DOT): 3457 this = exp.Dot( 3458 this=this, 3459 expression=self._parse_function() or self._parse_id_var(any_token=False), 3460 ) 3461 3462 if view: 3463 table = self._parse_id_var(any_token=False) 3464 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3465 table_alias: t.Optional[exp.TableAlias] = self.expression( 3466 exp.TableAlias, this=table, columns=columns 3467 ) 3468 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3469 # We move the alias from the lateral's child node to the lateral itself 3470 table_alias = this.args["alias"].pop() 3471 else: 3472 table_alias = self._parse_table_alias() 3473 3474 return self.expression( 3475 exp.Lateral, 3476 this=this, 3477 view=view, 3478 outer=outer, 3479 alias=table_alias, 3480 cross_apply=cross_apply, 3481 ) 3482 3483 def _parse_join_parts( 3484 self, 3485 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3486 return ( 3487 self._match_set(self.JOIN_METHODS) and self._prev, 3488 self._match_set(self.JOIN_SIDES) and self._prev, 3489 self._match_set(self.JOIN_KINDS) and self._prev, 3490 ) 3491 3492 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3493 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3494 this = self._parse_column() 3495 if isinstance(this, exp.Column): 3496 return this.this 3497 return this 3498 3499 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3500 3501 def _parse_join( 3502 self, skip_join_token: bool = False, parse_bracket: bool = False 3503 ) -> t.Optional[exp.Join]: 3504 if self._match(TokenType.COMMA): 3505 return self.expression(exp.Join, this=self._parse_table()) 3506 3507 index = self._index 3508 method, side, kind = self._parse_join_parts() 3509 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3510 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3511 3512 if not skip_join_token and not join: 3513 self._retreat(index) 3514 kind = None 3515 method = None 3516 side = None 3517 3518 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3519 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3520 3521 if not skip_join_token and not join and not outer_apply and not cross_apply: 3522 return None 3523 3524 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3525 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3526 kwargs["expressions"] = self._parse_csv( 3527 lambda: self._parse_table(parse_bracket=parse_bracket) 3528 ) 3529 3530 if method: 3531 kwargs["method"] = method.text 3532 if side: 3533 kwargs["side"] = side.text 3534 if kind: 3535 kwargs["kind"] = kind.text 3536 if hint: 3537 kwargs["hint"] = hint 3538 3539 if self._match(TokenType.MATCH_CONDITION): 3540 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3541 3542 if self._match(TokenType.ON): 3543 kwargs["on"] = self._parse_assignment() 3544 elif self._match(TokenType.USING): 3545 kwargs["using"] = self._parse_using_identifiers() 3546 elif ( 3547 not (outer_apply or cross_apply) 3548 and not isinstance(kwargs["this"], exp.Unnest) 3549 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3550 ): 3551 index = self._index 3552 joins: t.Optional[list] = list(self._parse_joins()) 3553 3554 if joins and self._match(TokenType.ON): 3555 kwargs["on"] = self._parse_assignment() 3556 elif joins and self._match(TokenType.USING): 3557 kwargs["using"] = self._parse_using_identifiers() 3558 else: 3559 joins = None 3560 self._retreat(index) 3561 3562 kwargs["this"].set("joins", joins if joins else None) 3563 3564 comments = [c for token in (method, side, kind) if token for c in token.comments] 3565 return self.expression(exp.Join, comments=comments, **kwargs) 3566 3567 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3568 this = self._parse_assignment() 3569 3570 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3571 return this 3572 3573 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3574 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3575 3576 return this 3577 3578 def _parse_index_params(self) -> exp.IndexParameters: 3579 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3580 3581 if self._match(TokenType.L_PAREN, advance=False): 3582 columns = self._parse_wrapped_csv(self._parse_with_operator) 3583 else: 3584 columns = None 3585 3586 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3587 partition_by = self._parse_partition_by() 3588 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3589 tablespace = ( 3590 self._parse_var(any_token=True) 3591 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3592 else None 3593 ) 3594 where = self._parse_where() 3595 3596 on = self._parse_field() if self._match(TokenType.ON) else None 3597 3598 return self.expression( 3599 exp.IndexParameters, 3600 using=using, 3601 columns=columns, 3602 include=include, 3603 partition_by=partition_by, 3604 where=where, 3605 with_storage=with_storage, 3606 tablespace=tablespace, 3607 on=on, 3608 ) 3609 3610 def _parse_index( 3611 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3612 ) -> t.Optional[exp.Index]: 3613 if index or anonymous: 3614 unique = None 3615 primary = None 3616 amp = None 3617 3618 self._match(TokenType.ON) 3619 self._match(TokenType.TABLE) # hive 3620 table = self._parse_table_parts(schema=True) 3621 else: 3622 unique = self._match(TokenType.UNIQUE) 3623 primary = self._match_text_seq("PRIMARY") 3624 amp = self._match_text_seq("AMP") 3625 3626 if not self._match(TokenType.INDEX): 3627 return None 3628 3629 index = self._parse_id_var() 3630 table = None 3631 3632 params = self._parse_index_params() 3633 3634 return self.expression( 3635 exp.Index, 3636 this=index, 3637 table=table, 3638 unique=unique, 3639 primary=primary, 3640 amp=amp, 3641 params=params, 3642 ) 3643 3644 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3645 hints: t.List[exp.Expression] = [] 3646 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3647 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3648 hints.append( 3649 self.expression( 3650 exp.WithTableHint, 3651 expressions=self._parse_csv( 3652 lambda: self._parse_function() or self._parse_var(any_token=True) 3653 ), 3654 ) 3655 ) 3656 self._match_r_paren() 3657 else: 3658 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3659 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3660 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3661 3662 self._match_set((TokenType.INDEX, TokenType.KEY)) 3663 if self._match(TokenType.FOR): 3664 hint.set("target", self._advance_any() and self._prev.text.upper()) 3665 3666 hint.set("expressions", self._parse_wrapped_id_vars()) 3667 hints.append(hint) 3668 3669 return hints or None 3670 3671 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3672 return ( 3673 (not schema and self._parse_function(optional_parens=False)) 3674 or self._parse_id_var(any_token=False) 3675 or self._parse_string_as_identifier() 3676 or self._parse_placeholder() 3677 ) 3678 3679 def _parse_table_parts( 3680 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3681 ) -> exp.Table: 3682 catalog = None 3683 db = None 3684 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3685 3686 while self._match(TokenType.DOT): 3687 if catalog: 3688 # This allows nesting the table in arbitrarily many dot expressions if needed 3689 table = self.expression( 3690 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3691 ) 3692 else: 3693 catalog = db 3694 db = table 3695 # "" used for tsql FROM a..b case 3696 table = self._parse_table_part(schema=schema) or "" 3697 3698 if ( 3699 wildcard 3700 and self._is_connected() 3701 and (isinstance(table, exp.Identifier) or not table) 3702 and self._match(TokenType.STAR) 3703 ): 3704 if isinstance(table, exp.Identifier): 3705 table.args["this"] += "*" 3706 else: 3707 table = exp.Identifier(this="*") 3708 3709 # We bubble up comments from the Identifier to the Table 3710 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3711 3712 if is_db_reference: 3713 catalog = db 3714 db = table 3715 table = None 3716 3717 if not table and not is_db_reference: 3718 self.raise_error(f"Expected table name but got {self._curr}") 3719 if not db and is_db_reference: 3720 self.raise_error(f"Expected database name but got {self._curr}") 3721 3722 table = self.expression( 3723 exp.Table, 3724 comments=comments, 3725 this=table, 3726 db=db, 3727 catalog=catalog, 3728 ) 3729 3730 changes = self._parse_changes() 3731 if changes: 3732 table.set("changes", changes) 3733 3734 at_before = self._parse_historical_data() 3735 if at_before: 3736 table.set("when", at_before) 3737 3738 pivots = self._parse_pivots() 3739 if pivots: 3740 table.set("pivots", pivots) 3741 3742 return table 3743 3744 def _parse_table( 3745 self, 3746 schema: bool = False, 3747 joins: bool = False, 3748 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3749 parse_bracket: bool = False, 3750 is_db_reference: bool = False, 3751 parse_partition: bool = False, 3752 ) -> t.Optional[exp.Expression]: 3753 lateral = self._parse_lateral() 3754 if lateral: 3755 return lateral 3756 3757 unnest = self._parse_unnest() 3758 if unnest: 3759 return unnest 3760 3761 values = self._parse_derived_table_values() 3762 if values: 3763 return values 3764 3765 subquery = self._parse_select(table=True) 3766 if subquery: 3767 if not subquery.args.get("pivots"): 3768 subquery.set("pivots", self._parse_pivots()) 3769 return subquery 3770 3771 bracket = parse_bracket and self._parse_bracket(None) 3772 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3773 3774 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3775 self._parse_table 3776 ) 3777 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3778 3779 only = self._match(TokenType.ONLY) 3780 3781 this = t.cast( 3782 exp.Expression, 3783 bracket 3784 or rows_from 3785 or self._parse_bracket( 3786 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3787 ), 3788 ) 3789 3790 if only: 3791 this.set("only", only) 3792 3793 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3794 self._match_text_seq("*") 3795 3796 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3797 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3798 this.set("partition", self._parse_partition()) 3799 3800 if schema: 3801 return self._parse_schema(this=this) 3802 3803 version = self._parse_version() 3804 3805 if version: 3806 this.set("version", version) 3807 3808 if self.dialect.ALIAS_POST_TABLESAMPLE: 3809 this.set("sample", self._parse_table_sample()) 3810 3811 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3812 if alias: 3813 this.set("alias", alias) 3814 3815 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3816 return self.expression( 3817 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3818 ) 3819 3820 this.set("hints", self._parse_table_hints()) 3821 3822 if not this.args.get("pivots"): 3823 this.set("pivots", self._parse_pivots()) 3824 3825 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3826 this.set("sample", self._parse_table_sample()) 3827 3828 if joins: 3829 for join in self._parse_joins(): 3830 this.append("joins", join) 3831 3832 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3833 this.set("ordinality", True) 3834 this.set("alias", self._parse_table_alias()) 3835 3836 return this 3837 3838 def _parse_version(self) -> t.Optional[exp.Version]: 3839 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3840 this = "TIMESTAMP" 3841 elif self._match(TokenType.VERSION_SNAPSHOT): 3842 this = "VERSION" 3843 else: 3844 return None 3845 3846 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3847 kind = self._prev.text.upper() 3848 start = self._parse_bitwise() 3849 self._match_texts(("TO", "AND")) 3850 end = self._parse_bitwise() 3851 expression: t.Optional[exp.Expression] = self.expression( 3852 exp.Tuple, expressions=[start, end] 3853 ) 3854 elif self._match_text_seq("CONTAINED", "IN"): 3855 kind = "CONTAINED IN" 3856 expression = self.expression( 3857 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3858 ) 3859 elif self._match(TokenType.ALL): 3860 kind = "ALL" 3861 expression = None 3862 else: 3863 self._match_text_seq("AS", "OF") 3864 kind = "AS OF" 3865 expression = self._parse_type() 3866 3867 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3868 3869 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3870 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3871 index = self._index 3872 historical_data = None 3873 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3874 this = self._prev.text.upper() 3875 kind = ( 3876 self._match(TokenType.L_PAREN) 3877 and self._match_texts(self.HISTORICAL_DATA_KIND) 3878 and self._prev.text.upper() 3879 ) 3880 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3881 3882 if expression: 3883 self._match_r_paren() 3884 historical_data = self.expression( 3885 exp.HistoricalData, this=this, kind=kind, expression=expression 3886 ) 3887 else: 3888 self._retreat(index) 3889 3890 return historical_data 3891 3892 def _parse_changes(self) -> t.Optional[exp.Changes]: 3893 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3894 return None 3895 3896 information = self._parse_var(any_token=True) 3897 self._match_r_paren() 3898 3899 return self.expression( 3900 exp.Changes, 3901 information=information, 3902 at_before=self._parse_historical_data(), 3903 end=self._parse_historical_data(), 3904 ) 3905 3906 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3907 if not self._match(TokenType.UNNEST): 3908 return None 3909 3910 expressions = self._parse_wrapped_csv(self._parse_equality) 3911 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3912 3913 alias = self._parse_table_alias() if with_alias else None 3914 3915 if alias: 3916 if self.dialect.UNNEST_COLUMN_ONLY: 3917 if alias.args.get("columns"): 3918 self.raise_error("Unexpected extra column alias in unnest.") 3919 3920 alias.set("columns", [alias.this]) 3921 alias.set("this", None) 3922 3923 columns = alias.args.get("columns") or [] 3924 if offset and len(expressions) < len(columns): 3925 offset = columns.pop() 3926 3927 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3928 self._match(TokenType.ALIAS) 3929 offset = self._parse_id_var( 3930 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3931 ) or exp.to_identifier("offset") 3932 3933 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3934 3935 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3936 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3937 if not is_derived and not ( 3938 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3939 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3940 ): 3941 return None 3942 3943 expressions = self._parse_csv(self._parse_value) 3944 alias = self._parse_table_alias() 3945 3946 if is_derived: 3947 self._match_r_paren() 3948 3949 return self.expression( 3950 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3951 ) 3952 3953 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3954 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3955 as_modifier and self._match_text_seq("USING", "SAMPLE") 3956 ): 3957 return None 3958 3959 bucket_numerator = None 3960 bucket_denominator = None 3961 bucket_field = None 3962 percent = None 3963 size = None 3964 seed = None 3965 3966 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3967 matched_l_paren = self._match(TokenType.L_PAREN) 3968 3969 if self.TABLESAMPLE_CSV: 3970 num = None 3971 expressions = self._parse_csv(self._parse_primary) 3972 else: 3973 expressions = None 3974 num = ( 3975 self._parse_factor() 3976 if self._match(TokenType.NUMBER, advance=False) 3977 else self._parse_primary() or self._parse_placeholder() 3978 ) 3979 3980 if self._match_text_seq("BUCKET"): 3981 bucket_numerator = self._parse_number() 3982 self._match_text_seq("OUT", "OF") 3983 bucket_denominator = bucket_denominator = self._parse_number() 3984 self._match(TokenType.ON) 3985 bucket_field = self._parse_field() 3986 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3987 percent = num 3988 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3989 size = num 3990 else: 3991 percent = num 3992 3993 if matched_l_paren: 3994 self._match_r_paren() 3995 3996 if self._match(TokenType.L_PAREN): 3997 method = self._parse_var(upper=True) 3998 seed = self._match(TokenType.COMMA) and self._parse_number() 3999 self._match_r_paren() 4000 elif self._match_texts(("SEED", "REPEATABLE")): 4001 seed = self._parse_wrapped(self._parse_number) 4002 4003 if not method and self.DEFAULT_SAMPLING_METHOD: 4004 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4005 4006 return self.expression( 4007 exp.TableSample, 4008 expressions=expressions, 4009 method=method, 4010 bucket_numerator=bucket_numerator, 4011 bucket_denominator=bucket_denominator, 4012 bucket_field=bucket_field, 4013 percent=percent, 4014 size=size, 4015 seed=seed, 4016 ) 4017 4018 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4019 return list(iter(self._parse_pivot, None)) or None 4020 4021 def _parse_joins(self) -> t.Iterator[exp.Join]: 4022 return iter(self._parse_join, None) 4023 4024 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4025 if not self._match(TokenType.INTO): 4026 return None 4027 4028 return self.expression( 4029 exp.UnpivotColumns, 4030 this=self._match_text_seq("NAME") and self._parse_column(), 4031 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4032 ) 4033 4034 # https://duckdb.org/docs/sql/statements/pivot 4035 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4036 def _parse_on() -> t.Optional[exp.Expression]: 4037 this = self._parse_bitwise() 4038 4039 if self._match(TokenType.IN): 4040 # PIVOT ... ON col IN (row_val1, row_val2) 4041 return self._parse_in(this) 4042 if self._match(TokenType.ALIAS, advance=False): 4043 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4044 return self._parse_alias(this) 4045 4046 return this 4047 4048 this = self._parse_table() 4049 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4050 into = self._parse_unpivot_columns() 4051 using = self._match(TokenType.USING) and self._parse_csv( 4052 lambda: self._parse_alias(self._parse_function()) 4053 ) 4054 group = self._parse_group() 4055 4056 return self.expression( 4057 exp.Pivot, 4058 this=this, 4059 expressions=expressions, 4060 using=using, 4061 group=group, 4062 unpivot=is_unpivot, 4063 into=into, 4064 ) 4065 4066 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 4067 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4068 this = self._parse_select_or_expression() 4069 4070 self._match(TokenType.ALIAS) 4071 alias = self._parse_bitwise() 4072 if alias: 4073 if isinstance(alias, exp.Column) and not alias.db: 4074 alias = alias.this 4075 return self.expression(exp.PivotAlias, this=this, alias=alias) 4076 4077 return this 4078 4079 value = self._parse_column() 4080 4081 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4082 self.raise_error("Expecting IN (") 4083 4084 if self._match(TokenType.ANY): 4085 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4086 else: 4087 exprs = self._parse_csv(_parse_aliased_expression) 4088 4089 self._match_r_paren() 4090 return self.expression(exp.In, this=value, expressions=exprs) 4091 4092 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4093 index = self._index 4094 include_nulls = None 4095 4096 if self._match(TokenType.PIVOT): 4097 unpivot = False 4098 elif self._match(TokenType.UNPIVOT): 4099 unpivot = True 4100 4101 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4102 if self._match_text_seq("INCLUDE", "NULLS"): 4103 include_nulls = True 4104 elif self._match_text_seq("EXCLUDE", "NULLS"): 4105 include_nulls = False 4106 else: 4107 return None 4108 4109 expressions = [] 4110 4111 if not self._match(TokenType.L_PAREN): 4112 self._retreat(index) 4113 return None 4114 4115 if unpivot: 4116 expressions = self._parse_csv(self._parse_column) 4117 else: 4118 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4119 4120 if not expressions: 4121 self.raise_error("Failed to parse PIVOT's aggregation list") 4122 4123 if not self._match(TokenType.FOR): 4124 self.raise_error("Expecting FOR") 4125 4126 field = self._parse_pivot_in() 4127 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4128 self._parse_bitwise 4129 ) 4130 4131 self._match_r_paren() 4132 4133 pivot = self.expression( 4134 exp.Pivot, 4135 expressions=expressions, 4136 field=field, 4137 unpivot=unpivot, 4138 include_nulls=include_nulls, 4139 default_on_null=default_on_null, 4140 ) 4141 4142 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4143 pivot.set("alias", self._parse_table_alias()) 4144 4145 if not unpivot: 4146 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4147 4148 columns: t.List[exp.Expression] = [] 4149 for fld in pivot.args["field"].expressions: 4150 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4151 for name in names: 4152 if self.PREFIXED_PIVOT_COLUMNS: 4153 name = f"{name}_{field_name}" if name else field_name 4154 else: 4155 name = f"{field_name}_{name}" if name else field_name 4156 4157 columns.append(exp.to_identifier(name)) 4158 4159 pivot.set("columns", columns) 4160 4161 return pivot 4162 4163 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4164 return [agg.alias for agg in aggregations] 4165 4166 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4167 if not skip_where_token and not self._match(TokenType.PREWHERE): 4168 return None 4169 4170 return self.expression( 4171 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4172 ) 4173 4174 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4175 if not skip_where_token and not self._match(TokenType.WHERE): 4176 return None 4177 4178 return self.expression( 4179 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4180 ) 4181 4182 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4183 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4184 return None 4185 4186 elements: t.Dict[str, t.Any] = defaultdict(list) 4187 4188 if self._match(TokenType.ALL): 4189 elements["all"] = True 4190 elif self._match(TokenType.DISTINCT): 4191 elements["all"] = False 4192 4193 while True: 4194 index = self._index 4195 4196 elements["expressions"].extend( 4197 self._parse_csv( 4198 lambda: None 4199 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4200 else self._parse_assignment() 4201 ) 4202 ) 4203 4204 before_with_index = self._index 4205 with_prefix = self._match(TokenType.WITH) 4206 4207 if self._match(TokenType.ROLLUP): 4208 elements["rollup"].append( 4209 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4210 ) 4211 elif self._match(TokenType.CUBE): 4212 elements["cube"].append( 4213 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4214 ) 4215 elif self._match(TokenType.GROUPING_SETS): 4216 elements["grouping_sets"].append( 4217 self.expression( 4218 exp.GroupingSets, 4219 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4220 ) 4221 ) 4222 elif self._match_text_seq("TOTALS"): 4223 elements["totals"] = True # type: ignore 4224 4225 if before_with_index <= self._index <= before_with_index + 1: 4226 self._retreat(before_with_index) 4227 break 4228 4229 if index == self._index: 4230 break 4231 4232 return self.expression(exp.Group, **elements) # type: ignore 4233 4234 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4235 return self.expression( 4236 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4237 ) 4238 4239 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4240 if self._match(TokenType.L_PAREN): 4241 grouping_set = self._parse_csv(self._parse_column) 4242 self._match_r_paren() 4243 return self.expression(exp.Tuple, expressions=grouping_set) 4244 4245 return self._parse_column() 4246 4247 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4248 if not skip_having_token and not self._match(TokenType.HAVING): 4249 return None 4250 return self.expression(exp.Having, this=self._parse_assignment()) 4251 4252 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4253 if not self._match(TokenType.QUALIFY): 4254 return None 4255 return self.expression(exp.Qualify, this=self._parse_assignment()) 4256 4257 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4258 if skip_start_token: 4259 start = None 4260 elif self._match(TokenType.START_WITH): 4261 start = self._parse_assignment() 4262 else: 4263 return None 4264 4265 self._match(TokenType.CONNECT_BY) 4266 nocycle = self._match_text_seq("NOCYCLE") 4267 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4268 exp.Prior, this=self._parse_bitwise() 4269 ) 4270 connect = self._parse_assignment() 4271 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4272 4273 if not start and self._match(TokenType.START_WITH): 4274 start = self._parse_assignment() 4275 4276 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4277 4278 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4279 this = self._parse_id_var(any_token=True) 4280 if self._match(TokenType.ALIAS): 4281 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4282 return this 4283 4284 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4285 if self._match_text_seq("INTERPOLATE"): 4286 return self._parse_wrapped_csv(self._parse_name_as_expression) 4287 return None 4288 4289 def _parse_order( 4290 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4291 ) -> t.Optional[exp.Expression]: 4292 siblings = None 4293 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4294 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4295 return this 4296 4297 siblings = True 4298 4299 return self.expression( 4300 exp.Order, 4301 this=this, 4302 expressions=self._parse_csv(self._parse_ordered), 4303 siblings=siblings, 4304 ) 4305 4306 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4307 if not self._match(token): 4308 return None 4309 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4310 4311 def _parse_ordered( 4312 self, parse_method: t.Optional[t.Callable] = None 4313 ) -> t.Optional[exp.Ordered]: 4314 this = parse_method() if parse_method else self._parse_assignment() 4315 if not this: 4316 return None 4317 4318 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4319 this = exp.var("ALL") 4320 4321 asc = self._match(TokenType.ASC) 4322 desc = self._match(TokenType.DESC) or (asc and False) 4323 4324 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4325 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4326 4327 nulls_first = is_nulls_first or False 4328 explicitly_null_ordered = is_nulls_first or is_nulls_last 4329 4330 if ( 4331 not explicitly_null_ordered 4332 and ( 4333 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4334 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4335 ) 4336 and self.dialect.NULL_ORDERING != "nulls_are_last" 4337 ): 4338 nulls_first = True 4339 4340 if self._match_text_seq("WITH", "FILL"): 4341 with_fill = self.expression( 4342 exp.WithFill, 4343 **{ # type: ignore 4344 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4345 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4346 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4347 "interpolate": self._parse_interpolate(), 4348 }, 4349 ) 4350 else: 4351 with_fill = None 4352 4353 return self.expression( 4354 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4355 ) 4356 4357 def _parse_limit( 4358 self, 4359 this: t.Optional[exp.Expression] = None, 4360 top: bool = False, 4361 skip_limit_token: bool = False, 4362 ) -> t.Optional[exp.Expression]: 4363 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4364 comments = self._prev_comments 4365 if top: 4366 limit_paren = self._match(TokenType.L_PAREN) 4367 expression = self._parse_term() if limit_paren else self._parse_number() 4368 4369 if limit_paren: 4370 self._match_r_paren() 4371 else: 4372 expression = self._parse_term() 4373 4374 if self._match(TokenType.COMMA): 4375 offset = expression 4376 expression = self._parse_term() 4377 else: 4378 offset = None 4379 4380 limit_exp = self.expression( 4381 exp.Limit, 4382 this=this, 4383 expression=expression, 4384 offset=offset, 4385 comments=comments, 4386 expressions=self._parse_limit_by(), 4387 ) 4388 4389 return limit_exp 4390 4391 if self._match(TokenType.FETCH): 4392 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4393 direction = self._prev.text.upper() if direction else "FIRST" 4394 4395 count = self._parse_field(tokens=self.FETCH_TOKENS) 4396 percent = self._match(TokenType.PERCENT) 4397 4398 self._match_set((TokenType.ROW, TokenType.ROWS)) 4399 4400 only = self._match_text_seq("ONLY") 4401 with_ties = self._match_text_seq("WITH", "TIES") 4402 4403 if only and with_ties: 4404 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4405 4406 return self.expression( 4407 exp.Fetch, 4408 direction=direction, 4409 count=count, 4410 percent=percent, 4411 with_ties=with_ties, 4412 ) 4413 4414 return this 4415 4416 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4417 if not self._match(TokenType.OFFSET): 4418 return this 4419 4420 count = self._parse_term() 4421 self._match_set((TokenType.ROW, TokenType.ROWS)) 4422 4423 return self.expression( 4424 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4425 ) 4426 4427 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4428 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4429 4430 def _parse_locks(self) -> t.List[exp.Lock]: 4431 locks = [] 4432 while True: 4433 if self._match_text_seq("FOR", "UPDATE"): 4434 update = True 4435 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4436 "LOCK", "IN", "SHARE", "MODE" 4437 ): 4438 update = False 4439 else: 4440 break 4441 4442 expressions = None 4443 if self._match_text_seq("OF"): 4444 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4445 4446 wait: t.Optional[bool | exp.Expression] = None 4447 if self._match_text_seq("NOWAIT"): 4448 wait = True 4449 elif self._match_text_seq("WAIT"): 4450 wait = self._parse_primary() 4451 elif self._match_text_seq("SKIP", "LOCKED"): 4452 wait = False 4453 4454 locks.append( 4455 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4456 ) 4457 4458 return locks 4459 4460 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4461 while this and self._match_set(self.SET_OPERATIONS): 4462 token_type = self._prev.token_type 4463 4464 if token_type == TokenType.UNION: 4465 operation: t.Type[exp.SetOperation] = exp.Union 4466 elif token_type == TokenType.EXCEPT: 4467 operation = exp.Except 4468 else: 4469 operation = exp.Intersect 4470 4471 comments = self._prev.comments 4472 4473 if self._match(TokenType.DISTINCT): 4474 distinct: t.Optional[bool] = True 4475 elif self._match(TokenType.ALL): 4476 distinct = False 4477 else: 4478 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4479 if distinct is None: 4480 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4481 4482 by_name = self._match_text_seq("BY", "NAME") 4483 expression = self._parse_select(nested=True, parse_set_operation=False) 4484 4485 this = self.expression( 4486 operation, 4487 comments=comments, 4488 this=this, 4489 distinct=distinct, 4490 by_name=by_name, 4491 expression=expression, 4492 ) 4493 4494 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4495 expression = this.expression 4496 4497 if expression: 4498 for arg in self.SET_OP_MODIFIERS: 4499 expr = expression.args.get(arg) 4500 if expr: 4501 this.set(arg, expr.pop()) 4502 4503 return this 4504 4505 def _parse_expression(self) -> t.Optional[exp.Expression]: 4506 return self._parse_alias(self._parse_assignment()) 4507 4508 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4509 this = self._parse_disjunction() 4510 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4511 # This allows us to parse <non-identifier token> := <expr> 4512 this = exp.column( 4513 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4514 ) 4515 4516 while self._match_set(self.ASSIGNMENT): 4517 if isinstance(this, exp.Column) and len(this.parts) == 1: 4518 this = this.this 4519 4520 this = self.expression( 4521 self.ASSIGNMENT[self._prev.token_type], 4522 this=this, 4523 comments=self._prev_comments, 4524 expression=self._parse_assignment(), 4525 ) 4526 4527 return this 4528 4529 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4530 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4531 4532 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4533 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4534 4535 def _parse_equality(self) -> t.Optional[exp.Expression]: 4536 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4537 4538 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4539 return self._parse_tokens(self._parse_range, self.COMPARISON) 4540 4541 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4542 this = this or self._parse_bitwise() 4543 negate = self._match(TokenType.NOT) 4544 4545 if self._match_set(self.RANGE_PARSERS): 4546 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4547 if not expression: 4548 return this 4549 4550 this = expression 4551 elif self._match(TokenType.ISNULL): 4552 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4553 4554 # Postgres supports ISNULL and NOTNULL for conditions. 4555 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4556 if self._match(TokenType.NOTNULL): 4557 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4558 this = self.expression(exp.Not, this=this) 4559 4560 if negate: 4561 this = self._negate_range(this) 4562 4563 if self._match(TokenType.IS): 4564 this = self._parse_is(this) 4565 4566 return this 4567 4568 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4569 if not this: 4570 return this 4571 4572 return self.expression(exp.Not, this=this) 4573 4574 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4575 index = self._index - 1 4576 negate = self._match(TokenType.NOT) 4577 4578 if self._match_text_seq("DISTINCT", "FROM"): 4579 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4580 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4581 4582 if self._match(TokenType.JSON): 4583 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4584 4585 if self._match_text_seq("WITH"): 4586 _with = True 4587 elif self._match_text_seq("WITHOUT"): 4588 _with = False 4589 else: 4590 _with = None 4591 4592 unique = self._match(TokenType.UNIQUE) 4593 self._match_text_seq("KEYS") 4594 expression: t.Optional[exp.Expression] = self.expression( 4595 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4596 ) 4597 else: 4598 expression = self._parse_primary() or self._parse_null() 4599 if not expression: 4600 self._retreat(index) 4601 return None 4602 4603 this = self.expression(exp.Is, this=this, expression=expression) 4604 return self.expression(exp.Not, this=this) if negate else this 4605 4606 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4607 unnest = self._parse_unnest(with_alias=False) 4608 if unnest: 4609 this = self.expression(exp.In, this=this, unnest=unnest) 4610 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4611 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4612 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4613 4614 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4615 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4616 else: 4617 this = self.expression(exp.In, this=this, expressions=expressions) 4618 4619 if matched_l_paren: 4620 self._match_r_paren(this) 4621 elif not self._match(TokenType.R_BRACKET, expression=this): 4622 self.raise_error("Expecting ]") 4623 else: 4624 this = self.expression(exp.In, this=this, field=self._parse_column()) 4625 4626 return this 4627 4628 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4629 low = self._parse_bitwise() 4630 self._match(TokenType.AND) 4631 high = self._parse_bitwise() 4632 return self.expression(exp.Between, this=this, low=low, high=high) 4633 4634 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4635 if not self._match(TokenType.ESCAPE): 4636 return this 4637 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4638 4639 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4640 index = self._index 4641 4642 if not self._match(TokenType.INTERVAL) and match_interval: 4643 return None 4644 4645 if self._match(TokenType.STRING, advance=False): 4646 this = self._parse_primary() 4647 else: 4648 this = self._parse_term() 4649 4650 if not this or ( 4651 isinstance(this, exp.Column) 4652 and not this.table 4653 and not this.this.quoted 4654 and this.name.upper() == "IS" 4655 ): 4656 self._retreat(index) 4657 return None 4658 4659 unit = self._parse_function() or ( 4660 not self._match(TokenType.ALIAS, advance=False) 4661 and self._parse_var(any_token=True, upper=True) 4662 ) 4663 4664 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4665 # each INTERVAL expression into this canonical form so it's easy to transpile 4666 if this and this.is_number: 4667 this = exp.Literal.string(this.to_py()) 4668 elif this and this.is_string: 4669 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4670 if parts and unit: 4671 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4672 unit = None 4673 self._retreat(self._index - 1) 4674 4675 if len(parts) == 1: 4676 this = exp.Literal.string(parts[0][0]) 4677 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4678 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4679 unit = self.expression( 4680 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4681 ) 4682 4683 interval = self.expression(exp.Interval, this=this, unit=unit) 4684 4685 index = self._index 4686 self._match(TokenType.PLUS) 4687 4688 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4689 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4690 return self.expression( 4691 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4692 ) 4693 4694 self._retreat(index) 4695 return interval 4696 4697 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4698 this = self._parse_term() 4699 4700 while True: 4701 if self._match_set(self.BITWISE): 4702 this = self.expression( 4703 self.BITWISE[self._prev.token_type], 4704 this=this, 4705 expression=self._parse_term(), 4706 ) 4707 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4708 this = self.expression( 4709 exp.DPipe, 4710 this=this, 4711 expression=self._parse_term(), 4712 safe=not self.dialect.STRICT_STRING_CONCAT, 4713 ) 4714 elif self._match(TokenType.DQMARK): 4715 this = self.expression( 4716 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4717 ) 4718 elif self._match_pair(TokenType.LT, TokenType.LT): 4719 this = self.expression( 4720 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4721 ) 4722 elif self._match_pair(TokenType.GT, TokenType.GT): 4723 this = self.expression( 4724 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4725 ) 4726 else: 4727 break 4728 4729 return this 4730 4731 def _parse_term(self) -> t.Optional[exp.Expression]: 4732 this = self._parse_factor() 4733 4734 while self._match_set(self.TERM): 4735 klass = self.TERM[self._prev.token_type] 4736 comments = self._prev_comments 4737 expression = self._parse_factor() 4738 4739 this = self.expression(klass, this=this, comments=comments, expression=expression) 4740 4741 if isinstance(this, exp.Collate): 4742 expr = this.expression 4743 4744 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4745 # fallback to Identifier / Var 4746 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4747 ident = expr.this 4748 if isinstance(ident, exp.Identifier): 4749 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4750 4751 return this 4752 4753 def _parse_factor(self) -> t.Optional[exp.Expression]: 4754 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4755 this = parse_method() 4756 4757 while self._match_set(self.FACTOR): 4758 klass = self.FACTOR[self._prev.token_type] 4759 comments = self._prev_comments 4760 expression = parse_method() 4761 4762 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4763 self._retreat(self._index - 1) 4764 return this 4765 4766 this = self.expression(klass, this=this, comments=comments, expression=expression) 4767 4768 if isinstance(this, exp.Div): 4769 this.args["typed"] = self.dialect.TYPED_DIVISION 4770 this.args["safe"] = self.dialect.SAFE_DIVISION 4771 4772 return this 4773 4774 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4775 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4776 4777 def _parse_unary(self) -> t.Optional[exp.Expression]: 4778 if self._match_set(self.UNARY_PARSERS): 4779 return self.UNARY_PARSERS[self._prev.token_type](self) 4780 return self._parse_at_time_zone(self._parse_type()) 4781 4782 def _parse_type( 4783 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4784 ) -> t.Optional[exp.Expression]: 4785 interval = parse_interval and self._parse_interval() 4786 if interval: 4787 return interval 4788 4789 index = self._index 4790 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4791 4792 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4793 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4794 if isinstance(data_type, exp.Cast): 4795 # This constructor can contain ops directly after it, for instance struct unnesting: 4796 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4797 return self._parse_column_ops(data_type) 4798 4799 if data_type: 4800 index2 = self._index 4801 this = self._parse_primary() 4802 4803 if isinstance(this, exp.Literal): 4804 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4805 if parser: 4806 return parser(self, this, data_type) 4807 4808 return self.expression(exp.Cast, this=this, to=data_type) 4809 4810 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4811 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4812 # 4813 # If the index difference here is greater than 1, that means the parser itself must have 4814 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4815 # 4816 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4817 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4818 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4819 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4820 # 4821 # In these cases, we don't really want to return the converted type, but instead retreat 4822 # and try to parse a Column or Identifier in the section below. 4823 if data_type.expressions and index2 - index > 1: 4824 self._retreat(index2) 4825 return self._parse_column_ops(data_type) 4826 4827 self._retreat(index) 4828 4829 if fallback_to_identifier: 4830 return self._parse_id_var() 4831 4832 this = self._parse_column() 4833 return this and self._parse_column_ops(this) 4834 4835 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4836 this = self._parse_type() 4837 if not this: 4838 return None 4839 4840 if isinstance(this, exp.Column) and not this.table: 4841 this = exp.var(this.name.upper()) 4842 4843 return self.expression( 4844 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4845 ) 4846 4847 def _parse_types( 4848 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4849 ) -> t.Optional[exp.Expression]: 4850 index = self._index 4851 4852 this: t.Optional[exp.Expression] = None 4853 prefix = self._match_text_seq("SYSUDTLIB", ".") 4854 4855 if not self._match_set(self.TYPE_TOKENS): 4856 identifier = allow_identifiers and self._parse_id_var( 4857 any_token=False, tokens=(TokenType.VAR,) 4858 ) 4859 if isinstance(identifier, exp.Identifier): 4860 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4861 4862 if len(tokens) != 1: 4863 self.raise_error("Unexpected identifier", self._prev) 4864 4865 if tokens[0].token_type in self.TYPE_TOKENS: 4866 self._prev = tokens[0] 4867 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4868 type_name = identifier.name 4869 4870 while self._match(TokenType.DOT): 4871 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4872 4873 this = exp.DataType.build(type_name, udt=True) 4874 else: 4875 self._retreat(self._index - 1) 4876 return None 4877 else: 4878 return None 4879 4880 type_token = self._prev.token_type 4881 4882 if type_token == TokenType.PSEUDO_TYPE: 4883 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4884 4885 if type_token == TokenType.OBJECT_IDENTIFIER: 4886 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4887 4888 # https://materialize.com/docs/sql/types/map/ 4889 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4890 key_type = self._parse_types( 4891 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4892 ) 4893 if not self._match(TokenType.FARROW): 4894 self._retreat(index) 4895 return None 4896 4897 value_type = self._parse_types( 4898 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4899 ) 4900 if not self._match(TokenType.R_BRACKET): 4901 self._retreat(index) 4902 return None 4903 4904 return exp.DataType( 4905 this=exp.DataType.Type.MAP, 4906 expressions=[key_type, value_type], 4907 nested=True, 4908 prefix=prefix, 4909 ) 4910 4911 nested = type_token in self.NESTED_TYPE_TOKENS 4912 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4913 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4914 expressions = None 4915 maybe_func = False 4916 4917 if self._match(TokenType.L_PAREN): 4918 if is_struct: 4919 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4920 elif nested: 4921 expressions = self._parse_csv( 4922 lambda: self._parse_types( 4923 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4924 ) 4925 ) 4926 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4927 this = expressions[0] 4928 this.set("nullable", True) 4929 self._match_r_paren() 4930 return this 4931 elif type_token in self.ENUM_TYPE_TOKENS: 4932 expressions = self._parse_csv(self._parse_equality) 4933 elif is_aggregate: 4934 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4935 any_token=False, tokens=(TokenType.VAR,) 4936 ) 4937 if not func_or_ident or not self._match(TokenType.COMMA): 4938 return None 4939 expressions = self._parse_csv( 4940 lambda: self._parse_types( 4941 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4942 ) 4943 ) 4944 expressions.insert(0, func_or_ident) 4945 else: 4946 expressions = self._parse_csv(self._parse_type_size) 4947 4948 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4949 if type_token == TokenType.VECTOR and len(expressions) == 2: 4950 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4951 4952 if not expressions or not self._match(TokenType.R_PAREN): 4953 self._retreat(index) 4954 return None 4955 4956 maybe_func = True 4957 4958 values: t.Optional[t.List[exp.Expression]] = None 4959 4960 if nested and self._match(TokenType.LT): 4961 if is_struct: 4962 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4963 else: 4964 expressions = self._parse_csv( 4965 lambda: self._parse_types( 4966 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4967 ) 4968 ) 4969 4970 if not self._match(TokenType.GT): 4971 self.raise_error("Expecting >") 4972 4973 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4974 values = self._parse_csv(self._parse_assignment) 4975 if not values and is_struct: 4976 values = None 4977 self._retreat(self._index - 1) 4978 else: 4979 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4980 4981 if type_token in self.TIMESTAMPS: 4982 if self._match_text_seq("WITH", "TIME", "ZONE"): 4983 maybe_func = False 4984 tz_type = ( 4985 exp.DataType.Type.TIMETZ 4986 if type_token in self.TIMES 4987 else exp.DataType.Type.TIMESTAMPTZ 4988 ) 4989 this = exp.DataType(this=tz_type, expressions=expressions) 4990 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4991 maybe_func = False 4992 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4993 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4994 maybe_func = False 4995 elif type_token == TokenType.INTERVAL: 4996 unit = self._parse_var(upper=True) 4997 if unit: 4998 if self._match_text_seq("TO"): 4999 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5000 5001 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5002 else: 5003 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5004 5005 if maybe_func and check_func: 5006 index2 = self._index 5007 peek = self._parse_string() 5008 5009 if not peek: 5010 self._retreat(index) 5011 return None 5012 5013 self._retreat(index2) 5014 5015 if not this: 5016 if self._match_text_seq("UNSIGNED"): 5017 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5018 if not unsigned_type_token: 5019 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5020 5021 type_token = unsigned_type_token or type_token 5022 5023 this = exp.DataType( 5024 this=exp.DataType.Type[type_token.value], 5025 expressions=expressions, 5026 nested=nested, 5027 prefix=prefix, 5028 ) 5029 5030 # Empty arrays/structs are allowed 5031 if values is not None: 5032 cls = exp.Struct if is_struct else exp.Array 5033 this = exp.cast(cls(expressions=values), this, copy=False) 5034 5035 elif expressions: 5036 this.set("expressions", expressions) 5037 5038 # https://materialize.com/docs/sql/types/list/#type-name 5039 while self._match(TokenType.LIST): 5040 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5041 5042 index = self._index 5043 5044 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5045 matched_array = self._match(TokenType.ARRAY) 5046 5047 while self._curr: 5048 datatype_token = self._prev.token_type 5049 matched_l_bracket = self._match(TokenType.L_BRACKET) 5050 if not matched_l_bracket and not matched_array: 5051 break 5052 5053 matched_array = False 5054 values = self._parse_csv(self._parse_assignment) or None 5055 if ( 5056 values 5057 and not schema 5058 and ( 5059 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5060 ) 5061 ): 5062 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5063 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5064 self._retreat(index) 5065 break 5066 5067 this = exp.DataType( 5068 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5069 ) 5070 self._match(TokenType.R_BRACKET) 5071 5072 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5073 converter = self.TYPE_CONVERTERS.get(this.this) 5074 if converter: 5075 this = converter(t.cast(exp.DataType, this)) 5076 5077 return this 5078 5079 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5080 index = self._index 5081 5082 if ( 5083 self._curr 5084 and self._next 5085 and self._curr.token_type in self.TYPE_TOKENS 5086 and self._next.token_type in self.TYPE_TOKENS 5087 ): 5088 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5089 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5090 this = self._parse_id_var() 5091 else: 5092 this = ( 5093 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5094 or self._parse_id_var() 5095 ) 5096 5097 self._match(TokenType.COLON) 5098 5099 if ( 5100 type_required 5101 and not isinstance(this, exp.DataType) 5102 and not self._match_set(self.TYPE_TOKENS, advance=False) 5103 ): 5104 self._retreat(index) 5105 return self._parse_types() 5106 5107 return self._parse_column_def(this) 5108 5109 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5110 if not self._match_text_seq("AT", "TIME", "ZONE"): 5111 return this 5112 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5113 5114 def _parse_column(self) -> t.Optional[exp.Expression]: 5115 this = self._parse_column_reference() 5116 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5117 5118 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5119 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5120 5121 return column 5122 5123 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5124 this = self._parse_field() 5125 if ( 5126 not this 5127 and self._match(TokenType.VALUES, advance=False) 5128 and self.VALUES_FOLLOWED_BY_PAREN 5129 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5130 ): 5131 this = self._parse_id_var() 5132 5133 if isinstance(this, exp.Identifier): 5134 # We bubble up comments from the Identifier to the Column 5135 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5136 5137 return this 5138 5139 def _parse_colon_as_variant_extract( 5140 self, this: t.Optional[exp.Expression] 5141 ) -> t.Optional[exp.Expression]: 5142 casts = [] 5143 json_path = [] 5144 escape = None 5145 5146 while self._match(TokenType.COLON): 5147 start_index = self._index 5148 5149 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5150 path = self._parse_column_ops( 5151 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5152 ) 5153 5154 # The cast :: operator has a lower precedence than the extraction operator :, so 5155 # we rearrange the AST appropriately to avoid casting the JSON path 5156 while isinstance(path, exp.Cast): 5157 casts.append(path.to) 5158 path = path.this 5159 5160 if casts: 5161 dcolon_offset = next( 5162 i 5163 for i, t in enumerate(self._tokens[start_index:]) 5164 if t.token_type == TokenType.DCOLON 5165 ) 5166 end_token = self._tokens[start_index + dcolon_offset - 1] 5167 else: 5168 end_token = self._prev 5169 5170 if path: 5171 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5172 # it'll roundtrip to a string literal in GET_PATH 5173 if isinstance(path, exp.Identifier) and path.quoted: 5174 escape = True 5175 5176 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5177 5178 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5179 # Databricks transforms it back to the colon/dot notation 5180 if json_path: 5181 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5182 5183 if json_path_expr: 5184 json_path_expr.set("escape", escape) 5185 5186 this = self.expression( 5187 exp.JSONExtract, 5188 this=this, 5189 expression=json_path_expr, 5190 variant_extract=True, 5191 ) 5192 5193 while casts: 5194 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5195 5196 return this 5197 5198 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5199 return self._parse_types() 5200 5201 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5202 this = self._parse_bracket(this) 5203 5204 while self._match_set(self.COLUMN_OPERATORS): 5205 op_token = self._prev.token_type 5206 op = self.COLUMN_OPERATORS.get(op_token) 5207 5208 if op_token == TokenType.DCOLON: 5209 field = self._parse_dcolon() 5210 if not field: 5211 self.raise_error("Expected type") 5212 elif op and self._curr: 5213 field = self._parse_column_reference() or self._parse_bracket() 5214 else: 5215 field = self._parse_field(any_token=True, anonymous_func=True) 5216 5217 if isinstance(field, (exp.Func, exp.Window)) and this: 5218 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5219 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5220 this = exp.replace_tree( 5221 this, 5222 lambda n: ( 5223 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5224 if n.table 5225 else n.this 5226 ) 5227 if isinstance(n, exp.Column) 5228 else n, 5229 ) 5230 5231 if op: 5232 this = op(self, this, field) 5233 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5234 this = self.expression( 5235 exp.Column, 5236 comments=this.comments, 5237 this=field, 5238 table=this.this, 5239 db=this.args.get("table"), 5240 catalog=this.args.get("db"), 5241 ) 5242 elif isinstance(field, exp.Window): 5243 # Move the exp.Dot's to the window's function 5244 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5245 field.set("this", window_func) 5246 this = field 5247 else: 5248 this = self.expression(exp.Dot, this=this, expression=field) 5249 5250 if field and field.comments: 5251 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5252 5253 this = self._parse_bracket(this) 5254 5255 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5256 5257 def _parse_primary(self) -> t.Optional[exp.Expression]: 5258 if self._match_set(self.PRIMARY_PARSERS): 5259 token_type = self._prev.token_type 5260 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5261 5262 if token_type == TokenType.STRING: 5263 expressions = [primary] 5264 while self._match(TokenType.STRING): 5265 expressions.append(exp.Literal.string(self._prev.text)) 5266 5267 if len(expressions) > 1: 5268 return self.expression(exp.Concat, expressions=expressions) 5269 5270 return primary 5271 5272 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5273 return exp.Literal.number(f"0.{self._prev.text}") 5274 5275 if self._match(TokenType.L_PAREN): 5276 comments = self._prev_comments 5277 query = self._parse_select() 5278 5279 if query: 5280 expressions = [query] 5281 else: 5282 expressions = self._parse_expressions() 5283 5284 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5285 5286 if not this and self._match(TokenType.R_PAREN, advance=False): 5287 this = self.expression(exp.Tuple) 5288 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5289 this = self._parse_subquery(this=this, parse_alias=False) 5290 elif isinstance(this, exp.Subquery): 5291 this = self._parse_subquery( 5292 this=self._parse_set_operations(this), parse_alias=False 5293 ) 5294 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5295 this = self.expression(exp.Tuple, expressions=expressions) 5296 else: 5297 this = self.expression(exp.Paren, this=this) 5298 5299 if this: 5300 this.add_comments(comments) 5301 5302 self._match_r_paren(expression=this) 5303 return this 5304 5305 return None 5306 5307 def _parse_field( 5308 self, 5309 any_token: bool = False, 5310 tokens: t.Optional[t.Collection[TokenType]] = None, 5311 anonymous_func: bool = False, 5312 ) -> t.Optional[exp.Expression]: 5313 if anonymous_func: 5314 field = ( 5315 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5316 or self._parse_primary() 5317 ) 5318 else: 5319 field = self._parse_primary() or self._parse_function( 5320 anonymous=anonymous_func, any_token=any_token 5321 ) 5322 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5323 5324 def _parse_function( 5325 self, 5326 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5327 anonymous: bool = False, 5328 optional_parens: bool = True, 5329 any_token: bool = False, 5330 ) -> t.Optional[exp.Expression]: 5331 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5332 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5333 fn_syntax = False 5334 if ( 5335 self._match(TokenType.L_BRACE, advance=False) 5336 and self._next 5337 and self._next.text.upper() == "FN" 5338 ): 5339 self._advance(2) 5340 fn_syntax = True 5341 5342 func = self._parse_function_call( 5343 functions=functions, 5344 anonymous=anonymous, 5345 optional_parens=optional_parens, 5346 any_token=any_token, 5347 ) 5348 5349 if fn_syntax: 5350 self._match(TokenType.R_BRACE) 5351 5352 return func 5353 5354 def _parse_function_call( 5355 self, 5356 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5357 anonymous: bool = False, 5358 optional_parens: bool = True, 5359 any_token: bool = False, 5360 ) -> t.Optional[exp.Expression]: 5361 if not self._curr: 5362 return None 5363 5364 comments = self._curr.comments 5365 token_type = self._curr.token_type 5366 this = self._curr.text 5367 upper = this.upper() 5368 5369 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5370 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5371 self._advance() 5372 return self._parse_window(parser(self)) 5373 5374 if not self._next or self._next.token_type != TokenType.L_PAREN: 5375 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5376 self._advance() 5377 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5378 5379 return None 5380 5381 if any_token: 5382 if token_type in self.RESERVED_TOKENS: 5383 return None 5384 elif token_type not in self.FUNC_TOKENS: 5385 return None 5386 5387 self._advance(2) 5388 5389 parser = self.FUNCTION_PARSERS.get(upper) 5390 if parser and not anonymous: 5391 this = parser(self) 5392 else: 5393 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5394 5395 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5396 this = self.expression( 5397 subquery_predicate, comments=comments, this=self._parse_select() 5398 ) 5399 self._match_r_paren() 5400 return this 5401 5402 if functions is None: 5403 functions = self.FUNCTIONS 5404 5405 function = functions.get(upper) 5406 known_function = function and not anonymous 5407 5408 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5409 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5410 5411 post_func_comments = self._curr and self._curr.comments 5412 if known_function and post_func_comments: 5413 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5414 # call we'll construct it as exp.Anonymous, even if it's "known" 5415 if any( 5416 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5417 for comment in post_func_comments 5418 ): 5419 known_function = False 5420 5421 if alias and known_function: 5422 args = self._kv_to_prop_eq(args) 5423 5424 if known_function: 5425 func_builder = t.cast(t.Callable, function) 5426 5427 if "dialect" in func_builder.__code__.co_varnames: 5428 func = func_builder(args, dialect=self.dialect) 5429 else: 5430 func = func_builder(args) 5431 5432 func = self.validate_expression(func, args) 5433 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5434 func.meta["name"] = this 5435 5436 this = func 5437 else: 5438 if token_type == TokenType.IDENTIFIER: 5439 this = exp.Identifier(this=this, quoted=True) 5440 this = self.expression(exp.Anonymous, this=this, expressions=args) 5441 5442 if isinstance(this, exp.Expression): 5443 this.add_comments(comments) 5444 5445 self._match_r_paren(this) 5446 return self._parse_window(this) 5447 5448 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5449 return expression 5450 5451 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5452 transformed = [] 5453 5454 for index, e in enumerate(expressions): 5455 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5456 if isinstance(e, exp.Alias): 5457 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5458 5459 if not isinstance(e, exp.PropertyEQ): 5460 e = self.expression( 5461 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5462 ) 5463 5464 if isinstance(e.this, exp.Column): 5465 e.this.replace(e.this.this) 5466 else: 5467 e = self._to_prop_eq(e, index) 5468 5469 transformed.append(e) 5470 5471 return transformed 5472 5473 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5474 return self._parse_statement() 5475 5476 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5477 return self._parse_column_def(self._parse_id_var()) 5478 5479 def _parse_user_defined_function( 5480 self, kind: t.Optional[TokenType] = None 5481 ) -> t.Optional[exp.Expression]: 5482 this = self._parse_id_var() 5483 5484 while self._match(TokenType.DOT): 5485 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5486 5487 if not self._match(TokenType.L_PAREN): 5488 return this 5489 5490 expressions = self._parse_csv(self._parse_function_parameter) 5491 self._match_r_paren() 5492 return self.expression( 5493 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5494 ) 5495 5496 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5497 literal = self._parse_primary() 5498 if literal: 5499 return self.expression(exp.Introducer, this=token.text, expression=literal) 5500 5501 return self.expression(exp.Identifier, this=token.text) 5502 5503 def _parse_session_parameter(self) -> exp.SessionParameter: 5504 kind = None 5505 this = self._parse_id_var() or self._parse_primary() 5506 5507 if this and self._match(TokenType.DOT): 5508 kind = this.name 5509 this = self._parse_var() or self._parse_primary() 5510 5511 return self.expression(exp.SessionParameter, this=this, kind=kind) 5512 5513 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5514 return self._parse_id_var() 5515 5516 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5517 index = self._index 5518 5519 if self._match(TokenType.L_PAREN): 5520 expressions = t.cast( 5521 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5522 ) 5523 5524 if not self._match(TokenType.R_PAREN): 5525 self._retreat(index) 5526 else: 5527 expressions = [self._parse_lambda_arg()] 5528 5529 if self._match_set(self.LAMBDAS): 5530 return self.LAMBDAS[self._prev.token_type](self, expressions) 5531 5532 self._retreat(index) 5533 5534 this: t.Optional[exp.Expression] 5535 5536 if self._match(TokenType.DISTINCT): 5537 this = self.expression( 5538 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5539 ) 5540 else: 5541 this = self._parse_select_or_expression(alias=alias) 5542 5543 return self._parse_limit( 5544 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5545 ) 5546 5547 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5548 index = self._index 5549 if not self._match(TokenType.L_PAREN): 5550 return this 5551 5552 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5553 # expr can be of both types 5554 if self._match_set(self.SELECT_START_TOKENS): 5555 self._retreat(index) 5556 return this 5557 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5558 self._match_r_paren() 5559 return self.expression(exp.Schema, this=this, expressions=args) 5560 5561 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5562 return self._parse_column_def(self._parse_field(any_token=True)) 5563 5564 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5565 # column defs are not really columns, they're identifiers 5566 if isinstance(this, exp.Column): 5567 this = this.this 5568 5569 kind = self._parse_types(schema=True) 5570 5571 if self._match_text_seq("FOR", "ORDINALITY"): 5572 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5573 5574 constraints: t.List[exp.Expression] = [] 5575 5576 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5577 ("ALIAS", "MATERIALIZED") 5578 ): 5579 persisted = self._prev.text.upper() == "MATERIALIZED" 5580 constraint_kind = exp.ComputedColumnConstraint( 5581 this=self._parse_assignment(), 5582 persisted=persisted or self._match_text_seq("PERSISTED"), 5583 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5584 ) 5585 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5586 elif ( 5587 kind 5588 and self._match(TokenType.ALIAS, advance=False) 5589 and ( 5590 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5591 or (self._next and self._next.token_type == TokenType.L_PAREN) 5592 ) 5593 ): 5594 self._advance() 5595 constraints.append( 5596 self.expression( 5597 exp.ColumnConstraint, 5598 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5599 ) 5600 ) 5601 5602 while True: 5603 constraint = self._parse_column_constraint() 5604 if not constraint: 5605 break 5606 constraints.append(constraint) 5607 5608 if not kind and not constraints: 5609 return this 5610 5611 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5612 5613 def _parse_auto_increment( 5614 self, 5615 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5616 start = None 5617 increment = None 5618 5619 if self._match(TokenType.L_PAREN, advance=False): 5620 args = self._parse_wrapped_csv(self._parse_bitwise) 5621 start = seq_get(args, 0) 5622 increment = seq_get(args, 1) 5623 elif self._match_text_seq("START"): 5624 start = self._parse_bitwise() 5625 self._match_text_seq("INCREMENT") 5626 increment = self._parse_bitwise() 5627 5628 if start and increment: 5629 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5630 5631 return exp.AutoIncrementColumnConstraint() 5632 5633 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5634 if not self._match_text_seq("REFRESH"): 5635 self._retreat(self._index - 1) 5636 return None 5637 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5638 5639 def _parse_compress(self) -> exp.CompressColumnConstraint: 5640 if self._match(TokenType.L_PAREN, advance=False): 5641 return self.expression( 5642 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5643 ) 5644 5645 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5646 5647 def _parse_generated_as_identity( 5648 self, 5649 ) -> ( 5650 exp.GeneratedAsIdentityColumnConstraint 5651 | exp.ComputedColumnConstraint 5652 | exp.GeneratedAsRowColumnConstraint 5653 ): 5654 if self._match_text_seq("BY", "DEFAULT"): 5655 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5656 this = self.expression( 5657 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5658 ) 5659 else: 5660 self._match_text_seq("ALWAYS") 5661 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5662 5663 self._match(TokenType.ALIAS) 5664 5665 if self._match_text_seq("ROW"): 5666 start = self._match_text_seq("START") 5667 if not start: 5668 self._match(TokenType.END) 5669 hidden = self._match_text_seq("HIDDEN") 5670 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5671 5672 identity = self._match_text_seq("IDENTITY") 5673 5674 if self._match(TokenType.L_PAREN): 5675 if self._match(TokenType.START_WITH): 5676 this.set("start", self._parse_bitwise()) 5677 if self._match_text_seq("INCREMENT", "BY"): 5678 this.set("increment", self._parse_bitwise()) 5679 if self._match_text_seq("MINVALUE"): 5680 this.set("minvalue", self._parse_bitwise()) 5681 if self._match_text_seq("MAXVALUE"): 5682 this.set("maxvalue", self._parse_bitwise()) 5683 5684 if self._match_text_seq("CYCLE"): 5685 this.set("cycle", True) 5686 elif self._match_text_seq("NO", "CYCLE"): 5687 this.set("cycle", False) 5688 5689 if not identity: 5690 this.set("expression", self._parse_range()) 5691 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5692 args = self._parse_csv(self._parse_bitwise) 5693 this.set("start", seq_get(args, 0)) 5694 this.set("increment", seq_get(args, 1)) 5695 5696 self._match_r_paren() 5697 5698 return this 5699 5700 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5701 self._match_text_seq("LENGTH") 5702 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5703 5704 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5705 if self._match_text_seq("NULL"): 5706 return self.expression(exp.NotNullColumnConstraint) 5707 if self._match_text_seq("CASESPECIFIC"): 5708 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5709 if self._match_text_seq("FOR", "REPLICATION"): 5710 return self.expression(exp.NotForReplicationColumnConstraint) 5711 5712 # Unconsume the `NOT` token 5713 self._retreat(self._index - 1) 5714 return None 5715 5716 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5717 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5718 5719 procedure_option_follows = ( 5720 self._match(TokenType.WITH, advance=False) 5721 and self._next 5722 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5723 ) 5724 5725 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5726 return self.expression( 5727 exp.ColumnConstraint, 5728 this=this, 5729 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5730 ) 5731 5732 return this 5733 5734 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5735 if not self._match(TokenType.CONSTRAINT): 5736 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5737 5738 return self.expression( 5739 exp.Constraint, 5740 this=self._parse_id_var(), 5741 expressions=self._parse_unnamed_constraints(), 5742 ) 5743 5744 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5745 constraints = [] 5746 while True: 5747 constraint = self._parse_unnamed_constraint() or self._parse_function() 5748 if not constraint: 5749 break 5750 constraints.append(constraint) 5751 5752 return constraints 5753 5754 def _parse_unnamed_constraint( 5755 self, constraints: t.Optional[t.Collection[str]] = None 5756 ) -> t.Optional[exp.Expression]: 5757 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5758 constraints or self.CONSTRAINT_PARSERS 5759 ): 5760 return None 5761 5762 constraint = self._prev.text.upper() 5763 if constraint not in self.CONSTRAINT_PARSERS: 5764 self.raise_error(f"No parser found for schema constraint {constraint}.") 5765 5766 return self.CONSTRAINT_PARSERS[constraint](self) 5767 5768 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5769 return self._parse_id_var(any_token=False) 5770 5771 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5772 self._match_text_seq("KEY") 5773 return self.expression( 5774 exp.UniqueColumnConstraint, 5775 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5776 this=self._parse_schema(self._parse_unique_key()), 5777 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5778 on_conflict=self._parse_on_conflict(), 5779 ) 5780 5781 def _parse_key_constraint_options(self) -> t.List[str]: 5782 options = [] 5783 while True: 5784 if not self._curr: 5785 break 5786 5787 if self._match(TokenType.ON): 5788 action = None 5789 on = self._advance_any() and self._prev.text 5790 5791 if self._match_text_seq("NO", "ACTION"): 5792 action = "NO ACTION" 5793 elif self._match_text_seq("CASCADE"): 5794 action = "CASCADE" 5795 elif self._match_text_seq("RESTRICT"): 5796 action = "RESTRICT" 5797 elif self._match_pair(TokenType.SET, TokenType.NULL): 5798 action = "SET NULL" 5799 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5800 action = "SET DEFAULT" 5801 else: 5802 self.raise_error("Invalid key constraint") 5803 5804 options.append(f"ON {on} {action}") 5805 else: 5806 var = self._parse_var_from_options( 5807 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5808 ) 5809 if not var: 5810 break 5811 options.append(var.name) 5812 5813 return options 5814 5815 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5816 if match and not self._match(TokenType.REFERENCES): 5817 return None 5818 5819 expressions = None 5820 this = self._parse_table(schema=True) 5821 options = self._parse_key_constraint_options() 5822 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5823 5824 def _parse_foreign_key(self) -> exp.ForeignKey: 5825 expressions = self._parse_wrapped_id_vars() 5826 reference = self._parse_references() 5827 options = {} 5828 5829 while self._match(TokenType.ON): 5830 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5831 self.raise_error("Expected DELETE or UPDATE") 5832 5833 kind = self._prev.text.lower() 5834 5835 if self._match_text_seq("NO", "ACTION"): 5836 action = "NO ACTION" 5837 elif self._match(TokenType.SET): 5838 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5839 action = "SET " + self._prev.text.upper() 5840 else: 5841 self._advance() 5842 action = self._prev.text.upper() 5843 5844 options[kind] = action 5845 5846 return self.expression( 5847 exp.ForeignKey, 5848 expressions=expressions, 5849 reference=reference, 5850 **options, # type: ignore 5851 ) 5852 5853 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5854 return self._parse_field() 5855 5856 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5857 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5858 self._retreat(self._index - 1) 5859 return None 5860 5861 id_vars = self._parse_wrapped_id_vars() 5862 return self.expression( 5863 exp.PeriodForSystemTimeConstraint, 5864 this=seq_get(id_vars, 0), 5865 expression=seq_get(id_vars, 1), 5866 ) 5867 5868 def _parse_primary_key( 5869 self, wrapped_optional: bool = False, in_props: bool = False 5870 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5871 desc = ( 5872 self._match_set((TokenType.ASC, TokenType.DESC)) 5873 and self._prev.token_type == TokenType.DESC 5874 ) 5875 5876 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5877 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5878 5879 expressions = self._parse_wrapped_csv( 5880 self._parse_primary_key_part, optional=wrapped_optional 5881 ) 5882 options = self._parse_key_constraint_options() 5883 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5884 5885 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5886 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5887 5888 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5889 """ 5890 Parses a datetime column in ODBC format. We parse the column into the corresponding 5891 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5892 same as we did for `DATE('yyyy-mm-dd')`. 5893 5894 Reference: 5895 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5896 """ 5897 self._match(TokenType.VAR) 5898 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5899 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5900 if not self._match(TokenType.R_BRACE): 5901 self.raise_error("Expected }") 5902 return expression 5903 5904 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5905 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5906 return this 5907 5908 bracket_kind = self._prev.token_type 5909 if ( 5910 bracket_kind == TokenType.L_BRACE 5911 and self._curr 5912 and self._curr.token_type == TokenType.VAR 5913 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5914 ): 5915 return self._parse_odbc_datetime_literal() 5916 5917 expressions = self._parse_csv( 5918 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5919 ) 5920 5921 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5922 self.raise_error("Expected ]") 5923 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5924 self.raise_error("Expected }") 5925 5926 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5927 if bracket_kind == TokenType.L_BRACE: 5928 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5929 elif not this: 5930 this = build_array_constructor( 5931 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5932 ) 5933 else: 5934 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5935 if constructor_type: 5936 return build_array_constructor( 5937 constructor_type, 5938 args=expressions, 5939 bracket_kind=bracket_kind, 5940 dialect=self.dialect, 5941 ) 5942 5943 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5944 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5945 5946 self._add_comments(this) 5947 return self._parse_bracket(this) 5948 5949 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5950 if self._match(TokenType.COLON): 5951 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5952 return this 5953 5954 def _parse_case(self) -> t.Optional[exp.Expression]: 5955 ifs = [] 5956 default = None 5957 5958 comments = self._prev_comments 5959 expression = self._parse_assignment() 5960 5961 while self._match(TokenType.WHEN): 5962 this = self._parse_assignment() 5963 self._match(TokenType.THEN) 5964 then = self._parse_assignment() 5965 ifs.append(self.expression(exp.If, this=this, true=then)) 5966 5967 if self._match(TokenType.ELSE): 5968 default = self._parse_assignment() 5969 5970 if not self._match(TokenType.END): 5971 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5972 default = exp.column("interval") 5973 else: 5974 self.raise_error("Expected END after CASE", self._prev) 5975 5976 return self.expression( 5977 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5978 ) 5979 5980 def _parse_if(self) -> t.Optional[exp.Expression]: 5981 if self._match(TokenType.L_PAREN): 5982 args = self._parse_csv(self._parse_assignment) 5983 this = self.validate_expression(exp.If.from_arg_list(args), args) 5984 self._match_r_paren() 5985 else: 5986 index = self._index - 1 5987 5988 if self.NO_PAREN_IF_COMMANDS and index == 0: 5989 return self._parse_as_command(self._prev) 5990 5991 condition = self._parse_assignment() 5992 5993 if not condition: 5994 self._retreat(index) 5995 return None 5996 5997 self._match(TokenType.THEN) 5998 true = self._parse_assignment() 5999 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6000 self._match(TokenType.END) 6001 this = self.expression(exp.If, this=condition, true=true, false=false) 6002 6003 return this 6004 6005 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6006 if not self._match_text_seq("VALUE", "FOR"): 6007 self._retreat(self._index - 1) 6008 return None 6009 6010 return self.expression( 6011 exp.NextValueFor, 6012 this=self._parse_column(), 6013 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6014 ) 6015 6016 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6017 this = self._parse_function() or self._parse_var_or_string(upper=True) 6018 6019 if self._match(TokenType.FROM): 6020 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6021 6022 if not self._match(TokenType.COMMA): 6023 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6024 6025 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6026 6027 def _parse_gap_fill(self) -> exp.GapFill: 6028 self._match(TokenType.TABLE) 6029 this = self._parse_table() 6030 6031 self._match(TokenType.COMMA) 6032 args = [this, *self._parse_csv(self._parse_lambda)] 6033 6034 gap_fill = exp.GapFill.from_arg_list(args) 6035 return self.validate_expression(gap_fill, args) 6036 6037 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6038 this = self._parse_assignment() 6039 6040 if not self._match(TokenType.ALIAS): 6041 if self._match(TokenType.COMMA): 6042 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6043 6044 self.raise_error("Expected AS after CAST") 6045 6046 fmt = None 6047 to = self._parse_types() 6048 6049 if self._match(TokenType.FORMAT): 6050 fmt_string = self._parse_string() 6051 fmt = self._parse_at_time_zone(fmt_string) 6052 6053 if not to: 6054 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6055 if to.this in exp.DataType.TEMPORAL_TYPES: 6056 this = self.expression( 6057 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6058 this=this, 6059 format=exp.Literal.string( 6060 format_time( 6061 fmt_string.this if fmt_string else "", 6062 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6063 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6064 ) 6065 ), 6066 safe=safe, 6067 ) 6068 6069 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6070 this.set("zone", fmt.args["zone"]) 6071 return this 6072 elif not to: 6073 self.raise_error("Expected TYPE after CAST") 6074 elif isinstance(to, exp.Identifier): 6075 to = exp.DataType.build(to.name, udt=True) 6076 elif to.this == exp.DataType.Type.CHAR: 6077 if self._match(TokenType.CHARACTER_SET): 6078 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6079 6080 return self.expression( 6081 exp.Cast if strict else exp.TryCast, 6082 this=this, 6083 to=to, 6084 format=fmt, 6085 safe=safe, 6086 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6087 ) 6088 6089 def _parse_string_agg(self) -> exp.GroupConcat: 6090 if self._match(TokenType.DISTINCT): 6091 args: t.List[t.Optional[exp.Expression]] = [ 6092 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6093 ] 6094 if self._match(TokenType.COMMA): 6095 args.extend(self._parse_csv(self._parse_assignment)) 6096 else: 6097 args = self._parse_csv(self._parse_assignment) # type: ignore 6098 6099 if self._match_text_seq("ON", "OVERFLOW"): 6100 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6101 if self._match_text_seq("ERROR"): 6102 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6103 else: 6104 self._match_text_seq("TRUNCATE") 6105 on_overflow = self.expression( 6106 exp.OverflowTruncateBehavior, 6107 this=self._parse_string(), 6108 with_count=( 6109 self._match_text_seq("WITH", "COUNT") 6110 or not self._match_text_seq("WITHOUT", "COUNT") 6111 ), 6112 ) 6113 else: 6114 on_overflow = None 6115 6116 index = self._index 6117 if not self._match(TokenType.R_PAREN) and args: 6118 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6119 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6120 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 6121 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6122 6123 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6124 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6125 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6126 if not self._match_text_seq("WITHIN", "GROUP"): 6127 self._retreat(index) 6128 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6129 6130 # The corresponding match_r_paren will be called in parse_function (caller) 6131 self._match_l_paren() 6132 6133 return self.expression( 6134 exp.GroupConcat, 6135 this=self._parse_order(this=seq_get(args, 0)), 6136 separator=seq_get(args, 1), 6137 on_overflow=on_overflow, 6138 ) 6139 6140 def _parse_convert( 6141 self, strict: bool, safe: t.Optional[bool] = None 6142 ) -> t.Optional[exp.Expression]: 6143 this = self._parse_bitwise() 6144 6145 if self._match(TokenType.USING): 6146 to: t.Optional[exp.Expression] = self.expression( 6147 exp.CharacterSet, this=self._parse_var() 6148 ) 6149 elif self._match(TokenType.COMMA): 6150 to = self._parse_types() 6151 else: 6152 to = None 6153 6154 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6155 6156 def _parse_xml_table(self) -> exp.XMLTable: 6157 this = self._parse_string() 6158 6159 passing = None 6160 columns = None 6161 6162 if self._match_text_seq("PASSING"): 6163 # The BY VALUE keywords are optional and are provided for semantic clarity 6164 self._match_text_seq("BY", "VALUE") 6165 passing = self._parse_csv(self._parse_column) 6166 6167 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6168 6169 if self._match_text_seq("COLUMNS"): 6170 columns = self._parse_csv(self._parse_field_def) 6171 6172 return self.expression( 6173 exp.XMLTable, this=this, passing=passing, columns=columns, by_ref=by_ref 6174 ) 6175 6176 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6177 """ 6178 There are generally two variants of the DECODE function: 6179 6180 - DECODE(bin, charset) 6181 - DECODE(expression, search, result [, search, result] ... [, default]) 6182 6183 The second variant will always be parsed into a CASE expression. Note that NULL 6184 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6185 instead of relying on pattern matching. 6186 """ 6187 args = self._parse_csv(self._parse_assignment) 6188 6189 if len(args) < 3: 6190 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6191 6192 expression, *expressions = args 6193 if not expression: 6194 return None 6195 6196 ifs = [] 6197 for search, result in zip(expressions[::2], expressions[1::2]): 6198 if not search or not result: 6199 return None 6200 6201 if isinstance(search, exp.Literal): 6202 ifs.append( 6203 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6204 ) 6205 elif isinstance(search, exp.Null): 6206 ifs.append( 6207 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6208 ) 6209 else: 6210 cond = exp.or_( 6211 exp.EQ(this=expression.copy(), expression=search), 6212 exp.and_( 6213 exp.Is(this=expression.copy(), expression=exp.Null()), 6214 exp.Is(this=search.copy(), expression=exp.Null()), 6215 copy=False, 6216 ), 6217 copy=False, 6218 ) 6219 ifs.append(exp.If(this=cond, true=result)) 6220 6221 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6222 6223 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6224 self._match_text_seq("KEY") 6225 key = self._parse_column() 6226 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6227 self._match_text_seq("VALUE") 6228 value = self._parse_bitwise() 6229 6230 if not key and not value: 6231 return None 6232 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6233 6234 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6235 if not this or not self._match_text_seq("FORMAT", "JSON"): 6236 return this 6237 6238 return self.expression(exp.FormatJson, this=this) 6239 6240 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6241 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6242 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6243 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6244 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6245 else: 6246 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6247 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6248 6249 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6250 6251 if not empty and not error and not null: 6252 return None 6253 6254 return self.expression( 6255 exp.OnCondition, 6256 empty=empty, 6257 error=error, 6258 null=null, 6259 ) 6260 6261 def _parse_on_handling( 6262 self, on: str, *values: str 6263 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6264 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6265 for value in values: 6266 if self._match_text_seq(value, "ON", on): 6267 return f"{value} ON {on}" 6268 6269 index = self._index 6270 if self._match(TokenType.DEFAULT): 6271 default_value = self._parse_bitwise() 6272 if self._match_text_seq("ON", on): 6273 return default_value 6274 6275 self._retreat(index) 6276 6277 return None 6278 6279 @t.overload 6280 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6281 6282 @t.overload 6283 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6284 6285 def _parse_json_object(self, agg=False): 6286 star = self._parse_star() 6287 expressions = ( 6288 [star] 6289 if star 6290 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6291 ) 6292 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6293 6294 unique_keys = None 6295 if self._match_text_seq("WITH", "UNIQUE"): 6296 unique_keys = True 6297 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6298 unique_keys = False 6299 6300 self._match_text_seq("KEYS") 6301 6302 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6303 self._parse_type() 6304 ) 6305 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6306 6307 return self.expression( 6308 exp.JSONObjectAgg if agg else exp.JSONObject, 6309 expressions=expressions, 6310 null_handling=null_handling, 6311 unique_keys=unique_keys, 6312 return_type=return_type, 6313 encoding=encoding, 6314 ) 6315 6316 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6317 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6318 if not self._match_text_seq("NESTED"): 6319 this = self._parse_id_var() 6320 kind = self._parse_types(allow_identifiers=False) 6321 nested = None 6322 else: 6323 this = None 6324 kind = None 6325 nested = True 6326 6327 path = self._match_text_seq("PATH") and self._parse_string() 6328 nested_schema = nested and self._parse_json_schema() 6329 6330 return self.expression( 6331 exp.JSONColumnDef, 6332 this=this, 6333 kind=kind, 6334 path=path, 6335 nested_schema=nested_schema, 6336 ) 6337 6338 def _parse_json_schema(self) -> exp.JSONSchema: 6339 self._match_text_seq("COLUMNS") 6340 return self.expression( 6341 exp.JSONSchema, 6342 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6343 ) 6344 6345 def _parse_json_table(self) -> exp.JSONTable: 6346 this = self._parse_format_json(self._parse_bitwise()) 6347 path = self._match(TokenType.COMMA) and self._parse_string() 6348 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6349 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6350 schema = self._parse_json_schema() 6351 6352 return exp.JSONTable( 6353 this=this, 6354 schema=schema, 6355 path=path, 6356 error_handling=error_handling, 6357 empty_handling=empty_handling, 6358 ) 6359 6360 def _parse_match_against(self) -> exp.MatchAgainst: 6361 expressions = self._parse_csv(self._parse_column) 6362 6363 self._match_text_seq(")", "AGAINST", "(") 6364 6365 this = self._parse_string() 6366 6367 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6368 modifier = "IN NATURAL LANGUAGE MODE" 6369 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6370 modifier = f"{modifier} WITH QUERY EXPANSION" 6371 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6372 modifier = "IN BOOLEAN MODE" 6373 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6374 modifier = "WITH QUERY EXPANSION" 6375 else: 6376 modifier = None 6377 6378 return self.expression( 6379 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6380 ) 6381 6382 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6383 def _parse_open_json(self) -> exp.OpenJSON: 6384 this = self._parse_bitwise() 6385 path = self._match(TokenType.COMMA) and self._parse_string() 6386 6387 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6388 this = self._parse_field(any_token=True) 6389 kind = self._parse_types() 6390 path = self._parse_string() 6391 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6392 6393 return self.expression( 6394 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6395 ) 6396 6397 expressions = None 6398 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6399 self._match_l_paren() 6400 expressions = self._parse_csv(_parse_open_json_column_def) 6401 6402 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6403 6404 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6405 args = self._parse_csv(self._parse_bitwise) 6406 6407 if self._match(TokenType.IN): 6408 return self.expression( 6409 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6410 ) 6411 6412 if haystack_first: 6413 haystack = seq_get(args, 0) 6414 needle = seq_get(args, 1) 6415 else: 6416 needle = seq_get(args, 0) 6417 haystack = seq_get(args, 1) 6418 6419 return self.expression( 6420 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6421 ) 6422 6423 def _parse_predict(self) -> exp.Predict: 6424 self._match_text_seq("MODEL") 6425 this = self._parse_table() 6426 6427 self._match(TokenType.COMMA) 6428 self._match_text_seq("TABLE") 6429 6430 return self.expression( 6431 exp.Predict, 6432 this=this, 6433 expression=self._parse_table(), 6434 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6435 ) 6436 6437 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6438 args = self._parse_csv(self._parse_table) 6439 return exp.JoinHint(this=func_name.upper(), expressions=args) 6440 6441 def _parse_substring(self) -> exp.Substring: 6442 # Postgres supports the form: substring(string [from int] [for int]) 6443 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6444 6445 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6446 6447 if self._match(TokenType.FROM): 6448 args.append(self._parse_bitwise()) 6449 if self._match(TokenType.FOR): 6450 if len(args) == 1: 6451 args.append(exp.Literal.number(1)) 6452 args.append(self._parse_bitwise()) 6453 6454 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6455 6456 def _parse_trim(self) -> exp.Trim: 6457 # https://www.w3resource.com/sql/character-functions/trim.php 6458 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6459 6460 position = None 6461 collation = None 6462 expression = None 6463 6464 if self._match_texts(self.TRIM_TYPES): 6465 position = self._prev.text.upper() 6466 6467 this = self._parse_bitwise() 6468 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6469 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6470 expression = self._parse_bitwise() 6471 6472 if invert_order: 6473 this, expression = expression, this 6474 6475 if self._match(TokenType.COLLATE): 6476 collation = self._parse_bitwise() 6477 6478 return self.expression( 6479 exp.Trim, this=this, position=position, expression=expression, collation=collation 6480 ) 6481 6482 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6483 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6484 6485 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6486 return self._parse_window(self._parse_id_var(), alias=True) 6487 6488 def _parse_respect_or_ignore_nulls( 6489 self, this: t.Optional[exp.Expression] 6490 ) -> t.Optional[exp.Expression]: 6491 if self._match_text_seq("IGNORE", "NULLS"): 6492 return self.expression(exp.IgnoreNulls, this=this) 6493 if self._match_text_seq("RESPECT", "NULLS"): 6494 return self.expression(exp.RespectNulls, this=this) 6495 return this 6496 6497 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6498 if self._match(TokenType.HAVING): 6499 self._match_texts(("MAX", "MIN")) 6500 max = self._prev.text.upper() != "MIN" 6501 return self.expression( 6502 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6503 ) 6504 6505 return this 6506 6507 def _parse_window( 6508 self, this: t.Optional[exp.Expression], alias: bool = False 6509 ) -> t.Optional[exp.Expression]: 6510 func = this 6511 comments = func.comments if isinstance(func, exp.Expression) else None 6512 6513 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6514 self._match(TokenType.WHERE) 6515 this = self.expression( 6516 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6517 ) 6518 self._match_r_paren() 6519 6520 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6521 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6522 if self._match_text_seq("WITHIN", "GROUP"): 6523 order = self._parse_wrapped(self._parse_order) 6524 this = self.expression(exp.WithinGroup, this=this, expression=order) 6525 6526 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6527 # Some dialects choose to implement and some do not. 6528 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6529 6530 # There is some code above in _parse_lambda that handles 6531 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6532 6533 # The below changes handle 6534 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6535 6536 # Oracle allows both formats 6537 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6538 # and Snowflake chose to do the same for familiarity 6539 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6540 if isinstance(this, exp.AggFunc): 6541 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6542 6543 if ignore_respect and ignore_respect is not this: 6544 ignore_respect.replace(ignore_respect.this) 6545 this = self.expression(ignore_respect.__class__, this=this) 6546 6547 this = self._parse_respect_or_ignore_nulls(this) 6548 6549 # bigquery select from window x AS (partition by ...) 6550 if alias: 6551 over = None 6552 self._match(TokenType.ALIAS) 6553 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6554 return this 6555 else: 6556 over = self._prev.text.upper() 6557 6558 if comments and isinstance(func, exp.Expression): 6559 func.pop_comments() 6560 6561 if not self._match(TokenType.L_PAREN): 6562 return self.expression( 6563 exp.Window, 6564 comments=comments, 6565 this=this, 6566 alias=self._parse_id_var(False), 6567 over=over, 6568 ) 6569 6570 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6571 6572 first = self._match(TokenType.FIRST) 6573 if self._match_text_seq("LAST"): 6574 first = False 6575 6576 partition, order = self._parse_partition_and_order() 6577 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6578 6579 if kind: 6580 self._match(TokenType.BETWEEN) 6581 start = self._parse_window_spec() 6582 self._match(TokenType.AND) 6583 end = self._parse_window_spec() 6584 6585 spec = self.expression( 6586 exp.WindowSpec, 6587 kind=kind, 6588 start=start["value"], 6589 start_side=start["side"], 6590 end=end["value"], 6591 end_side=end["side"], 6592 ) 6593 else: 6594 spec = None 6595 6596 self._match_r_paren() 6597 6598 window = self.expression( 6599 exp.Window, 6600 comments=comments, 6601 this=this, 6602 partition_by=partition, 6603 order=order, 6604 spec=spec, 6605 alias=window_alias, 6606 over=over, 6607 first=first, 6608 ) 6609 6610 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6611 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6612 return self._parse_window(window, alias=alias) 6613 6614 return window 6615 6616 def _parse_partition_and_order( 6617 self, 6618 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6619 return self._parse_partition_by(), self._parse_order() 6620 6621 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6622 self._match(TokenType.BETWEEN) 6623 6624 return { 6625 "value": ( 6626 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6627 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6628 or self._parse_bitwise() 6629 ), 6630 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6631 } 6632 6633 def _parse_alias( 6634 self, this: t.Optional[exp.Expression], explicit: bool = False 6635 ) -> t.Optional[exp.Expression]: 6636 any_token = self._match(TokenType.ALIAS) 6637 comments = self._prev_comments or [] 6638 6639 if explicit and not any_token: 6640 return this 6641 6642 if self._match(TokenType.L_PAREN): 6643 aliases = self.expression( 6644 exp.Aliases, 6645 comments=comments, 6646 this=this, 6647 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6648 ) 6649 self._match_r_paren(aliases) 6650 return aliases 6651 6652 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6653 self.STRING_ALIASES and self._parse_string_as_identifier() 6654 ) 6655 6656 if alias: 6657 comments.extend(alias.pop_comments()) 6658 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6659 column = this.this 6660 6661 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6662 if not this.comments and column and column.comments: 6663 this.comments = column.pop_comments() 6664 6665 return this 6666 6667 def _parse_id_var( 6668 self, 6669 any_token: bool = True, 6670 tokens: t.Optional[t.Collection[TokenType]] = None, 6671 ) -> t.Optional[exp.Expression]: 6672 expression = self._parse_identifier() 6673 if not expression and ( 6674 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6675 ): 6676 quoted = self._prev.token_type == TokenType.STRING 6677 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6678 6679 return expression 6680 6681 def _parse_string(self) -> t.Optional[exp.Expression]: 6682 if self._match_set(self.STRING_PARSERS): 6683 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6684 return self._parse_placeholder() 6685 6686 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6687 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6688 6689 def _parse_number(self) -> t.Optional[exp.Expression]: 6690 if self._match_set(self.NUMERIC_PARSERS): 6691 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6692 return self._parse_placeholder() 6693 6694 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6695 if self._match(TokenType.IDENTIFIER): 6696 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6697 return self._parse_placeholder() 6698 6699 def _parse_var( 6700 self, 6701 any_token: bool = False, 6702 tokens: t.Optional[t.Collection[TokenType]] = None, 6703 upper: bool = False, 6704 ) -> t.Optional[exp.Expression]: 6705 if ( 6706 (any_token and self._advance_any()) 6707 or self._match(TokenType.VAR) 6708 or (self._match_set(tokens) if tokens else False) 6709 ): 6710 return self.expression( 6711 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6712 ) 6713 return self._parse_placeholder() 6714 6715 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6716 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6717 self._advance() 6718 return self._prev 6719 return None 6720 6721 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6722 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6723 6724 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6725 return self._parse_primary() or self._parse_var(any_token=True) 6726 6727 def _parse_null(self) -> t.Optional[exp.Expression]: 6728 if self._match_set(self.NULL_TOKENS): 6729 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6730 return self._parse_placeholder() 6731 6732 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6733 if self._match(TokenType.TRUE): 6734 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6735 if self._match(TokenType.FALSE): 6736 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6737 return self._parse_placeholder() 6738 6739 def _parse_star(self) -> t.Optional[exp.Expression]: 6740 if self._match(TokenType.STAR): 6741 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6742 return self._parse_placeholder() 6743 6744 def _parse_parameter(self) -> exp.Parameter: 6745 this = self._parse_identifier() or self._parse_primary_or_var() 6746 return self.expression(exp.Parameter, this=this) 6747 6748 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6749 if self._match_set(self.PLACEHOLDER_PARSERS): 6750 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6751 if placeholder: 6752 return placeholder 6753 self._advance(-1) 6754 return None 6755 6756 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6757 if not self._match_texts(keywords): 6758 return None 6759 if self._match(TokenType.L_PAREN, advance=False): 6760 return self._parse_wrapped_csv(self._parse_expression) 6761 6762 expression = self._parse_expression() 6763 return [expression] if expression else None 6764 6765 def _parse_csv( 6766 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6767 ) -> t.List[exp.Expression]: 6768 parse_result = parse_method() 6769 items = [parse_result] if parse_result is not None else [] 6770 6771 while self._match(sep): 6772 self._add_comments(parse_result) 6773 parse_result = parse_method() 6774 if parse_result is not None: 6775 items.append(parse_result) 6776 6777 return items 6778 6779 def _parse_tokens( 6780 self, parse_method: t.Callable, expressions: t.Dict 6781 ) -> t.Optional[exp.Expression]: 6782 this = parse_method() 6783 6784 while self._match_set(expressions): 6785 this = self.expression( 6786 expressions[self._prev.token_type], 6787 this=this, 6788 comments=self._prev_comments, 6789 expression=parse_method(), 6790 ) 6791 6792 return this 6793 6794 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6795 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6796 6797 def _parse_wrapped_csv( 6798 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6799 ) -> t.List[exp.Expression]: 6800 return self._parse_wrapped( 6801 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6802 ) 6803 6804 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6805 wrapped = self._match(TokenType.L_PAREN) 6806 if not wrapped and not optional: 6807 self.raise_error("Expecting (") 6808 parse_result = parse_method() 6809 if wrapped: 6810 self._match_r_paren() 6811 return parse_result 6812 6813 def _parse_expressions(self) -> t.List[exp.Expression]: 6814 return self._parse_csv(self._parse_expression) 6815 6816 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6817 return self._parse_select() or self._parse_set_operations( 6818 self._parse_alias(self._parse_assignment(), explicit=True) 6819 if alias 6820 else self._parse_assignment() 6821 ) 6822 6823 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6824 return self._parse_query_modifiers( 6825 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6826 ) 6827 6828 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6829 this = None 6830 if self._match_texts(self.TRANSACTION_KIND): 6831 this = self._prev.text 6832 6833 self._match_texts(("TRANSACTION", "WORK")) 6834 6835 modes = [] 6836 while True: 6837 mode = [] 6838 while self._match(TokenType.VAR): 6839 mode.append(self._prev.text) 6840 6841 if mode: 6842 modes.append(" ".join(mode)) 6843 if not self._match(TokenType.COMMA): 6844 break 6845 6846 return self.expression(exp.Transaction, this=this, modes=modes) 6847 6848 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6849 chain = None 6850 savepoint = None 6851 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6852 6853 self._match_texts(("TRANSACTION", "WORK")) 6854 6855 if self._match_text_seq("TO"): 6856 self._match_text_seq("SAVEPOINT") 6857 savepoint = self._parse_id_var() 6858 6859 if self._match(TokenType.AND): 6860 chain = not self._match_text_seq("NO") 6861 self._match_text_seq("CHAIN") 6862 6863 if is_rollback: 6864 return self.expression(exp.Rollback, savepoint=savepoint) 6865 6866 return self.expression(exp.Commit, chain=chain) 6867 6868 def _parse_refresh(self) -> exp.Refresh: 6869 self._match(TokenType.TABLE) 6870 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6871 6872 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6873 if not self._match_text_seq("ADD"): 6874 return None 6875 6876 self._match(TokenType.COLUMN) 6877 exists_column = self._parse_exists(not_=True) 6878 expression = self._parse_field_def() 6879 6880 if expression: 6881 expression.set("exists", exists_column) 6882 6883 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6884 if self._match_texts(("FIRST", "AFTER")): 6885 position = self._prev.text 6886 column_position = self.expression( 6887 exp.ColumnPosition, this=self._parse_column(), position=position 6888 ) 6889 expression.set("position", column_position) 6890 6891 return expression 6892 6893 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6894 drop = self._match(TokenType.DROP) and self._parse_drop() 6895 if drop and not isinstance(drop, exp.Command): 6896 drop.set("kind", drop.args.get("kind", "COLUMN")) 6897 return drop 6898 6899 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6900 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6901 return self.expression( 6902 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6903 ) 6904 6905 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6906 index = self._index - 1 6907 6908 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6909 return self._parse_csv( 6910 lambda: self.expression( 6911 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6912 ) 6913 ) 6914 6915 self._retreat(index) 6916 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6917 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6918 6919 if self._match_text_seq("ADD", "COLUMNS"): 6920 schema = self._parse_schema() 6921 if schema: 6922 return [schema] 6923 return [] 6924 6925 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6926 6927 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6928 if self._match_texts(self.ALTER_ALTER_PARSERS): 6929 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6930 6931 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6932 # keyword after ALTER we default to parsing this statement 6933 self._match(TokenType.COLUMN) 6934 column = self._parse_field(any_token=True) 6935 6936 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6937 return self.expression(exp.AlterColumn, this=column, drop=True) 6938 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6939 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6940 if self._match(TokenType.COMMENT): 6941 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6942 if self._match_text_seq("DROP", "NOT", "NULL"): 6943 return self.expression( 6944 exp.AlterColumn, 6945 this=column, 6946 drop=True, 6947 allow_null=True, 6948 ) 6949 if self._match_text_seq("SET", "NOT", "NULL"): 6950 return self.expression( 6951 exp.AlterColumn, 6952 this=column, 6953 allow_null=False, 6954 ) 6955 self._match_text_seq("SET", "DATA") 6956 self._match_text_seq("TYPE") 6957 return self.expression( 6958 exp.AlterColumn, 6959 this=column, 6960 dtype=self._parse_types(), 6961 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6962 using=self._match(TokenType.USING) and self._parse_assignment(), 6963 ) 6964 6965 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6966 if self._match_texts(("ALL", "EVEN", "AUTO")): 6967 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6968 6969 self._match_text_seq("KEY", "DISTKEY") 6970 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6971 6972 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6973 if compound: 6974 self._match_text_seq("SORTKEY") 6975 6976 if self._match(TokenType.L_PAREN, advance=False): 6977 return self.expression( 6978 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6979 ) 6980 6981 self._match_texts(("AUTO", "NONE")) 6982 return self.expression( 6983 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6984 ) 6985 6986 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6987 index = self._index - 1 6988 6989 partition_exists = self._parse_exists() 6990 if self._match(TokenType.PARTITION, advance=False): 6991 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6992 6993 self._retreat(index) 6994 return self._parse_csv(self._parse_drop_column) 6995 6996 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 6997 if self._match(TokenType.COLUMN): 6998 exists = self._parse_exists() 6999 old_column = self._parse_column() 7000 to = self._match_text_seq("TO") 7001 new_column = self._parse_column() 7002 7003 if old_column is None or to is None or new_column is None: 7004 return None 7005 7006 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7007 7008 self._match_text_seq("TO") 7009 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7010 7011 def _parse_alter_table_set(self) -> exp.AlterSet: 7012 alter_set = self.expression(exp.AlterSet) 7013 7014 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7015 "TABLE", "PROPERTIES" 7016 ): 7017 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7018 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7019 alter_set.set("expressions", [self._parse_assignment()]) 7020 elif self._match_texts(("LOGGED", "UNLOGGED")): 7021 alter_set.set("option", exp.var(self._prev.text.upper())) 7022 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7023 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7024 elif self._match_text_seq("LOCATION"): 7025 alter_set.set("location", self._parse_field()) 7026 elif self._match_text_seq("ACCESS", "METHOD"): 7027 alter_set.set("access_method", self._parse_field()) 7028 elif self._match_text_seq("TABLESPACE"): 7029 alter_set.set("tablespace", self._parse_field()) 7030 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7031 alter_set.set("file_format", [self._parse_field()]) 7032 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7033 alter_set.set("file_format", self._parse_wrapped_options()) 7034 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7035 alter_set.set("copy_options", self._parse_wrapped_options()) 7036 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7037 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7038 else: 7039 if self._match_text_seq("SERDE"): 7040 alter_set.set("serde", self._parse_field()) 7041 7042 alter_set.set("expressions", [self._parse_properties()]) 7043 7044 return alter_set 7045 7046 def _parse_alter(self) -> exp.Alter | exp.Command: 7047 start = self._prev 7048 7049 alter_token = self._match_set(self.ALTERABLES) and self._prev 7050 if not alter_token: 7051 return self._parse_as_command(start) 7052 7053 exists = self._parse_exists() 7054 only = self._match_text_seq("ONLY") 7055 this = self._parse_table(schema=True) 7056 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7057 7058 if self._next: 7059 self._advance() 7060 7061 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7062 if parser: 7063 actions = ensure_list(parser(self)) 7064 not_valid = self._match_text_seq("NOT", "VALID") 7065 options = self._parse_csv(self._parse_property) 7066 7067 if not self._curr and actions: 7068 return self.expression( 7069 exp.Alter, 7070 this=this, 7071 kind=alter_token.text.upper(), 7072 exists=exists, 7073 actions=actions, 7074 only=only, 7075 options=options, 7076 cluster=cluster, 7077 not_valid=not_valid, 7078 ) 7079 7080 return self._parse_as_command(start) 7081 7082 def _parse_merge(self) -> exp.Merge: 7083 self._match(TokenType.INTO) 7084 target = self._parse_table() 7085 7086 if target and self._match(TokenType.ALIAS, advance=False): 7087 target.set("alias", self._parse_table_alias()) 7088 7089 self._match(TokenType.USING) 7090 using = self._parse_table() 7091 7092 self._match(TokenType.ON) 7093 on = self._parse_assignment() 7094 7095 return self.expression( 7096 exp.Merge, 7097 this=target, 7098 using=using, 7099 on=on, 7100 whens=self._parse_when_matched(), 7101 returning=self._parse_returning(), 7102 ) 7103 7104 def _parse_when_matched(self) -> exp.Whens: 7105 whens = [] 7106 7107 while self._match(TokenType.WHEN): 7108 matched = not self._match(TokenType.NOT) 7109 self._match_text_seq("MATCHED") 7110 source = ( 7111 False 7112 if self._match_text_seq("BY", "TARGET") 7113 else self._match_text_seq("BY", "SOURCE") 7114 ) 7115 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7116 7117 self._match(TokenType.THEN) 7118 7119 if self._match(TokenType.INSERT): 7120 this = self._parse_star() 7121 if this: 7122 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7123 else: 7124 then = self.expression( 7125 exp.Insert, 7126 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 7127 expression=self._match_text_seq("VALUES") and self._parse_value(), 7128 ) 7129 elif self._match(TokenType.UPDATE): 7130 expressions = self._parse_star() 7131 if expressions: 7132 then = self.expression(exp.Update, expressions=expressions) 7133 else: 7134 then = self.expression( 7135 exp.Update, 7136 expressions=self._match(TokenType.SET) 7137 and self._parse_csv(self._parse_equality), 7138 ) 7139 elif self._match(TokenType.DELETE): 7140 then = self.expression(exp.Var, this=self._prev.text) 7141 else: 7142 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7143 7144 whens.append( 7145 self.expression( 7146 exp.When, 7147 matched=matched, 7148 source=source, 7149 condition=condition, 7150 then=then, 7151 ) 7152 ) 7153 return self.expression(exp.Whens, expressions=whens) 7154 7155 def _parse_show(self) -> t.Optional[exp.Expression]: 7156 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7157 if parser: 7158 return parser(self) 7159 return self._parse_as_command(self._prev) 7160 7161 def _parse_set_item_assignment( 7162 self, kind: t.Optional[str] = None 7163 ) -> t.Optional[exp.Expression]: 7164 index = self._index 7165 7166 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7167 return self._parse_set_transaction(global_=kind == "GLOBAL") 7168 7169 left = self._parse_primary() or self._parse_column() 7170 assignment_delimiter = self._match_texts(("=", "TO")) 7171 7172 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7173 self._retreat(index) 7174 return None 7175 7176 right = self._parse_statement() or self._parse_id_var() 7177 if isinstance(right, (exp.Column, exp.Identifier)): 7178 right = exp.var(right.name) 7179 7180 this = self.expression(exp.EQ, this=left, expression=right) 7181 return self.expression(exp.SetItem, this=this, kind=kind) 7182 7183 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7184 self._match_text_seq("TRANSACTION") 7185 characteristics = self._parse_csv( 7186 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7187 ) 7188 return self.expression( 7189 exp.SetItem, 7190 expressions=characteristics, 7191 kind="TRANSACTION", 7192 **{"global": global_}, # type: ignore 7193 ) 7194 7195 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7196 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7197 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7198 7199 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7200 index = self._index 7201 set_ = self.expression( 7202 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7203 ) 7204 7205 if self._curr: 7206 self._retreat(index) 7207 return self._parse_as_command(self._prev) 7208 7209 return set_ 7210 7211 def _parse_var_from_options( 7212 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7213 ) -> t.Optional[exp.Var]: 7214 start = self._curr 7215 if not start: 7216 return None 7217 7218 option = start.text.upper() 7219 continuations = options.get(option) 7220 7221 index = self._index 7222 self._advance() 7223 for keywords in continuations or []: 7224 if isinstance(keywords, str): 7225 keywords = (keywords,) 7226 7227 if self._match_text_seq(*keywords): 7228 option = f"{option} {' '.join(keywords)}" 7229 break 7230 else: 7231 if continuations or continuations is None: 7232 if raise_unmatched: 7233 self.raise_error(f"Unknown option {option}") 7234 7235 self._retreat(index) 7236 return None 7237 7238 return exp.var(option) 7239 7240 def _parse_as_command(self, start: Token) -> exp.Command: 7241 while self._curr: 7242 self._advance() 7243 text = self._find_sql(start, self._prev) 7244 size = len(start.text) 7245 self._warn_unsupported() 7246 return exp.Command(this=text[:size], expression=text[size:]) 7247 7248 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7249 settings = [] 7250 7251 self._match_l_paren() 7252 kind = self._parse_id_var() 7253 7254 if self._match(TokenType.L_PAREN): 7255 while True: 7256 key = self._parse_id_var() 7257 value = self._parse_primary() 7258 if not key and value is None: 7259 break 7260 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7261 self._match(TokenType.R_PAREN) 7262 7263 self._match_r_paren() 7264 7265 return self.expression( 7266 exp.DictProperty, 7267 this=this, 7268 kind=kind.this if kind else None, 7269 settings=settings, 7270 ) 7271 7272 def _parse_dict_range(self, this: str) -> exp.DictRange: 7273 self._match_l_paren() 7274 has_min = self._match_text_seq("MIN") 7275 if has_min: 7276 min = self._parse_var() or self._parse_primary() 7277 self._match_text_seq("MAX") 7278 max = self._parse_var() or self._parse_primary() 7279 else: 7280 max = self._parse_var() or self._parse_primary() 7281 min = exp.Literal.number(0) 7282 self._match_r_paren() 7283 return self.expression(exp.DictRange, this=this, min=min, max=max) 7284 7285 def _parse_comprehension( 7286 self, this: t.Optional[exp.Expression] 7287 ) -> t.Optional[exp.Comprehension]: 7288 index = self._index 7289 expression = self._parse_column() 7290 if not self._match(TokenType.IN): 7291 self._retreat(index - 1) 7292 return None 7293 iterator = self._parse_column() 7294 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7295 return self.expression( 7296 exp.Comprehension, 7297 this=this, 7298 expression=expression, 7299 iterator=iterator, 7300 condition=condition, 7301 ) 7302 7303 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7304 if self._match(TokenType.HEREDOC_STRING): 7305 return self.expression(exp.Heredoc, this=self._prev.text) 7306 7307 if not self._match_text_seq("$"): 7308 return None 7309 7310 tags = ["$"] 7311 tag_text = None 7312 7313 if self._is_connected(): 7314 self._advance() 7315 tags.append(self._prev.text.upper()) 7316 else: 7317 self.raise_error("No closing $ found") 7318 7319 if tags[-1] != "$": 7320 if self._is_connected() and self._match_text_seq("$"): 7321 tag_text = tags[-1] 7322 tags.append("$") 7323 else: 7324 self.raise_error("No closing $ found") 7325 7326 heredoc_start = self._curr 7327 7328 while self._curr: 7329 if self._match_text_seq(*tags, advance=False): 7330 this = self._find_sql(heredoc_start, self._prev) 7331 self._advance(len(tags)) 7332 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7333 7334 self._advance() 7335 7336 self.raise_error(f"No closing {''.join(tags)} found") 7337 return None 7338 7339 def _find_parser( 7340 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7341 ) -> t.Optional[t.Callable]: 7342 if not self._curr: 7343 return None 7344 7345 index = self._index 7346 this = [] 7347 while True: 7348 # The current token might be multiple words 7349 curr = self._curr.text.upper() 7350 key = curr.split(" ") 7351 this.append(curr) 7352 7353 self._advance() 7354 result, trie = in_trie(trie, key) 7355 if result == TrieResult.FAILED: 7356 break 7357 7358 if result == TrieResult.EXISTS: 7359 subparser = parsers[" ".join(this)] 7360 return subparser 7361 7362 self._retreat(index) 7363 return None 7364 7365 def _match(self, token_type, advance=True, expression=None): 7366 if not self._curr: 7367 return None 7368 7369 if self._curr.token_type == token_type: 7370 if advance: 7371 self._advance() 7372 self._add_comments(expression) 7373 return True 7374 7375 return None 7376 7377 def _match_set(self, types, advance=True): 7378 if not self._curr: 7379 return None 7380 7381 if self._curr.token_type in types: 7382 if advance: 7383 self._advance() 7384 return True 7385 7386 return None 7387 7388 def _match_pair(self, token_type_a, token_type_b, advance=True): 7389 if not self._curr or not self._next: 7390 return None 7391 7392 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7393 if advance: 7394 self._advance(2) 7395 return True 7396 7397 return None 7398 7399 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7400 if not self._match(TokenType.L_PAREN, expression=expression): 7401 self.raise_error("Expecting (") 7402 7403 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7404 if not self._match(TokenType.R_PAREN, expression=expression): 7405 self.raise_error("Expecting )") 7406 7407 def _match_texts(self, texts, advance=True): 7408 if ( 7409 self._curr 7410 and self._curr.token_type != TokenType.STRING 7411 and self._curr.text.upper() in texts 7412 ): 7413 if advance: 7414 self._advance() 7415 return True 7416 return None 7417 7418 def _match_text_seq(self, *texts, advance=True): 7419 index = self._index 7420 for text in texts: 7421 if ( 7422 self._curr 7423 and self._curr.token_type != TokenType.STRING 7424 and self._curr.text.upper() == text 7425 ): 7426 self._advance() 7427 else: 7428 self._retreat(index) 7429 return None 7430 7431 if not advance: 7432 self._retreat(index) 7433 7434 return True 7435 7436 def _replace_lambda( 7437 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7438 ) -> t.Optional[exp.Expression]: 7439 if not node: 7440 return node 7441 7442 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7443 7444 for column in node.find_all(exp.Column): 7445 typ = lambda_types.get(column.parts[0].name) 7446 if typ is not None: 7447 dot_or_id = column.to_dot() if column.table else column.this 7448 7449 if typ: 7450 dot_or_id = self.expression( 7451 exp.Cast, 7452 this=dot_or_id, 7453 to=typ, 7454 ) 7455 7456 parent = column.parent 7457 7458 while isinstance(parent, exp.Dot): 7459 if not isinstance(parent.parent, exp.Dot): 7460 parent.replace(dot_or_id) 7461 break 7462 parent = parent.parent 7463 else: 7464 if column is node: 7465 node = dot_or_id 7466 else: 7467 column.replace(dot_or_id) 7468 return node 7469 7470 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7471 start = self._prev 7472 7473 # Not to be confused with TRUNCATE(number, decimals) function call 7474 if self._match(TokenType.L_PAREN): 7475 self._retreat(self._index - 2) 7476 return self._parse_function() 7477 7478 # Clickhouse supports TRUNCATE DATABASE as well 7479 is_database = self._match(TokenType.DATABASE) 7480 7481 self._match(TokenType.TABLE) 7482 7483 exists = self._parse_exists(not_=False) 7484 7485 expressions = self._parse_csv( 7486 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7487 ) 7488 7489 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7490 7491 if self._match_text_seq("RESTART", "IDENTITY"): 7492 identity = "RESTART" 7493 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7494 identity = "CONTINUE" 7495 else: 7496 identity = None 7497 7498 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7499 option = self._prev.text 7500 else: 7501 option = None 7502 7503 partition = self._parse_partition() 7504 7505 # Fallback case 7506 if self._curr: 7507 return self._parse_as_command(start) 7508 7509 return self.expression( 7510 exp.TruncateTable, 7511 expressions=expressions, 7512 is_database=is_database, 7513 exists=exists, 7514 cluster=cluster, 7515 identity=identity, 7516 option=option, 7517 partition=partition, 7518 ) 7519 7520 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7521 this = self._parse_ordered(self._parse_opclass) 7522 7523 if not self._match(TokenType.WITH): 7524 return this 7525 7526 op = self._parse_var(any_token=True) 7527 7528 return self.expression(exp.WithOperator, this=this, op=op) 7529 7530 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7531 self._match(TokenType.EQ) 7532 self._match(TokenType.L_PAREN) 7533 7534 opts: t.List[t.Optional[exp.Expression]] = [] 7535 while self._curr and not self._match(TokenType.R_PAREN): 7536 if self._match_text_seq("FORMAT_NAME", "="): 7537 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7538 # so we parse it separately to use _parse_field() 7539 prop = self.expression( 7540 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7541 ) 7542 opts.append(prop) 7543 else: 7544 opts.append(self._parse_property()) 7545 7546 self._match(TokenType.COMMA) 7547 7548 return opts 7549 7550 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7551 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7552 7553 options = [] 7554 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7555 option = self._parse_var(any_token=True) 7556 prev = self._prev.text.upper() 7557 7558 # Different dialects might separate options and values by white space, "=" and "AS" 7559 self._match(TokenType.EQ) 7560 self._match(TokenType.ALIAS) 7561 7562 param = self.expression(exp.CopyParameter, this=option) 7563 7564 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7565 TokenType.L_PAREN, advance=False 7566 ): 7567 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7568 param.set("expressions", self._parse_wrapped_options()) 7569 elif prev == "FILE_FORMAT": 7570 # T-SQL's external file format case 7571 param.set("expression", self._parse_field()) 7572 else: 7573 param.set("expression", self._parse_unquoted_field()) 7574 7575 options.append(param) 7576 self._match(sep) 7577 7578 return options 7579 7580 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7581 expr = self.expression(exp.Credentials) 7582 7583 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7584 expr.set("storage", self._parse_field()) 7585 if self._match_text_seq("CREDENTIALS"): 7586 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7587 creds = ( 7588 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7589 ) 7590 expr.set("credentials", creds) 7591 if self._match_text_seq("ENCRYPTION"): 7592 expr.set("encryption", self._parse_wrapped_options()) 7593 if self._match_text_seq("IAM_ROLE"): 7594 expr.set("iam_role", self._parse_field()) 7595 if self._match_text_seq("REGION"): 7596 expr.set("region", self._parse_field()) 7597 7598 return expr 7599 7600 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7601 return self._parse_field() 7602 7603 def _parse_copy(self) -> exp.Copy | exp.Command: 7604 start = self._prev 7605 7606 self._match(TokenType.INTO) 7607 7608 this = ( 7609 self._parse_select(nested=True, parse_subquery_alias=False) 7610 if self._match(TokenType.L_PAREN, advance=False) 7611 else self._parse_table(schema=True) 7612 ) 7613 7614 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7615 7616 files = self._parse_csv(self._parse_file_location) 7617 credentials = self._parse_credentials() 7618 7619 self._match_text_seq("WITH") 7620 7621 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7622 7623 # Fallback case 7624 if self._curr: 7625 return self._parse_as_command(start) 7626 7627 return self.expression( 7628 exp.Copy, 7629 this=this, 7630 kind=kind, 7631 credentials=credentials, 7632 files=files, 7633 params=params, 7634 ) 7635 7636 def _parse_normalize(self) -> exp.Normalize: 7637 return self.expression( 7638 exp.Normalize, 7639 this=self._parse_bitwise(), 7640 form=self._match(TokenType.COMMA) and self._parse_var(), 7641 ) 7642 7643 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7644 if self._match_text_seq("COLUMNS", "(", advance=False): 7645 this = self._parse_function() 7646 if isinstance(this, exp.Columns): 7647 this.set("unpack", True) 7648 return this 7649 7650 return self.expression( 7651 exp.Star, 7652 **{ # type: ignore 7653 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7654 "replace": self._parse_star_op("REPLACE"), 7655 "rename": self._parse_star_op("RENAME"), 7656 }, 7657 ) 7658 7659 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7660 privilege_parts = [] 7661 7662 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7663 # (end of privilege list) or L_PAREN (start of column list) are met 7664 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7665 privilege_parts.append(self._curr.text.upper()) 7666 self._advance() 7667 7668 this = exp.var(" ".join(privilege_parts)) 7669 expressions = ( 7670 self._parse_wrapped_csv(self._parse_column) 7671 if self._match(TokenType.L_PAREN, advance=False) 7672 else None 7673 ) 7674 7675 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7676 7677 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7678 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7679 principal = self._parse_id_var() 7680 7681 if not principal: 7682 return None 7683 7684 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7685 7686 def _parse_grant(self) -> exp.Grant | exp.Command: 7687 start = self._prev 7688 7689 privileges = self._parse_csv(self._parse_grant_privilege) 7690 7691 self._match(TokenType.ON) 7692 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7693 7694 # Attempt to parse the securable e.g. MySQL allows names 7695 # such as "foo.*", "*.*" which are not easily parseable yet 7696 securable = self._try_parse(self._parse_table_parts) 7697 7698 if not securable or not self._match_text_seq("TO"): 7699 return self._parse_as_command(start) 7700 7701 principals = self._parse_csv(self._parse_grant_principal) 7702 7703 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 7704 7705 if self._curr: 7706 return self._parse_as_command(start) 7707 7708 return self.expression( 7709 exp.Grant, 7710 privileges=privileges, 7711 kind=kind, 7712 securable=securable, 7713 principals=principals, 7714 grant_option=grant_option, 7715 ) 7716 7717 def _parse_overlay(self) -> exp.Overlay: 7718 return self.expression( 7719 exp.Overlay, 7720 **{ # type: ignore 7721 "this": self._parse_bitwise(), 7722 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 7723 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 7724 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 7725 }, 7726 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
121def build_array_constructor( 122 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 123) -> exp.Expression: 124 array_exp = exp_class(expressions=args) 125 126 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 127 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 128 129 return array_exp
132def build_convert_timezone( 133 args: t.List, default_source_tz: t.Optional[str] = None 134) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 135 if len(args) == 2: 136 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 137 return exp.ConvertTimezone( 138 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 139 ) 140 141 return exp.ConvertTimezone.from_arg_list(args)
166class Parser(metaclass=_Parser): 167 """ 168 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 169 170 Args: 171 error_level: The desired error level. 172 Default: ErrorLevel.IMMEDIATE 173 error_message_context: The amount of context to capture from a query string when displaying 174 the error message (in number of characters). 175 Default: 100 176 max_errors: Maximum number of error messages to include in a raised ParseError. 177 This is only relevant if error_level is ErrorLevel.RAISE. 178 Default: 3 179 """ 180 181 FUNCTIONS: t.Dict[str, t.Callable] = { 182 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 183 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 184 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 185 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 186 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 187 ), 188 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 189 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 190 ), 191 "CHAR": lambda args: exp.Chr(expressions=args), 192 "CHR": lambda args: exp.Chr(expressions=args), 193 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 194 "CONCAT": lambda args, dialect: exp.Concat( 195 expressions=args, 196 safe=not dialect.STRICT_STRING_CONCAT, 197 coalesce=dialect.CONCAT_COALESCE, 198 ), 199 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 200 expressions=args, 201 safe=not dialect.STRICT_STRING_CONCAT, 202 coalesce=dialect.CONCAT_COALESCE, 203 ), 204 "CONVERT_TIMEZONE": build_convert_timezone, 205 "DATE_TO_DATE_STR": lambda args: exp.Cast( 206 this=seq_get(args, 0), 207 to=exp.DataType(this=exp.DataType.Type.TEXT), 208 ), 209 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 210 start=seq_get(args, 0), 211 end=seq_get(args, 1), 212 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 213 ), 214 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 215 "HEX": build_hex, 216 "INSTR": lambda args: exp.StrPosition(this=seq_get(args, 0), substr=seq_get(args, 1)), 217 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 218 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 219 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 220 "LIKE": build_like, 221 "LOG": build_logarithm, 222 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 223 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 224 "LOWER": build_lower, 225 "LPAD": lambda args: build_pad(args), 226 "LEFTPAD": lambda args: build_pad(args), 227 "LTRIM": lambda args: build_trim(args), 228 "MOD": build_mod, 229 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 230 "RPAD": lambda args: build_pad(args, is_left=False), 231 "RTRIM": lambda args: build_trim(args, is_left=False), 232 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 233 if len(args) != 2 234 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 235 "TIME_TO_TIME_STR": lambda args: exp.Cast( 236 this=seq_get(args, 0), 237 to=exp.DataType(this=exp.DataType.Type.TEXT), 238 ), 239 "TO_HEX": build_hex, 240 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 241 this=exp.Cast( 242 this=seq_get(args, 0), 243 to=exp.DataType(this=exp.DataType.Type.TEXT), 244 ), 245 start=exp.Literal.number(1), 246 length=exp.Literal.number(10), 247 ), 248 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 249 "UPPER": build_upper, 250 "VAR_MAP": build_var_map, 251 } 252 253 NO_PAREN_FUNCTIONS = { 254 TokenType.CURRENT_DATE: exp.CurrentDate, 255 TokenType.CURRENT_DATETIME: exp.CurrentDate, 256 TokenType.CURRENT_TIME: exp.CurrentTime, 257 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 258 TokenType.CURRENT_USER: exp.CurrentUser, 259 } 260 261 STRUCT_TYPE_TOKENS = { 262 TokenType.NESTED, 263 TokenType.OBJECT, 264 TokenType.STRUCT, 265 TokenType.UNION, 266 } 267 268 NESTED_TYPE_TOKENS = { 269 TokenType.ARRAY, 270 TokenType.LIST, 271 TokenType.LOWCARDINALITY, 272 TokenType.MAP, 273 TokenType.NULLABLE, 274 TokenType.RANGE, 275 *STRUCT_TYPE_TOKENS, 276 } 277 278 ENUM_TYPE_TOKENS = { 279 TokenType.ENUM, 280 TokenType.ENUM8, 281 TokenType.ENUM16, 282 } 283 284 AGGREGATE_TYPE_TOKENS = { 285 TokenType.AGGREGATEFUNCTION, 286 TokenType.SIMPLEAGGREGATEFUNCTION, 287 } 288 289 TYPE_TOKENS = { 290 TokenType.BIT, 291 TokenType.BOOLEAN, 292 TokenType.TINYINT, 293 TokenType.UTINYINT, 294 TokenType.SMALLINT, 295 TokenType.USMALLINT, 296 TokenType.INT, 297 TokenType.UINT, 298 TokenType.BIGINT, 299 TokenType.UBIGINT, 300 TokenType.INT128, 301 TokenType.UINT128, 302 TokenType.INT256, 303 TokenType.UINT256, 304 TokenType.MEDIUMINT, 305 TokenType.UMEDIUMINT, 306 TokenType.FIXEDSTRING, 307 TokenType.FLOAT, 308 TokenType.DOUBLE, 309 TokenType.CHAR, 310 TokenType.NCHAR, 311 TokenType.VARCHAR, 312 TokenType.NVARCHAR, 313 TokenType.BPCHAR, 314 TokenType.TEXT, 315 TokenType.MEDIUMTEXT, 316 TokenType.LONGTEXT, 317 TokenType.MEDIUMBLOB, 318 TokenType.LONGBLOB, 319 TokenType.BINARY, 320 TokenType.VARBINARY, 321 TokenType.JSON, 322 TokenType.JSONB, 323 TokenType.INTERVAL, 324 TokenType.TINYBLOB, 325 TokenType.TINYTEXT, 326 TokenType.TIME, 327 TokenType.TIMETZ, 328 TokenType.TIMESTAMP, 329 TokenType.TIMESTAMP_S, 330 TokenType.TIMESTAMP_MS, 331 TokenType.TIMESTAMP_NS, 332 TokenType.TIMESTAMPTZ, 333 TokenType.TIMESTAMPLTZ, 334 TokenType.TIMESTAMPNTZ, 335 TokenType.DATETIME, 336 TokenType.DATETIME2, 337 TokenType.DATETIME64, 338 TokenType.SMALLDATETIME, 339 TokenType.DATE, 340 TokenType.DATE32, 341 TokenType.INT4RANGE, 342 TokenType.INT4MULTIRANGE, 343 TokenType.INT8RANGE, 344 TokenType.INT8MULTIRANGE, 345 TokenType.NUMRANGE, 346 TokenType.NUMMULTIRANGE, 347 TokenType.TSRANGE, 348 TokenType.TSMULTIRANGE, 349 TokenType.TSTZRANGE, 350 TokenType.TSTZMULTIRANGE, 351 TokenType.DATERANGE, 352 TokenType.DATEMULTIRANGE, 353 TokenType.DECIMAL, 354 TokenType.DECIMAL32, 355 TokenType.DECIMAL64, 356 TokenType.DECIMAL128, 357 TokenType.DECIMAL256, 358 TokenType.UDECIMAL, 359 TokenType.BIGDECIMAL, 360 TokenType.UUID, 361 TokenType.GEOGRAPHY, 362 TokenType.GEOMETRY, 363 TokenType.POINT, 364 TokenType.RING, 365 TokenType.LINESTRING, 366 TokenType.MULTILINESTRING, 367 TokenType.POLYGON, 368 TokenType.MULTIPOLYGON, 369 TokenType.HLLSKETCH, 370 TokenType.HSTORE, 371 TokenType.PSEUDO_TYPE, 372 TokenType.SUPER, 373 TokenType.SERIAL, 374 TokenType.SMALLSERIAL, 375 TokenType.BIGSERIAL, 376 TokenType.XML, 377 TokenType.YEAR, 378 TokenType.UNIQUEIDENTIFIER, 379 TokenType.USERDEFINED, 380 TokenType.MONEY, 381 TokenType.SMALLMONEY, 382 TokenType.ROWVERSION, 383 TokenType.IMAGE, 384 TokenType.VARIANT, 385 TokenType.VECTOR, 386 TokenType.OBJECT, 387 TokenType.OBJECT_IDENTIFIER, 388 TokenType.INET, 389 TokenType.IPADDRESS, 390 TokenType.IPPREFIX, 391 TokenType.IPV4, 392 TokenType.IPV6, 393 TokenType.UNKNOWN, 394 TokenType.NULL, 395 TokenType.NAME, 396 TokenType.TDIGEST, 397 *ENUM_TYPE_TOKENS, 398 *NESTED_TYPE_TOKENS, 399 *AGGREGATE_TYPE_TOKENS, 400 } 401 402 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 403 TokenType.BIGINT: TokenType.UBIGINT, 404 TokenType.INT: TokenType.UINT, 405 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 406 TokenType.SMALLINT: TokenType.USMALLINT, 407 TokenType.TINYINT: TokenType.UTINYINT, 408 TokenType.DECIMAL: TokenType.UDECIMAL, 409 } 410 411 SUBQUERY_PREDICATES = { 412 TokenType.ANY: exp.Any, 413 TokenType.ALL: exp.All, 414 TokenType.EXISTS: exp.Exists, 415 TokenType.SOME: exp.Any, 416 } 417 418 RESERVED_TOKENS = { 419 *Tokenizer.SINGLE_TOKENS.values(), 420 TokenType.SELECT, 421 } - {TokenType.IDENTIFIER} 422 423 DB_CREATABLES = { 424 TokenType.DATABASE, 425 TokenType.DICTIONARY, 426 TokenType.MODEL, 427 TokenType.NAMESPACE, 428 TokenType.SCHEMA, 429 TokenType.SEQUENCE, 430 TokenType.SINK, 431 TokenType.SOURCE, 432 TokenType.STORAGE_INTEGRATION, 433 TokenType.STREAMLIT, 434 TokenType.TABLE, 435 TokenType.TAG, 436 TokenType.VIEW, 437 TokenType.WAREHOUSE, 438 } 439 440 CREATABLES = { 441 TokenType.COLUMN, 442 TokenType.CONSTRAINT, 443 TokenType.FOREIGN_KEY, 444 TokenType.FUNCTION, 445 TokenType.INDEX, 446 TokenType.PROCEDURE, 447 *DB_CREATABLES, 448 } 449 450 ALTERABLES = { 451 TokenType.INDEX, 452 TokenType.TABLE, 453 TokenType.VIEW, 454 } 455 456 # Tokens that can represent identifiers 457 ID_VAR_TOKENS = { 458 TokenType.ALL, 459 TokenType.ATTACH, 460 TokenType.VAR, 461 TokenType.ANTI, 462 TokenType.APPLY, 463 TokenType.ASC, 464 TokenType.ASOF, 465 TokenType.AUTO_INCREMENT, 466 TokenType.BEGIN, 467 TokenType.BPCHAR, 468 TokenType.CACHE, 469 TokenType.CASE, 470 TokenType.COLLATE, 471 TokenType.COMMAND, 472 TokenType.COMMENT, 473 TokenType.COMMIT, 474 TokenType.CONSTRAINT, 475 TokenType.COPY, 476 TokenType.CUBE, 477 TokenType.DEFAULT, 478 TokenType.DELETE, 479 TokenType.DESC, 480 TokenType.DESCRIBE, 481 TokenType.DETACH, 482 TokenType.DICTIONARY, 483 TokenType.DIV, 484 TokenType.END, 485 TokenType.EXECUTE, 486 TokenType.ESCAPE, 487 TokenType.FALSE, 488 TokenType.FIRST, 489 TokenType.FILTER, 490 TokenType.FINAL, 491 TokenType.FORMAT, 492 TokenType.FULL, 493 TokenType.IDENTIFIER, 494 TokenType.IS, 495 TokenType.ISNULL, 496 TokenType.INTERVAL, 497 TokenType.KEEP, 498 TokenType.KILL, 499 TokenType.LEFT, 500 TokenType.LOAD, 501 TokenType.MERGE, 502 TokenType.NATURAL, 503 TokenType.NEXT, 504 TokenType.OFFSET, 505 TokenType.OPERATOR, 506 TokenType.ORDINALITY, 507 TokenType.OVERLAPS, 508 TokenType.OVERWRITE, 509 TokenType.PARTITION, 510 TokenType.PERCENT, 511 TokenType.PIVOT, 512 TokenType.PRAGMA, 513 TokenType.RANGE, 514 TokenType.RECURSIVE, 515 TokenType.REFERENCES, 516 TokenType.REFRESH, 517 TokenType.RENAME, 518 TokenType.REPLACE, 519 TokenType.RIGHT, 520 TokenType.ROLLUP, 521 TokenType.ROW, 522 TokenType.ROWS, 523 TokenType.SEMI, 524 TokenType.SET, 525 TokenType.SETTINGS, 526 TokenType.SHOW, 527 TokenType.TEMPORARY, 528 TokenType.TOP, 529 TokenType.TRUE, 530 TokenType.TRUNCATE, 531 TokenType.UNIQUE, 532 TokenType.UNNEST, 533 TokenType.UNPIVOT, 534 TokenType.UPDATE, 535 TokenType.USE, 536 TokenType.VOLATILE, 537 TokenType.WINDOW, 538 *CREATABLES, 539 *SUBQUERY_PREDICATES, 540 *TYPE_TOKENS, 541 *NO_PAREN_FUNCTIONS, 542 } 543 ID_VAR_TOKENS.remove(TokenType.UNION) 544 545 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 546 547 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 548 TokenType.ANTI, 549 TokenType.APPLY, 550 TokenType.ASOF, 551 TokenType.FULL, 552 TokenType.LEFT, 553 TokenType.LOCK, 554 TokenType.NATURAL, 555 TokenType.OFFSET, 556 TokenType.RIGHT, 557 TokenType.SEMI, 558 TokenType.WINDOW, 559 } 560 561 ALIAS_TOKENS = ID_VAR_TOKENS 562 563 ARRAY_CONSTRUCTORS = { 564 "ARRAY": exp.Array, 565 "LIST": exp.List, 566 } 567 568 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 569 570 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 571 572 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 573 574 FUNC_TOKENS = { 575 TokenType.COLLATE, 576 TokenType.COMMAND, 577 TokenType.CURRENT_DATE, 578 TokenType.CURRENT_DATETIME, 579 TokenType.CURRENT_TIMESTAMP, 580 TokenType.CURRENT_TIME, 581 TokenType.CURRENT_USER, 582 TokenType.FILTER, 583 TokenType.FIRST, 584 TokenType.FORMAT, 585 TokenType.GLOB, 586 TokenType.IDENTIFIER, 587 TokenType.INDEX, 588 TokenType.ISNULL, 589 TokenType.ILIKE, 590 TokenType.INSERT, 591 TokenType.LIKE, 592 TokenType.MERGE, 593 TokenType.NEXT, 594 TokenType.OFFSET, 595 TokenType.PRIMARY_KEY, 596 TokenType.RANGE, 597 TokenType.REPLACE, 598 TokenType.RLIKE, 599 TokenType.ROW, 600 TokenType.UNNEST, 601 TokenType.VAR, 602 TokenType.LEFT, 603 TokenType.RIGHT, 604 TokenType.SEQUENCE, 605 TokenType.DATE, 606 TokenType.DATETIME, 607 TokenType.TABLE, 608 TokenType.TIMESTAMP, 609 TokenType.TIMESTAMPTZ, 610 TokenType.TRUNCATE, 611 TokenType.WINDOW, 612 TokenType.XOR, 613 *TYPE_TOKENS, 614 *SUBQUERY_PREDICATES, 615 } 616 617 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 618 TokenType.AND: exp.And, 619 } 620 621 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 622 TokenType.COLON_EQ: exp.PropertyEQ, 623 } 624 625 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 626 TokenType.OR: exp.Or, 627 } 628 629 EQUALITY = { 630 TokenType.EQ: exp.EQ, 631 TokenType.NEQ: exp.NEQ, 632 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 633 } 634 635 COMPARISON = { 636 TokenType.GT: exp.GT, 637 TokenType.GTE: exp.GTE, 638 TokenType.LT: exp.LT, 639 TokenType.LTE: exp.LTE, 640 } 641 642 BITWISE = { 643 TokenType.AMP: exp.BitwiseAnd, 644 TokenType.CARET: exp.BitwiseXor, 645 TokenType.PIPE: exp.BitwiseOr, 646 } 647 648 TERM = { 649 TokenType.DASH: exp.Sub, 650 TokenType.PLUS: exp.Add, 651 TokenType.MOD: exp.Mod, 652 TokenType.COLLATE: exp.Collate, 653 } 654 655 FACTOR = { 656 TokenType.DIV: exp.IntDiv, 657 TokenType.LR_ARROW: exp.Distance, 658 TokenType.SLASH: exp.Div, 659 TokenType.STAR: exp.Mul, 660 } 661 662 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 663 664 TIMES = { 665 TokenType.TIME, 666 TokenType.TIMETZ, 667 } 668 669 TIMESTAMPS = { 670 TokenType.TIMESTAMP, 671 TokenType.TIMESTAMPTZ, 672 TokenType.TIMESTAMPLTZ, 673 *TIMES, 674 } 675 676 SET_OPERATIONS = { 677 TokenType.UNION, 678 TokenType.INTERSECT, 679 TokenType.EXCEPT, 680 } 681 682 JOIN_METHODS = { 683 TokenType.ASOF, 684 TokenType.NATURAL, 685 TokenType.POSITIONAL, 686 } 687 688 JOIN_SIDES = { 689 TokenType.LEFT, 690 TokenType.RIGHT, 691 TokenType.FULL, 692 } 693 694 JOIN_KINDS = { 695 TokenType.ANTI, 696 TokenType.CROSS, 697 TokenType.INNER, 698 TokenType.OUTER, 699 TokenType.SEMI, 700 TokenType.STRAIGHT_JOIN, 701 } 702 703 JOIN_HINTS: t.Set[str] = set() 704 705 LAMBDAS = { 706 TokenType.ARROW: lambda self, expressions: self.expression( 707 exp.Lambda, 708 this=self._replace_lambda( 709 self._parse_assignment(), 710 expressions, 711 ), 712 expressions=expressions, 713 ), 714 TokenType.FARROW: lambda self, expressions: self.expression( 715 exp.Kwarg, 716 this=exp.var(expressions[0].name), 717 expression=self._parse_assignment(), 718 ), 719 } 720 721 COLUMN_OPERATORS = { 722 TokenType.DOT: None, 723 TokenType.DCOLON: lambda self, this, to: self.expression( 724 exp.Cast if self.STRICT_CAST else exp.TryCast, 725 this=this, 726 to=to, 727 ), 728 TokenType.ARROW: lambda self, this, path: self.expression( 729 exp.JSONExtract, 730 this=this, 731 expression=self.dialect.to_json_path(path), 732 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 733 ), 734 TokenType.DARROW: lambda self, this, path: self.expression( 735 exp.JSONExtractScalar, 736 this=this, 737 expression=self.dialect.to_json_path(path), 738 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 739 ), 740 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 741 exp.JSONBExtract, 742 this=this, 743 expression=path, 744 ), 745 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 746 exp.JSONBExtractScalar, 747 this=this, 748 expression=path, 749 ), 750 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 751 exp.JSONBContains, 752 this=this, 753 expression=key, 754 ), 755 } 756 757 EXPRESSION_PARSERS = { 758 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 759 exp.Column: lambda self: self._parse_column(), 760 exp.Condition: lambda self: self._parse_assignment(), 761 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 762 exp.Expression: lambda self: self._parse_expression(), 763 exp.From: lambda self: self._parse_from(joins=True), 764 exp.Group: lambda self: self._parse_group(), 765 exp.Having: lambda self: self._parse_having(), 766 exp.Hint: lambda self: self._parse_hint_body(), 767 exp.Identifier: lambda self: self._parse_id_var(), 768 exp.Join: lambda self: self._parse_join(), 769 exp.Lambda: lambda self: self._parse_lambda(), 770 exp.Lateral: lambda self: self._parse_lateral(), 771 exp.Limit: lambda self: self._parse_limit(), 772 exp.Offset: lambda self: self._parse_offset(), 773 exp.Order: lambda self: self._parse_order(), 774 exp.Ordered: lambda self: self._parse_ordered(), 775 exp.Properties: lambda self: self._parse_properties(), 776 exp.Qualify: lambda self: self._parse_qualify(), 777 exp.Returning: lambda self: self._parse_returning(), 778 exp.Select: lambda self: self._parse_select(), 779 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 780 exp.Table: lambda self: self._parse_table_parts(), 781 exp.TableAlias: lambda self: self._parse_table_alias(), 782 exp.Tuple: lambda self: self._parse_value(), 783 exp.Whens: lambda self: self._parse_when_matched(), 784 exp.Where: lambda self: self._parse_where(), 785 exp.Window: lambda self: self._parse_named_window(), 786 exp.With: lambda self: self._parse_with(), 787 "JOIN_TYPE": lambda self: self._parse_join_parts(), 788 } 789 790 STATEMENT_PARSERS = { 791 TokenType.ALTER: lambda self: self._parse_alter(), 792 TokenType.BEGIN: lambda self: self._parse_transaction(), 793 TokenType.CACHE: lambda self: self._parse_cache(), 794 TokenType.COMMENT: lambda self: self._parse_comment(), 795 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 796 TokenType.COPY: lambda self: self._parse_copy(), 797 TokenType.CREATE: lambda self: self._parse_create(), 798 TokenType.DELETE: lambda self: self._parse_delete(), 799 TokenType.DESC: lambda self: self._parse_describe(), 800 TokenType.DESCRIBE: lambda self: self._parse_describe(), 801 TokenType.DROP: lambda self: self._parse_drop(), 802 TokenType.GRANT: lambda self: self._parse_grant(), 803 TokenType.INSERT: lambda self: self._parse_insert(), 804 TokenType.KILL: lambda self: self._parse_kill(), 805 TokenType.LOAD: lambda self: self._parse_load(), 806 TokenType.MERGE: lambda self: self._parse_merge(), 807 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 808 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 809 TokenType.REFRESH: lambda self: self._parse_refresh(), 810 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 811 TokenType.SET: lambda self: self._parse_set(), 812 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 813 TokenType.UNCACHE: lambda self: self._parse_uncache(), 814 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 815 TokenType.UPDATE: lambda self: self._parse_update(), 816 TokenType.USE: lambda self: self.expression( 817 exp.Use, 818 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 819 this=self._parse_table(schema=False), 820 ), 821 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 822 } 823 824 UNARY_PARSERS = { 825 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 826 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 827 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 828 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 829 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 830 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 831 } 832 833 STRING_PARSERS = { 834 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 835 exp.RawString, this=token.text 836 ), 837 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 838 exp.National, this=token.text 839 ), 840 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 841 TokenType.STRING: lambda self, token: self.expression( 842 exp.Literal, this=token.text, is_string=True 843 ), 844 TokenType.UNICODE_STRING: lambda self, token: self.expression( 845 exp.UnicodeString, 846 this=token.text, 847 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 848 ), 849 } 850 851 NUMERIC_PARSERS = { 852 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 853 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 854 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 855 TokenType.NUMBER: lambda self, token: self.expression( 856 exp.Literal, this=token.text, is_string=False 857 ), 858 } 859 860 PRIMARY_PARSERS = { 861 **STRING_PARSERS, 862 **NUMERIC_PARSERS, 863 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 864 TokenType.NULL: lambda self, _: self.expression(exp.Null), 865 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 866 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 867 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 868 TokenType.STAR: lambda self, _: self._parse_star_ops(), 869 } 870 871 PLACEHOLDER_PARSERS = { 872 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 873 TokenType.PARAMETER: lambda self: self._parse_parameter(), 874 TokenType.COLON: lambda self: ( 875 self.expression(exp.Placeholder, this=self._prev.text) 876 if self._match_set(self.ID_VAR_TOKENS) 877 else None 878 ), 879 } 880 881 RANGE_PARSERS = { 882 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 883 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 884 TokenType.GLOB: binary_range_parser(exp.Glob), 885 TokenType.ILIKE: binary_range_parser(exp.ILike), 886 TokenType.IN: lambda self, this: self._parse_in(this), 887 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 888 TokenType.IS: lambda self, this: self._parse_is(this), 889 TokenType.LIKE: binary_range_parser(exp.Like), 890 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 891 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 892 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 893 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 894 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 895 } 896 897 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 898 "ALLOWED_VALUES": lambda self: self.expression( 899 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 900 ), 901 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 902 "AUTO": lambda self: self._parse_auto_property(), 903 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 904 "BACKUP": lambda self: self.expression( 905 exp.BackupProperty, this=self._parse_var(any_token=True) 906 ), 907 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 908 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 909 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 910 "CHECKSUM": lambda self: self._parse_checksum(), 911 "CLUSTER BY": lambda self: self._parse_cluster(), 912 "CLUSTERED": lambda self: self._parse_clustered_by(), 913 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 914 exp.CollateProperty, **kwargs 915 ), 916 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 917 "CONTAINS": lambda self: self._parse_contains_property(), 918 "COPY": lambda self: self._parse_copy_property(), 919 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 920 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 921 "DEFINER": lambda self: self._parse_definer(), 922 "DETERMINISTIC": lambda self: self.expression( 923 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 924 ), 925 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 926 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 927 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 928 "DISTKEY": lambda self: self._parse_distkey(), 929 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 930 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 931 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 932 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 933 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 934 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 935 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 936 "FREESPACE": lambda self: self._parse_freespace(), 937 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 938 "HEAP": lambda self: self.expression(exp.HeapProperty), 939 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 940 "IMMUTABLE": lambda self: self.expression( 941 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 942 ), 943 "INHERITS": lambda self: self.expression( 944 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 945 ), 946 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 947 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 948 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 949 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 950 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 951 "LIKE": lambda self: self._parse_create_like(), 952 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 953 "LOCK": lambda self: self._parse_locking(), 954 "LOCKING": lambda self: self._parse_locking(), 955 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 956 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 957 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 958 "MODIFIES": lambda self: self._parse_modifies_property(), 959 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 960 "NO": lambda self: self._parse_no_property(), 961 "ON": lambda self: self._parse_on_property(), 962 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 963 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 964 "PARTITION": lambda self: self._parse_partitioned_of(), 965 "PARTITION BY": lambda self: self._parse_partitioned_by(), 966 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 967 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 968 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 969 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 970 "READS": lambda self: self._parse_reads_property(), 971 "REMOTE": lambda self: self._parse_remote_with_connection(), 972 "RETURNS": lambda self: self._parse_returns(), 973 "STRICT": lambda self: self.expression(exp.StrictProperty), 974 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 975 "ROW": lambda self: self._parse_row(), 976 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 977 "SAMPLE": lambda self: self.expression( 978 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 979 ), 980 "SECURE": lambda self: self.expression(exp.SecureProperty), 981 "SECURITY": lambda self: self._parse_security(), 982 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 983 "SETTINGS": lambda self: self._parse_settings_property(), 984 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 985 "SORTKEY": lambda self: self._parse_sortkey(), 986 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 987 "STABLE": lambda self: self.expression( 988 exp.StabilityProperty, this=exp.Literal.string("STABLE") 989 ), 990 "STORED": lambda self: self._parse_stored(), 991 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 992 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 993 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 994 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 995 "TO": lambda self: self._parse_to_table(), 996 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 997 "TRANSFORM": lambda self: self.expression( 998 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 999 ), 1000 "TTL": lambda self: self._parse_ttl(), 1001 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1002 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1003 "VOLATILE": lambda self: self._parse_volatile_property(), 1004 "WITH": lambda self: self._parse_with_property(), 1005 } 1006 1007 CONSTRAINT_PARSERS = { 1008 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1009 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1010 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1011 "CHARACTER SET": lambda self: self.expression( 1012 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1013 ), 1014 "CHECK": lambda self: self.expression( 1015 exp.CheckColumnConstraint, 1016 this=self._parse_wrapped(self._parse_assignment), 1017 enforced=self._match_text_seq("ENFORCED"), 1018 ), 1019 "COLLATE": lambda self: self.expression( 1020 exp.CollateColumnConstraint, 1021 this=self._parse_identifier() or self._parse_column(), 1022 ), 1023 "COMMENT": lambda self: self.expression( 1024 exp.CommentColumnConstraint, this=self._parse_string() 1025 ), 1026 "COMPRESS": lambda self: self._parse_compress(), 1027 "CLUSTERED": lambda self: self.expression( 1028 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1029 ), 1030 "NONCLUSTERED": lambda self: self.expression( 1031 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1032 ), 1033 "DEFAULT": lambda self: self.expression( 1034 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1035 ), 1036 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1037 "EPHEMERAL": lambda self: self.expression( 1038 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1039 ), 1040 "EXCLUDE": lambda self: self.expression( 1041 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1042 ), 1043 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1044 "FORMAT": lambda self: self.expression( 1045 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1046 ), 1047 "GENERATED": lambda self: self._parse_generated_as_identity(), 1048 "IDENTITY": lambda self: self._parse_auto_increment(), 1049 "INLINE": lambda self: self._parse_inline(), 1050 "LIKE": lambda self: self._parse_create_like(), 1051 "NOT": lambda self: self._parse_not_constraint(), 1052 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1053 "ON": lambda self: ( 1054 self._match(TokenType.UPDATE) 1055 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1056 ) 1057 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1058 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1059 "PERIOD": lambda self: self._parse_period_for_system_time(), 1060 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1061 "REFERENCES": lambda self: self._parse_references(match=False), 1062 "TITLE": lambda self: self.expression( 1063 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1064 ), 1065 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1066 "UNIQUE": lambda self: self._parse_unique(), 1067 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1068 "WATERMARK": lambda self: self.expression( 1069 exp.WatermarkColumnConstraint, 1070 this=self._match(TokenType.FOR) and self._parse_column(), 1071 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1072 ), 1073 "WITH": lambda self: self.expression( 1074 exp.Properties, expressions=self._parse_wrapped_properties() 1075 ), 1076 } 1077 1078 ALTER_PARSERS = { 1079 "ADD": lambda self: self._parse_alter_table_add(), 1080 "AS": lambda self: self._parse_select(), 1081 "ALTER": lambda self: self._parse_alter_table_alter(), 1082 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1083 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1084 "DROP": lambda self: self._parse_alter_table_drop(), 1085 "RENAME": lambda self: self._parse_alter_table_rename(), 1086 "SET": lambda self: self._parse_alter_table_set(), 1087 "SWAP": lambda self: self.expression( 1088 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1089 ), 1090 } 1091 1092 ALTER_ALTER_PARSERS = { 1093 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1094 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1095 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1096 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1097 } 1098 1099 SCHEMA_UNNAMED_CONSTRAINTS = { 1100 "CHECK", 1101 "EXCLUDE", 1102 "FOREIGN KEY", 1103 "LIKE", 1104 "PERIOD", 1105 "PRIMARY KEY", 1106 "UNIQUE", 1107 "WATERMARK", 1108 } 1109 1110 NO_PAREN_FUNCTION_PARSERS = { 1111 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1112 "CASE": lambda self: self._parse_case(), 1113 "CONNECT_BY_ROOT": lambda self: self.expression( 1114 exp.ConnectByRoot, this=self._parse_column() 1115 ), 1116 "IF": lambda self: self._parse_if(), 1117 } 1118 1119 INVALID_FUNC_NAME_TOKENS = { 1120 TokenType.IDENTIFIER, 1121 TokenType.STRING, 1122 } 1123 1124 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1125 1126 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1127 1128 FUNCTION_PARSERS = { 1129 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1130 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1131 "DECODE": lambda self: self._parse_decode(), 1132 "EXTRACT": lambda self: self._parse_extract(), 1133 "GAP_FILL": lambda self: self._parse_gap_fill(), 1134 "JSON_OBJECT": lambda self: self._parse_json_object(), 1135 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1136 "JSON_TABLE": lambda self: self._parse_json_table(), 1137 "MATCH": lambda self: self._parse_match_against(), 1138 "NORMALIZE": lambda self: self._parse_normalize(), 1139 "OPENJSON": lambda self: self._parse_open_json(), 1140 "OVERLAY": lambda self: self._parse_overlay(), 1141 "POSITION": lambda self: self._parse_position(), 1142 "PREDICT": lambda self: self._parse_predict(), 1143 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1144 "STRING_AGG": lambda self: self._parse_string_agg(), 1145 "SUBSTRING": lambda self: self._parse_substring(), 1146 "TRIM": lambda self: self._parse_trim(), 1147 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1148 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1149 "XMLELEMENT": lambda self: self.expression( 1150 exp.XMLElement, 1151 this=self._match_text_seq("NAME") and self._parse_id_var(), 1152 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1153 ), 1154 "XMLTABLE": lambda self: self._parse_xml_table(), 1155 } 1156 1157 QUERY_MODIFIER_PARSERS = { 1158 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1159 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1160 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1161 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1162 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1163 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1164 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1165 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1166 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1167 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1168 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1169 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1170 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1171 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1172 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1173 TokenType.CLUSTER_BY: lambda self: ( 1174 "cluster", 1175 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1176 ), 1177 TokenType.DISTRIBUTE_BY: lambda self: ( 1178 "distribute", 1179 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1180 ), 1181 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1182 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1183 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1184 } 1185 1186 SET_PARSERS = { 1187 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1188 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1189 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1190 "TRANSACTION": lambda self: self._parse_set_transaction(), 1191 } 1192 1193 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1194 1195 TYPE_LITERAL_PARSERS = { 1196 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1197 } 1198 1199 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1200 1201 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1202 1203 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1204 1205 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1206 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1207 "ISOLATION": ( 1208 ("LEVEL", "REPEATABLE", "READ"), 1209 ("LEVEL", "READ", "COMMITTED"), 1210 ("LEVEL", "READ", "UNCOMITTED"), 1211 ("LEVEL", "SERIALIZABLE"), 1212 ), 1213 "READ": ("WRITE", "ONLY"), 1214 } 1215 1216 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1217 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1218 ) 1219 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1220 1221 CREATE_SEQUENCE: OPTIONS_TYPE = { 1222 "SCALE": ("EXTEND", "NOEXTEND"), 1223 "SHARD": ("EXTEND", "NOEXTEND"), 1224 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1225 **dict.fromkeys( 1226 ( 1227 "SESSION", 1228 "GLOBAL", 1229 "KEEP", 1230 "NOKEEP", 1231 "ORDER", 1232 "NOORDER", 1233 "NOCACHE", 1234 "CYCLE", 1235 "NOCYCLE", 1236 "NOMINVALUE", 1237 "NOMAXVALUE", 1238 "NOSCALE", 1239 "NOSHARD", 1240 ), 1241 tuple(), 1242 ), 1243 } 1244 1245 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1246 1247 USABLES: OPTIONS_TYPE = dict.fromkeys( 1248 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1249 ) 1250 1251 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1252 1253 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1254 "TYPE": ("EVOLUTION",), 1255 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1256 } 1257 1258 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1259 1260 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1261 1262 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1263 "NOT": ("ENFORCED",), 1264 "MATCH": ( 1265 "FULL", 1266 "PARTIAL", 1267 "SIMPLE", 1268 ), 1269 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1270 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1271 } 1272 1273 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1274 1275 CLONE_KEYWORDS = {"CLONE", "COPY"} 1276 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1277 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1278 1279 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1280 1281 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1282 1283 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1284 1285 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1286 1287 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1288 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1289 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1290 1291 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1292 1293 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1294 1295 ADD_CONSTRAINT_TOKENS = { 1296 TokenType.CONSTRAINT, 1297 TokenType.FOREIGN_KEY, 1298 TokenType.INDEX, 1299 TokenType.KEY, 1300 TokenType.PRIMARY_KEY, 1301 TokenType.UNIQUE, 1302 } 1303 1304 DISTINCT_TOKENS = {TokenType.DISTINCT} 1305 1306 NULL_TOKENS = {TokenType.NULL} 1307 1308 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1309 1310 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1311 1312 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1313 1314 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1315 1316 ODBC_DATETIME_LITERALS = { 1317 "d": exp.Date, 1318 "t": exp.Time, 1319 "ts": exp.Timestamp, 1320 } 1321 1322 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1323 1324 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1325 1326 # The style options for the DESCRIBE statement 1327 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1328 1329 OPERATION_MODIFIERS: t.Set[str] = set() 1330 1331 STRICT_CAST = True 1332 1333 PREFIXED_PIVOT_COLUMNS = False 1334 IDENTIFY_PIVOT_STRINGS = False 1335 1336 LOG_DEFAULTS_TO_LN = False 1337 1338 # Whether ADD is present for each column added by ALTER TABLE 1339 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1340 1341 # Whether the table sample clause expects CSV syntax 1342 TABLESAMPLE_CSV = False 1343 1344 # The default method used for table sampling 1345 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1346 1347 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1348 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1349 1350 # Whether the TRIM function expects the characters to trim as its first argument 1351 TRIM_PATTERN_FIRST = False 1352 1353 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1354 STRING_ALIASES = False 1355 1356 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1357 MODIFIERS_ATTACHED_TO_SET_OP = True 1358 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1359 1360 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1361 NO_PAREN_IF_COMMANDS = True 1362 1363 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1364 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1365 1366 # Whether the `:` operator is used to extract a value from a VARIANT column 1367 COLON_IS_VARIANT_EXTRACT = False 1368 1369 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1370 # If this is True and '(' is not found, the keyword will be treated as an identifier 1371 VALUES_FOLLOWED_BY_PAREN = True 1372 1373 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1374 SUPPORTS_IMPLICIT_UNNEST = False 1375 1376 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1377 INTERVAL_SPANS = True 1378 1379 # Whether a PARTITION clause can follow a table reference 1380 SUPPORTS_PARTITION_SELECTION = False 1381 1382 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1383 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1384 1385 # Whether the 'AS' keyword is optional in the CTE definition syntax 1386 OPTIONAL_ALIAS_TOKEN_CTE = False 1387 1388 __slots__ = ( 1389 "error_level", 1390 "error_message_context", 1391 "max_errors", 1392 "dialect", 1393 "sql", 1394 "errors", 1395 "_tokens", 1396 "_index", 1397 "_curr", 1398 "_next", 1399 "_prev", 1400 "_prev_comments", 1401 ) 1402 1403 # Autofilled 1404 SHOW_TRIE: t.Dict = {} 1405 SET_TRIE: t.Dict = {} 1406 1407 def __init__( 1408 self, 1409 error_level: t.Optional[ErrorLevel] = None, 1410 error_message_context: int = 100, 1411 max_errors: int = 3, 1412 dialect: DialectType = None, 1413 ): 1414 from sqlglot.dialects import Dialect 1415 1416 self.error_level = error_level or ErrorLevel.IMMEDIATE 1417 self.error_message_context = error_message_context 1418 self.max_errors = max_errors 1419 self.dialect = Dialect.get_or_raise(dialect) 1420 self.reset() 1421 1422 def reset(self): 1423 self.sql = "" 1424 self.errors = [] 1425 self._tokens = [] 1426 self._index = 0 1427 self._curr = None 1428 self._next = None 1429 self._prev = None 1430 self._prev_comments = None 1431 1432 def parse( 1433 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1434 ) -> t.List[t.Optional[exp.Expression]]: 1435 """ 1436 Parses a list of tokens and returns a list of syntax trees, one tree 1437 per parsed SQL statement. 1438 1439 Args: 1440 raw_tokens: The list of tokens. 1441 sql: The original SQL string, used to produce helpful debug messages. 1442 1443 Returns: 1444 The list of the produced syntax trees. 1445 """ 1446 return self._parse( 1447 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1448 ) 1449 1450 def parse_into( 1451 self, 1452 expression_types: exp.IntoType, 1453 raw_tokens: t.List[Token], 1454 sql: t.Optional[str] = None, 1455 ) -> t.List[t.Optional[exp.Expression]]: 1456 """ 1457 Parses a list of tokens into a given Expression type. If a collection of Expression 1458 types is given instead, this method will try to parse the token list into each one 1459 of them, stopping at the first for which the parsing succeeds. 1460 1461 Args: 1462 expression_types: The expression type(s) to try and parse the token list into. 1463 raw_tokens: The list of tokens. 1464 sql: The original SQL string, used to produce helpful debug messages. 1465 1466 Returns: 1467 The target Expression. 1468 """ 1469 errors = [] 1470 for expression_type in ensure_list(expression_types): 1471 parser = self.EXPRESSION_PARSERS.get(expression_type) 1472 if not parser: 1473 raise TypeError(f"No parser registered for {expression_type}") 1474 1475 try: 1476 return self._parse(parser, raw_tokens, sql) 1477 except ParseError as e: 1478 e.errors[0]["into_expression"] = expression_type 1479 errors.append(e) 1480 1481 raise ParseError( 1482 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1483 errors=merge_errors(errors), 1484 ) from errors[-1] 1485 1486 def _parse( 1487 self, 1488 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1489 raw_tokens: t.List[Token], 1490 sql: t.Optional[str] = None, 1491 ) -> t.List[t.Optional[exp.Expression]]: 1492 self.reset() 1493 self.sql = sql or "" 1494 1495 total = len(raw_tokens) 1496 chunks: t.List[t.List[Token]] = [[]] 1497 1498 for i, token in enumerate(raw_tokens): 1499 if token.token_type == TokenType.SEMICOLON: 1500 if token.comments: 1501 chunks.append([token]) 1502 1503 if i < total - 1: 1504 chunks.append([]) 1505 else: 1506 chunks[-1].append(token) 1507 1508 expressions = [] 1509 1510 for tokens in chunks: 1511 self._index = -1 1512 self._tokens = tokens 1513 self._advance() 1514 1515 expressions.append(parse_method(self)) 1516 1517 if self._index < len(self._tokens): 1518 self.raise_error("Invalid expression / Unexpected token") 1519 1520 self.check_errors() 1521 1522 return expressions 1523 1524 def check_errors(self) -> None: 1525 """Logs or raises any found errors, depending on the chosen error level setting.""" 1526 if self.error_level == ErrorLevel.WARN: 1527 for error in self.errors: 1528 logger.error(str(error)) 1529 elif self.error_level == ErrorLevel.RAISE and self.errors: 1530 raise ParseError( 1531 concat_messages(self.errors, self.max_errors), 1532 errors=merge_errors(self.errors), 1533 ) 1534 1535 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1536 """ 1537 Appends an error in the list of recorded errors or raises it, depending on the chosen 1538 error level setting. 1539 """ 1540 token = token or self._curr or self._prev or Token.string("") 1541 start = token.start 1542 end = token.end + 1 1543 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1544 highlight = self.sql[start:end] 1545 end_context = self.sql[end : end + self.error_message_context] 1546 1547 error = ParseError.new( 1548 f"{message}. Line {token.line}, Col: {token.col}.\n" 1549 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1550 description=message, 1551 line=token.line, 1552 col=token.col, 1553 start_context=start_context, 1554 highlight=highlight, 1555 end_context=end_context, 1556 ) 1557 1558 if self.error_level == ErrorLevel.IMMEDIATE: 1559 raise error 1560 1561 self.errors.append(error) 1562 1563 def expression( 1564 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1565 ) -> E: 1566 """ 1567 Creates a new, validated Expression. 1568 1569 Args: 1570 exp_class: The expression class to instantiate. 1571 comments: An optional list of comments to attach to the expression. 1572 kwargs: The arguments to set for the expression along with their respective values. 1573 1574 Returns: 1575 The target expression. 1576 """ 1577 instance = exp_class(**kwargs) 1578 instance.add_comments(comments) if comments else self._add_comments(instance) 1579 return self.validate_expression(instance) 1580 1581 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1582 if expression and self._prev_comments: 1583 expression.add_comments(self._prev_comments) 1584 self._prev_comments = None 1585 1586 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1587 """ 1588 Validates an Expression, making sure that all its mandatory arguments are set. 1589 1590 Args: 1591 expression: The expression to validate. 1592 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1593 1594 Returns: 1595 The validated expression. 1596 """ 1597 if self.error_level != ErrorLevel.IGNORE: 1598 for error_message in expression.error_messages(args): 1599 self.raise_error(error_message) 1600 1601 return expression 1602 1603 def _find_sql(self, start: Token, end: Token) -> str: 1604 return self.sql[start.start : end.end + 1] 1605 1606 def _is_connected(self) -> bool: 1607 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1608 1609 def _advance(self, times: int = 1) -> None: 1610 self._index += times 1611 self._curr = seq_get(self._tokens, self._index) 1612 self._next = seq_get(self._tokens, self._index + 1) 1613 1614 if self._index > 0: 1615 self._prev = self._tokens[self._index - 1] 1616 self._prev_comments = self._prev.comments 1617 else: 1618 self._prev = None 1619 self._prev_comments = None 1620 1621 def _retreat(self, index: int) -> None: 1622 if index != self._index: 1623 self._advance(index - self._index) 1624 1625 def _warn_unsupported(self) -> None: 1626 if len(self._tokens) <= 1: 1627 return 1628 1629 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1630 # interested in emitting a warning for the one being currently processed. 1631 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1632 1633 logger.warning( 1634 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1635 ) 1636 1637 def _parse_command(self) -> exp.Command: 1638 self._warn_unsupported() 1639 return self.expression( 1640 exp.Command, 1641 comments=self._prev_comments, 1642 this=self._prev.text.upper(), 1643 expression=self._parse_string(), 1644 ) 1645 1646 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1647 """ 1648 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1649 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1650 solve this by setting & resetting the parser state accordingly 1651 """ 1652 index = self._index 1653 error_level = self.error_level 1654 1655 self.error_level = ErrorLevel.IMMEDIATE 1656 try: 1657 this = parse_method() 1658 except ParseError: 1659 this = None 1660 finally: 1661 if not this or retreat: 1662 self._retreat(index) 1663 self.error_level = error_level 1664 1665 return this 1666 1667 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1668 start = self._prev 1669 exists = self._parse_exists() if allow_exists else None 1670 1671 self._match(TokenType.ON) 1672 1673 materialized = self._match_text_seq("MATERIALIZED") 1674 kind = self._match_set(self.CREATABLES) and self._prev 1675 if not kind: 1676 return self._parse_as_command(start) 1677 1678 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1679 this = self._parse_user_defined_function(kind=kind.token_type) 1680 elif kind.token_type == TokenType.TABLE: 1681 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1682 elif kind.token_type == TokenType.COLUMN: 1683 this = self._parse_column() 1684 else: 1685 this = self._parse_id_var() 1686 1687 self._match(TokenType.IS) 1688 1689 return self.expression( 1690 exp.Comment, 1691 this=this, 1692 kind=kind.text, 1693 expression=self._parse_string(), 1694 exists=exists, 1695 materialized=materialized, 1696 ) 1697 1698 def _parse_to_table( 1699 self, 1700 ) -> exp.ToTableProperty: 1701 table = self._parse_table_parts(schema=True) 1702 return self.expression(exp.ToTableProperty, this=table) 1703 1704 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1705 def _parse_ttl(self) -> exp.Expression: 1706 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1707 this = self._parse_bitwise() 1708 1709 if self._match_text_seq("DELETE"): 1710 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1711 if self._match_text_seq("RECOMPRESS"): 1712 return self.expression( 1713 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1714 ) 1715 if self._match_text_seq("TO", "DISK"): 1716 return self.expression( 1717 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1718 ) 1719 if self._match_text_seq("TO", "VOLUME"): 1720 return self.expression( 1721 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1722 ) 1723 1724 return this 1725 1726 expressions = self._parse_csv(_parse_ttl_action) 1727 where = self._parse_where() 1728 group = self._parse_group() 1729 1730 aggregates = None 1731 if group and self._match(TokenType.SET): 1732 aggregates = self._parse_csv(self._parse_set_item) 1733 1734 return self.expression( 1735 exp.MergeTreeTTL, 1736 expressions=expressions, 1737 where=where, 1738 group=group, 1739 aggregates=aggregates, 1740 ) 1741 1742 def _parse_statement(self) -> t.Optional[exp.Expression]: 1743 if self._curr is None: 1744 return None 1745 1746 if self._match_set(self.STATEMENT_PARSERS): 1747 comments = self._prev_comments 1748 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1749 stmt.add_comments(comments, prepend=True) 1750 return stmt 1751 1752 if self._match_set(self.dialect.tokenizer.COMMANDS): 1753 return self._parse_command() 1754 1755 expression = self._parse_expression() 1756 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1757 return self._parse_query_modifiers(expression) 1758 1759 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1760 start = self._prev 1761 temporary = self._match(TokenType.TEMPORARY) 1762 materialized = self._match_text_seq("MATERIALIZED") 1763 1764 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1765 if not kind: 1766 return self._parse_as_command(start) 1767 1768 concurrently = self._match_text_seq("CONCURRENTLY") 1769 if_exists = exists or self._parse_exists() 1770 1771 if kind == "COLUMN": 1772 this = self._parse_column() 1773 else: 1774 this = self._parse_table_parts( 1775 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1776 ) 1777 1778 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1779 1780 if self._match(TokenType.L_PAREN, advance=False): 1781 expressions = self._parse_wrapped_csv(self._parse_types) 1782 else: 1783 expressions = None 1784 1785 return self.expression( 1786 exp.Drop, 1787 exists=if_exists, 1788 this=this, 1789 expressions=expressions, 1790 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1791 temporary=temporary, 1792 materialized=materialized, 1793 cascade=self._match_text_seq("CASCADE"), 1794 constraints=self._match_text_seq("CONSTRAINTS"), 1795 purge=self._match_text_seq("PURGE"), 1796 cluster=cluster, 1797 concurrently=concurrently, 1798 ) 1799 1800 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1801 return ( 1802 self._match_text_seq("IF") 1803 and (not not_ or self._match(TokenType.NOT)) 1804 and self._match(TokenType.EXISTS) 1805 ) 1806 1807 def _parse_create(self) -> exp.Create | exp.Command: 1808 # Note: this can't be None because we've matched a statement parser 1809 start = self._prev 1810 1811 replace = ( 1812 start.token_type == TokenType.REPLACE 1813 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1814 or self._match_pair(TokenType.OR, TokenType.ALTER) 1815 ) 1816 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1817 1818 unique = self._match(TokenType.UNIQUE) 1819 1820 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1821 clustered = True 1822 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1823 "COLUMNSTORE" 1824 ): 1825 clustered = False 1826 else: 1827 clustered = None 1828 1829 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1830 self._advance() 1831 1832 properties = None 1833 create_token = self._match_set(self.CREATABLES) and self._prev 1834 1835 if not create_token: 1836 # exp.Properties.Location.POST_CREATE 1837 properties = self._parse_properties() 1838 create_token = self._match_set(self.CREATABLES) and self._prev 1839 1840 if not properties or not create_token: 1841 return self._parse_as_command(start) 1842 1843 concurrently = self._match_text_seq("CONCURRENTLY") 1844 exists = self._parse_exists(not_=True) 1845 this = None 1846 expression: t.Optional[exp.Expression] = None 1847 indexes = None 1848 no_schema_binding = None 1849 begin = None 1850 end = None 1851 clone = None 1852 1853 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1854 nonlocal properties 1855 if properties and temp_props: 1856 properties.expressions.extend(temp_props.expressions) 1857 elif temp_props: 1858 properties = temp_props 1859 1860 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1861 this = self._parse_user_defined_function(kind=create_token.token_type) 1862 1863 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1864 extend_props(self._parse_properties()) 1865 1866 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1867 extend_props(self._parse_properties()) 1868 1869 if not expression: 1870 if self._match(TokenType.COMMAND): 1871 expression = self._parse_as_command(self._prev) 1872 else: 1873 begin = self._match(TokenType.BEGIN) 1874 return_ = self._match_text_seq("RETURN") 1875 1876 if self._match(TokenType.STRING, advance=False): 1877 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1878 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1879 expression = self._parse_string() 1880 extend_props(self._parse_properties()) 1881 else: 1882 expression = self._parse_user_defined_function_expression() 1883 1884 end = self._match_text_seq("END") 1885 1886 if return_: 1887 expression = self.expression(exp.Return, this=expression) 1888 elif create_token.token_type == TokenType.INDEX: 1889 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1890 if not self._match(TokenType.ON): 1891 index = self._parse_id_var() 1892 anonymous = False 1893 else: 1894 index = None 1895 anonymous = True 1896 1897 this = self._parse_index(index=index, anonymous=anonymous) 1898 elif create_token.token_type in self.DB_CREATABLES: 1899 table_parts = self._parse_table_parts( 1900 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1901 ) 1902 1903 # exp.Properties.Location.POST_NAME 1904 self._match(TokenType.COMMA) 1905 extend_props(self._parse_properties(before=True)) 1906 1907 this = self._parse_schema(this=table_parts) 1908 1909 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1910 extend_props(self._parse_properties()) 1911 1912 self._match(TokenType.ALIAS) 1913 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1914 # exp.Properties.Location.POST_ALIAS 1915 extend_props(self._parse_properties()) 1916 1917 if create_token.token_type == TokenType.SEQUENCE: 1918 expression = self._parse_types() 1919 extend_props(self._parse_properties()) 1920 else: 1921 expression = self._parse_ddl_select() 1922 1923 if create_token.token_type == TokenType.TABLE: 1924 # exp.Properties.Location.POST_EXPRESSION 1925 extend_props(self._parse_properties()) 1926 1927 indexes = [] 1928 while True: 1929 index = self._parse_index() 1930 1931 # exp.Properties.Location.POST_INDEX 1932 extend_props(self._parse_properties()) 1933 if not index: 1934 break 1935 else: 1936 self._match(TokenType.COMMA) 1937 indexes.append(index) 1938 elif create_token.token_type == TokenType.VIEW: 1939 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1940 no_schema_binding = True 1941 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 1942 extend_props(self._parse_properties()) 1943 1944 shallow = self._match_text_seq("SHALLOW") 1945 1946 if self._match_texts(self.CLONE_KEYWORDS): 1947 copy = self._prev.text.lower() == "copy" 1948 clone = self.expression( 1949 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1950 ) 1951 1952 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1953 return self._parse_as_command(start) 1954 1955 create_kind_text = create_token.text.upper() 1956 return self.expression( 1957 exp.Create, 1958 this=this, 1959 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1960 replace=replace, 1961 refresh=refresh, 1962 unique=unique, 1963 expression=expression, 1964 exists=exists, 1965 properties=properties, 1966 indexes=indexes, 1967 no_schema_binding=no_schema_binding, 1968 begin=begin, 1969 end=end, 1970 clone=clone, 1971 concurrently=concurrently, 1972 clustered=clustered, 1973 ) 1974 1975 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1976 seq = exp.SequenceProperties() 1977 1978 options = [] 1979 index = self._index 1980 1981 while self._curr: 1982 self._match(TokenType.COMMA) 1983 if self._match_text_seq("INCREMENT"): 1984 self._match_text_seq("BY") 1985 self._match_text_seq("=") 1986 seq.set("increment", self._parse_term()) 1987 elif self._match_text_seq("MINVALUE"): 1988 seq.set("minvalue", self._parse_term()) 1989 elif self._match_text_seq("MAXVALUE"): 1990 seq.set("maxvalue", self._parse_term()) 1991 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1992 self._match_text_seq("=") 1993 seq.set("start", self._parse_term()) 1994 elif self._match_text_seq("CACHE"): 1995 # T-SQL allows empty CACHE which is initialized dynamically 1996 seq.set("cache", self._parse_number() or True) 1997 elif self._match_text_seq("OWNED", "BY"): 1998 # "OWNED BY NONE" is the default 1999 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2000 else: 2001 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2002 if opt: 2003 options.append(opt) 2004 else: 2005 break 2006 2007 seq.set("options", options if options else None) 2008 return None if self._index == index else seq 2009 2010 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2011 # only used for teradata currently 2012 self._match(TokenType.COMMA) 2013 2014 kwargs = { 2015 "no": self._match_text_seq("NO"), 2016 "dual": self._match_text_seq("DUAL"), 2017 "before": self._match_text_seq("BEFORE"), 2018 "default": self._match_text_seq("DEFAULT"), 2019 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2020 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2021 "after": self._match_text_seq("AFTER"), 2022 "minimum": self._match_texts(("MIN", "MINIMUM")), 2023 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2024 } 2025 2026 if self._match_texts(self.PROPERTY_PARSERS): 2027 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2028 try: 2029 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2030 except TypeError: 2031 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2032 2033 return None 2034 2035 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2036 return self._parse_wrapped_csv(self._parse_property) 2037 2038 def _parse_property(self) -> t.Optional[exp.Expression]: 2039 if self._match_texts(self.PROPERTY_PARSERS): 2040 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2041 2042 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2043 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2044 2045 if self._match_text_seq("COMPOUND", "SORTKEY"): 2046 return self._parse_sortkey(compound=True) 2047 2048 if self._match_text_seq("SQL", "SECURITY"): 2049 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2050 2051 index = self._index 2052 key = self._parse_column() 2053 2054 if not self._match(TokenType.EQ): 2055 self._retreat(index) 2056 return self._parse_sequence_properties() 2057 2058 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2059 if isinstance(key, exp.Column): 2060 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2061 2062 value = self._parse_bitwise() or self._parse_var(any_token=True) 2063 2064 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2065 if isinstance(value, exp.Column): 2066 value = exp.var(value.name) 2067 2068 return self.expression(exp.Property, this=key, value=value) 2069 2070 def _parse_stored(self) -> exp.FileFormatProperty: 2071 self._match(TokenType.ALIAS) 2072 2073 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2074 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2075 2076 return self.expression( 2077 exp.FileFormatProperty, 2078 this=( 2079 self.expression( 2080 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2081 ) 2082 if input_format or output_format 2083 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2084 ), 2085 ) 2086 2087 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2088 field = self._parse_field() 2089 if isinstance(field, exp.Identifier) and not field.quoted: 2090 field = exp.var(field) 2091 2092 return field 2093 2094 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2095 self._match(TokenType.EQ) 2096 self._match(TokenType.ALIAS) 2097 2098 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2099 2100 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2101 properties = [] 2102 while True: 2103 if before: 2104 prop = self._parse_property_before() 2105 else: 2106 prop = self._parse_property() 2107 if not prop: 2108 break 2109 for p in ensure_list(prop): 2110 properties.append(p) 2111 2112 if properties: 2113 return self.expression(exp.Properties, expressions=properties) 2114 2115 return None 2116 2117 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2118 return self.expression( 2119 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2120 ) 2121 2122 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2123 if self._match_texts(("DEFINER", "INVOKER")): 2124 security_specifier = self._prev.text.upper() 2125 return self.expression(exp.SecurityProperty, this=security_specifier) 2126 return None 2127 2128 def _parse_settings_property(self) -> exp.SettingsProperty: 2129 return self.expression( 2130 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2131 ) 2132 2133 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2134 if self._index >= 2: 2135 pre_volatile_token = self._tokens[self._index - 2] 2136 else: 2137 pre_volatile_token = None 2138 2139 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2140 return exp.VolatileProperty() 2141 2142 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2143 2144 def _parse_retention_period(self) -> exp.Var: 2145 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2146 number = self._parse_number() 2147 number_str = f"{number} " if number else "" 2148 unit = self._parse_var(any_token=True) 2149 return exp.var(f"{number_str}{unit}") 2150 2151 def _parse_system_versioning_property( 2152 self, with_: bool = False 2153 ) -> exp.WithSystemVersioningProperty: 2154 self._match(TokenType.EQ) 2155 prop = self.expression( 2156 exp.WithSystemVersioningProperty, 2157 **{ # type: ignore 2158 "on": True, 2159 "with": with_, 2160 }, 2161 ) 2162 2163 if self._match_text_seq("OFF"): 2164 prop.set("on", False) 2165 return prop 2166 2167 self._match(TokenType.ON) 2168 if self._match(TokenType.L_PAREN): 2169 while self._curr and not self._match(TokenType.R_PAREN): 2170 if self._match_text_seq("HISTORY_TABLE", "="): 2171 prop.set("this", self._parse_table_parts()) 2172 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2173 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2174 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2175 prop.set("retention_period", self._parse_retention_period()) 2176 2177 self._match(TokenType.COMMA) 2178 2179 return prop 2180 2181 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2182 self._match(TokenType.EQ) 2183 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2184 prop = self.expression(exp.DataDeletionProperty, on=on) 2185 2186 if self._match(TokenType.L_PAREN): 2187 while self._curr and not self._match(TokenType.R_PAREN): 2188 if self._match_text_seq("FILTER_COLUMN", "="): 2189 prop.set("filter_column", self._parse_column()) 2190 elif self._match_text_seq("RETENTION_PERIOD", "="): 2191 prop.set("retention_period", self._parse_retention_period()) 2192 2193 self._match(TokenType.COMMA) 2194 2195 return prop 2196 2197 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2198 kind = "HASH" 2199 expressions: t.Optional[t.List[exp.Expression]] = None 2200 if self._match_text_seq("BY", "HASH"): 2201 expressions = self._parse_wrapped_csv(self._parse_id_var) 2202 elif self._match_text_seq("BY", "RANDOM"): 2203 kind = "RANDOM" 2204 2205 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2206 buckets: t.Optional[exp.Expression] = None 2207 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2208 buckets = self._parse_number() 2209 2210 return self.expression( 2211 exp.DistributedByProperty, 2212 expressions=expressions, 2213 kind=kind, 2214 buckets=buckets, 2215 order=self._parse_order(), 2216 ) 2217 2218 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2219 self._match_text_seq("KEY") 2220 expressions = self._parse_wrapped_id_vars() 2221 return self.expression(expr_type, expressions=expressions) 2222 2223 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2224 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2225 prop = self._parse_system_versioning_property(with_=True) 2226 self._match_r_paren() 2227 return prop 2228 2229 if self._match(TokenType.L_PAREN, advance=False): 2230 return self._parse_wrapped_properties() 2231 2232 if self._match_text_seq("JOURNAL"): 2233 return self._parse_withjournaltable() 2234 2235 if self._match_texts(self.VIEW_ATTRIBUTES): 2236 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2237 2238 if self._match_text_seq("DATA"): 2239 return self._parse_withdata(no=False) 2240 elif self._match_text_seq("NO", "DATA"): 2241 return self._parse_withdata(no=True) 2242 2243 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2244 return self._parse_serde_properties(with_=True) 2245 2246 if self._match(TokenType.SCHEMA): 2247 return self.expression( 2248 exp.WithSchemaBindingProperty, 2249 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2250 ) 2251 2252 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2253 return self.expression( 2254 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2255 ) 2256 2257 if not self._next: 2258 return None 2259 2260 return self._parse_withisolatedloading() 2261 2262 def _parse_procedure_option(self) -> exp.Expression | None: 2263 if self._match_text_seq("EXECUTE", "AS"): 2264 return self.expression( 2265 exp.ExecuteAsProperty, 2266 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2267 or self._parse_string(), 2268 ) 2269 2270 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2271 2272 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2273 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2274 self._match(TokenType.EQ) 2275 2276 user = self._parse_id_var() 2277 self._match(TokenType.PARAMETER) 2278 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2279 2280 if not user or not host: 2281 return None 2282 2283 return exp.DefinerProperty(this=f"{user}@{host}") 2284 2285 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2286 self._match(TokenType.TABLE) 2287 self._match(TokenType.EQ) 2288 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2289 2290 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2291 return self.expression(exp.LogProperty, no=no) 2292 2293 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2294 return self.expression(exp.JournalProperty, **kwargs) 2295 2296 def _parse_checksum(self) -> exp.ChecksumProperty: 2297 self._match(TokenType.EQ) 2298 2299 on = None 2300 if self._match(TokenType.ON): 2301 on = True 2302 elif self._match_text_seq("OFF"): 2303 on = False 2304 2305 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2306 2307 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2308 return self.expression( 2309 exp.Cluster, 2310 expressions=( 2311 self._parse_wrapped_csv(self._parse_ordered) 2312 if wrapped 2313 else self._parse_csv(self._parse_ordered) 2314 ), 2315 ) 2316 2317 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2318 self._match_text_seq("BY") 2319 2320 self._match_l_paren() 2321 expressions = self._parse_csv(self._parse_column) 2322 self._match_r_paren() 2323 2324 if self._match_text_seq("SORTED", "BY"): 2325 self._match_l_paren() 2326 sorted_by = self._parse_csv(self._parse_ordered) 2327 self._match_r_paren() 2328 else: 2329 sorted_by = None 2330 2331 self._match(TokenType.INTO) 2332 buckets = self._parse_number() 2333 self._match_text_seq("BUCKETS") 2334 2335 return self.expression( 2336 exp.ClusteredByProperty, 2337 expressions=expressions, 2338 sorted_by=sorted_by, 2339 buckets=buckets, 2340 ) 2341 2342 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2343 if not self._match_text_seq("GRANTS"): 2344 self._retreat(self._index - 1) 2345 return None 2346 2347 return self.expression(exp.CopyGrantsProperty) 2348 2349 def _parse_freespace(self) -> exp.FreespaceProperty: 2350 self._match(TokenType.EQ) 2351 return self.expression( 2352 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2353 ) 2354 2355 def _parse_mergeblockratio( 2356 self, no: bool = False, default: bool = False 2357 ) -> exp.MergeBlockRatioProperty: 2358 if self._match(TokenType.EQ): 2359 return self.expression( 2360 exp.MergeBlockRatioProperty, 2361 this=self._parse_number(), 2362 percent=self._match(TokenType.PERCENT), 2363 ) 2364 2365 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2366 2367 def _parse_datablocksize( 2368 self, 2369 default: t.Optional[bool] = None, 2370 minimum: t.Optional[bool] = None, 2371 maximum: t.Optional[bool] = None, 2372 ) -> exp.DataBlocksizeProperty: 2373 self._match(TokenType.EQ) 2374 size = self._parse_number() 2375 2376 units = None 2377 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2378 units = self._prev.text 2379 2380 return self.expression( 2381 exp.DataBlocksizeProperty, 2382 size=size, 2383 units=units, 2384 default=default, 2385 minimum=minimum, 2386 maximum=maximum, 2387 ) 2388 2389 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2390 self._match(TokenType.EQ) 2391 always = self._match_text_seq("ALWAYS") 2392 manual = self._match_text_seq("MANUAL") 2393 never = self._match_text_seq("NEVER") 2394 default = self._match_text_seq("DEFAULT") 2395 2396 autotemp = None 2397 if self._match_text_seq("AUTOTEMP"): 2398 autotemp = self._parse_schema() 2399 2400 return self.expression( 2401 exp.BlockCompressionProperty, 2402 always=always, 2403 manual=manual, 2404 never=never, 2405 default=default, 2406 autotemp=autotemp, 2407 ) 2408 2409 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2410 index = self._index 2411 no = self._match_text_seq("NO") 2412 concurrent = self._match_text_seq("CONCURRENT") 2413 2414 if not self._match_text_seq("ISOLATED", "LOADING"): 2415 self._retreat(index) 2416 return None 2417 2418 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2419 return self.expression( 2420 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2421 ) 2422 2423 def _parse_locking(self) -> exp.LockingProperty: 2424 if self._match(TokenType.TABLE): 2425 kind = "TABLE" 2426 elif self._match(TokenType.VIEW): 2427 kind = "VIEW" 2428 elif self._match(TokenType.ROW): 2429 kind = "ROW" 2430 elif self._match_text_seq("DATABASE"): 2431 kind = "DATABASE" 2432 else: 2433 kind = None 2434 2435 if kind in ("DATABASE", "TABLE", "VIEW"): 2436 this = self._parse_table_parts() 2437 else: 2438 this = None 2439 2440 if self._match(TokenType.FOR): 2441 for_or_in = "FOR" 2442 elif self._match(TokenType.IN): 2443 for_or_in = "IN" 2444 else: 2445 for_or_in = None 2446 2447 if self._match_text_seq("ACCESS"): 2448 lock_type = "ACCESS" 2449 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2450 lock_type = "EXCLUSIVE" 2451 elif self._match_text_seq("SHARE"): 2452 lock_type = "SHARE" 2453 elif self._match_text_seq("READ"): 2454 lock_type = "READ" 2455 elif self._match_text_seq("WRITE"): 2456 lock_type = "WRITE" 2457 elif self._match_text_seq("CHECKSUM"): 2458 lock_type = "CHECKSUM" 2459 else: 2460 lock_type = None 2461 2462 override = self._match_text_seq("OVERRIDE") 2463 2464 return self.expression( 2465 exp.LockingProperty, 2466 this=this, 2467 kind=kind, 2468 for_or_in=for_or_in, 2469 lock_type=lock_type, 2470 override=override, 2471 ) 2472 2473 def _parse_partition_by(self) -> t.List[exp.Expression]: 2474 if self._match(TokenType.PARTITION_BY): 2475 return self._parse_csv(self._parse_assignment) 2476 return [] 2477 2478 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2479 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2480 if self._match_text_seq("MINVALUE"): 2481 return exp.var("MINVALUE") 2482 if self._match_text_seq("MAXVALUE"): 2483 return exp.var("MAXVALUE") 2484 return self._parse_bitwise() 2485 2486 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2487 expression = None 2488 from_expressions = None 2489 to_expressions = None 2490 2491 if self._match(TokenType.IN): 2492 this = self._parse_wrapped_csv(self._parse_bitwise) 2493 elif self._match(TokenType.FROM): 2494 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2495 self._match_text_seq("TO") 2496 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2497 elif self._match_text_seq("WITH", "(", "MODULUS"): 2498 this = self._parse_number() 2499 self._match_text_seq(",", "REMAINDER") 2500 expression = self._parse_number() 2501 self._match_r_paren() 2502 else: 2503 self.raise_error("Failed to parse partition bound spec.") 2504 2505 return self.expression( 2506 exp.PartitionBoundSpec, 2507 this=this, 2508 expression=expression, 2509 from_expressions=from_expressions, 2510 to_expressions=to_expressions, 2511 ) 2512 2513 # https://www.postgresql.org/docs/current/sql-createtable.html 2514 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2515 if not self._match_text_seq("OF"): 2516 self._retreat(self._index - 1) 2517 return None 2518 2519 this = self._parse_table(schema=True) 2520 2521 if self._match(TokenType.DEFAULT): 2522 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2523 elif self._match_text_seq("FOR", "VALUES"): 2524 expression = self._parse_partition_bound_spec() 2525 else: 2526 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2527 2528 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2529 2530 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2531 self._match(TokenType.EQ) 2532 return self.expression( 2533 exp.PartitionedByProperty, 2534 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2535 ) 2536 2537 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2538 if self._match_text_seq("AND", "STATISTICS"): 2539 statistics = True 2540 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2541 statistics = False 2542 else: 2543 statistics = None 2544 2545 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2546 2547 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2548 if self._match_text_seq("SQL"): 2549 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2550 return None 2551 2552 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2553 if self._match_text_seq("SQL", "DATA"): 2554 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2555 return None 2556 2557 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2558 if self._match_text_seq("PRIMARY", "INDEX"): 2559 return exp.NoPrimaryIndexProperty() 2560 if self._match_text_seq("SQL"): 2561 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2562 return None 2563 2564 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2565 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2566 return exp.OnCommitProperty() 2567 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2568 return exp.OnCommitProperty(delete=True) 2569 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2570 2571 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2572 if self._match_text_seq("SQL", "DATA"): 2573 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2574 return None 2575 2576 def _parse_distkey(self) -> exp.DistKeyProperty: 2577 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2578 2579 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2580 table = self._parse_table(schema=True) 2581 2582 options = [] 2583 while self._match_texts(("INCLUDING", "EXCLUDING")): 2584 this = self._prev.text.upper() 2585 2586 id_var = self._parse_id_var() 2587 if not id_var: 2588 return None 2589 2590 options.append( 2591 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2592 ) 2593 2594 return self.expression(exp.LikeProperty, this=table, expressions=options) 2595 2596 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2597 return self.expression( 2598 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2599 ) 2600 2601 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2602 self._match(TokenType.EQ) 2603 return self.expression( 2604 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2605 ) 2606 2607 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2608 self._match_text_seq("WITH", "CONNECTION") 2609 return self.expression( 2610 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2611 ) 2612 2613 def _parse_returns(self) -> exp.ReturnsProperty: 2614 value: t.Optional[exp.Expression] 2615 null = None 2616 is_table = self._match(TokenType.TABLE) 2617 2618 if is_table: 2619 if self._match(TokenType.LT): 2620 value = self.expression( 2621 exp.Schema, 2622 this="TABLE", 2623 expressions=self._parse_csv(self._parse_struct_types), 2624 ) 2625 if not self._match(TokenType.GT): 2626 self.raise_error("Expecting >") 2627 else: 2628 value = self._parse_schema(exp.var("TABLE")) 2629 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2630 null = True 2631 value = None 2632 else: 2633 value = self._parse_types() 2634 2635 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2636 2637 def _parse_describe(self) -> exp.Describe: 2638 kind = self._match_set(self.CREATABLES) and self._prev.text 2639 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2640 if self._match(TokenType.DOT): 2641 style = None 2642 self._retreat(self._index - 2) 2643 2644 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2645 2646 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2647 this = self._parse_statement() 2648 else: 2649 this = self._parse_table(schema=True) 2650 2651 properties = self._parse_properties() 2652 expressions = properties.expressions if properties else None 2653 partition = self._parse_partition() 2654 return self.expression( 2655 exp.Describe, 2656 this=this, 2657 style=style, 2658 kind=kind, 2659 expressions=expressions, 2660 partition=partition, 2661 format=format, 2662 ) 2663 2664 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2665 kind = self._prev.text.upper() 2666 expressions = [] 2667 2668 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2669 if self._match(TokenType.WHEN): 2670 expression = self._parse_disjunction() 2671 self._match(TokenType.THEN) 2672 else: 2673 expression = None 2674 2675 else_ = self._match(TokenType.ELSE) 2676 2677 if not self._match(TokenType.INTO): 2678 return None 2679 2680 return self.expression( 2681 exp.ConditionalInsert, 2682 this=self.expression( 2683 exp.Insert, 2684 this=self._parse_table(schema=True), 2685 expression=self._parse_derived_table_values(), 2686 ), 2687 expression=expression, 2688 else_=else_, 2689 ) 2690 2691 expression = parse_conditional_insert() 2692 while expression is not None: 2693 expressions.append(expression) 2694 expression = parse_conditional_insert() 2695 2696 return self.expression( 2697 exp.MultitableInserts, 2698 kind=kind, 2699 comments=comments, 2700 expressions=expressions, 2701 source=self._parse_table(), 2702 ) 2703 2704 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2705 comments = [] 2706 hint = self._parse_hint() 2707 overwrite = self._match(TokenType.OVERWRITE) 2708 ignore = self._match(TokenType.IGNORE) 2709 local = self._match_text_seq("LOCAL") 2710 alternative = None 2711 is_function = None 2712 2713 if self._match_text_seq("DIRECTORY"): 2714 this: t.Optional[exp.Expression] = self.expression( 2715 exp.Directory, 2716 this=self._parse_var_or_string(), 2717 local=local, 2718 row_format=self._parse_row_format(match_row=True), 2719 ) 2720 else: 2721 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2722 comments += ensure_list(self._prev_comments) 2723 return self._parse_multitable_inserts(comments) 2724 2725 if self._match(TokenType.OR): 2726 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2727 2728 self._match(TokenType.INTO) 2729 comments += ensure_list(self._prev_comments) 2730 self._match(TokenType.TABLE) 2731 is_function = self._match(TokenType.FUNCTION) 2732 2733 this = ( 2734 self._parse_table(schema=True, parse_partition=True) 2735 if not is_function 2736 else self._parse_function() 2737 ) 2738 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2739 this.set("alias", self._parse_table_alias()) 2740 2741 returning = self._parse_returning() 2742 2743 return self.expression( 2744 exp.Insert, 2745 comments=comments, 2746 hint=hint, 2747 is_function=is_function, 2748 this=this, 2749 stored=self._match_text_seq("STORED") and self._parse_stored(), 2750 by_name=self._match_text_seq("BY", "NAME"), 2751 exists=self._parse_exists(), 2752 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2753 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2754 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2755 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2756 conflict=self._parse_on_conflict(), 2757 returning=returning or self._parse_returning(), 2758 overwrite=overwrite, 2759 alternative=alternative, 2760 ignore=ignore, 2761 source=self._match(TokenType.TABLE) and self._parse_table(), 2762 ) 2763 2764 def _parse_kill(self) -> exp.Kill: 2765 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2766 2767 return self.expression( 2768 exp.Kill, 2769 this=self._parse_primary(), 2770 kind=kind, 2771 ) 2772 2773 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2774 conflict = self._match_text_seq("ON", "CONFLICT") 2775 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2776 2777 if not conflict and not duplicate: 2778 return None 2779 2780 conflict_keys = None 2781 constraint = None 2782 2783 if conflict: 2784 if self._match_text_seq("ON", "CONSTRAINT"): 2785 constraint = self._parse_id_var() 2786 elif self._match(TokenType.L_PAREN): 2787 conflict_keys = self._parse_csv(self._parse_id_var) 2788 self._match_r_paren() 2789 2790 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2791 if self._prev.token_type == TokenType.UPDATE: 2792 self._match(TokenType.SET) 2793 expressions = self._parse_csv(self._parse_equality) 2794 else: 2795 expressions = None 2796 2797 return self.expression( 2798 exp.OnConflict, 2799 duplicate=duplicate, 2800 expressions=expressions, 2801 action=action, 2802 conflict_keys=conflict_keys, 2803 constraint=constraint, 2804 where=self._parse_where(), 2805 ) 2806 2807 def _parse_returning(self) -> t.Optional[exp.Returning]: 2808 if not self._match(TokenType.RETURNING): 2809 return None 2810 return self.expression( 2811 exp.Returning, 2812 expressions=self._parse_csv(self._parse_expression), 2813 into=self._match(TokenType.INTO) and self._parse_table_part(), 2814 ) 2815 2816 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2817 if not self._match(TokenType.FORMAT): 2818 return None 2819 return self._parse_row_format() 2820 2821 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2822 index = self._index 2823 with_ = with_ or self._match_text_seq("WITH") 2824 2825 if not self._match(TokenType.SERDE_PROPERTIES): 2826 self._retreat(index) 2827 return None 2828 return self.expression( 2829 exp.SerdeProperties, 2830 **{ # type: ignore 2831 "expressions": self._parse_wrapped_properties(), 2832 "with": with_, 2833 }, 2834 ) 2835 2836 def _parse_row_format( 2837 self, match_row: bool = False 2838 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2839 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2840 return None 2841 2842 if self._match_text_seq("SERDE"): 2843 this = self._parse_string() 2844 2845 serde_properties = self._parse_serde_properties() 2846 2847 return self.expression( 2848 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2849 ) 2850 2851 self._match_text_seq("DELIMITED") 2852 2853 kwargs = {} 2854 2855 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2856 kwargs["fields"] = self._parse_string() 2857 if self._match_text_seq("ESCAPED", "BY"): 2858 kwargs["escaped"] = self._parse_string() 2859 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2860 kwargs["collection_items"] = self._parse_string() 2861 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2862 kwargs["map_keys"] = self._parse_string() 2863 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2864 kwargs["lines"] = self._parse_string() 2865 if self._match_text_seq("NULL", "DEFINED", "AS"): 2866 kwargs["null"] = self._parse_string() 2867 2868 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2869 2870 def _parse_load(self) -> exp.LoadData | exp.Command: 2871 if self._match_text_seq("DATA"): 2872 local = self._match_text_seq("LOCAL") 2873 self._match_text_seq("INPATH") 2874 inpath = self._parse_string() 2875 overwrite = self._match(TokenType.OVERWRITE) 2876 self._match_pair(TokenType.INTO, TokenType.TABLE) 2877 2878 return self.expression( 2879 exp.LoadData, 2880 this=self._parse_table(schema=True), 2881 local=local, 2882 overwrite=overwrite, 2883 inpath=inpath, 2884 partition=self._parse_partition(), 2885 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2886 serde=self._match_text_seq("SERDE") and self._parse_string(), 2887 ) 2888 return self._parse_as_command(self._prev) 2889 2890 def _parse_delete(self) -> exp.Delete: 2891 # This handles MySQL's "Multiple-Table Syntax" 2892 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2893 tables = None 2894 if not self._match(TokenType.FROM, advance=False): 2895 tables = self._parse_csv(self._parse_table) or None 2896 2897 returning = self._parse_returning() 2898 2899 return self.expression( 2900 exp.Delete, 2901 tables=tables, 2902 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2903 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2904 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2905 where=self._parse_where(), 2906 returning=returning or self._parse_returning(), 2907 limit=self._parse_limit(), 2908 ) 2909 2910 def _parse_update(self) -> exp.Update: 2911 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2912 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2913 returning = self._parse_returning() 2914 return self.expression( 2915 exp.Update, 2916 **{ # type: ignore 2917 "this": this, 2918 "expressions": expressions, 2919 "from": self._parse_from(joins=True), 2920 "where": self._parse_where(), 2921 "returning": returning or self._parse_returning(), 2922 "order": self._parse_order(), 2923 "limit": self._parse_limit(), 2924 }, 2925 ) 2926 2927 def _parse_uncache(self) -> exp.Uncache: 2928 if not self._match(TokenType.TABLE): 2929 self.raise_error("Expecting TABLE after UNCACHE") 2930 2931 return self.expression( 2932 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2933 ) 2934 2935 def _parse_cache(self) -> exp.Cache: 2936 lazy = self._match_text_seq("LAZY") 2937 self._match(TokenType.TABLE) 2938 table = self._parse_table(schema=True) 2939 2940 options = [] 2941 if self._match_text_seq("OPTIONS"): 2942 self._match_l_paren() 2943 k = self._parse_string() 2944 self._match(TokenType.EQ) 2945 v = self._parse_string() 2946 options = [k, v] 2947 self._match_r_paren() 2948 2949 self._match(TokenType.ALIAS) 2950 return self.expression( 2951 exp.Cache, 2952 this=table, 2953 lazy=lazy, 2954 options=options, 2955 expression=self._parse_select(nested=True), 2956 ) 2957 2958 def _parse_partition(self) -> t.Optional[exp.Partition]: 2959 if not self._match(TokenType.PARTITION): 2960 return None 2961 2962 return self.expression( 2963 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2964 ) 2965 2966 def _parse_value(self) -> t.Optional[exp.Tuple]: 2967 def _parse_value_expression() -> t.Optional[exp.Expression]: 2968 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 2969 return exp.var(self._prev.text.upper()) 2970 return self._parse_expression() 2971 2972 if self._match(TokenType.L_PAREN): 2973 expressions = self._parse_csv(_parse_value_expression) 2974 self._match_r_paren() 2975 return self.expression(exp.Tuple, expressions=expressions) 2976 2977 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2978 expression = self._parse_expression() 2979 if expression: 2980 return self.expression(exp.Tuple, expressions=[expression]) 2981 return None 2982 2983 def _parse_projections(self) -> t.List[exp.Expression]: 2984 return self._parse_expressions() 2985 2986 def _parse_select( 2987 self, 2988 nested: bool = False, 2989 table: bool = False, 2990 parse_subquery_alias: bool = True, 2991 parse_set_operation: bool = True, 2992 ) -> t.Optional[exp.Expression]: 2993 cte = self._parse_with() 2994 2995 if cte: 2996 this = self._parse_statement() 2997 2998 if not this: 2999 self.raise_error("Failed to parse any statement following CTE") 3000 return cte 3001 3002 if "with" in this.arg_types: 3003 this.set("with", cte) 3004 else: 3005 self.raise_error(f"{this.key} does not support CTE") 3006 this = cte 3007 3008 return this 3009 3010 # duckdb supports leading with FROM x 3011 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3012 3013 if self._match(TokenType.SELECT): 3014 comments = self._prev_comments 3015 3016 hint = self._parse_hint() 3017 3018 if self._next and not self._next.token_type == TokenType.DOT: 3019 all_ = self._match(TokenType.ALL) 3020 distinct = self._match_set(self.DISTINCT_TOKENS) 3021 else: 3022 all_, distinct = None, None 3023 3024 kind = ( 3025 self._match(TokenType.ALIAS) 3026 and self._match_texts(("STRUCT", "VALUE")) 3027 and self._prev.text.upper() 3028 ) 3029 3030 if distinct: 3031 distinct = self.expression( 3032 exp.Distinct, 3033 on=self._parse_value() if self._match(TokenType.ON) else None, 3034 ) 3035 3036 if all_ and distinct: 3037 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3038 3039 operation_modifiers = [] 3040 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3041 operation_modifiers.append(exp.var(self._prev.text.upper())) 3042 3043 limit = self._parse_limit(top=True) 3044 projections = self._parse_projections() 3045 3046 this = self.expression( 3047 exp.Select, 3048 kind=kind, 3049 hint=hint, 3050 distinct=distinct, 3051 expressions=projections, 3052 limit=limit, 3053 operation_modifiers=operation_modifiers or None, 3054 ) 3055 this.comments = comments 3056 3057 into = self._parse_into() 3058 if into: 3059 this.set("into", into) 3060 3061 if not from_: 3062 from_ = self._parse_from() 3063 3064 if from_: 3065 this.set("from", from_) 3066 3067 this = self._parse_query_modifiers(this) 3068 elif (table or nested) and self._match(TokenType.L_PAREN): 3069 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3070 this = self._parse_simplified_pivot( 3071 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3072 ) 3073 elif self._match(TokenType.FROM): 3074 from_ = self._parse_from(skip_from_token=True) 3075 # Support parentheses for duckdb FROM-first syntax 3076 select = self._parse_select() 3077 if select: 3078 select.set("from", from_) 3079 this = select 3080 else: 3081 this = exp.select("*").from_(t.cast(exp.From, from_)) 3082 else: 3083 this = ( 3084 self._parse_table() 3085 if table 3086 else self._parse_select(nested=True, parse_set_operation=False) 3087 ) 3088 3089 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3090 # in case a modifier (e.g. join) is following 3091 if table and isinstance(this, exp.Values) and this.alias: 3092 alias = this.args["alias"].pop() 3093 this = exp.Table(this=this, alias=alias) 3094 3095 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3096 3097 self._match_r_paren() 3098 3099 # We return early here so that the UNION isn't attached to the subquery by the 3100 # following call to _parse_set_operations, but instead becomes the parent node 3101 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3102 elif self._match(TokenType.VALUES, advance=False): 3103 this = self._parse_derived_table_values() 3104 elif from_: 3105 this = exp.select("*").from_(from_.this, copy=False) 3106 elif self._match(TokenType.SUMMARIZE): 3107 table = self._match(TokenType.TABLE) 3108 this = self._parse_select() or self._parse_string() or self._parse_table() 3109 return self.expression(exp.Summarize, this=this, table=table) 3110 elif self._match(TokenType.DESCRIBE): 3111 this = self._parse_describe() 3112 elif self._match_text_seq("STREAM"): 3113 this = self._parse_function() 3114 if this: 3115 this = self.expression(exp.Stream, this=this) 3116 else: 3117 self._retreat(self._index - 1) 3118 else: 3119 this = None 3120 3121 return self._parse_set_operations(this) if parse_set_operation else this 3122 3123 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3124 if not skip_with_token and not self._match(TokenType.WITH): 3125 return None 3126 3127 comments = self._prev_comments 3128 recursive = self._match(TokenType.RECURSIVE) 3129 3130 last_comments = None 3131 expressions = [] 3132 while True: 3133 expressions.append(self._parse_cte()) 3134 if last_comments: 3135 expressions[-1].add_comments(last_comments) 3136 3137 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3138 break 3139 else: 3140 self._match(TokenType.WITH) 3141 3142 last_comments = self._prev_comments 3143 3144 return self.expression( 3145 exp.With, comments=comments, expressions=expressions, recursive=recursive 3146 ) 3147 3148 def _parse_cte(self) -> t.Optional[exp.CTE]: 3149 index = self._index 3150 3151 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3152 if not alias or not alias.this: 3153 self.raise_error("Expected CTE to have alias") 3154 3155 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3156 self._retreat(index) 3157 return None 3158 3159 comments = self._prev_comments 3160 3161 if self._match_text_seq("NOT", "MATERIALIZED"): 3162 materialized = False 3163 elif self._match_text_seq("MATERIALIZED"): 3164 materialized = True 3165 else: 3166 materialized = None 3167 3168 return self.expression( 3169 exp.CTE, 3170 this=self._parse_wrapped(self._parse_statement), 3171 alias=alias, 3172 materialized=materialized, 3173 comments=comments, 3174 ) 3175 3176 def _parse_table_alias( 3177 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3178 ) -> t.Optional[exp.TableAlias]: 3179 any_token = self._match(TokenType.ALIAS) 3180 alias = ( 3181 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3182 or self._parse_string_as_identifier() 3183 ) 3184 3185 index = self._index 3186 if self._match(TokenType.L_PAREN): 3187 columns = self._parse_csv(self._parse_function_parameter) 3188 self._match_r_paren() if columns else self._retreat(index) 3189 else: 3190 columns = None 3191 3192 if not alias and not columns: 3193 return None 3194 3195 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3196 3197 # We bubble up comments from the Identifier to the TableAlias 3198 if isinstance(alias, exp.Identifier): 3199 table_alias.add_comments(alias.pop_comments()) 3200 3201 return table_alias 3202 3203 def _parse_subquery( 3204 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3205 ) -> t.Optional[exp.Subquery]: 3206 if not this: 3207 return None 3208 3209 return self.expression( 3210 exp.Subquery, 3211 this=this, 3212 pivots=self._parse_pivots(), 3213 alias=self._parse_table_alias() if parse_alias else None, 3214 sample=self._parse_table_sample(), 3215 ) 3216 3217 def _implicit_unnests_to_explicit(self, this: E) -> E: 3218 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3219 3220 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3221 for i, join in enumerate(this.args.get("joins") or []): 3222 table = join.this 3223 normalized_table = table.copy() 3224 normalized_table.meta["maybe_column"] = True 3225 normalized_table = _norm(normalized_table, dialect=self.dialect) 3226 3227 if isinstance(table, exp.Table) and not join.args.get("on"): 3228 if normalized_table.parts[0].name in refs: 3229 table_as_column = table.to_column() 3230 unnest = exp.Unnest(expressions=[table_as_column]) 3231 3232 # Table.to_column creates a parent Alias node that we want to convert to 3233 # a TableAlias and attach to the Unnest, so it matches the parser's output 3234 if isinstance(table.args.get("alias"), exp.TableAlias): 3235 table_as_column.replace(table_as_column.this) 3236 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3237 3238 table.replace(unnest) 3239 3240 refs.add(normalized_table.alias_or_name) 3241 3242 return this 3243 3244 def _parse_query_modifiers( 3245 self, this: t.Optional[exp.Expression] 3246 ) -> t.Optional[exp.Expression]: 3247 if isinstance(this, (exp.Query, exp.Table)): 3248 for join in self._parse_joins(): 3249 this.append("joins", join) 3250 for lateral in iter(self._parse_lateral, None): 3251 this.append("laterals", lateral) 3252 3253 while True: 3254 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3255 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3256 key, expression = parser(self) 3257 3258 if expression: 3259 this.set(key, expression) 3260 if key == "limit": 3261 offset = expression.args.pop("offset", None) 3262 3263 if offset: 3264 offset = exp.Offset(expression=offset) 3265 this.set("offset", offset) 3266 3267 limit_by_expressions = expression.expressions 3268 expression.set("expressions", None) 3269 offset.set("expressions", limit_by_expressions) 3270 continue 3271 break 3272 3273 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3274 this = self._implicit_unnests_to_explicit(this) 3275 3276 return this 3277 3278 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3279 start = self._curr 3280 while self._curr: 3281 self._advance() 3282 3283 end = self._tokens[self._index - 1] 3284 return exp.Hint(expressions=[self._find_sql(start, end)]) 3285 3286 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3287 return self._parse_function_call() 3288 3289 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3290 start_index = self._index 3291 should_fallback_to_string = False 3292 3293 hints = [] 3294 try: 3295 for hint in iter( 3296 lambda: self._parse_csv( 3297 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3298 ), 3299 [], 3300 ): 3301 hints.extend(hint) 3302 except ParseError: 3303 should_fallback_to_string = True 3304 3305 if should_fallback_to_string or self._curr: 3306 self._retreat(start_index) 3307 return self._parse_hint_fallback_to_string() 3308 3309 return self.expression(exp.Hint, expressions=hints) 3310 3311 def _parse_hint(self) -> t.Optional[exp.Hint]: 3312 if self._match(TokenType.HINT) and self._prev_comments: 3313 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3314 3315 return None 3316 3317 def _parse_into(self) -> t.Optional[exp.Into]: 3318 if not self._match(TokenType.INTO): 3319 return None 3320 3321 temp = self._match(TokenType.TEMPORARY) 3322 unlogged = self._match_text_seq("UNLOGGED") 3323 self._match(TokenType.TABLE) 3324 3325 return self.expression( 3326 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3327 ) 3328 3329 def _parse_from( 3330 self, joins: bool = False, skip_from_token: bool = False 3331 ) -> t.Optional[exp.From]: 3332 if not skip_from_token and not self._match(TokenType.FROM): 3333 return None 3334 3335 return self.expression( 3336 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3337 ) 3338 3339 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3340 return self.expression( 3341 exp.MatchRecognizeMeasure, 3342 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3343 this=self._parse_expression(), 3344 ) 3345 3346 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3347 if not self._match(TokenType.MATCH_RECOGNIZE): 3348 return None 3349 3350 self._match_l_paren() 3351 3352 partition = self._parse_partition_by() 3353 order = self._parse_order() 3354 3355 measures = ( 3356 self._parse_csv(self._parse_match_recognize_measure) 3357 if self._match_text_seq("MEASURES") 3358 else None 3359 ) 3360 3361 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3362 rows = exp.var("ONE ROW PER MATCH") 3363 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3364 text = "ALL ROWS PER MATCH" 3365 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3366 text += " SHOW EMPTY MATCHES" 3367 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3368 text += " OMIT EMPTY MATCHES" 3369 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3370 text += " WITH UNMATCHED ROWS" 3371 rows = exp.var(text) 3372 else: 3373 rows = None 3374 3375 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3376 text = "AFTER MATCH SKIP" 3377 if self._match_text_seq("PAST", "LAST", "ROW"): 3378 text += " PAST LAST ROW" 3379 elif self._match_text_seq("TO", "NEXT", "ROW"): 3380 text += " TO NEXT ROW" 3381 elif self._match_text_seq("TO", "FIRST"): 3382 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3383 elif self._match_text_seq("TO", "LAST"): 3384 text += f" TO LAST {self._advance_any().text}" # type: ignore 3385 after = exp.var(text) 3386 else: 3387 after = None 3388 3389 if self._match_text_seq("PATTERN"): 3390 self._match_l_paren() 3391 3392 if not self._curr: 3393 self.raise_error("Expecting )", self._curr) 3394 3395 paren = 1 3396 start = self._curr 3397 3398 while self._curr and paren > 0: 3399 if self._curr.token_type == TokenType.L_PAREN: 3400 paren += 1 3401 if self._curr.token_type == TokenType.R_PAREN: 3402 paren -= 1 3403 3404 end = self._prev 3405 self._advance() 3406 3407 if paren > 0: 3408 self.raise_error("Expecting )", self._curr) 3409 3410 pattern = exp.var(self._find_sql(start, end)) 3411 else: 3412 pattern = None 3413 3414 define = ( 3415 self._parse_csv(self._parse_name_as_expression) 3416 if self._match_text_seq("DEFINE") 3417 else None 3418 ) 3419 3420 self._match_r_paren() 3421 3422 return self.expression( 3423 exp.MatchRecognize, 3424 partition_by=partition, 3425 order=order, 3426 measures=measures, 3427 rows=rows, 3428 after=after, 3429 pattern=pattern, 3430 define=define, 3431 alias=self._parse_table_alias(), 3432 ) 3433 3434 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3435 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3436 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3437 cross_apply = False 3438 3439 if cross_apply is not None: 3440 this = self._parse_select(table=True) 3441 view = None 3442 outer = None 3443 elif self._match(TokenType.LATERAL): 3444 this = self._parse_select(table=True) 3445 view = self._match(TokenType.VIEW) 3446 outer = self._match(TokenType.OUTER) 3447 else: 3448 return None 3449 3450 if not this: 3451 this = ( 3452 self._parse_unnest() 3453 or self._parse_function() 3454 or self._parse_id_var(any_token=False) 3455 ) 3456 3457 while self._match(TokenType.DOT): 3458 this = exp.Dot( 3459 this=this, 3460 expression=self._parse_function() or self._parse_id_var(any_token=False), 3461 ) 3462 3463 if view: 3464 table = self._parse_id_var(any_token=False) 3465 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3466 table_alias: t.Optional[exp.TableAlias] = self.expression( 3467 exp.TableAlias, this=table, columns=columns 3468 ) 3469 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3470 # We move the alias from the lateral's child node to the lateral itself 3471 table_alias = this.args["alias"].pop() 3472 else: 3473 table_alias = self._parse_table_alias() 3474 3475 return self.expression( 3476 exp.Lateral, 3477 this=this, 3478 view=view, 3479 outer=outer, 3480 alias=table_alias, 3481 cross_apply=cross_apply, 3482 ) 3483 3484 def _parse_join_parts( 3485 self, 3486 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3487 return ( 3488 self._match_set(self.JOIN_METHODS) and self._prev, 3489 self._match_set(self.JOIN_SIDES) and self._prev, 3490 self._match_set(self.JOIN_KINDS) and self._prev, 3491 ) 3492 3493 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3494 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3495 this = self._parse_column() 3496 if isinstance(this, exp.Column): 3497 return this.this 3498 return this 3499 3500 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3501 3502 def _parse_join( 3503 self, skip_join_token: bool = False, parse_bracket: bool = False 3504 ) -> t.Optional[exp.Join]: 3505 if self._match(TokenType.COMMA): 3506 return self.expression(exp.Join, this=self._parse_table()) 3507 3508 index = self._index 3509 method, side, kind = self._parse_join_parts() 3510 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3511 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3512 3513 if not skip_join_token and not join: 3514 self._retreat(index) 3515 kind = None 3516 method = None 3517 side = None 3518 3519 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3520 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3521 3522 if not skip_join_token and not join and not outer_apply and not cross_apply: 3523 return None 3524 3525 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3526 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3527 kwargs["expressions"] = self._parse_csv( 3528 lambda: self._parse_table(parse_bracket=parse_bracket) 3529 ) 3530 3531 if method: 3532 kwargs["method"] = method.text 3533 if side: 3534 kwargs["side"] = side.text 3535 if kind: 3536 kwargs["kind"] = kind.text 3537 if hint: 3538 kwargs["hint"] = hint 3539 3540 if self._match(TokenType.MATCH_CONDITION): 3541 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3542 3543 if self._match(TokenType.ON): 3544 kwargs["on"] = self._parse_assignment() 3545 elif self._match(TokenType.USING): 3546 kwargs["using"] = self._parse_using_identifiers() 3547 elif ( 3548 not (outer_apply or cross_apply) 3549 and not isinstance(kwargs["this"], exp.Unnest) 3550 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3551 ): 3552 index = self._index 3553 joins: t.Optional[list] = list(self._parse_joins()) 3554 3555 if joins and self._match(TokenType.ON): 3556 kwargs["on"] = self._parse_assignment() 3557 elif joins and self._match(TokenType.USING): 3558 kwargs["using"] = self._parse_using_identifiers() 3559 else: 3560 joins = None 3561 self._retreat(index) 3562 3563 kwargs["this"].set("joins", joins if joins else None) 3564 3565 comments = [c for token in (method, side, kind) if token for c in token.comments] 3566 return self.expression(exp.Join, comments=comments, **kwargs) 3567 3568 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3569 this = self._parse_assignment() 3570 3571 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3572 return this 3573 3574 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3575 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3576 3577 return this 3578 3579 def _parse_index_params(self) -> exp.IndexParameters: 3580 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3581 3582 if self._match(TokenType.L_PAREN, advance=False): 3583 columns = self._parse_wrapped_csv(self._parse_with_operator) 3584 else: 3585 columns = None 3586 3587 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3588 partition_by = self._parse_partition_by() 3589 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3590 tablespace = ( 3591 self._parse_var(any_token=True) 3592 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3593 else None 3594 ) 3595 where = self._parse_where() 3596 3597 on = self._parse_field() if self._match(TokenType.ON) else None 3598 3599 return self.expression( 3600 exp.IndexParameters, 3601 using=using, 3602 columns=columns, 3603 include=include, 3604 partition_by=partition_by, 3605 where=where, 3606 with_storage=with_storage, 3607 tablespace=tablespace, 3608 on=on, 3609 ) 3610 3611 def _parse_index( 3612 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3613 ) -> t.Optional[exp.Index]: 3614 if index or anonymous: 3615 unique = None 3616 primary = None 3617 amp = None 3618 3619 self._match(TokenType.ON) 3620 self._match(TokenType.TABLE) # hive 3621 table = self._parse_table_parts(schema=True) 3622 else: 3623 unique = self._match(TokenType.UNIQUE) 3624 primary = self._match_text_seq("PRIMARY") 3625 amp = self._match_text_seq("AMP") 3626 3627 if not self._match(TokenType.INDEX): 3628 return None 3629 3630 index = self._parse_id_var() 3631 table = None 3632 3633 params = self._parse_index_params() 3634 3635 return self.expression( 3636 exp.Index, 3637 this=index, 3638 table=table, 3639 unique=unique, 3640 primary=primary, 3641 amp=amp, 3642 params=params, 3643 ) 3644 3645 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3646 hints: t.List[exp.Expression] = [] 3647 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3648 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3649 hints.append( 3650 self.expression( 3651 exp.WithTableHint, 3652 expressions=self._parse_csv( 3653 lambda: self._parse_function() or self._parse_var(any_token=True) 3654 ), 3655 ) 3656 ) 3657 self._match_r_paren() 3658 else: 3659 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3660 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3661 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3662 3663 self._match_set((TokenType.INDEX, TokenType.KEY)) 3664 if self._match(TokenType.FOR): 3665 hint.set("target", self._advance_any() and self._prev.text.upper()) 3666 3667 hint.set("expressions", self._parse_wrapped_id_vars()) 3668 hints.append(hint) 3669 3670 return hints or None 3671 3672 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3673 return ( 3674 (not schema and self._parse_function(optional_parens=False)) 3675 or self._parse_id_var(any_token=False) 3676 or self._parse_string_as_identifier() 3677 or self._parse_placeholder() 3678 ) 3679 3680 def _parse_table_parts( 3681 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3682 ) -> exp.Table: 3683 catalog = None 3684 db = None 3685 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3686 3687 while self._match(TokenType.DOT): 3688 if catalog: 3689 # This allows nesting the table in arbitrarily many dot expressions if needed 3690 table = self.expression( 3691 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3692 ) 3693 else: 3694 catalog = db 3695 db = table 3696 # "" used for tsql FROM a..b case 3697 table = self._parse_table_part(schema=schema) or "" 3698 3699 if ( 3700 wildcard 3701 and self._is_connected() 3702 and (isinstance(table, exp.Identifier) or not table) 3703 and self._match(TokenType.STAR) 3704 ): 3705 if isinstance(table, exp.Identifier): 3706 table.args["this"] += "*" 3707 else: 3708 table = exp.Identifier(this="*") 3709 3710 # We bubble up comments from the Identifier to the Table 3711 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3712 3713 if is_db_reference: 3714 catalog = db 3715 db = table 3716 table = None 3717 3718 if not table and not is_db_reference: 3719 self.raise_error(f"Expected table name but got {self._curr}") 3720 if not db and is_db_reference: 3721 self.raise_error(f"Expected database name but got {self._curr}") 3722 3723 table = self.expression( 3724 exp.Table, 3725 comments=comments, 3726 this=table, 3727 db=db, 3728 catalog=catalog, 3729 ) 3730 3731 changes = self._parse_changes() 3732 if changes: 3733 table.set("changes", changes) 3734 3735 at_before = self._parse_historical_data() 3736 if at_before: 3737 table.set("when", at_before) 3738 3739 pivots = self._parse_pivots() 3740 if pivots: 3741 table.set("pivots", pivots) 3742 3743 return table 3744 3745 def _parse_table( 3746 self, 3747 schema: bool = False, 3748 joins: bool = False, 3749 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3750 parse_bracket: bool = False, 3751 is_db_reference: bool = False, 3752 parse_partition: bool = False, 3753 ) -> t.Optional[exp.Expression]: 3754 lateral = self._parse_lateral() 3755 if lateral: 3756 return lateral 3757 3758 unnest = self._parse_unnest() 3759 if unnest: 3760 return unnest 3761 3762 values = self._parse_derived_table_values() 3763 if values: 3764 return values 3765 3766 subquery = self._parse_select(table=True) 3767 if subquery: 3768 if not subquery.args.get("pivots"): 3769 subquery.set("pivots", self._parse_pivots()) 3770 return subquery 3771 3772 bracket = parse_bracket and self._parse_bracket(None) 3773 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3774 3775 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3776 self._parse_table 3777 ) 3778 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3779 3780 only = self._match(TokenType.ONLY) 3781 3782 this = t.cast( 3783 exp.Expression, 3784 bracket 3785 or rows_from 3786 or self._parse_bracket( 3787 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3788 ), 3789 ) 3790 3791 if only: 3792 this.set("only", only) 3793 3794 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3795 self._match_text_seq("*") 3796 3797 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3798 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3799 this.set("partition", self._parse_partition()) 3800 3801 if schema: 3802 return self._parse_schema(this=this) 3803 3804 version = self._parse_version() 3805 3806 if version: 3807 this.set("version", version) 3808 3809 if self.dialect.ALIAS_POST_TABLESAMPLE: 3810 this.set("sample", self._parse_table_sample()) 3811 3812 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3813 if alias: 3814 this.set("alias", alias) 3815 3816 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3817 return self.expression( 3818 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3819 ) 3820 3821 this.set("hints", self._parse_table_hints()) 3822 3823 if not this.args.get("pivots"): 3824 this.set("pivots", self._parse_pivots()) 3825 3826 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3827 this.set("sample", self._parse_table_sample()) 3828 3829 if joins: 3830 for join in self._parse_joins(): 3831 this.append("joins", join) 3832 3833 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3834 this.set("ordinality", True) 3835 this.set("alias", self._parse_table_alias()) 3836 3837 return this 3838 3839 def _parse_version(self) -> t.Optional[exp.Version]: 3840 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3841 this = "TIMESTAMP" 3842 elif self._match(TokenType.VERSION_SNAPSHOT): 3843 this = "VERSION" 3844 else: 3845 return None 3846 3847 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3848 kind = self._prev.text.upper() 3849 start = self._parse_bitwise() 3850 self._match_texts(("TO", "AND")) 3851 end = self._parse_bitwise() 3852 expression: t.Optional[exp.Expression] = self.expression( 3853 exp.Tuple, expressions=[start, end] 3854 ) 3855 elif self._match_text_seq("CONTAINED", "IN"): 3856 kind = "CONTAINED IN" 3857 expression = self.expression( 3858 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3859 ) 3860 elif self._match(TokenType.ALL): 3861 kind = "ALL" 3862 expression = None 3863 else: 3864 self._match_text_seq("AS", "OF") 3865 kind = "AS OF" 3866 expression = self._parse_type() 3867 3868 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3869 3870 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3871 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3872 index = self._index 3873 historical_data = None 3874 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3875 this = self._prev.text.upper() 3876 kind = ( 3877 self._match(TokenType.L_PAREN) 3878 and self._match_texts(self.HISTORICAL_DATA_KIND) 3879 and self._prev.text.upper() 3880 ) 3881 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3882 3883 if expression: 3884 self._match_r_paren() 3885 historical_data = self.expression( 3886 exp.HistoricalData, this=this, kind=kind, expression=expression 3887 ) 3888 else: 3889 self._retreat(index) 3890 3891 return historical_data 3892 3893 def _parse_changes(self) -> t.Optional[exp.Changes]: 3894 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3895 return None 3896 3897 information = self._parse_var(any_token=True) 3898 self._match_r_paren() 3899 3900 return self.expression( 3901 exp.Changes, 3902 information=information, 3903 at_before=self._parse_historical_data(), 3904 end=self._parse_historical_data(), 3905 ) 3906 3907 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3908 if not self._match(TokenType.UNNEST): 3909 return None 3910 3911 expressions = self._parse_wrapped_csv(self._parse_equality) 3912 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3913 3914 alias = self._parse_table_alias() if with_alias else None 3915 3916 if alias: 3917 if self.dialect.UNNEST_COLUMN_ONLY: 3918 if alias.args.get("columns"): 3919 self.raise_error("Unexpected extra column alias in unnest.") 3920 3921 alias.set("columns", [alias.this]) 3922 alias.set("this", None) 3923 3924 columns = alias.args.get("columns") or [] 3925 if offset and len(expressions) < len(columns): 3926 offset = columns.pop() 3927 3928 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3929 self._match(TokenType.ALIAS) 3930 offset = self._parse_id_var( 3931 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3932 ) or exp.to_identifier("offset") 3933 3934 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3935 3936 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3937 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3938 if not is_derived and not ( 3939 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3940 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3941 ): 3942 return None 3943 3944 expressions = self._parse_csv(self._parse_value) 3945 alias = self._parse_table_alias() 3946 3947 if is_derived: 3948 self._match_r_paren() 3949 3950 return self.expression( 3951 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3952 ) 3953 3954 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3955 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3956 as_modifier and self._match_text_seq("USING", "SAMPLE") 3957 ): 3958 return None 3959 3960 bucket_numerator = None 3961 bucket_denominator = None 3962 bucket_field = None 3963 percent = None 3964 size = None 3965 seed = None 3966 3967 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3968 matched_l_paren = self._match(TokenType.L_PAREN) 3969 3970 if self.TABLESAMPLE_CSV: 3971 num = None 3972 expressions = self._parse_csv(self._parse_primary) 3973 else: 3974 expressions = None 3975 num = ( 3976 self._parse_factor() 3977 if self._match(TokenType.NUMBER, advance=False) 3978 else self._parse_primary() or self._parse_placeholder() 3979 ) 3980 3981 if self._match_text_seq("BUCKET"): 3982 bucket_numerator = self._parse_number() 3983 self._match_text_seq("OUT", "OF") 3984 bucket_denominator = bucket_denominator = self._parse_number() 3985 self._match(TokenType.ON) 3986 bucket_field = self._parse_field() 3987 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3988 percent = num 3989 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3990 size = num 3991 else: 3992 percent = num 3993 3994 if matched_l_paren: 3995 self._match_r_paren() 3996 3997 if self._match(TokenType.L_PAREN): 3998 method = self._parse_var(upper=True) 3999 seed = self._match(TokenType.COMMA) and self._parse_number() 4000 self._match_r_paren() 4001 elif self._match_texts(("SEED", "REPEATABLE")): 4002 seed = self._parse_wrapped(self._parse_number) 4003 4004 if not method and self.DEFAULT_SAMPLING_METHOD: 4005 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4006 4007 return self.expression( 4008 exp.TableSample, 4009 expressions=expressions, 4010 method=method, 4011 bucket_numerator=bucket_numerator, 4012 bucket_denominator=bucket_denominator, 4013 bucket_field=bucket_field, 4014 percent=percent, 4015 size=size, 4016 seed=seed, 4017 ) 4018 4019 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4020 return list(iter(self._parse_pivot, None)) or None 4021 4022 def _parse_joins(self) -> t.Iterator[exp.Join]: 4023 return iter(self._parse_join, None) 4024 4025 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4026 if not self._match(TokenType.INTO): 4027 return None 4028 4029 return self.expression( 4030 exp.UnpivotColumns, 4031 this=self._match_text_seq("NAME") and self._parse_column(), 4032 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4033 ) 4034 4035 # https://duckdb.org/docs/sql/statements/pivot 4036 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4037 def _parse_on() -> t.Optional[exp.Expression]: 4038 this = self._parse_bitwise() 4039 4040 if self._match(TokenType.IN): 4041 # PIVOT ... ON col IN (row_val1, row_val2) 4042 return self._parse_in(this) 4043 if self._match(TokenType.ALIAS, advance=False): 4044 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4045 return self._parse_alias(this) 4046 4047 return this 4048 4049 this = self._parse_table() 4050 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4051 into = self._parse_unpivot_columns() 4052 using = self._match(TokenType.USING) and self._parse_csv( 4053 lambda: self._parse_alias(self._parse_function()) 4054 ) 4055 group = self._parse_group() 4056 4057 return self.expression( 4058 exp.Pivot, 4059 this=this, 4060 expressions=expressions, 4061 using=using, 4062 group=group, 4063 unpivot=is_unpivot, 4064 into=into, 4065 ) 4066 4067 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 4068 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4069 this = self._parse_select_or_expression() 4070 4071 self._match(TokenType.ALIAS) 4072 alias = self._parse_bitwise() 4073 if alias: 4074 if isinstance(alias, exp.Column) and not alias.db: 4075 alias = alias.this 4076 return self.expression(exp.PivotAlias, this=this, alias=alias) 4077 4078 return this 4079 4080 value = self._parse_column() 4081 4082 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4083 self.raise_error("Expecting IN (") 4084 4085 if self._match(TokenType.ANY): 4086 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4087 else: 4088 exprs = self._parse_csv(_parse_aliased_expression) 4089 4090 self._match_r_paren() 4091 return self.expression(exp.In, this=value, expressions=exprs) 4092 4093 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4094 index = self._index 4095 include_nulls = None 4096 4097 if self._match(TokenType.PIVOT): 4098 unpivot = False 4099 elif self._match(TokenType.UNPIVOT): 4100 unpivot = True 4101 4102 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4103 if self._match_text_seq("INCLUDE", "NULLS"): 4104 include_nulls = True 4105 elif self._match_text_seq("EXCLUDE", "NULLS"): 4106 include_nulls = False 4107 else: 4108 return None 4109 4110 expressions = [] 4111 4112 if not self._match(TokenType.L_PAREN): 4113 self._retreat(index) 4114 return None 4115 4116 if unpivot: 4117 expressions = self._parse_csv(self._parse_column) 4118 else: 4119 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4120 4121 if not expressions: 4122 self.raise_error("Failed to parse PIVOT's aggregation list") 4123 4124 if not self._match(TokenType.FOR): 4125 self.raise_error("Expecting FOR") 4126 4127 field = self._parse_pivot_in() 4128 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4129 self._parse_bitwise 4130 ) 4131 4132 self._match_r_paren() 4133 4134 pivot = self.expression( 4135 exp.Pivot, 4136 expressions=expressions, 4137 field=field, 4138 unpivot=unpivot, 4139 include_nulls=include_nulls, 4140 default_on_null=default_on_null, 4141 ) 4142 4143 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4144 pivot.set("alias", self._parse_table_alias()) 4145 4146 if not unpivot: 4147 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4148 4149 columns: t.List[exp.Expression] = [] 4150 for fld in pivot.args["field"].expressions: 4151 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4152 for name in names: 4153 if self.PREFIXED_PIVOT_COLUMNS: 4154 name = f"{name}_{field_name}" if name else field_name 4155 else: 4156 name = f"{field_name}_{name}" if name else field_name 4157 4158 columns.append(exp.to_identifier(name)) 4159 4160 pivot.set("columns", columns) 4161 4162 return pivot 4163 4164 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4165 return [agg.alias for agg in aggregations] 4166 4167 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4168 if not skip_where_token and not self._match(TokenType.PREWHERE): 4169 return None 4170 4171 return self.expression( 4172 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4173 ) 4174 4175 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4176 if not skip_where_token and not self._match(TokenType.WHERE): 4177 return None 4178 4179 return self.expression( 4180 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4181 ) 4182 4183 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4184 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4185 return None 4186 4187 elements: t.Dict[str, t.Any] = defaultdict(list) 4188 4189 if self._match(TokenType.ALL): 4190 elements["all"] = True 4191 elif self._match(TokenType.DISTINCT): 4192 elements["all"] = False 4193 4194 while True: 4195 index = self._index 4196 4197 elements["expressions"].extend( 4198 self._parse_csv( 4199 lambda: None 4200 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4201 else self._parse_assignment() 4202 ) 4203 ) 4204 4205 before_with_index = self._index 4206 with_prefix = self._match(TokenType.WITH) 4207 4208 if self._match(TokenType.ROLLUP): 4209 elements["rollup"].append( 4210 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4211 ) 4212 elif self._match(TokenType.CUBE): 4213 elements["cube"].append( 4214 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4215 ) 4216 elif self._match(TokenType.GROUPING_SETS): 4217 elements["grouping_sets"].append( 4218 self.expression( 4219 exp.GroupingSets, 4220 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4221 ) 4222 ) 4223 elif self._match_text_seq("TOTALS"): 4224 elements["totals"] = True # type: ignore 4225 4226 if before_with_index <= self._index <= before_with_index + 1: 4227 self._retreat(before_with_index) 4228 break 4229 4230 if index == self._index: 4231 break 4232 4233 return self.expression(exp.Group, **elements) # type: ignore 4234 4235 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4236 return self.expression( 4237 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4238 ) 4239 4240 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4241 if self._match(TokenType.L_PAREN): 4242 grouping_set = self._parse_csv(self._parse_column) 4243 self._match_r_paren() 4244 return self.expression(exp.Tuple, expressions=grouping_set) 4245 4246 return self._parse_column() 4247 4248 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4249 if not skip_having_token and not self._match(TokenType.HAVING): 4250 return None 4251 return self.expression(exp.Having, this=self._parse_assignment()) 4252 4253 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4254 if not self._match(TokenType.QUALIFY): 4255 return None 4256 return self.expression(exp.Qualify, this=self._parse_assignment()) 4257 4258 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4259 if skip_start_token: 4260 start = None 4261 elif self._match(TokenType.START_WITH): 4262 start = self._parse_assignment() 4263 else: 4264 return None 4265 4266 self._match(TokenType.CONNECT_BY) 4267 nocycle = self._match_text_seq("NOCYCLE") 4268 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4269 exp.Prior, this=self._parse_bitwise() 4270 ) 4271 connect = self._parse_assignment() 4272 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4273 4274 if not start and self._match(TokenType.START_WITH): 4275 start = self._parse_assignment() 4276 4277 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4278 4279 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4280 this = self._parse_id_var(any_token=True) 4281 if self._match(TokenType.ALIAS): 4282 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4283 return this 4284 4285 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4286 if self._match_text_seq("INTERPOLATE"): 4287 return self._parse_wrapped_csv(self._parse_name_as_expression) 4288 return None 4289 4290 def _parse_order( 4291 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4292 ) -> t.Optional[exp.Expression]: 4293 siblings = None 4294 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4295 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4296 return this 4297 4298 siblings = True 4299 4300 return self.expression( 4301 exp.Order, 4302 this=this, 4303 expressions=self._parse_csv(self._parse_ordered), 4304 siblings=siblings, 4305 ) 4306 4307 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4308 if not self._match(token): 4309 return None 4310 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4311 4312 def _parse_ordered( 4313 self, parse_method: t.Optional[t.Callable] = None 4314 ) -> t.Optional[exp.Ordered]: 4315 this = parse_method() if parse_method else self._parse_assignment() 4316 if not this: 4317 return None 4318 4319 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4320 this = exp.var("ALL") 4321 4322 asc = self._match(TokenType.ASC) 4323 desc = self._match(TokenType.DESC) or (asc and False) 4324 4325 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4326 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4327 4328 nulls_first = is_nulls_first or False 4329 explicitly_null_ordered = is_nulls_first or is_nulls_last 4330 4331 if ( 4332 not explicitly_null_ordered 4333 and ( 4334 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4335 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4336 ) 4337 and self.dialect.NULL_ORDERING != "nulls_are_last" 4338 ): 4339 nulls_first = True 4340 4341 if self._match_text_seq("WITH", "FILL"): 4342 with_fill = self.expression( 4343 exp.WithFill, 4344 **{ # type: ignore 4345 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4346 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4347 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4348 "interpolate": self._parse_interpolate(), 4349 }, 4350 ) 4351 else: 4352 with_fill = None 4353 4354 return self.expression( 4355 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4356 ) 4357 4358 def _parse_limit( 4359 self, 4360 this: t.Optional[exp.Expression] = None, 4361 top: bool = False, 4362 skip_limit_token: bool = False, 4363 ) -> t.Optional[exp.Expression]: 4364 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4365 comments = self._prev_comments 4366 if top: 4367 limit_paren = self._match(TokenType.L_PAREN) 4368 expression = self._parse_term() if limit_paren else self._parse_number() 4369 4370 if limit_paren: 4371 self._match_r_paren() 4372 else: 4373 expression = self._parse_term() 4374 4375 if self._match(TokenType.COMMA): 4376 offset = expression 4377 expression = self._parse_term() 4378 else: 4379 offset = None 4380 4381 limit_exp = self.expression( 4382 exp.Limit, 4383 this=this, 4384 expression=expression, 4385 offset=offset, 4386 comments=comments, 4387 expressions=self._parse_limit_by(), 4388 ) 4389 4390 return limit_exp 4391 4392 if self._match(TokenType.FETCH): 4393 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4394 direction = self._prev.text.upper() if direction else "FIRST" 4395 4396 count = self._parse_field(tokens=self.FETCH_TOKENS) 4397 percent = self._match(TokenType.PERCENT) 4398 4399 self._match_set((TokenType.ROW, TokenType.ROWS)) 4400 4401 only = self._match_text_seq("ONLY") 4402 with_ties = self._match_text_seq("WITH", "TIES") 4403 4404 if only and with_ties: 4405 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4406 4407 return self.expression( 4408 exp.Fetch, 4409 direction=direction, 4410 count=count, 4411 percent=percent, 4412 with_ties=with_ties, 4413 ) 4414 4415 return this 4416 4417 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4418 if not self._match(TokenType.OFFSET): 4419 return this 4420 4421 count = self._parse_term() 4422 self._match_set((TokenType.ROW, TokenType.ROWS)) 4423 4424 return self.expression( 4425 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4426 ) 4427 4428 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4429 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4430 4431 def _parse_locks(self) -> t.List[exp.Lock]: 4432 locks = [] 4433 while True: 4434 if self._match_text_seq("FOR", "UPDATE"): 4435 update = True 4436 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4437 "LOCK", "IN", "SHARE", "MODE" 4438 ): 4439 update = False 4440 else: 4441 break 4442 4443 expressions = None 4444 if self._match_text_seq("OF"): 4445 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4446 4447 wait: t.Optional[bool | exp.Expression] = None 4448 if self._match_text_seq("NOWAIT"): 4449 wait = True 4450 elif self._match_text_seq("WAIT"): 4451 wait = self._parse_primary() 4452 elif self._match_text_seq("SKIP", "LOCKED"): 4453 wait = False 4454 4455 locks.append( 4456 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4457 ) 4458 4459 return locks 4460 4461 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4462 while this and self._match_set(self.SET_OPERATIONS): 4463 token_type = self._prev.token_type 4464 4465 if token_type == TokenType.UNION: 4466 operation: t.Type[exp.SetOperation] = exp.Union 4467 elif token_type == TokenType.EXCEPT: 4468 operation = exp.Except 4469 else: 4470 operation = exp.Intersect 4471 4472 comments = self._prev.comments 4473 4474 if self._match(TokenType.DISTINCT): 4475 distinct: t.Optional[bool] = True 4476 elif self._match(TokenType.ALL): 4477 distinct = False 4478 else: 4479 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4480 if distinct is None: 4481 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4482 4483 by_name = self._match_text_seq("BY", "NAME") 4484 expression = self._parse_select(nested=True, parse_set_operation=False) 4485 4486 this = self.expression( 4487 operation, 4488 comments=comments, 4489 this=this, 4490 distinct=distinct, 4491 by_name=by_name, 4492 expression=expression, 4493 ) 4494 4495 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4496 expression = this.expression 4497 4498 if expression: 4499 for arg in self.SET_OP_MODIFIERS: 4500 expr = expression.args.get(arg) 4501 if expr: 4502 this.set(arg, expr.pop()) 4503 4504 return this 4505 4506 def _parse_expression(self) -> t.Optional[exp.Expression]: 4507 return self._parse_alias(self._parse_assignment()) 4508 4509 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4510 this = self._parse_disjunction() 4511 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4512 # This allows us to parse <non-identifier token> := <expr> 4513 this = exp.column( 4514 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4515 ) 4516 4517 while self._match_set(self.ASSIGNMENT): 4518 if isinstance(this, exp.Column) and len(this.parts) == 1: 4519 this = this.this 4520 4521 this = self.expression( 4522 self.ASSIGNMENT[self._prev.token_type], 4523 this=this, 4524 comments=self._prev_comments, 4525 expression=self._parse_assignment(), 4526 ) 4527 4528 return this 4529 4530 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4531 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4532 4533 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4534 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4535 4536 def _parse_equality(self) -> t.Optional[exp.Expression]: 4537 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4538 4539 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4540 return self._parse_tokens(self._parse_range, self.COMPARISON) 4541 4542 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4543 this = this or self._parse_bitwise() 4544 negate = self._match(TokenType.NOT) 4545 4546 if self._match_set(self.RANGE_PARSERS): 4547 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4548 if not expression: 4549 return this 4550 4551 this = expression 4552 elif self._match(TokenType.ISNULL): 4553 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4554 4555 # Postgres supports ISNULL and NOTNULL for conditions. 4556 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4557 if self._match(TokenType.NOTNULL): 4558 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4559 this = self.expression(exp.Not, this=this) 4560 4561 if negate: 4562 this = self._negate_range(this) 4563 4564 if self._match(TokenType.IS): 4565 this = self._parse_is(this) 4566 4567 return this 4568 4569 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4570 if not this: 4571 return this 4572 4573 return self.expression(exp.Not, this=this) 4574 4575 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4576 index = self._index - 1 4577 negate = self._match(TokenType.NOT) 4578 4579 if self._match_text_seq("DISTINCT", "FROM"): 4580 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4581 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4582 4583 if self._match(TokenType.JSON): 4584 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4585 4586 if self._match_text_seq("WITH"): 4587 _with = True 4588 elif self._match_text_seq("WITHOUT"): 4589 _with = False 4590 else: 4591 _with = None 4592 4593 unique = self._match(TokenType.UNIQUE) 4594 self._match_text_seq("KEYS") 4595 expression: t.Optional[exp.Expression] = self.expression( 4596 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4597 ) 4598 else: 4599 expression = self._parse_primary() or self._parse_null() 4600 if not expression: 4601 self._retreat(index) 4602 return None 4603 4604 this = self.expression(exp.Is, this=this, expression=expression) 4605 return self.expression(exp.Not, this=this) if negate else this 4606 4607 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4608 unnest = self._parse_unnest(with_alias=False) 4609 if unnest: 4610 this = self.expression(exp.In, this=this, unnest=unnest) 4611 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4612 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4613 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4614 4615 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4616 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4617 else: 4618 this = self.expression(exp.In, this=this, expressions=expressions) 4619 4620 if matched_l_paren: 4621 self._match_r_paren(this) 4622 elif not self._match(TokenType.R_BRACKET, expression=this): 4623 self.raise_error("Expecting ]") 4624 else: 4625 this = self.expression(exp.In, this=this, field=self._parse_column()) 4626 4627 return this 4628 4629 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4630 low = self._parse_bitwise() 4631 self._match(TokenType.AND) 4632 high = self._parse_bitwise() 4633 return self.expression(exp.Between, this=this, low=low, high=high) 4634 4635 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4636 if not self._match(TokenType.ESCAPE): 4637 return this 4638 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4639 4640 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4641 index = self._index 4642 4643 if not self._match(TokenType.INTERVAL) and match_interval: 4644 return None 4645 4646 if self._match(TokenType.STRING, advance=False): 4647 this = self._parse_primary() 4648 else: 4649 this = self._parse_term() 4650 4651 if not this or ( 4652 isinstance(this, exp.Column) 4653 and not this.table 4654 and not this.this.quoted 4655 and this.name.upper() == "IS" 4656 ): 4657 self._retreat(index) 4658 return None 4659 4660 unit = self._parse_function() or ( 4661 not self._match(TokenType.ALIAS, advance=False) 4662 and self._parse_var(any_token=True, upper=True) 4663 ) 4664 4665 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4666 # each INTERVAL expression into this canonical form so it's easy to transpile 4667 if this and this.is_number: 4668 this = exp.Literal.string(this.to_py()) 4669 elif this and this.is_string: 4670 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4671 if parts and unit: 4672 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4673 unit = None 4674 self._retreat(self._index - 1) 4675 4676 if len(parts) == 1: 4677 this = exp.Literal.string(parts[0][0]) 4678 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4679 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4680 unit = self.expression( 4681 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4682 ) 4683 4684 interval = self.expression(exp.Interval, this=this, unit=unit) 4685 4686 index = self._index 4687 self._match(TokenType.PLUS) 4688 4689 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4690 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4691 return self.expression( 4692 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4693 ) 4694 4695 self._retreat(index) 4696 return interval 4697 4698 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4699 this = self._parse_term() 4700 4701 while True: 4702 if self._match_set(self.BITWISE): 4703 this = self.expression( 4704 self.BITWISE[self._prev.token_type], 4705 this=this, 4706 expression=self._parse_term(), 4707 ) 4708 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4709 this = self.expression( 4710 exp.DPipe, 4711 this=this, 4712 expression=self._parse_term(), 4713 safe=not self.dialect.STRICT_STRING_CONCAT, 4714 ) 4715 elif self._match(TokenType.DQMARK): 4716 this = self.expression( 4717 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4718 ) 4719 elif self._match_pair(TokenType.LT, TokenType.LT): 4720 this = self.expression( 4721 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4722 ) 4723 elif self._match_pair(TokenType.GT, TokenType.GT): 4724 this = self.expression( 4725 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4726 ) 4727 else: 4728 break 4729 4730 return this 4731 4732 def _parse_term(self) -> t.Optional[exp.Expression]: 4733 this = self._parse_factor() 4734 4735 while self._match_set(self.TERM): 4736 klass = self.TERM[self._prev.token_type] 4737 comments = self._prev_comments 4738 expression = self._parse_factor() 4739 4740 this = self.expression(klass, this=this, comments=comments, expression=expression) 4741 4742 if isinstance(this, exp.Collate): 4743 expr = this.expression 4744 4745 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4746 # fallback to Identifier / Var 4747 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4748 ident = expr.this 4749 if isinstance(ident, exp.Identifier): 4750 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4751 4752 return this 4753 4754 def _parse_factor(self) -> t.Optional[exp.Expression]: 4755 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4756 this = parse_method() 4757 4758 while self._match_set(self.FACTOR): 4759 klass = self.FACTOR[self._prev.token_type] 4760 comments = self._prev_comments 4761 expression = parse_method() 4762 4763 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4764 self._retreat(self._index - 1) 4765 return this 4766 4767 this = self.expression(klass, this=this, comments=comments, expression=expression) 4768 4769 if isinstance(this, exp.Div): 4770 this.args["typed"] = self.dialect.TYPED_DIVISION 4771 this.args["safe"] = self.dialect.SAFE_DIVISION 4772 4773 return this 4774 4775 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4776 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4777 4778 def _parse_unary(self) -> t.Optional[exp.Expression]: 4779 if self._match_set(self.UNARY_PARSERS): 4780 return self.UNARY_PARSERS[self._prev.token_type](self) 4781 return self._parse_at_time_zone(self._parse_type()) 4782 4783 def _parse_type( 4784 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4785 ) -> t.Optional[exp.Expression]: 4786 interval = parse_interval and self._parse_interval() 4787 if interval: 4788 return interval 4789 4790 index = self._index 4791 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4792 4793 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4794 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4795 if isinstance(data_type, exp.Cast): 4796 # This constructor can contain ops directly after it, for instance struct unnesting: 4797 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4798 return self._parse_column_ops(data_type) 4799 4800 if data_type: 4801 index2 = self._index 4802 this = self._parse_primary() 4803 4804 if isinstance(this, exp.Literal): 4805 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4806 if parser: 4807 return parser(self, this, data_type) 4808 4809 return self.expression(exp.Cast, this=this, to=data_type) 4810 4811 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4812 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4813 # 4814 # If the index difference here is greater than 1, that means the parser itself must have 4815 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4816 # 4817 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4818 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4819 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4820 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4821 # 4822 # In these cases, we don't really want to return the converted type, but instead retreat 4823 # and try to parse a Column or Identifier in the section below. 4824 if data_type.expressions and index2 - index > 1: 4825 self._retreat(index2) 4826 return self._parse_column_ops(data_type) 4827 4828 self._retreat(index) 4829 4830 if fallback_to_identifier: 4831 return self._parse_id_var() 4832 4833 this = self._parse_column() 4834 return this and self._parse_column_ops(this) 4835 4836 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4837 this = self._parse_type() 4838 if not this: 4839 return None 4840 4841 if isinstance(this, exp.Column) and not this.table: 4842 this = exp.var(this.name.upper()) 4843 4844 return self.expression( 4845 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4846 ) 4847 4848 def _parse_types( 4849 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4850 ) -> t.Optional[exp.Expression]: 4851 index = self._index 4852 4853 this: t.Optional[exp.Expression] = None 4854 prefix = self._match_text_seq("SYSUDTLIB", ".") 4855 4856 if not self._match_set(self.TYPE_TOKENS): 4857 identifier = allow_identifiers and self._parse_id_var( 4858 any_token=False, tokens=(TokenType.VAR,) 4859 ) 4860 if isinstance(identifier, exp.Identifier): 4861 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4862 4863 if len(tokens) != 1: 4864 self.raise_error("Unexpected identifier", self._prev) 4865 4866 if tokens[0].token_type in self.TYPE_TOKENS: 4867 self._prev = tokens[0] 4868 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4869 type_name = identifier.name 4870 4871 while self._match(TokenType.DOT): 4872 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4873 4874 this = exp.DataType.build(type_name, udt=True) 4875 else: 4876 self._retreat(self._index - 1) 4877 return None 4878 else: 4879 return None 4880 4881 type_token = self._prev.token_type 4882 4883 if type_token == TokenType.PSEUDO_TYPE: 4884 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4885 4886 if type_token == TokenType.OBJECT_IDENTIFIER: 4887 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4888 4889 # https://materialize.com/docs/sql/types/map/ 4890 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4891 key_type = self._parse_types( 4892 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4893 ) 4894 if not self._match(TokenType.FARROW): 4895 self._retreat(index) 4896 return None 4897 4898 value_type = self._parse_types( 4899 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4900 ) 4901 if not self._match(TokenType.R_BRACKET): 4902 self._retreat(index) 4903 return None 4904 4905 return exp.DataType( 4906 this=exp.DataType.Type.MAP, 4907 expressions=[key_type, value_type], 4908 nested=True, 4909 prefix=prefix, 4910 ) 4911 4912 nested = type_token in self.NESTED_TYPE_TOKENS 4913 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4914 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4915 expressions = None 4916 maybe_func = False 4917 4918 if self._match(TokenType.L_PAREN): 4919 if is_struct: 4920 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4921 elif nested: 4922 expressions = self._parse_csv( 4923 lambda: self._parse_types( 4924 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4925 ) 4926 ) 4927 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4928 this = expressions[0] 4929 this.set("nullable", True) 4930 self._match_r_paren() 4931 return this 4932 elif type_token in self.ENUM_TYPE_TOKENS: 4933 expressions = self._parse_csv(self._parse_equality) 4934 elif is_aggregate: 4935 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4936 any_token=False, tokens=(TokenType.VAR,) 4937 ) 4938 if not func_or_ident or not self._match(TokenType.COMMA): 4939 return None 4940 expressions = self._parse_csv( 4941 lambda: self._parse_types( 4942 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4943 ) 4944 ) 4945 expressions.insert(0, func_or_ident) 4946 else: 4947 expressions = self._parse_csv(self._parse_type_size) 4948 4949 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4950 if type_token == TokenType.VECTOR and len(expressions) == 2: 4951 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4952 4953 if not expressions or not self._match(TokenType.R_PAREN): 4954 self._retreat(index) 4955 return None 4956 4957 maybe_func = True 4958 4959 values: t.Optional[t.List[exp.Expression]] = None 4960 4961 if nested and self._match(TokenType.LT): 4962 if is_struct: 4963 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4964 else: 4965 expressions = self._parse_csv( 4966 lambda: self._parse_types( 4967 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4968 ) 4969 ) 4970 4971 if not self._match(TokenType.GT): 4972 self.raise_error("Expecting >") 4973 4974 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4975 values = self._parse_csv(self._parse_assignment) 4976 if not values and is_struct: 4977 values = None 4978 self._retreat(self._index - 1) 4979 else: 4980 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4981 4982 if type_token in self.TIMESTAMPS: 4983 if self._match_text_seq("WITH", "TIME", "ZONE"): 4984 maybe_func = False 4985 tz_type = ( 4986 exp.DataType.Type.TIMETZ 4987 if type_token in self.TIMES 4988 else exp.DataType.Type.TIMESTAMPTZ 4989 ) 4990 this = exp.DataType(this=tz_type, expressions=expressions) 4991 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4992 maybe_func = False 4993 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4994 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4995 maybe_func = False 4996 elif type_token == TokenType.INTERVAL: 4997 unit = self._parse_var(upper=True) 4998 if unit: 4999 if self._match_text_seq("TO"): 5000 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5001 5002 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5003 else: 5004 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5005 5006 if maybe_func and check_func: 5007 index2 = self._index 5008 peek = self._parse_string() 5009 5010 if not peek: 5011 self._retreat(index) 5012 return None 5013 5014 self._retreat(index2) 5015 5016 if not this: 5017 if self._match_text_seq("UNSIGNED"): 5018 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5019 if not unsigned_type_token: 5020 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5021 5022 type_token = unsigned_type_token or type_token 5023 5024 this = exp.DataType( 5025 this=exp.DataType.Type[type_token.value], 5026 expressions=expressions, 5027 nested=nested, 5028 prefix=prefix, 5029 ) 5030 5031 # Empty arrays/structs are allowed 5032 if values is not None: 5033 cls = exp.Struct if is_struct else exp.Array 5034 this = exp.cast(cls(expressions=values), this, copy=False) 5035 5036 elif expressions: 5037 this.set("expressions", expressions) 5038 5039 # https://materialize.com/docs/sql/types/list/#type-name 5040 while self._match(TokenType.LIST): 5041 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5042 5043 index = self._index 5044 5045 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5046 matched_array = self._match(TokenType.ARRAY) 5047 5048 while self._curr: 5049 datatype_token = self._prev.token_type 5050 matched_l_bracket = self._match(TokenType.L_BRACKET) 5051 if not matched_l_bracket and not matched_array: 5052 break 5053 5054 matched_array = False 5055 values = self._parse_csv(self._parse_assignment) or None 5056 if ( 5057 values 5058 and not schema 5059 and ( 5060 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5061 ) 5062 ): 5063 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5064 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5065 self._retreat(index) 5066 break 5067 5068 this = exp.DataType( 5069 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5070 ) 5071 self._match(TokenType.R_BRACKET) 5072 5073 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5074 converter = self.TYPE_CONVERTERS.get(this.this) 5075 if converter: 5076 this = converter(t.cast(exp.DataType, this)) 5077 5078 return this 5079 5080 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5081 index = self._index 5082 5083 if ( 5084 self._curr 5085 and self._next 5086 and self._curr.token_type in self.TYPE_TOKENS 5087 and self._next.token_type in self.TYPE_TOKENS 5088 ): 5089 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5090 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5091 this = self._parse_id_var() 5092 else: 5093 this = ( 5094 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5095 or self._parse_id_var() 5096 ) 5097 5098 self._match(TokenType.COLON) 5099 5100 if ( 5101 type_required 5102 and not isinstance(this, exp.DataType) 5103 and not self._match_set(self.TYPE_TOKENS, advance=False) 5104 ): 5105 self._retreat(index) 5106 return self._parse_types() 5107 5108 return self._parse_column_def(this) 5109 5110 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5111 if not self._match_text_seq("AT", "TIME", "ZONE"): 5112 return this 5113 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5114 5115 def _parse_column(self) -> t.Optional[exp.Expression]: 5116 this = self._parse_column_reference() 5117 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5118 5119 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5120 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5121 5122 return column 5123 5124 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5125 this = self._parse_field() 5126 if ( 5127 not this 5128 and self._match(TokenType.VALUES, advance=False) 5129 and self.VALUES_FOLLOWED_BY_PAREN 5130 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5131 ): 5132 this = self._parse_id_var() 5133 5134 if isinstance(this, exp.Identifier): 5135 # We bubble up comments from the Identifier to the Column 5136 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5137 5138 return this 5139 5140 def _parse_colon_as_variant_extract( 5141 self, this: t.Optional[exp.Expression] 5142 ) -> t.Optional[exp.Expression]: 5143 casts = [] 5144 json_path = [] 5145 escape = None 5146 5147 while self._match(TokenType.COLON): 5148 start_index = self._index 5149 5150 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5151 path = self._parse_column_ops( 5152 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5153 ) 5154 5155 # The cast :: operator has a lower precedence than the extraction operator :, so 5156 # we rearrange the AST appropriately to avoid casting the JSON path 5157 while isinstance(path, exp.Cast): 5158 casts.append(path.to) 5159 path = path.this 5160 5161 if casts: 5162 dcolon_offset = next( 5163 i 5164 for i, t in enumerate(self._tokens[start_index:]) 5165 if t.token_type == TokenType.DCOLON 5166 ) 5167 end_token = self._tokens[start_index + dcolon_offset - 1] 5168 else: 5169 end_token = self._prev 5170 5171 if path: 5172 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5173 # it'll roundtrip to a string literal in GET_PATH 5174 if isinstance(path, exp.Identifier) and path.quoted: 5175 escape = True 5176 5177 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5178 5179 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5180 # Databricks transforms it back to the colon/dot notation 5181 if json_path: 5182 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5183 5184 if json_path_expr: 5185 json_path_expr.set("escape", escape) 5186 5187 this = self.expression( 5188 exp.JSONExtract, 5189 this=this, 5190 expression=json_path_expr, 5191 variant_extract=True, 5192 ) 5193 5194 while casts: 5195 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5196 5197 return this 5198 5199 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5200 return self._parse_types() 5201 5202 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5203 this = self._parse_bracket(this) 5204 5205 while self._match_set(self.COLUMN_OPERATORS): 5206 op_token = self._prev.token_type 5207 op = self.COLUMN_OPERATORS.get(op_token) 5208 5209 if op_token == TokenType.DCOLON: 5210 field = self._parse_dcolon() 5211 if not field: 5212 self.raise_error("Expected type") 5213 elif op and self._curr: 5214 field = self._parse_column_reference() or self._parse_bracket() 5215 else: 5216 field = self._parse_field(any_token=True, anonymous_func=True) 5217 5218 if isinstance(field, (exp.Func, exp.Window)) and this: 5219 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5220 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5221 this = exp.replace_tree( 5222 this, 5223 lambda n: ( 5224 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5225 if n.table 5226 else n.this 5227 ) 5228 if isinstance(n, exp.Column) 5229 else n, 5230 ) 5231 5232 if op: 5233 this = op(self, this, field) 5234 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5235 this = self.expression( 5236 exp.Column, 5237 comments=this.comments, 5238 this=field, 5239 table=this.this, 5240 db=this.args.get("table"), 5241 catalog=this.args.get("db"), 5242 ) 5243 elif isinstance(field, exp.Window): 5244 # Move the exp.Dot's to the window's function 5245 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5246 field.set("this", window_func) 5247 this = field 5248 else: 5249 this = self.expression(exp.Dot, this=this, expression=field) 5250 5251 if field and field.comments: 5252 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5253 5254 this = self._parse_bracket(this) 5255 5256 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5257 5258 def _parse_primary(self) -> t.Optional[exp.Expression]: 5259 if self._match_set(self.PRIMARY_PARSERS): 5260 token_type = self._prev.token_type 5261 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5262 5263 if token_type == TokenType.STRING: 5264 expressions = [primary] 5265 while self._match(TokenType.STRING): 5266 expressions.append(exp.Literal.string(self._prev.text)) 5267 5268 if len(expressions) > 1: 5269 return self.expression(exp.Concat, expressions=expressions) 5270 5271 return primary 5272 5273 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5274 return exp.Literal.number(f"0.{self._prev.text}") 5275 5276 if self._match(TokenType.L_PAREN): 5277 comments = self._prev_comments 5278 query = self._parse_select() 5279 5280 if query: 5281 expressions = [query] 5282 else: 5283 expressions = self._parse_expressions() 5284 5285 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5286 5287 if not this and self._match(TokenType.R_PAREN, advance=False): 5288 this = self.expression(exp.Tuple) 5289 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5290 this = self._parse_subquery(this=this, parse_alias=False) 5291 elif isinstance(this, exp.Subquery): 5292 this = self._parse_subquery( 5293 this=self._parse_set_operations(this), parse_alias=False 5294 ) 5295 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5296 this = self.expression(exp.Tuple, expressions=expressions) 5297 else: 5298 this = self.expression(exp.Paren, this=this) 5299 5300 if this: 5301 this.add_comments(comments) 5302 5303 self._match_r_paren(expression=this) 5304 return this 5305 5306 return None 5307 5308 def _parse_field( 5309 self, 5310 any_token: bool = False, 5311 tokens: t.Optional[t.Collection[TokenType]] = None, 5312 anonymous_func: bool = False, 5313 ) -> t.Optional[exp.Expression]: 5314 if anonymous_func: 5315 field = ( 5316 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5317 or self._parse_primary() 5318 ) 5319 else: 5320 field = self._parse_primary() or self._parse_function( 5321 anonymous=anonymous_func, any_token=any_token 5322 ) 5323 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5324 5325 def _parse_function( 5326 self, 5327 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5328 anonymous: bool = False, 5329 optional_parens: bool = True, 5330 any_token: bool = False, 5331 ) -> t.Optional[exp.Expression]: 5332 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5333 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5334 fn_syntax = False 5335 if ( 5336 self._match(TokenType.L_BRACE, advance=False) 5337 and self._next 5338 and self._next.text.upper() == "FN" 5339 ): 5340 self._advance(2) 5341 fn_syntax = True 5342 5343 func = self._parse_function_call( 5344 functions=functions, 5345 anonymous=anonymous, 5346 optional_parens=optional_parens, 5347 any_token=any_token, 5348 ) 5349 5350 if fn_syntax: 5351 self._match(TokenType.R_BRACE) 5352 5353 return func 5354 5355 def _parse_function_call( 5356 self, 5357 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5358 anonymous: bool = False, 5359 optional_parens: bool = True, 5360 any_token: bool = False, 5361 ) -> t.Optional[exp.Expression]: 5362 if not self._curr: 5363 return None 5364 5365 comments = self._curr.comments 5366 token_type = self._curr.token_type 5367 this = self._curr.text 5368 upper = this.upper() 5369 5370 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5371 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5372 self._advance() 5373 return self._parse_window(parser(self)) 5374 5375 if not self._next or self._next.token_type != TokenType.L_PAREN: 5376 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5377 self._advance() 5378 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5379 5380 return None 5381 5382 if any_token: 5383 if token_type in self.RESERVED_TOKENS: 5384 return None 5385 elif token_type not in self.FUNC_TOKENS: 5386 return None 5387 5388 self._advance(2) 5389 5390 parser = self.FUNCTION_PARSERS.get(upper) 5391 if parser and not anonymous: 5392 this = parser(self) 5393 else: 5394 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5395 5396 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5397 this = self.expression( 5398 subquery_predicate, comments=comments, this=self._parse_select() 5399 ) 5400 self._match_r_paren() 5401 return this 5402 5403 if functions is None: 5404 functions = self.FUNCTIONS 5405 5406 function = functions.get(upper) 5407 known_function = function and not anonymous 5408 5409 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5410 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5411 5412 post_func_comments = self._curr and self._curr.comments 5413 if known_function and post_func_comments: 5414 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5415 # call we'll construct it as exp.Anonymous, even if it's "known" 5416 if any( 5417 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5418 for comment in post_func_comments 5419 ): 5420 known_function = False 5421 5422 if alias and known_function: 5423 args = self._kv_to_prop_eq(args) 5424 5425 if known_function: 5426 func_builder = t.cast(t.Callable, function) 5427 5428 if "dialect" in func_builder.__code__.co_varnames: 5429 func = func_builder(args, dialect=self.dialect) 5430 else: 5431 func = func_builder(args) 5432 5433 func = self.validate_expression(func, args) 5434 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5435 func.meta["name"] = this 5436 5437 this = func 5438 else: 5439 if token_type == TokenType.IDENTIFIER: 5440 this = exp.Identifier(this=this, quoted=True) 5441 this = self.expression(exp.Anonymous, this=this, expressions=args) 5442 5443 if isinstance(this, exp.Expression): 5444 this.add_comments(comments) 5445 5446 self._match_r_paren(this) 5447 return self._parse_window(this) 5448 5449 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5450 return expression 5451 5452 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5453 transformed = [] 5454 5455 for index, e in enumerate(expressions): 5456 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5457 if isinstance(e, exp.Alias): 5458 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5459 5460 if not isinstance(e, exp.PropertyEQ): 5461 e = self.expression( 5462 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5463 ) 5464 5465 if isinstance(e.this, exp.Column): 5466 e.this.replace(e.this.this) 5467 else: 5468 e = self._to_prop_eq(e, index) 5469 5470 transformed.append(e) 5471 5472 return transformed 5473 5474 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5475 return self._parse_statement() 5476 5477 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5478 return self._parse_column_def(self._parse_id_var()) 5479 5480 def _parse_user_defined_function( 5481 self, kind: t.Optional[TokenType] = None 5482 ) -> t.Optional[exp.Expression]: 5483 this = self._parse_id_var() 5484 5485 while self._match(TokenType.DOT): 5486 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5487 5488 if not self._match(TokenType.L_PAREN): 5489 return this 5490 5491 expressions = self._parse_csv(self._parse_function_parameter) 5492 self._match_r_paren() 5493 return self.expression( 5494 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5495 ) 5496 5497 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5498 literal = self._parse_primary() 5499 if literal: 5500 return self.expression(exp.Introducer, this=token.text, expression=literal) 5501 5502 return self.expression(exp.Identifier, this=token.text) 5503 5504 def _parse_session_parameter(self) -> exp.SessionParameter: 5505 kind = None 5506 this = self._parse_id_var() or self._parse_primary() 5507 5508 if this and self._match(TokenType.DOT): 5509 kind = this.name 5510 this = self._parse_var() or self._parse_primary() 5511 5512 return self.expression(exp.SessionParameter, this=this, kind=kind) 5513 5514 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5515 return self._parse_id_var() 5516 5517 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5518 index = self._index 5519 5520 if self._match(TokenType.L_PAREN): 5521 expressions = t.cast( 5522 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5523 ) 5524 5525 if not self._match(TokenType.R_PAREN): 5526 self._retreat(index) 5527 else: 5528 expressions = [self._parse_lambda_arg()] 5529 5530 if self._match_set(self.LAMBDAS): 5531 return self.LAMBDAS[self._prev.token_type](self, expressions) 5532 5533 self._retreat(index) 5534 5535 this: t.Optional[exp.Expression] 5536 5537 if self._match(TokenType.DISTINCT): 5538 this = self.expression( 5539 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5540 ) 5541 else: 5542 this = self._parse_select_or_expression(alias=alias) 5543 5544 return self._parse_limit( 5545 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5546 ) 5547 5548 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5549 index = self._index 5550 if not self._match(TokenType.L_PAREN): 5551 return this 5552 5553 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5554 # expr can be of both types 5555 if self._match_set(self.SELECT_START_TOKENS): 5556 self._retreat(index) 5557 return this 5558 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5559 self._match_r_paren() 5560 return self.expression(exp.Schema, this=this, expressions=args) 5561 5562 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5563 return self._parse_column_def(self._parse_field(any_token=True)) 5564 5565 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5566 # column defs are not really columns, they're identifiers 5567 if isinstance(this, exp.Column): 5568 this = this.this 5569 5570 kind = self._parse_types(schema=True) 5571 5572 if self._match_text_seq("FOR", "ORDINALITY"): 5573 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5574 5575 constraints: t.List[exp.Expression] = [] 5576 5577 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5578 ("ALIAS", "MATERIALIZED") 5579 ): 5580 persisted = self._prev.text.upper() == "MATERIALIZED" 5581 constraint_kind = exp.ComputedColumnConstraint( 5582 this=self._parse_assignment(), 5583 persisted=persisted or self._match_text_seq("PERSISTED"), 5584 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5585 ) 5586 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5587 elif ( 5588 kind 5589 and self._match(TokenType.ALIAS, advance=False) 5590 and ( 5591 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5592 or (self._next and self._next.token_type == TokenType.L_PAREN) 5593 ) 5594 ): 5595 self._advance() 5596 constraints.append( 5597 self.expression( 5598 exp.ColumnConstraint, 5599 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5600 ) 5601 ) 5602 5603 while True: 5604 constraint = self._parse_column_constraint() 5605 if not constraint: 5606 break 5607 constraints.append(constraint) 5608 5609 if not kind and not constraints: 5610 return this 5611 5612 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5613 5614 def _parse_auto_increment( 5615 self, 5616 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5617 start = None 5618 increment = None 5619 5620 if self._match(TokenType.L_PAREN, advance=False): 5621 args = self._parse_wrapped_csv(self._parse_bitwise) 5622 start = seq_get(args, 0) 5623 increment = seq_get(args, 1) 5624 elif self._match_text_seq("START"): 5625 start = self._parse_bitwise() 5626 self._match_text_seq("INCREMENT") 5627 increment = self._parse_bitwise() 5628 5629 if start and increment: 5630 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5631 5632 return exp.AutoIncrementColumnConstraint() 5633 5634 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5635 if not self._match_text_seq("REFRESH"): 5636 self._retreat(self._index - 1) 5637 return None 5638 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5639 5640 def _parse_compress(self) -> exp.CompressColumnConstraint: 5641 if self._match(TokenType.L_PAREN, advance=False): 5642 return self.expression( 5643 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5644 ) 5645 5646 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5647 5648 def _parse_generated_as_identity( 5649 self, 5650 ) -> ( 5651 exp.GeneratedAsIdentityColumnConstraint 5652 | exp.ComputedColumnConstraint 5653 | exp.GeneratedAsRowColumnConstraint 5654 ): 5655 if self._match_text_seq("BY", "DEFAULT"): 5656 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5657 this = self.expression( 5658 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5659 ) 5660 else: 5661 self._match_text_seq("ALWAYS") 5662 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5663 5664 self._match(TokenType.ALIAS) 5665 5666 if self._match_text_seq("ROW"): 5667 start = self._match_text_seq("START") 5668 if not start: 5669 self._match(TokenType.END) 5670 hidden = self._match_text_seq("HIDDEN") 5671 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5672 5673 identity = self._match_text_seq("IDENTITY") 5674 5675 if self._match(TokenType.L_PAREN): 5676 if self._match(TokenType.START_WITH): 5677 this.set("start", self._parse_bitwise()) 5678 if self._match_text_seq("INCREMENT", "BY"): 5679 this.set("increment", self._parse_bitwise()) 5680 if self._match_text_seq("MINVALUE"): 5681 this.set("minvalue", self._parse_bitwise()) 5682 if self._match_text_seq("MAXVALUE"): 5683 this.set("maxvalue", self._parse_bitwise()) 5684 5685 if self._match_text_seq("CYCLE"): 5686 this.set("cycle", True) 5687 elif self._match_text_seq("NO", "CYCLE"): 5688 this.set("cycle", False) 5689 5690 if not identity: 5691 this.set("expression", self._parse_range()) 5692 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5693 args = self._parse_csv(self._parse_bitwise) 5694 this.set("start", seq_get(args, 0)) 5695 this.set("increment", seq_get(args, 1)) 5696 5697 self._match_r_paren() 5698 5699 return this 5700 5701 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5702 self._match_text_seq("LENGTH") 5703 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5704 5705 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5706 if self._match_text_seq("NULL"): 5707 return self.expression(exp.NotNullColumnConstraint) 5708 if self._match_text_seq("CASESPECIFIC"): 5709 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5710 if self._match_text_seq("FOR", "REPLICATION"): 5711 return self.expression(exp.NotForReplicationColumnConstraint) 5712 5713 # Unconsume the `NOT` token 5714 self._retreat(self._index - 1) 5715 return None 5716 5717 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5718 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5719 5720 procedure_option_follows = ( 5721 self._match(TokenType.WITH, advance=False) 5722 and self._next 5723 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5724 ) 5725 5726 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5727 return self.expression( 5728 exp.ColumnConstraint, 5729 this=this, 5730 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5731 ) 5732 5733 return this 5734 5735 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5736 if not self._match(TokenType.CONSTRAINT): 5737 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5738 5739 return self.expression( 5740 exp.Constraint, 5741 this=self._parse_id_var(), 5742 expressions=self._parse_unnamed_constraints(), 5743 ) 5744 5745 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5746 constraints = [] 5747 while True: 5748 constraint = self._parse_unnamed_constraint() or self._parse_function() 5749 if not constraint: 5750 break 5751 constraints.append(constraint) 5752 5753 return constraints 5754 5755 def _parse_unnamed_constraint( 5756 self, constraints: t.Optional[t.Collection[str]] = None 5757 ) -> t.Optional[exp.Expression]: 5758 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5759 constraints or self.CONSTRAINT_PARSERS 5760 ): 5761 return None 5762 5763 constraint = self._prev.text.upper() 5764 if constraint not in self.CONSTRAINT_PARSERS: 5765 self.raise_error(f"No parser found for schema constraint {constraint}.") 5766 5767 return self.CONSTRAINT_PARSERS[constraint](self) 5768 5769 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5770 return self._parse_id_var(any_token=False) 5771 5772 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5773 self._match_text_seq("KEY") 5774 return self.expression( 5775 exp.UniqueColumnConstraint, 5776 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5777 this=self._parse_schema(self._parse_unique_key()), 5778 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5779 on_conflict=self._parse_on_conflict(), 5780 ) 5781 5782 def _parse_key_constraint_options(self) -> t.List[str]: 5783 options = [] 5784 while True: 5785 if not self._curr: 5786 break 5787 5788 if self._match(TokenType.ON): 5789 action = None 5790 on = self._advance_any() and self._prev.text 5791 5792 if self._match_text_seq("NO", "ACTION"): 5793 action = "NO ACTION" 5794 elif self._match_text_seq("CASCADE"): 5795 action = "CASCADE" 5796 elif self._match_text_seq("RESTRICT"): 5797 action = "RESTRICT" 5798 elif self._match_pair(TokenType.SET, TokenType.NULL): 5799 action = "SET NULL" 5800 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5801 action = "SET DEFAULT" 5802 else: 5803 self.raise_error("Invalid key constraint") 5804 5805 options.append(f"ON {on} {action}") 5806 else: 5807 var = self._parse_var_from_options( 5808 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5809 ) 5810 if not var: 5811 break 5812 options.append(var.name) 5813 5814 return options 5815 5816 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5817 if match and not self._match(TokenType.REFERENCES): 5818 return None 5819 5820 expressions = None 5821 this = self._parse_table(schema=True) 5822 options = self._parse_key_constraint_options() 5823 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5824 5825 def _parse_foreign_key(self) -> exp.ForeignKey: 5826 expressions = self._parse_wrapped_id_vars() 5827 reference = self._parse_references() 5828 options = {} 5829 5830 while self._match(TokenType.ON): 5831 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5832 self.raise_error("Expected DELETE or UPDATE") 5833 5834 kind = self._prev.text.lower() 5835 5836 if self._match_text_seq("NO", "ACTION"): 5837 action = "NO ACTION" 5838 elif self._match(TokenType.SET): 5839 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5840 action = "SET " + self._prev.text.upper() 5841 else: 5842 self._advance() 5843 action = self._prev.text.upper() 5844 5845 options[kind] = action 5846 5847 return self.expression( 5848 exp.ForeignKey, 5849 expressions=expressions, 5850 reference=reference, 5851 **options, # type: ignore 5852 ) 5853 5854 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5855 return self._parse_field() 5856 5857 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5858 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5859 self._retreat(self._index - 1) 5860 return None 5861 5862 id_vars = self._parse_wrapped_id_vars() 5863 return self.expression( 5864 exp.PeriodForSystemTimeConstraint, 5865 this=seq_get(id_vars, 0), 5866 expression=seq_get(id_vars, 1), 5867 ) 5868 5869 def _parse_primary_key( 5870 self, wrapped_optional: bool = False, in_props: bool = False 5871 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5872 desc = ( 5873 self._match_set((TokenType.ASC, TokenType.DESC)) 5874 and self._prev.token_type == TokenType.DESC 5875 ) 5876 5877 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5878 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5879 5880 expressions = self._parse_wrapped_csv( 5881 self._parse_primary_key_part, optional=wrapped_optional 5882 ) 5883 options = self._parse_key_constraint_options() 5884 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5885 5886 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5887 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5888 5889 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5890 """ 5891 Parses a datetime column in ODBC format. We parse the column into the corresponding 5892 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5893 same as we did for `DATE('yyyy-mm-dd')`. 5894 5895 Reference: 5896 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5897 """ 5898 self._match(TokenType.VAR) 5899 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5900 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5901 if not self._match(TokenType.R_BRACE): 5902 self.raise_error("Expected }") 5903 return expression 5904 5905 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5906 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5907 return this 5908 5909 bracket_kind = self._prev.token_type 5910 if ( 5911 bracket_kind == TokenType.L_BRACE 5912 and self._curr 5913 and self._curr.token_type == TokenType.VAR 5914 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5915 ): 5916 return self._parse_odbc_datetime_literal() 5917 5918 expressions = self._parse_csv( 5919 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5920 ) 5921 5922 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5923 self.raise_error("Expected ]") 5924 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5925 self.raise_error("Expected }") 5926 5927 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5928 if bracket_kind == TokenType.L_BRACE: 5929 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5930 elif not this: 5931 this = build_array_constructor( 5932 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5933 ) 5934 else: 5935 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5936 if constructor_type: 5937 return build_array_constructor( 5938 constructor_type, 5939 args=expressions, 5940 bracket_kind=bracket_kind, 5941 dialect=self.dialect, 5942 ) 5943 5944 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5945 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5946 5947 self._add_comments(this) 5948 return self._parse_bracket(this) 5949 5950 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5951 if self._match(TokenType.COLON): 5952 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5953 return this 5954 5955 def _parse_case(self) -> t.Optional[exp.Expression]: 5956 ifs = [] 5957 default = None 5958 5959 comments = self._prev_comments 5960 expression = self._parse_assignment() 5961 5962 while self._match(TokenType.WHEN): 5963 this = self._parse_assignment() 5964 self._match(TokenType.THEN) 5965 then = self._parse_assignment() 5966 ifs.append(self.expression(exp.If, this=this, true=then)) 5967 5968 if self._match(TokenType.ELSE): 5969 default = self._parse_assignment() 5970 5971 if not self._match(TokenType.END): 5972 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5973 default = exp.column("interval") 5974 else: 5975 self.raise_error("Expected END after CASE", self._prev) 5976 5977 return self.expression( 5978 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5979 ) 5980 5981 def _parse_if(self) -> t.Optional[exp.Expression]: 5982 if self._match(TokenType.L_PAREN): 5983 args = self._parse_csv(self._parse_assignment) 5984 this = self.validate_expression(exp.If.from_arg_list(args), args) 5985 self._match_r_paren() 5986 else: 5987 index = self._index - 1 5988 5989 if self.NO_PAREN_IF_COMMANDS and index == 0: 5990 return self._parse_as_command(self._prev) 5991 5992 condition = self._parse_assignment() 5993 5994 if not condition: 5995 self._retreat(index) 5996 return None 5997 5998 self._match(TokenType.THEN) 5999 true = self._parse_assignment() 6000 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6001 self._match(TokenType.END) 6002 this = self.expression(exp.If, this=condition, true=true, false=false) 6003 6004 return this 6005 6006 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6007 if not self._match_text_seq("VALUE", "FOR"): 6008 self._retreat(self._index - 1) 6009 return None 6010 6011 return self.expression( 6012 exp.NextValueFor, 6013 this=self._parse_column(), 6014 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6015 ) 6016 6017 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6018 this = self._parse_function() or self._parse_var_or_string(upper=True) 6019 6020 if self._match(TokenType.FROM): 6021 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6022 6023 if not self._match(TokenType.COMMA): 6024 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6025 6026 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6027 6028 def _parse_gap_fill(self) -> exp.GapFill: 6029 self._match(TokenType.TABLE) 6030 this = self._parse_table() 6031 6032 self._match(TokenType.COMMA) 6033 args = [this, *self._parse_csv(self._parse_lambda)] 6034 6035 gap_fill = exp.GapFill.from_arg_list(args) 6036 return self.validate_expression(gap_fill, args) 6037 6038 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6039 this = self._parse_assignment() 6040 6041 if not self._match(TokenType.ALIAS): 6042 if self._match(TokenType.COMMA): 6043 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6044 6045 self.raise_error("Expected AS after CAST") 6046 6047 fmt = None 6048 to = self._parse_types() 6049 6050 if self._match(TokenType.FORMAT): 6051 fmt_string = self._parse_string() 6052 fmt = self._parse_at_time_zone(fmt_string) 6053 6054 if not to: 6055 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6056 if to.this in exp.DataType.TEMPORAL_TYPES: 6057 this = self.expression( 6058 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6059 this=this, 6060 format=exp.Literal.string( 6061 format_time( 6062 fmt_string.this if fmt_string else "", 6063 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6064 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6065 ) 6066 ), 6067 safe=safe, 6068 ) 6069 6070 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6071 this.set("zone", fmt.args["zone"]) 6072 return this 6073 elif not to: 6074 self.raise_error("Expected TYPE after CAST") 6075 elif isinstance(to, exp.Identifier): 6076 to = exp.DataType.build(to.name, udt=True) 6077 elif to.this == exp.DataType.Type.CHAR: 6078 if self._match(TokenType.CHARACTER_SET): 6079 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6080 6081 return self.expression( 6082 exp.Cast if strict else exp.TryCast, 6083 this=this, 6084 to=to, 6085 format=fmt, 6086 safe=safe, 6087 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6088 ) 6089 6090 def _parse_string_agg(self) -> exp.GroupConcat: 6091 if self._match(TokenType.DISTINCT): 6092 args: t.List[t.Optional[exp.Expression]] = [ 6093 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6094 ] 6095 if self._match(TokenType.COMMA): 6096 args.extend(self._parse_csv(self._parse_assignment)) 6097 else: 6098 args = self._parse_csv(self._parse_assignment) # type: ignore 6099 6100 if self._match_text_seq("ON", "OVERFLOW"): 6101 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6102 if self._match_text_seq("ERROR"): 6103 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6104 else: 6105 self._match_text_seq("TRUNCATE") 6106 on_overflow = self.expression( 6107 exp.OverflowTruncateBehavior, 6108 this=self._parse_string(), 6109 with_count=( 6110 self._match_text_seq("WITH", "COUNT") 6111 or not self._match_text_seq("WITHOUT", "COUNT") 6112 ), 6113 ) 6114 else: 6115 on_overflow = None 6116 6117 index = self._index 6118 if not self._match(TokenType.R_PAREN) and args: 6119 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6120 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6121 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 6122 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6123 6124 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6125 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6126 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6127 if not self._match_text_seq("WITHIN", "GROUP"): 6128 self._retreat(index) 6129 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6130 6131 # The corresponding match_r_paren will be called in parse_function (caller) 6132 self._match_l_paren() 6133 6134 return self.expression( 6135 exp.GroupConcat, 6136 this=self._parse_order(this=seq_get(args, 0)), 6137 separator=seq_get(args, 1), 6138 on_overflow=on_overflow, 6139 ) 6140 6141 def _parse_convert( 6142 self, strict: bool, safe: t.Optional[bool] = None 6143 ) -> t.Optional[exp.Expression]: 6144 this = self._parse_bitwise() 6145 6146 if self._match(TokenType.USING): 6147 to: t.Optional[exp.Expression] = self.expression( 6148 exp.CharacterSet, this=self._parse_var() 6149 ) 6150 elif self._match(TokenType.COMMA): 6151 to = self._parse_types() 6152 else: 6153 to = None 6154 6155 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6156 6157 def _parse_xml_table(self) -> exp.XMLTable: 6158 this = self._parse_string() 6159 6160 passing = None 6161 columns = None 6162 6163 if self._match_text_seq("PASSING"): 6164 # The BY VALUE keywords are optional and are provided for semantic clarity 6165 self._match_text_seq("BY", "VALUE") 6166 passing = self._parse_csv(self._parse_column) 6167 6168 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6169 6170 if self._match_text_seq("COLUMNS"): 6171 columns = self._parse_csv(self._parse_field_def) 6172 6173 return self.expression( 6174 exp.XMLTable, this=this, passing=passing, columns=columns, by_ref=by_ref 6175 ) 6176 6177 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6178 """ 6179 There are generally two variants of the DECODE function: 6180 6181 - DECODE(bin, charset) 6182 - DECODE(expression, search, result [, search, result] ... [, default]) 6183 6184 The second variant will always be parsed into a CASE expression. Note that NULL 6185 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6186 instead of relying on pattern matching. 6187 """ 6188 args = self._parse_csv(self._parse_assignment) 6189 6190 if len(args) < 3: 6191 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6192 6193 expression, *expressions = args 6194 if not expression: 6195 return None 6196 6197 ifs = [] 6198 for search, result in zip(expressions[::2], expressions[1::2]): 6199 if not search or not result: 6200 return None 6201 6202 if isinstance(search, exp.Literal): 6203 ifs.append( 6204 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6205 ) 6206 elif isinstance(search, exp.Null): 6207 ifs.append( 6208 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6209 ) 6210 else: 6211 cond = exp.or_( 6212 exp.EQ(this=expression.copy(), expression=search), 6213 exp.and_( 6214 exp.Is(this=expression.copy(), expression=exp.Null()), 6215 exp.Is(this=search.copy(), expression=exp.Null()), 6216 copy=False, 6217 ), 6218 copy=False, 6219 ) 6220 ifs.append(exp.If(this=cond, true=result)) 6221 6222 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6223 6224 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6225 self._match_text_seq("KEY") 6226 key = self._parse_column() 6227 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6228 self._match_text_seq("VALUE") 6229 value = self._parse_bitwise() 6230 6231 if not key and not value: 6232 return None 6233 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6234 6235 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6236 if not this or not self._match_text_seq("FORMAT", "JSON"): 6237 return this 6238 6239 return self.expression(exp.FormatJson, this=this) 6240 6241 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6242 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6243 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6244 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6245 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6246 else: 6247 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6248 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6249 6250 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6251 6252 if not empty and not error and not null: 6253 return None 6254 6255 return self.expression( 6256 exp.OnCondition, 6257 empty=empty, 6258 error=error, 6259 null=null, 6260 ) 6261 6262 def _parse_on_handling( 6263 self, on: str, *values: str 6264 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6265 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6266 for value in values: 6267 if self._match_text_seq(value, "ON", on): 6268 return f"{value} ON {on}" 6269 6270 index = self._index 6271 if self._match(TokenType.DEFAULT): 6272 default_value = self._parse_bitwise() 6273 if self._match_text_seq("ON", on): 6274 return default_value 6275 6276 self._retreat(index) 6277 6278 return None 6279 6280 @t.overload 6281 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6282 6283 @t.overload 6284 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6285 6286 def _parse_json_object(self, agg=False): 6287 star = self._parse_star() 6288 expressions = ( 6289 [star] 6290 if star 6291 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6292 ) 6293 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6294 6295 unique_keys = None 6296 if self._match_text_seq("WITH", "UNIQUE"): 6297 unique_keys = True 6298 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6299 unique_keys = False 6300 6301 self._match_text_seq("KEYS") 6302 6303 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6304 self._parse_type() 6305 ) 6306 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6307 6308 return self.expression( 6309 exp.JSONObjectAgg if agg else exp.JSONObject, 6310 expressions=expressions, 6311 null_handling=null_handling, 6312 unique_keys=unique_keys, 6313 return_type=return_type, 6314 encoding=encoding, 6315 ) 6316 6317 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6318 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6319 if not self._match_text_seq("NESTED"): 6320 this = self._parse_id_var() 6321 kind = self._parse_types(allow_identifiers=False) 6322 nested = None 6323 else: 6324 this = None 6325 kind = None 6326 nested = True 6327 6328 path = self._match_text_seq("PATH") and self._parse_string() 6329 nested_schema = nested and self._parse_json_schema() 6330 6331 return self.expression( 6332 exp.JSONColumnDef, 6333 this=this, 6334 kind=kind, 6335 path=path, 6336 nested_schema=nested_schema, 6337 ) 6338 6339 def _parse_json_schema(self) -> exp.JSONSchema: 6340 self._match_text_seq("COLUMNS") 6341 return self.expression( 6342 exp.JSONSchema, 6343 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6344 ) 6345 6346 def _parse_json_table(self) -> exp.JSONTable: 6347 this = self._parse_format_json(self._parse_bitwise()) 6348 path = self._match(TokenType.COMMA) and self._parse_string() 6349 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6350 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6351 schema = self._parse_json_schema() 6352 6353 return exp.JSONTable( 6354 this=this, 6355 schema=schema, 6356 path=path, 6357 error_handling=error_handling, 6358 empty_handling=empty_handling, 6359 ) 6360 6361 def _parse_match_against(self) -> exp.MatchAgainst: 6362 expressions = self._parse_csv(self._parse_column) 6363 6364 self._match_text_seq(")", "AGAINST", "(") 6365 6366 this = self._parse_string() 6367 6368 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6369 modifier = "IN NATURAL LANGUAGE MODE" 6370 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6371 modifier = f"{modifier} WITH QUERY EXPANSION" 6372 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6373 modifier = "IN BOOLEAN MODE" 6374 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6375 modifier = "WITH QUERY EXPANSION" 6376 else: 6377 modifier = None 6378 6379 return self.expression( 6380 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6381 ) 6382 6383 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6384 def _parse_open_json(self) -> exp.OpenJSON: 6385 this = self._parse_bitwise() 6386 path = self._match(TokenType.COMMA) and self._parse_string() 6387 6388 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6389 this = self._parse_field(any_token=True) 6390 kind = self._parse_types() 6391 path = self._parse_string() 6392 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6393 6394 return self.expression( 6395 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6396 ) 6397 6398 expressions = None 6399 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6400 self._match_l_paren() 6401 expressions = self._parse_csv(_parse_open_json_column_def) 6402 6403 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6404 6405 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6406 args = self._parse_csv(self._parse_bitwise) 6407 6408 if self._match(TokenType.IN): 6409 return self.expression( 6410 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6411 ) 6412 6413 if haystack_first: 6414 haystack = seq_get(args, 0) 6415 needle = seq_get(args, 1) 6416 else: 6417 needle = seq_get(args, 0) 6418 haystack = seq_get(args, 1) 6419 6420 return self.expression( 6421 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6422 ) 6423 6424 def _parse_predict(self) -> exp.Predict: 6425 self._match_text_seq("MODEL") 6426 this = self._parse_table() 6427 6428 self._match(TokenType.COMMA) 6429 self._match_text_seq("TABLE") 6430 6431 return self.expression( 6432 exp.Predict, 6433 this=this, 6434 expression=self._parse_table(), 6435 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6436 ) 6437 6438 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6439 args = self._parse_csv(self._parse_table) 6440 return exp.JoinHint(this=func_name.upper(), expressions=args) 6441 6442 def _parse_substring(self) -> exp.Substring: 6443 # Postgres supports the form: substring(string [from int] [for int]) 6444 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6445 6446 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6447 6448 if self._match(TokenType.FROM): 6449 args.append(self._parse_bitwise()) 6450 if self._match(TokenType.FOR): 6451 if len(args) == 1: 6452 args.append(exp.Literal.number(1)) 6453 args.append(self._parse_bitwise()) 6454 6455 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6456 6457 def _parse_trim(self) -> exp.Trim: 6458 # https://www.w3resource.com/sql/character-functions/trim.php 6459 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6460 6461 position = None 6462 collation = None 6463 expression = None 6464 6465 if self._match_texts(self.TRIM_TYPES): 6466 position = self._prev.text.upper() 6467 6468 this = self._parse_bitwise() 6469 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6470 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6471 expression = self._parse_bitwise() 6472 6473 if invert_order: 6474 this, expression = expression, this 6475 6476 if self._match(TokenType.COLLATE): 6477 collation = self._parse_bitwise() 6478 6479 return self.expression( 6480 exp.Trim, this=this, position=position, expression=expression, collation=collation 6481 ) 6482 6483 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6484 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6485 6486 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6487 return self._parse_window(self._parse_id_var(), alias=True) 6488 6489 def _parse_respect_or_ignore_nulls( 6490 self, this: t.Optional[exp.Expression] 6491 ) -> t.Optional[exp.Expression]: 6492 if self._match_text_seq("IGNORE", "NULLS"): 6493 return self.expression(exp.IgnoreNulls, this=this) 6494 if self._match_text_seq("RESPECT", "NULLS"): 6495 return self.expression(exp.RespectNulls, this=this) 6496 return this 6497 6498 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6499 if self._match(TokenType.HAVING): 6500 self._match_texts(("MAX", "MIN")) 6501 max = self._prev.text.upper() != "MIN" 6502 return self.expression( 6503 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6504 ) 6505 6506 return this 6507 6508 def _parse_window( 6509 self, this: t.Optional[exp.Expression], alias: bool = False 6510 ) -> t.Optional[exp.Expression]: 6511 func = this 6512 comments = func.comments if isinstance(func, exp.Expression) else None 6513 6514 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6515 self._match(TokenType.WHERE) 6516 this = self.expression( 6517 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6518 ) 6519 self._match_r_paren() 6520 6521 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6522 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6523 if self._match_text_seq("WITHIN", "GROUP"): 6524 order = self._parse_wrapped(self._parse_order) 6525 this = self.expression(exp.WithinGroup, this=this, expression=order) 6526 6527 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6528 # Some dialects choose to implement and some do not. 6529 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6530 6531 # There is some code above in _parse_lambda that handles 6532 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6533 6534 # The below changes handle 6535 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6536 6537 # Oracle allows both formats 6538 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6539 # and Snowflake chose to do the same for familiarity 6540 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6541 if isinstance(this, exp.AggFunc): 6542 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6543 6544 if ignore_respect and ignore_respect is not this: 6545 ignore_respect.replace(ignore_respect.this) 6546 this = self.expression(ignore_respect.__class__, this=this) 6547 6548 this = self._parse_respect_or_ignore_nulls(this) 6549 6550 # bigquery select from window x AS (partition by ...) 6551 if alias: 6552 over = None 6553 self._match(TokenType.ALIAS) 6554 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6555 return this 6556 else: 6557 over = self._prev.text.upper() 6558 6559 if comments and isinstance(func, exp.Expression): 6560 func.pop_comments() 6561 6562 if not self._match(TokenType.L_PAREN): 6563 return self.expression( 6564 exp.Window, 6565 comments=comments, 6566 this=this, 6567 alias=self._parse_id_var(False), 6568 over=over, 6569 ) 6570 6571 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6572 6573 first = self._match(TokenType.FIRST) 6574 if self._match_text_seq("LAST"): 6575 first = False 6576 6577 partition, order = self._parse_partition_and_order() 6578 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6579 6580 if kind: 6581 self._match(TokenType.BETWEEN) 6582 start = self._parse_window_spec() 6583 self._match(TokenType.AND) 6584 end = self._parse_window_spec() 6585 6586 spec = self.expression( 6587 exp.WindowSpec, 6588 kind=kind, 6589 start=start["value"], 6590 start_side=start["side"], 6591 end=end["value"], 6592 end_side=end["side"], 6593 ) 6594 else: 6595 spec = None 6596 6597 self._match_r_paren() 6598 6599 window = self.expression( 6600 exp.Window, 6601 comments=comments, 6602 this=this, 6603 partition_by=partition, 6604 order=order, 6605 spec=spec, 6606 alias=window_alias, 6607 over=over, 6608 first=first, 6609 ) 6610 6611 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6612 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6613 return self._parse_window(window, alias=alias) 6614 6615 return window 6616 6617 def _parse_partition_and_order( 6618 self, 6619 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6620 return self._parse_partition_by(), self._parse_order() 6621 6622 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6623 self._match(TokenType.BETWEEN) 6624 6625 return { 6626 "value": ( 6627 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6628 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6629 or self._parse_bitwise() 6630 ), 6631 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6632 } 6633 6634 def _parse_alias( 6635 self, this: t.Optional[exp.Expression], explicit: bool = False 6636 ) -> t.Optional[exp.Expression]: 6637 any_token = self._match(TokenType.ALIAS) 6638 comments = self._prev_comments or [] 6639 6640 if explicit and not any_token: 6641 return this 6642 6643 if self._match(TokenType.L_PAREN): 6644 aliases = self.expression( 6645 exp.Aliases, 6646 comments=comments, 6647 this=this, 6648 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6649 ) 6650 self._match_r_paren(aliases) 6651 return aliases 6652 6653 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6654 self.STRING_ALIASES and self._parse_string_as_identifier() 6655 ) 6656 6657 if alias: 6658 comments.extend(alias.pop_comments()) 6659 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6660 column = this.this 6661 6662 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6663 if not this.comments and column and column.comments: 6664 this.comments = column.pop_comments() 6665 6666 return this 6667 6668 def _parse_id_var( 6669 self, 6670 any_token: bool = True, 6671 tokens: t.Optional[t.Collection[TokenType]] = None, 6672 ) -> t.Optional[exp.Expression]: 6673 expression = self._parse_identifier() 6674 if not expression and ( 6675 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6676 ): 6677 quoted = self._prev.token_type == TokenType.STRING 6678 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6679 6680 return expression 6681 6682 def _parse_string(self) -> t.Optional[exp.Expression]: 6683 if self._match_set(self.STRING_PARSERS): 6684 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6685 return self._parse_placeholder() 6686 6687 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6688 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6689 6690 def _parse_number(self) -> t.Optional[exp.Expression]: 6691 if self._match_set(self.NUMERIC_PARSERS): 6692 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6693 return self._parse_placeholder() 6694 6695 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6696 if self._match(TokenType.IDENTIFIER): 6697 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6698 return self._parse_placeholder() 6699 6700 def _parse_var( 6701 self, 6702 any_token: bool = False, 6703 tokens: t.Optional[t.Collection[TokenType]] = None, 6704 upper: bool = False, 6705 ) -> t.Optional[exp.Expression]: 6706 if ( 6707 (any_token and self._advance_any()) 6708 or self._match(TokenType.VAR) 6709 or (self._match_set(tokens) if tokens else False) 6710 ): 6711 return self.expression( 6712 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6713 ) 6714 return self._parse_placeholder() 6715 6716 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6717 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6718 self._advance() 6719 return self._prev 6720 return None 6721 6722 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6723 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6724 6725 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6726 return self._parse_primary() or self._parse_var(any_token=True) 6727 6728 def _parse_null(self) -> t.Optional[exp.Expression]: 6729 if self._match_set(self.NULL_TOKENS): 6730 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6731 return self._parse_placeholder() 6732 6733 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6734 if self._match(TokenType.TRUE): 6735 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6736 if self._match(TokenType.FALSE): 6737 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6738 return self._parse_placeholder() 6739 6740 def _parse_star(self) -> t.Optional[exp.Expression]: 6741 if self._match(TokenType.STAR): 6742 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6743 return self._parse_placeholder() 6744 6745 def _parse_parameter(self) -> exp.Parameter: 6746 this = self._parse_identifier() or self._parse_primary_or_var() 6747 return self.expression(exp.Parameter, this=this) 6748 6749 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6750 if self._match_set(self.PLACEHOLDER_PARSERS): 6751 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6752 if placeholder: 6753 return placeholder 6754 self._advance(-1) 6755 return None 6756 6757 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6758 if not self._match_texts(keywords): 6759 return None 6760 if self._match(TokenType.L_PAREN, advance=False): 6761 return self._parse_wrapped_csv(self._parse_expression) 6762 6763 expression = self._parse_expression() 6764 return [expression] if expression else None 6765 6766 def _parse_csv( 6767 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6768 ) -> t.List[exp.Expression]: 6769 parse_result = parse_method() 6770 items = [parse_result] if parse_result is not None else [] 6771 6772 while self._match(sep): 6773 self._add_comments(parse_result) 6774 parse_result = parse_method() 6775 if parse_result is not None: 6776 items.append(parse_result) 6777 6778 return items 6779 6780 def _parse_tokens( 6781 self, parse_method: t.Callable, expressions: t.Dict 6782 ) -> t.Optional[exp.Expression]: 6783 this = parse_method() 6784 6785 while self._match_set(expressions): 6786 this = self.expression( 6787 expressions[self._prev.token_type], 6788 this=this, 6789 comments=self._prev_comments, 6790 expression=parse_method(), 6791 ) 6792 6793 return this 6794 6795 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6796 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6797 6798 def _parse_wrapped_csv( 6799 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6800 ) -> t.List[exp.Expression]: 6801 return self._parse_wrapped( 6802 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6803 ) 6804 6805 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6806 wrapped = self._match(TokenType.L_PAREN) 6807 if not wrapped and not optional: 6808 self.raise_error("Expecting (") 6809 parse_result = parse_method() 6810 if wrapped: 6811 self._match_r_paren() 6812 return parse_result 6813 6814 def _parse_expressions(self) -> t.List[exp.Expression]: 6815 return self._parse_csv(self._parse_expression) 6816 6817 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6818 return self._parse_select() or self._parse_set_operations( 6819 self._parse_alias(self._parse_assignment(), explicit=True) 6820 if alias 6821 else self._parse_assignment() 6822 ) 6823 6824 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6825 return self._parse_query_modifiers( 6826 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6827 ) 6828 6829 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6830 this = None 6831 if self._match_texts(self.TRANSACTION_KIND): 6832 this = self._prev.text 6833 6834 self._match_texts(("TRANSACTION", "WORK")) 6835 6836 modes = [] 6837 while True: 6838 mode = [] 6839 while self._match(TokenType.VAR): 6840 mode.append(self._prev.text) 6841 6842 if mode: 6843 modes.append(" ".join(mode)) 6844 if not self._match(TokenType.COMMA): 6845 break 6846 6847 return self.expression(exp.Transaction, this=this, modes=modes) 6848 6849 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6850 chain = None 6851 savepoint = None 6852 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6853 6854 self._match_texts(("TRANSACTION", "WORK")) 6855 6856 if self._match_text_seq("TO"): 6857 self._match_text_seq("SAVEPOINT") 6858 savepoint = self._parse_id_var() 6859 6860 if self._match(TokenType.AND): 6861 chain = not self._match_text_seq("NO") 6862 self._match_text_seq("CHAIN") 6863 6864 if is_rollback: 6865 return self.expression(exp.Rollback, savepoint=savepoint) 6866 6867 return self.expression(exp.Commit, chain=chain) 6868 6869 def _parse_refresh(self) -> exp.Refresh: 6870 self._match(TokenType.TABLE) 6871 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6872 6873 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6874 if not self._match_text_seq("ADD"): 6875 return None 6876 6877 self._match(TokenType.COLUMN) 6878 exists_column = self._parse_exists(not_=True) 6879 expression = self._parse_field_def() 6880 6881 if expression: 6882 expression.set("exists", exists_column) 6883 6884 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6885 if self._match_texts(("FIRST", "AFTER")): 6886 position = self._prev.text 6887 column_position = self.expression( 6888 exp.ColumnPosition, this=self._parse_column(), position=position 6889 ) 6890 expression.set("position", column_position) 6891 6892 return expression 6893 6894 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6895 drop = self._match(TokenType.DROP) and self._parse_drop() 6896 if drop and not isinstance(drop, exp.Command): 6897 drop.set("kind", drop.args.get("kind", "COLUMN")) 6898 return drop 6899 6900 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6901 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6902 return self.expression( 6903 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6904 ) 6905 6906 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6907 index = self._index - 1 6908 6909 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6910 return self._parse_csv( 6911 lambda: self.expression( 6912 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6913 ) 6914 ) 6915 6916 self._retreat(index) 6917 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6918 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6919 6920 if self._match_text_seq("ADD", "COLUMNS"): 6921 schema = self._parse_schema() 6922 if schema: 6923 return [schema] 6924 return [] 6925 6926 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6927 6928 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6929 if self._match_texts(self.ALTER_ALTER_PARSERS): 6930 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6931 6932 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6933 # keyword after ALTER we default to parsing this statement 6934 self._match(TokenType.COLUMN) 6935 column = self._parse_field(any_token=True) 6936 6937 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6938 return self.expression(exp.AlterColumn, this=column, drop=True) 6939 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6940 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6941 if self._match(TokenType.COMMENT): 6942 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6943 if self._match_text_seq("DROP", "NOT", "NULL"): 6944 return self.expression( 6945 exp.AlterColumn, 6946 this=column, 6947 drop=True, 6948 allow_null=True, 6949 ) 6950 if self._match_text_seq("SET", "NOT", "NULL"): 6951 return self.expression( 6952 exp.AlterColumn, 6953 this=column, 6954 allow_null=False, 6955 ) 6956 self._match_text_seq("SET", "DATA") 6957 self._match_text_seq("TYPE") 6958 return self.expression( 6959 exp.AlterColumn, 6960 this=column, 6961 dtype=self._parse_types(), 6962 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6963 using=self._match(TokenType.USING) and self._parse_assignment(), 6964 ) 6965 6966 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6967 if self._match_texts(("ALL", "EVEN", "AUTO")): 6968 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6969 6970 self._match_text_seq("KEY", "DISTKEY") 6971 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6972 6973 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6974 if compound: 6975 self._match_text_seq("SORTKEY") 6976 6977 if self._match(TokenType.L_PAREN, advance=False): 6978 return self.expression( 6979 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6980 ) 6981 6982 self._match_texts(("AUTO", "NONE")) 6983 return self.expression( 6984 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6985 ) 6986 6987 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6988 index = self._index - 1 6989 6990 partition_exists = self._parse_exists() 6991 if self._match(TokenType.PARTITION, advance=False): 6992 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6993 6994 self._retreat(index) 6995 return self._parse_csv(self._parse_drop_column) 6996 6997 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 6998 if self._match(TokenType.COLUMN): 6999 exists = self._parse_exists() 7000 old_column = self._parse_column() 7001 to = self._match_text_seq("TO") 7002 new_column = self._parse_column() 7003 7004 if old_column is None or to is None or new_column is None: 7005 return None 7006 7007 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7008 7009 self._match_text_seq("TO") 7010 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7011 7012 def _parse_alter_table_set(self) -> exp.AlterSet: 7013 alter_set = self.expression(exp.AlterSet) 7014 7015 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7016 "TABLE", "PROPERTIES" 7017 ): 7018 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7019 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7020 alter_set.set("expressions", [self._parse_assignment()]) 7021 elif self._match_texts(("LOGGED", "UNLOGGED")): 7022 alter_set.set("option", exp.var(self._prev.text.upper())) 7023 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7024 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7025 elif self._match_text_seq("LOCATION"): 7026 alter_set.set("location", self._parse_field()) 7027 elif self._match_text_seq("ACCESS", "METHOD"): 7028 alter_set.set("access_method", self._parse_field()) 7029 elif self._match_text_seq("TABLESPACE"): 7030 alter_set.set("tablespace", self._parse_field()) 7031 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7032 alter_set.set("file_format", [self._parse_field()]) 7033 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7034 alter_set.set("file_format", self._parse_wrapped_options()) 7035 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7036 alter_set.set("copy_options", self._parse_wrapped_options()) 7037 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7038 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7039 else: 7040 if self._match_text_seq("SERDE"): 7041 alter_set.set("serde", self._parse_field()) 7042 7043 alter_set.set("expressions", [self._parse_properties()]) 7044 7045 return alter_set 7046 7047 def _parse_alter(self) -> exp.Alter | exp.Command: 7048 start = self._prev 7049 7050 alter_token = self._match_set(self.ALTERABLES) and self._prev 7051 if not alter_token: 7052 return self._parse_as_command(start) 7053 7054 exists = self._parse_exists() 7055 only = self._match_text_seq("ONLY") 7056 this = self._parse_table(schema=True) 7057 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7058 7059 if self._next: 7060 self._advance() 7061 7062 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7063 if parser: 7064 actions = ensure_list(parser(self)) 7065 not_valid = self._match_text_seq("NOT", "VALID") 7066 options = self._parse_csv(self._parse_property) 7067 7068 if not self._curr and actions: 7069 return self.expression( 7070 exp.Alter, 7071 this=this, 7072 kind=alter_token.text.upper(), 7073 exists=exists, 7074 actions=actions, 7075 only=only, 7076 options=options, 7077 cluster=cluster, 7078 not_valid=not_valid, 7079 ) 7080 7081 return self._parse_as_command(start) 7082 7083 def _parse_merge(self) -> exp.Merge: 7084 self._match(TokenType.INTO) 7085 target = self._parse_table() 7086 7087 if target and self._match(TokenType.ALIAS, advance=False): 7088 target.set("alias", self._parse_table_alias()) 7089 7090 self._match(TokenType.USING) 7091 using = self._parse_table() 7092 7093 self._match(TokenType.ON) 7094 on = self._parse_assignment() 7095 7096 return self.expression( 7097 exp.Merge, 7098 this=target, 7099 using=using, 7100 on=on, 7101 whens=self._parse_when_matched(), 7102 returning=self._parse_returning(), 7103 ) 7104 7105 def _parse_when_matched(self) -> exp.Whens: 7106 whens = [] 7107 7108 while self._match(TokenType.WHEN): 7109 matched = not self._match(TokenType.NOT) 7110 self._match_text_seq("MATCHED") 7111 source = ( 7112 False 7113 if self._match_text_seq("BY", "TARGET") 7114 else self._match_text_seq("BY", "SOURCE") 7115 ) 7116 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7117 7118 self._match(TokenType.THEN) 7119 7120 if self._match(TokenType.INSERT): 7121 this = self._parse_star() 7122 if this: 7123 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7124 else: 7125 then = self.expression( 7126 exp.Insert, 7127 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 7128 expression=self._match_text_seq("VALUES") and self._parse_value(), 7129 ) 7130 elif self._match(TokenType.UPDATE): 7131 expressions = self._parse_star() 7132 if expressions: 7133 then = self.expression(exp.Update, expressions=expressions) 7134 else: 7135 then = self.expression( 7136 exp.Update, 7137 expressions=self._match(TokenType.SET) 7138 and self._parse_csv(self._parse_equality), 7139 ) 7140 elif self._match(TokenType.DELETE): 7141 then = self.expression(exp.Var, this=self._prev.text) 7142 else: 7143 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7144 7145 whens.append( 7146 self.expression( 7147 exp.When, 7148 matched=matched, 7149 source=source, 7150 condition=condition, 7151 then=then, 7152 ) 7153 ) 7154 return self.expression(exp.Whens, expressions=whens) 7155 7156 def _parse_show(self) -> t.Optional[exp.Expression]: 7157 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7158 if parser: 7159 return parser(self) 7160 return self._parse_as_command(self._prev) 7161 7162 def _parse_set_item_assignment( 7163 self, kind: t.Optional[str] = None 7164 ) -> t.Optional[exp.Expression]: 7165 index = self._index 7166 7167 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7168 return self._parse_set_transaction(global_=kind == "GLOBAL") 7169 7170 left = self._parse_primary() or self._parse_column() 7171 assignment_delimiter = self._match_texts(("=", "TO")) 7172 7173 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7174 self._retreat(index) 7175 return None 7176 7177 right = self._parse_statement() or self._parse_id_var() 7178 if isinstance(right, (exp.Column, exp.Identifier)): 7179 right = exp.var(right.name) 7180 7181 this = self.expression(exp.EQ, this=left, expression=right) 7182 return self.expression(exp.SetItem, this=this, kind=kind) 7183 7184 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7185 self._match_text_seq("TRANSACTION") 7186 characteristics = self._parse_csv( 7187 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7188 ) 7189 return self.expression( 7190 exp.SetItem, 7191 expressions=characteristics, 7192 kind="TRANSACTION", 7193 **{"global": global_}, # type: ignore 7194 ) 7195 7196 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7197 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7198 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7199 7200 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7201 index = self._index 7202 set_ = self.expression( 7203 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7204 ) 7205 7206 if self._curr: 7207 self._retreat(index) 7208 return self._parse_as_command(self._prev) 7209 7210 return set_ 7211 7212 def _parse_var_from_options( 7213 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7214 ) -> t.Optional[exp.Var]: 7215 start = self._curr 7216 if not start: 7217 return None 7218 7219 option = start.text.upper() 7220 continuations = options.get(option) 7221 7222 index = self._index 7223 self._advance() 7224 for keywords in continuations or []: 7225 if isinstance(keywords, str): 7226 keywords = (keywords,) 7227 7228 if self._match_text_seq(*keywords): 7229 option = f"{option} {' '.join(keywords)}" 7230 break 7231 else: 7232 if continuations or continuations is None: 7233 if raise_unmatched: 7234 self.raise_error(f"Unknown option {option}") 7235 7236 self._retreat(index) 7237 return None 7238 7239 return exp.var(option) 7240 7241 def _parse_as_command(self, start: Token) -> exp.Command: 7242 while self._curr: 7243 self._advance() 7244 text = self._find_sql(start, self._prev) 7245 size = len(start.text) 7246 self._warn_unsupported() 7247 return exp.Command(this=text[:size], expression=text[size:]) 7248 7249 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7250 settings = [] 7251 7252 self._match_l_paren() 7253 kind = self._parse_id_var() 7254 7255 if self._match(TokenType.L_PAREN): 7256 while True: 7257 key = self._parse_id_var() 7258 value = self._parse_primary() 7259 if not key and value is None: 7260 break 7261 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7262 self._match(TokenType.R_PAREN) 7263 7264 self._match_r_paren() 7265 7266 return self.expression( 7267 exp.DictProperty, 7268 this=this, 7269 kind=kind.this if kind else None, 7270 settings=settings, 7271 ) 7272 7273 def _parse_dict_range(self, this: str) -> exp.DictRange: 7274 self._match_l_paren() 7275 has_min = self._match_text_seq("MIN") 7276 if has_min: 7277 min = self._parse_var() or self._parse_primary() 7278 self._match_text_seq("MAX") 7279 max = self._parse_var() or self._parse_primary() 7280 else: 7281 max = self._parse_var() or self._parse_primary() 7282 min = exp.Literal.number(0) 7283 self._match_r_paren() 7284 return self.expression(exp.DictRange, this=this, min=min, max=max) 7285 7286 def _parse_comprehension( 7287 self, this: t.Optional[exp.Expression] 7288 ) -> t.Optional[exp.Comprehension]: 7289 index = self._index 7290 expression = self._parse_column() 7291 if not self._match(TokenType.IN): 7292 self._retreat(index - 1) 7293 return None 7294 iterator = self._parse_column() 7295 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7296 return self.expression( 7297 exp.Comprehension, 7298 this=this, 7299 expression=expression, 7300 iterator=iterator, 7301 condition=condition, 7302 ) 7303 7304 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7305 if self._match(TokenType.HEREDOC_STRING): 7306 return self.expression(exp.Heredoc, this=self._prev.text) 7307 7308 if not self._match_text_seq("$"): 7309 return None 7310 7311 tags = ["$"] 7312 tag_text = None 7313 7314 if self._is_connected(): 7315 self._advance() 7316 tags.append(self._prev.text.upper()) 7317 else: 7318 self.raise_error("No closing $ found") 7319 7320 if tags[-1] != "$": 7321 if self._is_connected() and self._match_text_seq("$"): 7322 tag_text = tags[-1] 7323 tags.append("$") 7324 else: 7325 self.raise_error("No closing $ found") 7326 7327 heredoc_start = self._curr 7328 7329 while self._curr: 7330 if self._match_text_seq(*tags, advance=False): 7331 this = self._find_sql(heredoc_start, self._prev) 7332 self._advance(len(tags)) 7333 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7334 7335 self._advance() 7336 7337 self.raise_error(f"No closing {''.join(tags)} found") 7338 return None 7339 7340 def _find_parser( 7341 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7342 ) -> t.Optional[t.Callable]: 7343 if not self._curr: 7344 return None 7345 7346 index = self._index 7347 this = [] 7348 while True: 7349 # The current token might be multiple words 7350 curr = self._curr.text.upper() 7351 key = curr.split(" ") 7352 this.append(curr) 7353 7354 self._advance() 7355 result, trie = in_trie(trie, key) 7356 if result == TrieResult.FAILED: 7357 break 7358 7359 if result == TrieResult.EXISTS: 7360 subparser = parsers[" ".join(this)] 7361 return subparser 7362 7363 self._retreat(index) 7364 return None 7365 7366 def _match(self, token_type, advance=True, expression=None): 7367 if not self._curr: 7368 return None 7369 7370 if self._curr.token_type == token_type: 7371 if advance: 7372 self._advance() 7373 self._add_comments(expression) 7374 return True 7375 7376 return None 7377 7378 def _match_set(self, types, advance=True): 7379 if not self._curr: 7380 return None 7381 7382 if self._curr.token_type in types: 7383 if advance: 7384 self._advance() 7385 return True 7386 7387 return None 7388 7389 def _match_pair(self, token_type_a, token_type_b, advance=True): 7390 if not self._curr or not self._next: 7391 return None 7392 7393 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7394 if advance: 7395 self._advance(2) 7396 return True 7397 7398 return None 7399 7400 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7401 if not self._match(TokenType.L_PAREN, expression=expression): 7402 self.raise_error("Expecting (") 7403 7404 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7405 if not self._match(TokenType.R_PAREN, expression=expression): 7406 self.raise_error("Expecting )") 7407 7408 def _match_texts(self, texts, advance=True): 7409 if ( 7410 self._curr 7411 and self._curr.token_type != TokenType.STRING 7412 and self._curr.text.upper() in texts 7413 ): 7414 if advance: 7415 self._advance() 7416 return True 7417 return None 7418 7419 def _match_text_seq(self, *texts, advance=True): 7420 index = self._index 7421 for text in texts: 7422 if ( 7423 self._curr 7424 and self._curr.token_type != TokenType.STRING 7425 and self._curr.text.upper() == text 7426 ): 7427 self._advance() 7428 else: 7429 self._retreat(index) 7430 return None 7431 7432 if not advance: 7433 self._retreat(index) 7434 7435 return True 7436 7437 def _replace_lambda( 7438 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7439 ) -> t.Optional[exp.Expression]: 7440 if not node: 7441 return node 7442 7443 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7444 7445 for column in node.find_all(exp.Column): 7446 typ = lambda_types.get(column.parts[0].name) 7447 if typ is not None: 7448 dot_or_id = column.to_dot() if column.table else column.this 7449 7450 if typ: 7451 dot_or_id = self.expression( 7452 exp.Cast, 7453 this=dot_or_id, 7454 to=typ, 7455 ) 7456 7457 parent = column.parent 7458 7459 while isinstance(parent, exp.Dot): 7460 if not isinstance(parent.parent, exp.Dot): 7461 parent.replace(dot_or_id) 7462 break 7463 parent = parent.parent 7464 else: 7465 if column is node: 7466 node = dot_or_id 7467 else: 7468 column.replace(dot_or_id) 7469 return node 7470 7471 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7472 start = self._prev 7473 7474 # Not to be confused with TRUNCATE(number, decimals) function call 7475 if self._match(TokenType.L_PAREN): 7476 self._retreat(self._index - 2) 7477 return self._parse_function() 7478 7479 # Clickhouse supports TRUNCATE DATABASE as well 7480 is_database = self._match(TokenType.DATABASE) 7481 7482 self._match(TokenType.TABLE) 7483 7484 exists = self._parse_exists(not_=False) 7485 7486 expressions = self._parse_csv( 7487 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7488 ) 7489 7490 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7491 7492 if self._match_text_seq("RESTART", "IDENTITY"): 7493 identity = "RESTART" 7494 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7495 identity = "CONTINUE" 7496 else: 7497 identity = None 7498 7499 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7500 option = self._prev.text 7501 else: 7502 option = None 7503 7504 partition = self._parse_partition() 7505 7506 # Fallback case 7507 if self._curr: 7508 return self._parse_as_command(start) 7509 7510 return self.expression( 7511 exp.TruncateTable, 7512 expressions=expressions, 7513 is_database=is_database, 7514 exists=exists, 7515 cluster=cluster, 7516 identity=identity, 7517 option=option, 7518 partition=partition, 7519 ) 7520 7521 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7522 this = self._parse_ordered(self._parse_opclass) 7523 7524 if not self._match(TokenType.WITH): 7525 return this 7526 7527 op = self._parse_var(any_token=True) 7528 7529 return self.expression(exp.WithOperator, this=this, op=op) 7530 7531 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7532 self._match(TokenType.EQ) 7533 self._match(TokenType.L_PAREN) 7534 7535 opts: t.List[t.Optional[exp.Expression]] = [] 7536 while self._curr and not self._match(TokenType.R_PAREN): 7537 if self._match_text_seq("FORMAT_NAME", "="): 7538 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7539 # so we parse it separately to use _parse_field() 7540 prop = self.expression( 7541 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7542 ) 7543 opts.append(prop) 7544 else: 7545 opts.append(self._parse_property()) 7546 7547 self._match(TokenType.COMMA) 7548 7549 return opts 7550 7551 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7552 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7553 7554 options = [] 7555 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7556 option = self._parse_var(any_token=True) 7557 prev = self._prev.text.upper() 7558 7559 # Different dialects might separate options and values by white space, "=" and "AS" 7560 self._match(TokenType.EQ) 7561 self._match(TokenType.ALIAS) 7562 7563 param = self.expression(exp.CopyParameter, this=option) 7564 7565 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7566 TokenType.L_PAREN, advance=False 7567 ): 7568 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7569 param.set("expressions", self._parse_wrapped_options()) 7570 elif prev == "FILE_FORMAT": 7571 # T-SQL's external file format case 7572 param.set("expression", self._parse_field()) 7573 else: 7574 param.set("expression", self._parse_unquoted_field()) 7575 7576 options.append(param) 7577 self._match(sep) 7578 7579 return options 7580 7581 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7582 expr = self.expression(exp.Credentials) 7583 7584 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7585 expr.set("storage", self._parse_field()) 7586 if self._match_text_seq("CREDENTIALS"): 7587 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7588 creds = ( 7589 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7590 ) 7591 expr.set("credentials", creds) 7592 if self._match_text_seq("ENCRYPTION"): 7593 expr.set("encryption", self._parse_wrapped_options()) 7594 if self._match_text_seq("IAM_ROLE"): 7595 expr.set("iam_role", self._parse_field()) 7596 if self._match_text_seq("REGION"): 7597 expr.set("region", self._parse_field()) 7598 7599 return expr 7600 7601 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7602 return self._parse_field() 7603 7604 def _parse_copy(self) -> exp.Copy | exp.Command: 7605 start = self._prev 7606 7607 self._match(TokenType.INTO) 7608 7609 this = ( 7610 self._parse_select(nested=True, parse_subquery_alias=False) 7611 if self._match(TokenType.L_PAREN, advance=False) 7612 else self._parse_table(schema=True) 7613 ) 7614 7615 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7616 7617 files = self._parse_csv(self._parse_file_location) 7618 credentials = self._parse_credentials() 7619 7620 self._match_text_seq("WITH") 7621 7622 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7623 7624 # Fallback case 7625 if self._curr: 7626 return self._parse_as_command(start) 7627 7628 return self.expression( 7629 exp.Copy, 7630 this=this, 7631 kind=kind, 7632 credentials=credentials, 7633 files=files, 7634 params=params, 7635 ) 7636 7637 def _parse_normalize(self) -> exp.Normalize: 7638 return self.expression( 7639 exp.Normalize, 7640 this=self._parse_bitwise(), 7641 form=self._match(TokenType.COMMA) and self._parse_var(), 7642 ) 7643 7644 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7645 if self._match_text_seq("COLUMNS", "(", advance=False): 7646 this = self._parse_function() 7647 if isinstance(this, exp.Columns): 7648 this.set("unpack", True) 7649 return this 7650 7651 return self.expression( 7652 exp.Star, 7653 **{ # type: ignore 7654 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7655 "replace": self._parse_star_op("REPLACE"), 7656 "rename": self._parse_star_op("RENAME"), 7657 }, 7658 ) 7659 7660 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7661 privilege_parts = [] 7662 7663 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7664 # (end of privilege list) or L_PAREN (start of column list) are met 7665 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7666 privilege_parts.append(self._curr.text.upper()) 7667 self._advance() 7668 7669 this = exp.var(" ".join(privilege_parts)) 7670 expressions = ( 7671 self._parse_wrapped_csv(self._parse_column) 7672 if self._match(TokenType.L_PAREN, advance=False) 7673 else None 7674 ) 7675 7676 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7677 7678 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7679 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7680 principal = self._parse_id_var() 7681 7682 if not principal: 7683 return None 7684 7685 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7686 7687 def _parse_grant(self) -> exp.Grant | exp.Command: 7688 start = self._prev 7689 7690 privileges = self._parse_csv(self._parse_grant_privilege) 7691 7692 self._match(TokenType.ON) 7693 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7694 7695 # Attempt to parse the securable e.g. MySQL allows names 7696 # such as "foo.*", "*.*" which are not easily parseable yet 7697 securable = self._try_parse(self._parse_table_parts) 7698 7699 if not securable or not self._match_text_seq("TO"): 7700 return self._parse_as_command(start) 7701 7702 principals = self._parse_csv(self._parse_grant_principal) 7703 7704 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 7705 7706 if self._curr: 7707 return self._parse_as_command(start) 7708 7709 return self.expression( 7710 exp.Grant, 7711 privileges=privileges, 7712 kind=kind, 7713 securable=securable, 7714 principals=principals, 7715 grant_option=grant_option, 7716 ) 7717 7718 def _parse_overlay(self) -> exp.Overlay: 7719 return self.expression( 7720 exp.Overlay, 7721 **{ # type: ignore 7722 "this": self._parse_bitwise(), 7723 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 7724 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 7725 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 7726 }, 7727 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1407 def __init__( 1408 self, 1409 error_level: t.Optional[ErrorLevel] = None, 1410 error_message_context: int = 100, 1411 max_errors: int = 3, 1412 dialect: DialectType = None, 1413 ): 1414 from sqlglot.dialects import Dialect 1415 1416 self.error_level = error_level or ErrorLevel.IMMEDIATE 1417 self.error_message_context = error_message_context 1418 self.max_errors = max_errors 1419 self.dialect = Dialect.get_or_raise(dialect) 1420 self.reset()
1432 def parse( 1433 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1434 ) -> t.List[t.Optional[exp.Expression]]: 1435 """ 1436 Parses a list of tokens and returns a list of syntax trees, one tree 1437 per parsed SQL statement. 1438 1439 Args: 1440 raw_tokens: The list of tokens. 1441 sql: The original SQL string, used to produce helpful debug messages. 1442 1443 Returns: 1444 The list of the produced syntax trees. 1445 """ 1446 return self._parse( 1447 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1448 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1450 def parse_into( 1451 self, 1452 expression_types: exp.IntoType, 1453 raw_tokens: t.List[Token], 1454 sql: t.Optional[str] = None, 1455 ) -> t.List[t.Optional[exp.Expression]]: 1456 """ 1457 Parses a list of tokens into a given Expression type. If a collection of Expression 1458 types is given instead, this method will try to parse the token list into each one 1459 of them, stopping at the first for which the parsing succeeds. 1460 1461 Args: 1462 expression_types: The expression type(s) to try and parse the token list into. 1463 raw_tokens: The list of tokens. 1464 sql: The original SQL string, used to produce helpful debug messages. 1465 1466 Returns: 1467 The target Expression. 1468 """ 1469 errors = [] 1470 for expression_type in ensure_list(expression_types): 1471 parser = self.EXPRESSION_PARSERS.get(expression_type) 1472 if not parser: 1473 raise TypeError(f"No parser registered for {expression_type}") 1474 1475 try: 1476 return self._parse(parser, raw_tokens, sql) 1477 except ParseError as e: 1478 e.errors[0]["into_expression"] = expression_type 1479 errors.append(e) 1480 1481 raise ParseError( 1482 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1483 errors=merge_errors(errors), 1484 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1524 def check_errors(self) -> None: 1525 """Logs or raises any found errors, depending on the chosen error level setting.""" 1526 if self.error_level == ErrorLevel.WARN: 1527 for error in self.errors: 1528 logger.error(str(error)) 1529 elif self.error_level == ErrorLevel.RAISE and self.errors: 1530 raise ParseError( 1531 concat_messages(self.errors, self.max_errors), 1532 errors=merge_errors(self.errors), 1533 )
Logs or raises any found errors, depending on the chosen error level setting.
1535 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1536 """ 1537 Appends an error in the list of recorded errors or raises it, depending on the chosen 1538 error level setting. 1539 """ 1540 token = token or self._curr or self._prev or Token.string("") 1541 start = token.start 1542 end = token.end + 1 1543 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1544 highlight = self.sql[start:end] 1545 end_context = self.sql[end : end + self.error_message_context] 1546 1547 error = ParseError.new( 1548 f"{message}. Line {token.line}, Col: {token.col}.\n" 1549 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1550 description=message, 1551 line=token.line, 1552 col=token.col, 1553 start_context=start_context, 1554 highlight=highlight, 1555 end_context=end_context, 1556 ) 1557 1558 if self.error_level == ErrorLevel.IMMEDIATE: 1559 raise error 1560 1561 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1563 def expression( 1564 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1565 ) -> E: 1566 """ 1567 Creates a new, validated Expression. 1568 1569 Args: 1570 exp_class: The expression class to instantiate. 1571 comments: An optional list of comments to attach to the expression. 1572 kwargs: The arguments to set for the expression along with their respective values. 1573 1574 Returns: 1575 The target expression. 1576 """ 1577 instance = exp_class(**kwargs) 1578 instance.add_comments(comments) if comments else self._add_comments(instance) 1579 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1586 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1587 """ 1588 Validates an Expression, making sure that all its mandatory arguments are set. 1589 1590 Args: 1591 expression: The expression to validate. 1592 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1593 1594 Returns: 1595 The validated expression. 1596 """ 1597 if self.error_level != ErrorLevel.IGNORE: 1598 for error_message in expression.error_messages(args): 1599 self.raise_error(error_message) 1600 1601 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.