sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 build_default_decimal_type, 11 build_timestamp_from_parts, 12 date_delta_sql, 13 date_trunc_to_time, 14 datestrtodate_sql, 15 build_formatted_time, 16 if_sql, 17 inline_array_sql, 18 max_or_greatest, 19 min_or_least, 20 rename_func, 21 timestamptrunc_sql, 22 timestrtotime_sql, 23 var_map_sql, 24 map_date_part, 25) 26from sqlglot.helper import flatten, is_float, is_int, seq_get 27from sqlglot.tokens import TokenType 28 29if t.TYPE_CHECKING: 30 from sqlglot._typing import E 31 32 33# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 34def _build_datetime( 35 name: str, kind: exp.DataType.Type, safe: bool = False 36) -> t.Callable[[t.List], exp.Func]: 37 def _builder(args: t.List) -> exp.Func: 38 value = seq_get(args, 0) 39 int_value = value is not None and is_int(value.name) 40 41 if isinstance(value, exp.Literal): 42 # Converts calls like `TO_TIME('01:02:03')` into casts 43 if len(args) == 1 and value.is_string and not int_value: 44 return exp.cast(value, kind) 45 46 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 47 # cases so we can transpile them, since they're relatively common 48 if kind == exp.DataType.Type.TIMESTAMP: 49 if int_value: 50 return exp.UnixToTime(this=value, scale=seq_get(args, 1)) 51 if not is_float(value.this): 52 return build_formatted_time(exp.StrToTime, "snowflake")(args) 53 54 if kind == exp.DataType.Type.DATE and not int_value: 55 formatted_exp = build_formatted_time(exp.TsOrDsToDate, "snowflake")(args) 56 formatted_exp.set("safe", safe) 57 return formatted_exp 58 59 return exp.Anonymous(this=name, expressions=args) 60 61 return _builder 62 63 64def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 65 expression = parser.build_var_map(args) 66 67 if isinstance(expression, exp.StarMap): 68 return expression 69 70 return exp.Struct( 71 expressions=[ 72 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 73 ] 74 ) 75 76 77def _build_datediff(args: t.List) -> exp.DateDiff: 78 return exp.DateDiff( 79 this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)) 80 ) 81 82 83def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 84 def _builder(args: t.List) -> E: 85 return expr_type( 86 this=seq_get(args, 2), 87 expression=seq_get(args, 1), 88 unit=map_date_part(seq_get(args, 0)), 89 ) 90 91 return _builder 92 93 94# https://docs.snowflake.com/en/sql-reference/functions/div0 95def _build_if_from_div0(args: t.List) -> exp.If: 96 cond = exp.EQ(this=seq_get(args, 1), expression=exp.Literal.number(0)).and_( 97 exp.Is(this=seq_get(args, 0), expression=exp.null()).not_() 98 ) 99 true = exp.Literal.number(0) 100 false = exp.Div(this=seq_get(args, 0), expression=seq_get(args, 1)) 101 return exp.If(this=cond, true=true, false=false) 102 103 104# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 105def _build_if_from_zeroifnull(args: t.List) -> exp.If: 106 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 107 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 108 109 110# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 111def _build_if_from_nullifzero(args: t.List) -> exp.If: 112 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 113 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 114 115 116def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 117 flag = expression.text("flag") 118 119 if "i" not in flag: 120 flag += "i" 121 122 return self.func( 123 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 124 ) 125 126 127def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 128 regexp_replace = exp.RegexpReplace.from_arg_list(args) 129 130 if not regexp_replace.args.get("replacement"): 131 regexp_replace.set("replacement", exp.Literal.string("")) 132 133 return regexp_replace 134 135 136def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 137 def _parse(self: Snowflake.Parser) -> exp.Show: 138 return self._parse_show_snowflake(*args, **kwargs) 139 140 return _parse 141 142 143def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 144 trunc = date_trunc_to_time(args) 145 trunc.set("unit", map_date_part(trunc.args["unit"])) 146 return trunc 147 148 149def _unqualify_unpivot_columns(expression: exp.Expression) -> exp.Expression: 150 """ 151 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 152 so we need to unqualify them. 153 154 Example: 155 >>> from sqlglot import parse_one 156 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 157 >>> print(_unqualify_unpivot_columns(expr).sql(dialect="snowflake")) 158 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 159 """ 160 if isinstance(expression, exp.Pivot) and expression.unpivot: 161 expression = transforms.unqualify_columns(expression) 162 163 return expression 164 165 166def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 167 assert isinstance(expression, exp.Create) 168 169 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 170 if expression.this in exp.DataType.NESTED_TYPES: 171 expression.set("expressions", None) 172 return expression 173 174 props = expression.args.get("properties") 175 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 176 for schema_expression in expression.this.expressions: 177 if isinstance(schema_expression, exp.ColumnDef): 178 column_type = schema_expression.kind 179 if isinstance(column_type, exp.DataType): 180 column_type.transform(_flatten_structured_type, copy=False) 181 182 return expression 183 184 185def _unnest_generate_date_array(expression: exp.Expression) -> exp.Expression: 186 if isinstance(expression, exp.Select): 187 for unnest in expression.find_all(exp.Unnest): 188 if ( 189 isinstance(unnest.parent, (exp.From, exp.Join)) 190 and len(unnest.expressions) == 1 191 and isinstance(unnest.expressions[0], exp.GenerateDateArray) 192 ): 193 generate_date_array = unnest.expressions[0] 194 start = generate_date_array.args.get("start") 195 end = generate_date_array.args.get("end") 196 step = generate_date_array.args.get("step") 197 198 if not start or not end or not isinstance(step, exp.Interval) or step.name != "1": 199 continue 200 201 unit = step.args.get("unit") 202 203 unnest_alias = unnest.args.get("alias") 204 if unnest_alias: 205 unnest_alias = unnest_alias.copy() 206 sequence_value_name = seq_get(unnest_alias.columns, 0) or "value" 207 else: 208 sequence_value_name = "value" 209 210 # We'll add the next sequence value to the starting date and project the result 211 date_add = _build_date_time_add(exp.DateAdd)( 212 [unit, exp.cast(sequence_value_name, "int"), exp.cast(start, "date")] 213 ).as_(sequence_value_name) 214 215 # We use DATEDIFF to compute the number of sequence values needed 216 number_sequence = Snowflake.Parser.FUNCTIONS["ARRAY_GENERATE_RANGE"]( 217 [exp.Literal.number(0), _build_datediff([unit, start, end]) + 1] 218 ) 219 220 unnest.set("expressions", [number_sequence]) 221 unnest.replace(exp.select(date_add).from_(unnest.copy()).subquery(unnest_alias)) 222 223 return expression 224 225 226class Snowflake(Dialect): 227 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 228 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 229 NULL_ORDERING = "nulls_are_large" 230 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 231 SUPPORTS_USER_DEFINED_TYPES = False 232 SUPPORTS_SEMI_ANTI_JOIN = False 233 PREFER_CTE_ALIAS_COLUMN = True 234 TABLESAMPLE_SIZE_IS_PERCENT = True 235 COPY_PARAMS_ARE_CSV = False 236 237 TIME_MAPPING = { 238 "YYYY": "%Y", 239 "yyyy": "%Y", 240 "YY": "%y", 241 "yy": "%y", 242 "MMMM": "%B", 243 "mmmm": "%B", 244 "MON": "%b", 245 "mon": "%b", 246 "MM": "%m", 247 "mm": "%m", 248 "DD": "%d", 249 "dd": "%-d", 250 "DY": "%a", 251 "dy": "%w", 252 "HH24": "%H", 253 "hh24": "%H", 254 "HH12": "%I", 255 "hh12": "%I", 256 "MI": "%M", 257 "mi": "%M", 258 "SS": "%S", 259 "ss": "%S", 260 "FF": "%f", 261 "ff": "%f", 262 "FF6": "%f", 263 "ff6": "%f", 264 } 265 266 def quote_identifier(self, expression: E, identify: bool = True) -> E: 267 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 268 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 269 if ( 270 isinstance(expression, exp.Identifier) 271 and isinstance(expression.parent, exp.Table) 272 and expression.name.lower() == "dual" 273 ): 274 return expression # type: ignore 275 276 return super().quote_identifier(expression, identify=identify) 277 278 class Parser(parser.Parser): 279 IDENTIFY_PIVOT_STRINGS = True 280 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 281 COLON_IS_VARIANT_EXTRACT = True 282 283 ID_VAR_TOKENS = { 284 *parser.Parser.ID_VAR_TOKENS, 285 TokenType.MATCH_CONDITION, 286 } 287 288 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 289 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 290 291 FUNCTIONS = { 292 **parser.Parser.FUNCTIONS, 293 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 294 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 295 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 296 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 297 this=seq_get(args, 1), expression=seq_get(args, 0) 298 ), 299 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 300 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 301 start=seq_get(args, 0), 302 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 303 step=seq_get(args, 2), 304 ), 305 "BITXOR": binary_from_function(exp.BitwiseXor), 306 "BIT_XOR": binary_from_function(exp.BitwiseXor), 307 "BOOLXOR": binary_from_function(exp.Xor), 308 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 309 "DATE_TRUNC": _date_trunc_to_time, 310 "DATEADD": _build_date_time_add(exp.DateAdd), 311 "DATEDIFF": _build_datediff, 312 "DIV0": _build_if_from_div0, 313 "FLATTEN": exp.Explode.from_arg_list, 314 "GET_PATH": lambda args, dialect: exp.JSONExtract( 315 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 316 ), 317 "IFF": exp.If.from_arg_list, 318 "LAST_DAY": lambda args: exp.LastDay( 319 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 320 ), 321 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 322 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 323 "LISTAGG": exp.GroupConcat.from_arg_list, 324 "MEDIAN": lambda args: exp.PercentileCont( 325 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 326 ), 327 "NULLIFZERO": _build_if_from_nullifzero, 328 "OBJECT_CONSTRUCT": _build_object_construct, 329 "REGEXP_REPLACE": _build_regexp_replace, 330 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 331 "RLIKE": exp.RegexpLike.from_arg_list, 332 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 333 "TIMEADD": _build_date_time_add(exp.TimeAdd), 334 "TIMEDIFF": _build_datediff, 335 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 336 "TIMESTAMPDIFF": _build_datediff, 337 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 338 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 339 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 340 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 341 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 342 "TO_NUMBER": lambda args: exp.ToNumber( 343 this=seq_get(args, 0), 344 format=seq_get(args, 1), 345 precision=seq_get(args, 2), 346 scale=seq_get(args, 3), 347 ), 348 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 349 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 350 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 351 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 352 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 353 "TO_VARCHAR": exp.ToChar.from_arg_list, 354 "ZEROIFNULL": _build_if_from_zeroifnull, 355 } 356 357 FUNCTION_PARSERS = { 358 **parser.Parser.FUNCTION_PARSERS, 359 "DATE_PART": lambda self: self._parse_date_part(), 360 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 361 } 362 FUNCTION_PARSERS.pop("TRIM") 363 364 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 365 366 RANGE_PARSERS = { 367 **parser.Parser.RANGE_PARSERS, 368 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 369 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 370 } 371 372 ALTER_PARSERS = { 373 **parser.Parser.ALTER_PARSERS, 374 "UNSET": lambda self: self.expression( 375 exp.Set, 376 tag=self._match_text_seq("TAG"), 377 expressions=self._parse_csv(self._parse_id_var), 378 unset=True, 379 ), 380 "SWAP": lambda self: self._parse_alter_table_swap(), 381 } 382 383 STATEMENT_PARSERS = { 384 **parser.Parser.STATEMENT_PARSERS, 385 TokenType.SHOW: lambda self: self._parse_show(), 386 } 387 388 PROPERTY_PARSERS = { 389 **parser.Parser.PROPERTY_PARSERS, 390 "LOCATION": lambda self: self._parse_location_property(), 391 } 392 393 TYPE_CONVERTERS = { 394 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 395 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 396 } 397 398 SHOW_PARSERS = { 399 "SCHEMAS": _show_parser("SCHEMAS"), 400 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 401 "OBJECTS": _show_parser("OBJECTS"), 402 "TERSE OBJECTS": _show_parser("OBJECTS"), 403 "TABLES": _show_parser("TABLES"), 404 "TERSE TABLES": _show_parser("TABLES"), 405 "VIEWS": _show_parser("VIEWS"), 406 "TERSE VIEWS": _show_parser("VIEWS"), 407 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 408 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 409 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 410 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 411 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 412 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 413 "SEQUENCES": _show_parser("SEQUENCES"), 414 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 415 "COLUMNS": _show_parser("COLUMNS"), 416 "USERS": _show_parser("USERS"), 417 "TERSE USERS": _show_parser("USERS"), 418 } 419 420 CONSTRAINT_PARSERS = { 421 **parser.Parser.CONSTRAINT_PARSERS, 422 "WITH": lambda self: self._parse_with_constraint(), 423 "MASKING": lambda self: self._parse_with_constraint(), 424 "PROJECTION": lambda self: self._parse_with_constraint(), 425 "TAG": lambda self: self._parse_with_constraint(), 426 } 427 428 STAGED_FILE_SINGLE_TOKENS = { 429 TokenType.DOT, 430 TokenType.MOD, 431 TokenType.SLASH, 432 } 433 434 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 435 436 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 437 438 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 439 440 LAMBDAS = { 441 **parser.Parser.LAMBDAS, 442 TokenType.ARROW: lambda self, expressions: self.expression( 443 exp.Lambda, 444 this=self._replace_lambda( 445 self._parse_assignment(), 446 expressions, 447 ), 448 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 449 ), 450 } 451 452 def _negate_range( 453 self, this: t.Optional[exp.Expression] = None 454 ) -> t.Optional[exp.Expression]: 455 if not this: 456 return this 457 458 query = this.args.get("query") 459 if isinstance(this, exp.In) and isinstance(query, exp.Query): 460 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 461 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 462 # which can produce different results (most likely a SnowFlake bug). 463 # 464 # https://docs.snowflake.com/en/sql-reference/functions/in 465 # Context: https://github.com/tobymao/sqlglot/issues/3890 466 return self.expression( 467 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 468 ) 469 470 return self.expression(exp.Not, this=this) 471 472 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 473 if self._prev.token_type != TokenType.WITH: 474 self._retreat(self._index - 1) 475 476 if self._match_text_seq("MASKING", "POLICY"): 477 policy = self._parse_column() 478 return self.expression( 479 exp.MaskingPolicyColumnConstraint, 480 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 481 expressions=self._match(TokenType.USING) 482 and self._parse_wrapped_csv(self._parse_id_var), 483 ) 484 if self._match_text_seq("PROJECTION", "POLICY"): 485 policy = self._parse_column() 486 return self.expression( 487 exp.ProjectionPolicyColumnConstraint, 488 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 489 ) 490 if self._match(TokenType.TAG): 491 return self.expression( 492 exp.TagColumnConstraint, 493 expressions=self._parse_wrapped_csv(self._parse_property), 494 ) 495 496 return None 497 498 def _parse_create(self) -> exp.Create | exp.Command: 499 expression = super()._parse_create() 500 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 501 # Replace the Table node with the enclosed Identifier 502 expression.this.replace(expression.this.this) 503 504 return expression 505 506 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 507 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 508 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 509 this = self._parse_var() or self._parse_type() 510 511 if not this: 512 return None 513 514 self._match(TokenType.COMMA) 515 expression = self._parse_bitwise() 516 this = map_date_part(this) 517 name = this.name.upper() 518 519 if name.startswith("EPOCH"): 520 if name == "EPOCH_MILLISECOND": 521 scale = 10**3 522 elif name == "EPOCH_MICROSECOND": 523 scale = 10**6 524 elif name == "EPOCH_NANOSECOND": 525 scale = 10**9 526 else: 527 scale = None 528 529 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 530 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 531 532 if scale: 533 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 534 535 return to_unix 536 537 return self.expression(exp.Extract, this=this, expression=expression) 538 539 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 540 if is_map: 541 # Keys are strings in Snowflake's objects, see also: 542 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 543 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 544 return self._parse_slice(self._parse_string()) 545 546 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 547 548 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 549 lateral = super()._parse_lateral() 550 if not lateral: 551 return lateral 552 553 if isinstance(lateral.this, exp.Explode): 554 table_alias = lateral.args.get("alias") 555 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 556 if table_alias and not table_alias.args.get("columns"): 557 table_alias.set("columns", columns) 558 elif not table_alias: 559 exp.alias_(lateral, "_flattened", table=columns, copy=False) 560 561 return lateral 562 563 def _parse_table_parts( 564 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 565 ) -> exp.Table: 566 # https://docs.snowflake.com/en/user-guide/querying-stage 567 if self._match(TokenType.STRING, advance=False): 568 table = self._parse_string() 569 elif self._match_text_seq("@", advance=False): 570 table = self._parse_location_path() 571 else: 572 table = None 573 574 if table: 575 file_format = None 576 pattern = None 577 578 wrapped = self._match(TokenType.L_PAREN) 579 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 580 if self._match_text_seq("FILE_FORMAT", "=>"): 581 file_format = self._parse_string() or super()._parse_table_parts( 582 is_db_reference=is_db_reference 583 ) 584 elif self._match_text_seq("PATTERN", "=>"): 585 pattern = self._parse_string() 586 else: 587 break 588 589 self._match(TokenType.COMMA) 590 591 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 592 else: 593 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 594 595 return table 596 597 def _parse_id_var( 598 self, 599 any_token: bool = True, 600 tokens: t.Optional[t.Collection[TokenType]] = None, 601 ) -> t.Optional[exp.Expression]: 602 if self._match_text_seq("IDENTIFIER", "("): 603 identifier = ( 604 super()._parse_id_var(any_token=any_token, tokens=tokens) 605 or self._parse_string() 606 ) 607 self._match_r_paren() 608 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 609 610 return super()._parse_id_var(any_token=any_token, tokens=tokens) 611 612 def _parse_show_snowflake(self, this: str) -> exp.Show: 613 scope = None 614 scope_kind = None 615 616 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 617 # which is syntactically valid but has no effect on the output 618 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 619 620 history = self._match_text_seq("HISTORY") 621 622 like = self._parse_string() if self._match(TokenType.LIKE) else None 623 624 if self._match(TokenType.IN): 625 if self._match_text_seq("ACCOUNT"): 626 scope_kind = "ACCOUNT" 627 elif self._match_set(self.DB_CREATABLES): 628 scope_kind = self._prev.text.upper() 629 if self._curr: 630 scope = self._parse_table_parts() 631 elif self._curr: 632 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 633 scope = self._parse_table_parts() 634 635 return self.expression( 636 exp.Show, 637 **{ 638 "terse": terse, 639 "this": this, 640 "history": history, 641 "like": like, 642 "scope": scope, 643 "scope_kind": scope_kind, 644 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 645 "limit": self._parse_limit(), 646 "from": self._parse_string() if self._match(TokenType.FROM) else None, 647 }, 648 ) 649 650 def _parse_alter_table_swap(self) -> exp.SwapTable: 651 self._match_text_seq("WITH") 652 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 653 654 def _parse_location_property(self) -> exp.LocationProperty: 655 self._match(TokenType.EQ) 656 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 657 658 def _parse_file_location(self) -> t.Optional[exp.Expression]: 659 # Parse either a subquery or a staged file 660 return ( 661 self._parse_select(table=True, parse_subquery_alias=False) 662 if self._match(TokenType.L_PAREN, advance=False) 663 else self._parse_table_parts() 664 ) 665 666 def _parse_location_path(self) -> exp.Var: 667 parts = [self._advance_any(ignore_reserved=True)] 668 669 # We avoid consuming a comma token because external tables like @foo and @bar 670 # can be joined in a query with a comma separator, as well as closing paren 671 # in case of subqueries 672 while self._is_connected() and not self._match_set( 673 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 674 ): 675 parts.append(self._advance_any(ignore_reserved=True)) 676 677 return exp.var("".join(part.text for part in parts if part)) 678 679 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 680 this = super()._parse_lambda_arg() 681 682 if not this: 683 return this 684 685 typ = self._parse_types() 686 687 if typ: 688 return self.expression(exp.Cast, this=this, to=typ) 689 690 return this 691 692 class Tokenizer(tokens.Tokenizer): 693 STRING_ESCAPES = ["\\", "'"] 694 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 695 RAW_STRINGS = ["$$"] 696 COMMENTS = ["--", "//", ("/*", "*/")] 697 NESTED_COMMENTS = False 698 699 KEYWORDS = { 700 **tokens.Tokenizer.KEYWORDS, 701 "BYTEINT": TokenType.INT, 702 "CHAR VARYING": TokenType.VARCHAR, 703 "CHARACTER VARYING": TokenType.VARCHAR, 704 "EXCLUDE": TokenType.EXCEPT, 705 "ILIKE ANY": TokenType.ILIKE_ANY, 706 "LIKE ANY": TokenType.LIKE_ANY, 707 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 708 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 709 "MINUS": TokenType.EXCEPT, 710 "NCHAR VARYING": TokenType.VARCHAR, 711 "PUT": TokenType.COMMAND, 712 "REMOVE": TokenType.COMMAND, 713 "RM": TokenType.COMMAND, 714 "SAMPLE": TokenType.TABLE_SAMPLE, 715 "SQL_DOUBLE": TokenType.DOUBLE, 716 "SQL_VARCHAR": TokenType.VARCHAR, 717 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 718 "TAG": TokenType.TAG, 719 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 720 "TOP": TokenType.TOP, 721 "WAREHOUSE": TokenType.WAREHOUSE, 722 "STREAMLIT": TokenType.STREAMLIT, 723 } 724 KEYWORDS.pop("/*+") 725 726 SINGLE_TOKENS = { 727 **tokens.Tokenizer.SINGLE_TOKENS, 728 "$": TokenType.PARAMETER, 729 } 730 731 VAR_SINGLE_TOKENS = {"$"} 732 733 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 734 735 class Generator(generator.Generator): 736 PARAMETER_TOKEN = "$" 737 MATCHED_BY_SOURCE = False 738 SINGLE_STRING_INTERVAL = True 739 JOIN_HINTS = False 740 TABLE_HINTS = False 741 QUERY_HINTS = False 742 AGGREGATE_FILTER_SUPPORTED = False 743 SUPPORTS_TABLE_COPY = False 744 COLLATE_IS_FUNC = True 745 LIMIT_ONLY_LITERALS = True 746 JSON_KEY_VALUE_PAIR_SEP = "," 747 INSERT_OVERWRITE = " OVERWRITE INTO" 748 STRUCT_DELIMITER = ("(", ")") 749 COPY_PARAMS_ARE_WRAPPED = False 750 COPY_PARAMS_EQ_REQUIRED = True 751 STAR_EXCEPT = "EXCLUDE" 752 SUPPORTS_EXPLODING_PROJECTIONS = False 753 ARRAY_CONCAT_IS_VAR_LEN = False 754 SUPPORTS_CONVERT_TIMEZONE = True 755 756 TRANSFORMS = { 757 **generator.Generator.TRANSFORMS, 758 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 759 exp.ArgMax: rename_func("MAX_BY"), 760 exp.ArgMin: rename_func("MIN_BY"), 761 exp.Array: inline_array_sql, 762 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 763 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 764 exp.AtTimeZone: lambda self, e: self.func( 765 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 766 ), 767 exp.BitwiseXor: rename_func("BITXOR"), 768 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 769 exp.DateAdd: date_delta_sql("DATEADD"), 770 exp.DateDiff: date_delta_sql("DATEDIFF"), 771 exp.DateStrToDate: datestrtodate_sql, 772 exp.DayOfMonth: rename_func("DAYOFMONTH"), 773 exp.DayOfWeek: rename_func("DAYOFWEEK"), 774 exp.DayOfYear: rename_func("DAYOFYEAR"), 775 exp.Explode: rename_func("FLATTEN"), 776 exp.Extract: rename_func("DATE_PART"), 777 exp.FromTimeZone: lambda self, e: self.func( 778 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 779 ), 780 exp.GenerateSeries: lambda self, e: self.func( 781 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 782 ), 783 exp.GroupConcat: rename_func("LISTAGG"), 784 exp.If: if_sql(name="IFF", false_value="NULL"), 785 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 786 exp.JSONExtractScalar: lambda self, e: self.func( 787 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 788 ), 789 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 790 exp.JSONPathRoot: lambda *_: "", 791 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 792 exp.LogicalOr: rename_func("BOOLOR_AGG"), 793 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 794 exp.Max: max_or_greatest, 795 exp.Min: min_or_least, 796 exp.ParseJSON: lambda self, e: self.func( 797 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 798 ), 799 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 800 exp.PercentileCont: transforms.preprocess( 801 [transforms.add_within_group_for_percentiles] 802 ), 803 exp.PercentileDisc: transforms.preprocess( 804 [transforms.add_within_group_for_percentiles] 805 ), 806 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 807 exp.RegexpILike: _regexpilike_sql, 808 exp.Rand: rename_func("RANDOM"), 809 exp.Select: transforms.preprocess( 810 [ 811 transforms.eliminate_distinct_on, 812 transforms.explode_to_unnest(), 813 transforms.eliminate_semi_and_anti_joins, 814 _unnest_generate_date_array, 815 ] 816 ), 817 exp.SHA: rename_func("SHA1"), 818 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 819 exp.StartsWith: rename_func("STARTSWITH"), 820 exp.StrPosition: lambda self, e: self.func( 821 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 822 ), 823 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 824 exp.Stuff: rename_func("INSERT"), 825 exp.TimeAdd: date_delta_sql("TIMEADD"), 826 exp.TimestampDiff: lambda self, e: self.func( 827 "TIMESTAMPDIFF", e.unit, e.expression, e.this 828 ), 829 exp.TimestampTrunc: timestamptrunc_sql(), 830 exp.TimeStrToTime: timestrtotime_sql, 831 exp.TimeToStr: lambda self, e: self.func( 832 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 833 ), 834 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 835 exp.ToArray: rename_func("TO_ARRAY"), 836 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 837 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 838 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 839 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 840 exp.TsOrDsToDate: lambda self, e: self.func( 841 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 842 ), 843 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 844 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 845 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 846 exp.Xor: rename_func("BOOLXOR"), 847 } 848 849 SUPPORTED_JSON_PATH_PARTS = { 850 exp.JSONPathKey, 851 exp.JSONPathRoot, 852 exp.JSONPathSubscript, 853 } 854 855 TYPE_MAPPING = { 856 **generator.Generator.TYPE_MAPPING, 857 exp.DataType.Type.NESTED: "OBJECT", 858 exp.DataType.Type.STRUCT: "OBJECT", 859 } 860 861 PROPERTIES_LOCATION = { 862 **generator.Generator.PROPERTIES_LOCATION, 863 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 864 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 865 } 866 867 UNSUPPORTED_VALUES_EXPRESSIONS = { 868 exp.Map, 869 exp.StarMap, 870 exp.Struct, 871 exp.VarMap, 872 } 873 874 def with_properties(self, properties: exp.Properties) -> str: 875 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 876 877 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 878 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 879 values_as_table = False 880 881 return super().values_sql(expression, values_as_table=values_as_table) 882 883 def datatype_sql(self, expression: exp.DataType) -> str: 884 expressions = expression.expressions 885 if ( 886 expressions 887 and expression.is_type(*exp.DataType.STRUCT_TYPES) 888 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 889 ): 890 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 891 return "OBJECT" 892 893 return super().datatype_sql(expression) 894 895 def tonumber_sql(self, expression: exp.ToNumber) -> str: 896 return self.func( 897 "TO_NUMBER", 898 expression.this, 899 expression.args.get("format"), 900 expression.args.get("precision"), 901 expression.args.get("scale"), 902 ) 903 904 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 905 milli = expression.args.get("milli") 906 if milli is not None: 907 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 908 expression.set("nano", milli_to_nano) 909 910 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 911 912 def trycast_sql(self, expression: exp.TryCast) -> str: 913 value = expression.this 914 915 if value.type is None: 916 from sqlglot.optimizer.annotate_types import annotate_types 917 918 value = annotate_types(value) 919 920 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 921 return super().trycast_sql(expression) 922 923 # TRY_CAST only works for string values in Snowflake 924 return self.cast_sql(expression) 925 926 def log_sql(self, expression: exp.Log) -> str: 927 if not expression.expression: 928 return self.func("LN", expression.this) 929 930 return super().log_sql(expression) 931 932 def unnest_sql(self, expression: exp.Unnest) -> str: 933 unnest_alias = expression.args.get("alias") 934 offset = expression.args.get("offset") 935 936 columns = [ 937 exp.to_identifier("seq"), 938 exp.to_identifier("key"), 939 exp.to_identifier("path"), 940 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 941 seq_get(unnest_alias.columns if unnest_alias else [], 0) 942 or exp.to_identifier("value"), 943 exp.to_identifier("this"), 944 ] 945 946 if unnest_alias: 947 unnest_alias.set("columns", columns) 948 else: 949 unnest_alias = exp.TableAlias(this="_u", columns=columns) 950 951 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 952 alias = self.sql(unnest_alias) 953 alias = f" AS {alias}" if alias else "" 954 return f"{explode}{alias}" 955 956 def show_sql(self, expression: exp.Show) -> str: 957 terse = "TERSE " if expression.args.get("terse") else "" 958 history = " HISTORY" if expression.args.get("history") else "" 959 like = self.sql(expression, "like") 960 like = f" LIKE {like}" if like else "" 961 962 scope = self.sql(expression, "scope") 963 scope = f" {scope}" if scope else "" 964 965 scope_kind = self.sql(expression, "scope_kind") 966 if scope_kind: 967 scope_kind = f" IN {scope_kind}" 968 969 starts_with = self.sql(expression, "starts_with") 970 if starts_with: 971 starts_with = f" STARTS WITH {starts_with}" 972 973 limit = self.sql(expression, "limit") 974 975 from_ = self.sql(expression, "from") 976 if from_: 977 from_ = f" FROM {from_}" 978 979 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 980 981 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 982 # Other dialects don't support all of the following parameters, so we need to 983 # generate default values as necessary to ensure the transpilation is correct 984 group = expression.args.get("group") 985 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 986 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 987 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 988 989 return self.func( 990 "REGEXP_SUBSTR", 991 expression.this, 992 expression.expression, 993 position, 994 occurrence, 995 parameters, 996 group, 997 ) 998 999 def except_op(self, expression: exp.Except) -> str: 1000 if not expression.args.get("distinct"): 1001 self.unsupported("EXCEPT with All is not supported in Snowflake") 1002 return super().except_op(expression) 1003 1004 def intersect_op(self, expression: exp.Intersect) -> str: 1005 if not expression.args.get("distinct"): 1006 self.unsupported("INTERSECT with All is not supported in Snowflake") 1007 return super().intersect_op(expression) 1008 1009 def describe_sql(self, expression: exp.Describe) -> str: 1010 # Default to table if kind is unknown 1011 kind_value = expression.args.get("kind") or "TABLE" 1012 kind = f" {kind_value}" if kind_value else "" 1013 this = f" {self.sql(expression, 'this')}" 1014 expressions = self.expressions(expression, flat=True) 1015 expressions = f" {expressions}" if expressions else "" 1016 return f"DESCRIBE{kind}{this}{expressions}" 1017 1018 def generatedasidentitycolumnconstraint_sql( 1019 self, expression: exp.GeneratedAsIdentityColumnConstraint 1020 ) -> str: 1021 start = expression.args.get("start") 1022 start = f" START {start}" if start else "" 1023 increment = expression.args.get("increment") 1024 increment = f" INCREMENT {increment}" if increment else "" 1025 return f"AUTOINCREMENT{start}{increment}" 1026 1027 def swaptable_sql(self, expression: exp.SwapTable) -> str: 1028 this = self.sql(expression, "this") 1029 return f"SWAP WITH {this}" 1030 1031 def cluster_sql(self, expression: exp.Cluster) -> str: 1032 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1033 1034 def struct_sql(self, expression: exp.Struct) -> str: 1035 keys = [] 1036 values = [] 1037 1038 for i, e in enumerate(expression.expressions): 1039 if isinstance(e, exp.PropertyEQ): 1040 keys.append( 1041 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1042 ) 1043 values.append(e.expression) 1044 else: 1045 keys.append(exp.Literal.string(f"_{i}")) 1046 values.append(e) 1047 1048 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1049 1050 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1051 if expression.args.get("weight") or expression.args.get("accuracy"): 1052 self.unsupported( 1053 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1054 ) 1055 1056 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1057 1058 def alterset_sql(self, expression: exp.AlterSet) -> str: 1059 exprs = self.expressions(expression, flat=True) 1060 exprs = f" {exprs}" if exprs else "" 1061 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1062 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1063 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1064 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1065 tag = self.expressions(expression, key="tag", flat=True) 1066 tag = f" TAG {tag}" if tag else "" 1067 1068 return f"SET{exprs}{file_format}{copy_options}{tag}"
227class Snowflake(Dialect): 228 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 229 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 230 NULL_ORDERING = "nulls_are_large" 231 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 232 SUPPORTS_USER_DEFINED_TYPES = False 233 SUPPORTS_SEMI_ANTI_JOIN = False 234 PREFER_CTE_ALIAS_COLUMN = True 235 TABLESAMPLE_SIZE_IS_PERCENT = True 236 COPY_PARAMS_ARE_CSV = False 237 238 TIME_MAPPING = { 239 "YYYY": "%Y", 240 "yyyy": "%Y", 241 "YY": "%y", 242 "yy": "%y", 243 "MMMM": "%B", 244 "mmmm": "%B", 245 "MON": "%b", 246 "mon": "%b", 247 "MM": "%m", 248 "mm": "%m", 249 "DD": "%d", 250 "dd": "%-d", 251 "DY": "%a", 252 "dy": "%w", 253 "HH24": "%H", 254 "hh24": "%H", 255 "HH12": "%I", 256 "hh12": "%I", 257 "MI": "%M", 258 "mi": "%M", 259 "SS": "%S", 260 "ss": "%S", 261 "FF": "%f", 262 "ff": "%f", 263 "FF6": "%f", 264 "ff6": "%f", 265 } 266 267 def quote_identifier(self, expression: E, identify: bool = True) -> E: 268 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 269 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 270 if ( 271 isinstance(expression, exp.Identifier) 272 and isinstance(expression.parent, exp.Table) 273 and expression.name.lower() == "dual" 274 ): 275 return expression # type: ignore 276 277 return super().quote_identifier(expression, identify=identify) 278 279 class Parser(parser.Parser): 280 IDENTIFY_PIVOT_STRINGS = True 281 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 282 COLON_IS_VARIANT_EXTRACT = True 283 284 ID_VAR_TOKENS = { 285 *parser.Parser.ID_VAR_TOKENS, 286 TokenType.MATCH_CONDITION, 287 } 288 289 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 290 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 291 292 FUNCTIONS = { 293 **parser.Parser.FUNCTIONS, 294 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 295 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 296 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 297 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 298 this=seq_get(args, 1), expression=seq_get(args, 0) 299 ), 300 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 301 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 302 start=seq_get(args, 0), 303 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 304 step=seq_get(args, 2), 305 ), 306 "BITXOR": binary_from_function(exp.BitwiseXor), 307 "BIT_XOR": binary_from_function(exp.BitwiseXor), 308 "BOOLXOR": binary_from_function(exp.Xor), 309 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 310 "DATE_TRUNC": _date_trunc_to_time, 311 "DATEADD": _build_date_time_add(exp.DateAdd), 312 "DATEDIFF": _build_datediff, 313 "DIV0": _build_if_from_div0, 314 "FLATTEN": exp.Explode.from_arg_list, 315 "GET_PATH": lambda args, dialect: exp.JSONExtract( 316 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 317 ), 318 "IFF": exp.If.from_arg_list, 319 "LAST_DAY": lambda args: exp.LastDay( 320 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 321 ), 322 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 323 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 324 "LISTAGG": exp.GroupConcat.from_arg_list, 325 "MEDIAN": lambda args: exp.PercentileCont( 326 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 327 ), 328 "NULLIFZERO": _build_if_from_nullifzero, 329 "OBJECT_CONSTRUCT": _build_object_construct, 330 "REGEXP_REPLACE": _build_regexp_replace, 331 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 332 "RLIKE": exp.RegexpLike.from_arg_list, 333 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 334 "TIMEADD": _build_date_time_add(exp.TimeAdd), 335 "TIMEDIFF": _build_datediff, 336 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 337 "TIMESTAMPDIFF": _build_datediff, 338 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 339 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 340 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 341 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 342 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 343 "TO_NUMBER": lambda args: exp.ToNumber( 344 this=seq_get(args, 0), 345 format=seq_get(args, 1), 346 precision=seq_get(args, 2), 347 scale=seq_get(args, 3), 348 ), 349 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 350 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 351 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 352 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 353 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 354 "TO_VARCHAR": exp.ToChar.from_arg_list, 355 "ZEROIFNULL": _build_if_from_zeroifnull, 356 } 357 358 FUNCTION_PARSERS = { 359 **parser.Parser.FUNCTION_PARSERS, 360 "DATE_PART": lambda self: self._parse_date_part(), 361 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 362 } 363 FUNCTION_PARSERS.pop("TRIM") 364 365 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 366 367 RANGE_PARSERS = { 368 **parser.Parser.RANGE_PARSERS, 369 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 370 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 371 } 372 373 ALTER_PARSERS = { 374 **parser.Parser.ALTER_PARSERS, 375 "UNSET": lambda self: self.expression( 376 exp.Set, 377 tag=self._match_text_seq("TAG"), 378 expressions=self._parse_csv(self._parse_id_var), 379 unset=True, 380 ), 381 "SWAP": lambda self: self._parse_alter_table_swap(), 382 } 383 384 STATEMENT_PARSERS = { 385 **parser.Parser.STATEMENT_PARSERS, 386 TokenType.SHOW: lambda self: self._parse_show(), 387 } 388 389 PROPERTY_PARSERS = { 390 **parser.Parser.PROPERTY_PARSERS, 391 "LOCATION": lambda self: self._parse_location_property(), 392 } 393 394 TYPE_CONVERTERS = { 395 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 396 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 397 } 398 399 SHOW_PARSERS = { 400 "SCHEMAS": _show_parser("SCHEMAS"), 401 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 402 "OBJECTS": _show_parser("OBJECTS"), 403 "TERSE OBJECTS": _show_parser("OBJECTS"), 404 "TABLES": _show_parser("TABLES"), 405 "TERSE TABLES": _show_parser("TABLES"), 406 "VIEWS": _show_parser("VIEWS"), 407 "TERSE VIEWS": _show_parser("VIEWS"), 408 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 409 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 410 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 411 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 412 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 413 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 414 "SEQUENCES": _show_parser("SEQUENCES"), 415 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 416 "COLUMNS": _show_parser("COLUMNS"), 417 "USERS": _show_parser("USERS"), 418 "TERSE USERS": _show_parser("USERS"), 419 } 420 421 CONSTRAINT_PARSERS = { 422 **parser.Parser.CONSTRAINT_PARSERS, 423 "WITH": lambda self: self._parse_with_constraint(), 424 "MASKING": lambda self: self._parse_with_constraint(), 425 "PROJECTION": lambda self: self._parse_with_constraint(), 426 "TAG": lambda self: self._parse_with_constraint(), 427 } 428 429 STAGED_FILE_SINGLE_TOKENS = { 430 TokenType.DOT, 431 TokenType.MOD, 432 TokenType.SLASH, 433 } 434 435 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 436 437 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 438 439 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 440 441 LAMBDAS = { 442 **parser.Parser.LAMBDAS, 443 TokenType.ARROW: lambda self, expressions: self.expression( 444 exp.Lambda, 445 this=self._replace_lambda( 446 self._parse_assignment(), 447 expressions, 448 ), 449 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 450 ), 451 } 452 453 def _negate_range( 454 self, this: t.Optional[exp.Expression] = None 455 ) -> t.Optional[exp.Expression]: 456 if not this: 457 return this 458 459 query = this.args.get("query") 460 if isinstance(this, exp.In) and isinstance(query, exp.Query): 461 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 462 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 463 # which can produce different results (most likely a SnowFlake bug). 464 # 465 # https://docs.snowflake.com/en/sql-reference/functions/in 466 # Context: https://github.com/tobymao/sqlglot/issues/3890 467 return self.expression( 468 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 469 ) 470 471 return self.expression(exp.Not, this=this) 472 473 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 474 if self._prev.token_type != TokenType.WITH: 475 self._retreat(self._index - 1) 476 477 if self._match_text_seq("MASKING", "POLICY"): 478 policy = self._parse_column() 479 return self.expression( 480 exp.MaskingPolicyColumnConstraint, 481 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 482 expressions=self._match(TokenType.USING) 483 and self._parse_wrapped_csv(self._parse_id_var), 484 ) 485 if self._match_text_seq("PROJECTION", "POLICY"): 486 policy = self._parse_column() 487 return self.expression( 488 exp.ProjectionPolicyColumnConstraint, 489 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 490 ) 491 if self._match(TokenType.TAG): 492 return self.expression( 493 exp.TagColumnConstraint, 494 expressions=self._parse_wrapped_csv(self._parse_property), 495 ) 496 497 return None 498 499 def _parse_create(self) -> exp.Create | exp.Command: 500 expression = super()._parse_create() 501 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 502 # Replace the Table node with the enclosed Identifier 503 expression.this.replace(expression.this.this) 504 505 return expression 506 507 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 508 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 509 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 510 this = self._parse_var() or self._parse_type() 511 512 if not this: 513 return None 514 515 self._match(TokenType.COMMA) 516 expression = self._parse_bitwise() 517 this = map_date_part(this) 518 name = this.name.upper() 519 520 if name.startswith("EPOCH"): 521 if name == "EPOCH_MILLISECOND": 522 scale = 10**3 523 elif name == "EPOCH_MICROSECOND": 524 scale = 10**6 525 elif name == "EPOCH_NANOSECOND": 526 scale = 10**9 527 else: 528 scale = None 529 530 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 531 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 532 533 if scale: 534 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 535 536 return to_unix 537 538 return self.expression(exp.Extract, this=this, expression=expression) 539 540 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 541 if is_map: 542 # Keys are strings in Snowflake's objects, see also: 543 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 544 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 545 return self._parse_slice(self._parse_string()) 546 547 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 548 549 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 550 lateral = super()._parse_lateral() 551 if not lateral: 552 return lateral 553 554 if isinstance(lateral.this, exp.Explode): 555 table_alias = lateral.args.get("alias") 556 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 557 if table_alias and not table_alias.args.get("columns"): 558 table_alias.set("columns", columns) 559 elif not table_alias: 560 exp.alias_(lateral, "_flattened", table=columns, copy=False) 561 562 return lateral 563 564 def _parse_table_parts( 565 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 566 ) -> exp.Table: 567 # https://docs.snowflake.com/en/user-guide/querying-stage 568 if self._match(TokenType.STRING, advance=False): 569 table = self._parse_string() 570 elif self._match_text_seq("@", advance=False): 571 table = self._parse_location_path() 572 else: 573 table = None 574 575 if table: 576 file_format = None 577 pattern = None 578 579 wrapped = self._match(TokenType.L_PAREN) 580 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 581 if self._match_text_seq("FILE_FORMAT", "=>"): 582 file_format = self._parse_string() or super()._parse_table_parts( 583 is_db_reference=is_db_reference 584 ) 585 elif self._match_text_seq("PATTERN", "=>"): 586 pattern = self._parse_string() 587 else: 588 break 589 590 self._match(TokenType.COMMA) 591 592 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 593 else: 594 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 595 596 return table 597 598 def _parse_id_var( 599 self, 600 any_token: bool = True, 601 tokens: t.Optional[t.Collection[TokenType]] = None, 602 ) -> t.Optional[exp.Expression]: 603 if self._match_text_seq("IDENTIFIER", "("): 604 identifier = ( 605 super()._parse_id_var(any_token=any_token, tokens=tokens) 606 or self._parse_string() 607 ) 608 self._match_r_paren() 609 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 610 611 return super()._parse_id_var(any_token=any_token, tokens=tokens) 612 613 def _parse_show_snowflake(self, this: str) -> exp.Show: 614 scope = None 615 scope_kind = None 616 617 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 618 # which is syntactically valid but has no effect on the output 619 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 620 621 history = self._match_text_seq("HISTORY") 622 623 like = self._parse_string() if self._match(TokenType.LIKE) else None 624 625 if self._match(TokenType.IN): 626 if self._match_text_seq("ACCOUNT"): 627 scope_kind = "ACCOUNT" 628 elif self._match_set(self.DB_CREATABLES): 629 scope_kind = self._prev.text.upper() 630 if self._curr: 631 scope = self._parse_table_parts() 632 elif self._curr: 633 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 634 scope = self._parse_table_parts() 635 636 return self.expression( 637 exp.Show, 638 **{ 639 "terse": terse, 640 "this": this, 641 "history": history, 642 "like": like, 643 "scope": scope, 644 "scope_kind": scope_kind, 645 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 646 "limit": self._parse_limit(), 647 "from": self._parse_string() if self._match(TokenType.FROM) else None, 648 }, 649 ) 650 651 def _parse_alter_table_swap(self) -> exp.SwapTable: 652 self._match_text_seq("WITH") 653 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 654 655 def _parse_location_property(self) -> exp.LocationProperty: 656 self._match(TokenType.EQ) 657 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 658 659 def _parse_file_location(self) -> t.Optional[exp.Expression]: 660 # Parse either a subquery or a staged file 661 return ( 662 self._parse_select(table=True, parse_subquery_alias=False) 663 if self._match(TokenType.L_PAREN, advance=False) 664 else self._parse_table_parts() 665 ) 666 667 def _parse_location_path(self) -> exp.Var: 668 parts = [self._advance_any(ignore_reserved=True)] 669 670 # We avoid consuming a comma token because external tables like @foo and @bar 671 # can be joined in a query with a comma separator, as well as closing paren 672 # in case of subqueries 673 while self._is_connected() and not self._match_set( 674 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 675 ): 676 parts.append(self._advance_any(ignore_reserved=True)) 677 678 return exp.var("".join(part.text for part in parts if part)) 679 680 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 681 this = super()._parse_lambda_arg() 682 683 if not this: 684 return this 685 686 typ = self._parse_types() 687 688 if typ: 689 return self.expression(exp.Cast, this=this, to=typ) 690 691 return this 692 693 class Tokenizer(tokens.Tokenizer): 694 STRING_ESCAPES = ["\\", "'"] 695 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 696 RAW_STRINGS = ["$$"] 697 COMMENTS = ["--", "//", ("/*", "*/")] 698 NESTED_COMMENTS = False 699 700 KEYWORDS = { 701 **tokens.Tokenizer.KEYWORDS, 702 "BYTEINT": TokenType.INT, 703 "CHAR VARYING": TokenType.VARCHAR, 704 "CHARACTER VARYING": TokenType.VARCHAR, 705 "EXCLUDE": TokenType.EXCEPT, 706 "ILIKE ANY": TokenType.ILIKE_ANY, 707 "LIKE ANY": TokenType.LIKE_ANY, 708 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 709 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 710 "MINUS": TokenType.EXCEPT, 711 "NCHAR VARYING": TokenType.VARCHAR, 712 "PUT": TokenType.COMMAND, 713 "REMOVE": TokenType.COMMAND, 714 "RM": TokenType.COMMAND, 715 "SAMPLE": TokenType.TABLE_SAMPLE, 716 "SQL_DOUBLE": TokenType.DOUBLE, 717 "SQL_VARCHAR": TokenType.VARCHAR, 718 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 719 "TAG": TokenType.TAG, 720 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 721 "TOP": TokenType.TOP, 722 "WAREHOUSE": TokenType.WAREHOUSE, 723 "STREAMLIT": TokenType.STREAMLIT, 724 } 725 KEYWORDS.pop("/*+") 726 727 SINGLE_TOKENS = { 728 **tokens.Tokenizer.SINGLE_TOKENS, 729 "$": TokenType.PARAMETER, 730 } 731 732 VAR_SINGLE_TOKENS = {"$"} 733 734 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 735 736 class Generator(generator.Generator): 737 PARAMETER_TOKEN = "$" 738 MATCHED_BY_SOURCE = False 739 SINGLE_STRING_INTERVAL = True 740 JOIN_HINTS = False 741 TABLE_HINTS = False 742 QUERY_HINTS = False 743 AGGREGATE_FILTER_SUPPORTED = False 744 SUPPORTS_TABLE_COPY = False 745 COLLATE_IS_FUNC = True 746 LIMIT_ONLY_LITERALS = True 747 JSON_KEY_VALUE_PAIR_SEP = "," 748 INSERT_OVERWRITE = " OVERWRITE INTO" 749 STRUCT_DELIMITER = ("(", ")") 750 COPY_PARAMS_ARE_WRAPPED = False 751 COPY_PARAMS_EQ_REQUIRED = True 752 STAR_EXCEPT = "EXCLUDE" 753 SUPPORTS_EXPLODING_PROJECTIONS = False 754 ARRAY_CONCAT_IS_VAR_LEN = False 755 SUPPORTS_CONVERT_TIMEZONE = True 756 757 TRANSFORMS = { 758 **generator.Generator.TRANSFORMS, 759 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 760 exp.ArgMax: rename_func("MAX_BY"), 761 exp.ArgMin: rename_func("MIN_BY"), 762 exp.Array: inline_array_sql, 763 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 764 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 765 exp.AtTimeZone: lambda self, e: self.func( 766 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 767 ), 768 exp.BitwiseXor: rename_func("BITXOR"), 769 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 770 exp.DateAdd: date_delta_sql("DATEADD"), 771 exp.DateDiff: date_delta_sql("DATEDIFF"), 772 exp.DateStrToDate: datestrtodate_sql, 773 exp.DayOfMonth: rename_func("DAYOFMONTH"), 774 exp.DayOfWeek: rename_func("DAYOFWEEK"), 775 exp.DayOfYear: rename_func("DAYOFYEAR"), 776 exp.Explode: rename_func("FLATTEN"), 777 exp.Extract: rename_func("DATE_PART"), 778 exp.FromTimeZone: lambda self, e: self.func( 779 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 780 ), 781 exp.GenerateSeries: lambda self, e: self.func( 782 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 783 ), 784 exp.GroupConcat: rename_func("LISTAGG"), 785 exp.If: if_sql(name="IFF", false_value="NULL"), 786 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 787 exp.JSONExtractScalar: lambda self, e: self.func( 788 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 789 ), 790 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 791 exp.JSONPathRoot: lambda *_: "", 792 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 793 exp.LogicalOr: rename_func("BOOLOR_AGG"), 794 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 795 exp.Max: max_or_greatest, 796 exp.Min: min_or_least, 797 exp.ParseJSON: lambda self, e: self.func( 798 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 799 ), 800 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 801 exp.PercentileCont: transforms.preprocess( 802 [transforms.add_within_group_for_percentiles] 803 ), 804 exp.PercentileDisc: transforms.preprocess( 805 [transforms.add_within_group_for_percentiles] 806 ), 807 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 808 exp.RegexpILike: _regexpilike_sql, 809 exp.Rand: rename_func("RANDOM"), 810 exp.Select: transforms.preprocess( 811 [ 812 transforms.eliminate_distinct_on, 813 transforms.explode_to_unnest(), 814 transforms.eliminate_semi_and_anti_joins, 815 _unnest_generate_date_array, 816 ] 817 ), 818 exp.SHA: rename_func("SHA1"), 819 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 820 exp.StartsWith: rename_func("STARTSWITH"), 821 exp.StrPosition: lambda self, e: self.func( 822 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 823 ), 824 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 825 exp.Stuff: rename_func("INSERT"), 826 exp.TimeAdd: date_delta_sql("TIMEADD"), 827 exp.TimestampDiff: lambda self, e: self.func( 828 "TIMESTAMPDIFF", e.unit, e.expression, e.this 829 ), 830 exp.TimestampTrunc: timestamptrunc_sql(), 831 exp.TimeStrToTime: timestrtotime_sql, 832 exp.TimeToStr: lambda self, e: self.func( 833 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 834 ), 835 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 836 exp.ToArray: rename_func("TO_ARRAY"), 837 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 838 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 839 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 840 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 841 exp.TsOrDsToDate: lambda self, e: self.func( 842 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 843 ), 844 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 845 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 846 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 847 exp.Xor: rename_func("BOOLXOR"), 848 } 849 850 SUPPORTED_JSON_PATH_PARTS = { 851 exp.JSONPathKey, 852 exp.JSONPathRoot, 853 exp.JSONPathSubscript, 854 } 855 856 TYPE_MAPPING = { 857 **generator.Generator.TYPE_MAPPING, 858 exp.DataType.Type.NESTED: "OBJECT", 859 exp.DataType.Type.STRUCT: "OBJECT", 860 } 861 862 PROPERTIES_LOCATION = { 863 **generator.Generator.PROPERTIES_LOCATION, 864 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 865 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 866 } 867 868 UNSUPPORTED_VALUES_EXPRESSIONS = { 869 exp.Map, 870 exp.StarMap, 871 exp.Struct, 872 exp.VarMap, 873 } 874 875 def with_properties(self, properties: exp.Properties) -> str: 876 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 877 878 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 879 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 880 values_as_table = False 881 882 return super().values_sql(expression, values_as_table=values_as_table) 883 884 def datatype_sql(self, expression: exp.DataType) -> str: 885 expressions = expression.expressions 886 if ( 887 expressions 888 and expression.is_type(*exp.DataType.STRUCT_TYPES) 889 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 890 ): 891 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 892 return "OBJECT" 893 894 return super().datatype_sql(expression) 895 896 def tonumber_sql(self, expression: exp.ToNumber) -> str: 897 return self.func( 898 "TO_NUMBER", 899 expression.this, 900 expression.args.get("format"), 901 expression.args.get("precision"), 902 expression.args.get("scale"), 903 ) 904 905 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 906 milli = expression.args.get("milli") 907 if milli is not None: 908 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 909 expression.set("nano", milli_to_nano) 910 911 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 912 913 def trycast_sql(self, expression: exp.TryCast) -> str: 914 value = expression.this 915 916 if value.type is None: 917 from sqlglot.optimizer.annotate_types import annotate_types 918 919 value = annotate_types(value) 920 921 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 922 return super().trycast_sql(expression) 923 924 # TRY_CAST only works for string values in Snowflake 925 return self.cast_sql(expression) 926 927 def log_sql(self, expression: exp.Log) -> str: 928 if not expression.expression: 929 return self.func("LN", expression.this) 930 931 return super().log_sql(expression) 932 933 def unnest_sql(self, expression: exp.Unnest) -> str: 934 unnest_alias = expression.args.get("alias") 935 offset = expression.args.get("offset") 936 937 columns = [ 938 exp.to_identifier("seq"), 939 exp.to_identifier("key"), 940 exp.to_identifier("path"), 941 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 942 seq_get(unnest_alias.columns if unnest_alias else [], 0) 943 or exp.to_identifier("value"), 944 exp.to_identifier("this"), 945 ] 946 947 if unnest_alias: 948 unnest_alias.set("columns", columns) 949 else: 950 unnest_alias = exp.TableAlias(this="_u", columns=columns) 951 952 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 953 alias = self.sql(unnest_alias) 954 alias = f" AS {alias}" if alias else "" 955 return f"{explode}{alias}" 956 957 def show_sql(self, expression: exp.Show) -> str: 958 terse = "TERSE " if expression.args.get("terse") else "" 959 history = " HISTORY" if expression.args.get("history") else "" 960 like = self.sql(expression, "like") 961 like = f" LIKE {like}" if like else "" 962 963 scope = self.sql(expression, "scope") 964 scope = f" {scope}" if scope else "" 965 966 scope_kind = self.sql(expression, "scope_kind") 967 if scope_kind: 968 scope_kind = f" IN {scope_kind}" 969 970 starts_with = self.sql(expression, "starts_with") 971 if starts_with: 972 starts_with = f" STARTS WITH {starts_with}" 973 974 limit = self.sql(expression, "limit") 975 976 from_ = self.sql(expression, "from") 977 if from_: 978 from_ = f" FROM {from_}" 979 980 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 981 982 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 983 # Other dialects don't support all of the following parameters, so we need to 984 # generate default values as necessary to ensure the transpilation is correct 985 group = expression.args.get("group") 986 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 987 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 988 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 989 990 return self.func( 991 "REGEXP_SUBSTR", 992 expression.this, 993 expression.expression, 994 position, 995 occurrence, 996 parameters, 997 group, 998 ) 999 1000 def except_op(self, expression: exp.Except) -> str: 1001 if not expression.args.get("distinct"): 1002 self.unsupported("EXCEPT with All is not supported in Snowflake") 1003 return super().except_op(expression) 1004 1005 def intersect_op(self, expression: exp.Intersect) -> str: 1006 if not expression.args.get("distinct"): 1007 self.unsupported("INTERSECT with All is not supported in Snowflake") 1008 return super().intersect_op(expression) 1009 1010 def describe_sql(self, expression: exp.Describe) -> str: 1011 # Default to table if kind is unknown 1012 kind_value = expression.args.get("kind") or "TABLE" 1013 kind = f" {kind_value}" if kind_value else "" 1014 this = f" {self.sql(expression, 'this')}" 1015 expressions = self.expressions(expression, flat=True) 1016 expressions = f" {expressions}" if expressions else "" 1017 return f"DESCRIBE{kind}{this}{expressions}" 1018 1019 def generatedasidentitycolumnconstraint_sql( 1020 self, expression: exp.GeneratedAsIdentityColumnConstraint 1021 ) -> str: 1022 start = expression.args.get("start") 1023 start = f" START {start}" if start else "" 1024 increment = expression.args.get("increment") 1025 increment = f" INCREMENT {increment}" if increment else "" 1026 return f"AUTOINCREMENT{start}{increment}" 1027 1028 def swaptable_sql(self, expression: exp.SwapTable) -> str: 1029 this = self.sql(expression, "this") 1030 return f"SWAP WITH {this}" 1031 1032 def cluster_sql(self, expression: exp.Cluster) -> str: 1033 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1034 1035 def struct_sql(self, expression: exp.Struct) -> str: 1036 keys = [] 1037 values = [] 1038 1039 for i, e in enumerate(expression.expressions): 1040 if isinstance(e, exp.PropertyEQ): 1041 keys.append( 1042 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1043 ) 1044 values.append(e.expression) 1045 else: 1046 keys.append(exp.Literal.string(f"_{i}")) 1047 values.append(e) 1048 1049 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1050 1051 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1052 if expression.args.get("weight") or expression.args.get("accuracy"): 1053 self.unsupported( 1054 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1055 ) 1056 1057 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1058 1059 def alterset_sql(self, expression: exp.AlterSet) -> str: 1060 exprs = self.expressions(expression, flat=True) 1061 exprs = f" {exprs}" if exprs else "" 1062 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1063 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1064 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1065 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1066 tag = self.expressions(expression, key="tag", flat=True) 1067 tag = f" TAG {tag}" if tag else "" 1068 1069 return f"SET{exprs}{file_format}{copy_options}{tag}"
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
267 def quote_identifier(self, expression: E, identify: bool = True) -> E: 268 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 269 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 270 if ( 271 isinstance(expression, exp.Identifier) 272 and isinstance(expression.parent, exp.Table) 273 and expression.name.lower() == "dual" 274 ): 275 return expression # type: ignore 276 277 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- CREATABLE_KIND_MAPPING
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
279 class Parser(parser.Parser): 280 IDENTIFY_PIVOT_STRINGS = True 281 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 282 COLON_IS_VARIANT_EXTRACT = True 283 284 ID_VAR_TOKENS = { 285 *parser.Parser.ID_VAR_TOKENS, 286 TokenType.MATCH_CONDITION, 287 } 288 289 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 290 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 291 292 FUNCTIONS = { 293 **parser.Parser.FUNCTIONS, 294 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 295 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 296 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 297 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 298 this=seq_get(args, 1), expression=seq_get(args, 0) 299 ), 300 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 301 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 302 start=seq_get(args, 0), 303 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 304 step=seq_get(args, 2), 305 ), 306 "BITXOR": binary_from_function(exp.BitwiseXor), 307 "BIT_XOR": binary_from_function(exp.BitwiseXor), 308 "BOOLXOR": binary_from_function(exp.Xor), 309 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 310 "DATE_TRUNC": _date_trunc_to_time, 311 "DATEADD": _build_date_time_add(exp.DateAdd), 312 "DATEDIFF": _build_datediff, 313 "DIV0": _build_if_from_div0, 314 "FLATTEN": exp.Explode.from_arg_list, 315 "GET_PATH": lambda args, dialect: exp.JSONExtract( 316 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 317 ), 318 "IFF": exp.If.from_arg_list, 319 "LAST_DAY": lambda args: exp.LastDay( 320 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 321 ), 322 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 323 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 324 "LISTAGG": exp.GroupConcat.from_arg_list, 325 "MEDIAN": lambda args: exp.PercentileCont( 326 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 327 ), 328 "NULLIFZERO": _build_if_from_nullifzero, 329 "OBJECT_CONSTRUCT": _build_object_construct, 330 "REGEXP_REPLACE": _build_regexp_replace, 331 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 332 "RLIKE": exp.RegexpLike.from_arg_list, 333 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 334 "TIMEADD": _build_date_time_add(exp.TimeAdd), 335 "TIMEDIFF": _build_datediff, 336 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 337 "TIMESTAMPDIFF": _build_datediff, 338 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 339 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 340 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 341 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 342 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 343 "TO_NUMBER": lambda args: exp.ToNumber( 344 this=seq_get(args, 0), 345 format=seq_get(args, 1), 346 precision=seq_get(args, 2), 347 scale=seq_get(args, 3), 348 ), 349 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 350 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 351 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 352 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 353 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 354 "TO_VARCHAR": exp.ToChar.from_arg_list, 355 "ZEROIFNULL": _build_if_from_zeroifnull, 356 } 357 358 FUNCTION_PARSERS = { 359 **parser.Parser.FUNCTION_PARSERS, 360 "DATE_PART": lambda self: self._parse_date_part(), 361 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 362 } 363 FUNCTION_PARSERS.pop("TRIM") 364 365 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 366 367 RANGE_PARSERS = { 368 **parser.Parser.RANGE_PARSERS, 369 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 370 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 371 } 372 373 ALTER_PARSERS = { 374 **parser.Parser.ALTER_PARSERS, 375 "UNSET": lambda self: self.expression( 376 exp.Set, 377 tag=self._match_text_seq("TAG"), 378 expressions=self._parse_csv(self._parse_id_var), 379 unset=True, 380 ), 381 "SWAP": lambda self: self._parse_alter_table_swap(), 382 } 383 384 STATEMENT_PARSERS = { 385 **parser.Parser.STATEMENT_PARSERS, 386 TokenType.SHOW: lambda self: self._parse_show(), 387 } 388 389 PROPERTY_PARSERS = { 390 **parser.Parser.PROPERTY_PARSERS, 391 "LOCATION": lambda self: self._parse_location_property(), 392 } 393 394 TYPE_CONVERTERS = { 395 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 396 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 397 } 398 399 SHOW_PARSERS = { 400 "SCHEMAS": _show_parser("SCHEMAS"), 401 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 402 "OBJECTS": _show_parser("OBJECTS"), 403 "TERSE OBJECTS": _show_parser("OBJECTS"), 404 "TABLES": _show_parser("TABLES"), 405 "TERSE TABLES": _show_parser("TABLES"), 406 "VIEWS": _show_parser("VIEWS"), 407 "TERSE VIEWS": _show_parser("VIEWS"), 408 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 409 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 410 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 411 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 412 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 413 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 414 "SEQUENCES": _show_parser("SEQUENCES"), 415 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 416 "COLUMNS": _show_parser("COLUMNS"), 417 "USERS": _show_parser("USERS"), 418 "TERSE USERS": _show_parser("USERS"), 419 } 420 421 CONSTRAINT_PARSERS = { 422 **parser.Parser.CONSTRAINT_PARSERS, 423 "WITH": lambda self: self._parse_with_constraint(), 424 "MASKING": lambda self: self._parse_with_constraint(), 425 "PROJECTION": lambda self: self._parse_with_constraint(), 426 "TAG": lambda self: self._parse_with_constraint(), 427 } 428 429 STAGED_FILE_SINGLE_TOKENS = { 430 TokenType.DOT, 431 TokenType.MOD, 432 TokenType.SLASH, 433 } 434 435 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 436 437 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 438 439 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 440 441 LAMBDAS = { 442 **parser.Parser.LAMBDAS, 443 TokenType.ARROW: lambda self, expressions: self.expression( 444 exp.Lambda, 445 this=self._replace_lambda( 446 self._parse_assignment(), 447 expressions, 448 ), 449 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 450 ), 451 } 452 453 def _negate_range( 454 self, this: t.Optional[exp.Expression] = None 455 ) -> t.Optional[exp.Expression]: 456 if not this: 457 return this 458 459 query = this.args.get("query") 460 if isinstance(this, exp.In) and isinstance(query, exp.Query): 461 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 462 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 463 # which can produce different results (most likely a SnowFlake bug). 464 # 465 # https://docs.snowflake.com/en/sql-reference/functions/in 466 # Context: https://github.com/tobymao/sqlglot/issues/3890 467 return self.expression( 468 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 469 ) 470 471 return self.expression(exp.Not, this=this) 472 473 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 474 if self._prev.token_type != TokenType.WITH: 475 self._retreat(self._index - 1) 476 477 if self._match_text_seq("MASKING", "POLICY"): 478 policy = self._parse_column() 479 return self.expression( 480 exp.MaskingPolicyColumnConstraint, 481 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 482 expressions=self._match(TokenType.USING) 483 and self._parse_wrapped_csv(self._parse_id_var), 484 ) 485 if self._match_text_seq("PROJECTION", "POLICY"): 486 policy = self._parse_column() 487 return self.expression( 488 exp.ProjectionPolicyColumnConstraint, 489 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 490 ) 491 if self._match(TokenType.TAG): 492 return self.expression( 493 exp.TagColumnConstraint, 494 expressions=self._parse_wrapped_csv(self._parse_property), 495 ) 496 497 return None 498 499 def _parse_create(self) -> exp.Create | exp.Command: 500 expression = super()._parse_create() 501 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 502 # Replace the Table node with the enclosed Identifier 503 expression.this.replace(expression.this.this) 504 505 return expression 506 507 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 508 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 509 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 510 this = self._parse_var() or self._parse_type() 511 512 if not this: 513 return None 514 515 self._match(TokenType.COMMA) 516 expression = self._parse_bitwise() 517 this = map_date_part(this) 518 name = this.name.upper() 519 520 if name.startswith("EPOCH"): 521 if name == "EPOCH_MILLISECOND": 522 scale = 10**3 523 elif name == "EPOCH_MICROSECOND": 524 scale = 10**6 525 elif name == "EPOCH_NANOSECOND": 526 scale = 10**9 527 else: 528 scale = None 529 530 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 531 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 532 533 if scale: 534 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 535 536 return to_unix 537 538 return self.expression(exp.Extract, this=this, expression=expression) 539 540 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 541 if is_map: 542 # Keys are strings in Snowflake's objects, see also: 543 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 544 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 545 return self._parse_slice(self._parse_string()) 546 547 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 548 549 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 550 lateral = super()._parse_lateral() 551 if not lateral: 552 return lateral 553 554 if isinstance(lateral.this, exp.Explode): 555 table_alias = lateral.args.get("alias") 556 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 557 if table_alias and not table_alias.args.get("columns"): 558 table_alias.set("columns", columns) 559 elif not table_alias: 560 exp.alias_(lateral, "_flattened", table=columns, copy=False) 561 562 return lateral 563 564 def _parse_table_parts( 565 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 566 ) -> exp.Table: 567 # https://docs.snowflake.com/en/user-guide/querying-stage 568 if self._match(TokenType.STRING, advance=False): 569 table = self._parse_string() 570 elif self._match_text_seq("@", advance=False): 571 table = self._parse_location_path() 572 else: 573 table = None 574 575 if table: 576 file_format = None 577 pattern = None 578 579 wrapped = self._match(TokenType.L_PAREN) 580 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 581 if self._match_text_seq("FILE_FORMAT", "=>"): 582 file_format = self._parse_string() or super()._parse_table_parts( 583 is_db_reference=is_db_reference 584 ) 585 elif self._match_text_seq("PATTERN", "=>"): 586 pattern = self._parse_string() 587 else: 588 break 589 590 self._match(TokenType.COMMA) 591 592 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 593 else: 594 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 595 596 return table 597 598 def _parse_id_var( 599 self, 600 any_token: bool = True, 601 tokens: t.Optional[t.Collection[TokenType]] = None, 602 ) -> t.Optional[exp.Expression]: 603 if self._match_text_seq("IDENTIFIER", "("): 604 identifier = ( 605 super()._parse_id_var(any_token=any_token, tokens=tokens) 606 or self._parse_string() 607 ) 608 self._match_r_paren() 609 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 610 611 return super()._parse_id_var(any_token=any_token, tokens=tokens) 612 613 def _parse_show_snowflake(self, this: str) -> exp.Show: 614 scope = None 615 scope_kind = None 616 617 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 618 # which is syntactically valid but has no effect on the output 619 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 620 621 history = self._match_text_seq("HISTORY") 622 623 like = self._parse_string() if self._match(TokenType.LIKE) else None 624 625 if self._match(TokenType.IN): 626 if self._match_text_seq("ACCOUNT"): 627 scope_kind = "ACCOUNT" 628 elif self._match_set(self.DB_CREATABLES): 629 scope_kind = self._prev.text.upper() 630 if self._curr: 631 scope = self._parse_table_parts() 632 elif self._curr: 633 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 634 scope = self._parse_table_parts() 635 636 return self.expression( 637 exp.Show, 638 **{ 639 "terse": terse, 640 "this": this, 641 "history": history, 642 "like": like, 643 "scope": scope, 644 "scope_kind": scope_kind, 645 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 646 "limit": self._parse_limit(), 647 "from": self._parse_string() if self._match(TokenType.FROM) else None, 648 }, 649 ) 650 651 def _parse_alter_table_swap(self) -> exp.SwapTable: 652 self._match_text_seq("WITH") 653 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 654 655 def _parse_location_property(self) -> exp.LocationProperty: 656 self._match(TokenType.EQ) 657 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 658 659 def _parse_file_location(self) -> t.Optional[exp.Expression]: 660 # Parse either a subquery or a staged file 661 return ( 662 self._parse_select(table=True, parse_subquery_alias=False) 663 if self._match(TokenType.L_PAREN, advance=False) 664 else self._parse_table_parts() 665 ) 666 667 def _parse_location_path(self) -> exp.Var: 668 parts = [self._advance_any(ignore_reserved=True)] 669 670 # We avoid consuming a comma token because external tables like @foo and @bar 671 # can be joined in a query with a comma separator, as well as closing paren 672 # in case of subqueries 673 while self._is_connected() and not self._match_set( 674 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 675 ): 676 parts.append(self._advance_any(ignore_reserved=True)) 677 678 return exp.var("".join(part.text for part in parts if part)) 679 680 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 681 this = super()._parse_lambda_arg() 682 683 if not this: 684 return this 685 686 typ = self._parse_types() 687 688 if typ: 689 return self.expression(exp.Cast, this=this, to=typ) 690 691 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
693 class Tokenizer(tokens.Tokenizer): 694 STRING_ESCAPES = ["\\", "'"] 695 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 696 RAW_STRINGS = ["$$"] 697 COMMENTS = ["--", "//", ("/*", "*/")] 698 NESTED_COMMENTS = False 699 700 KEYWORDS = { 701 **tokens.Tokenizer.KEYWORDS, 702 "BYTEINT": TokenType.INT, 703 "CHAR VARYING": TokenType.VARCHAR, 704 "CHARACTER VARYING": TokenType.VARCHAR, 705 "EXCLUDE": TokenType.EXCEPT, 706 "ILIKE ANY": TokenType.ILIKE_ANY, 707 "LIKE ANY": TokenType.LIKE_ANY, 708 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 709 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 710 "MINUS": TokenType.EXCEPT, 711 "NCHAR VARYING": TokenType.VARCHAR, 712 "PUT": TokenType.COMMAND, 713 "REMOVE": TokenType.COMMAND, 714 "RM": TokenType.COMMAND, 715 "SAMPLE": TokenType.TABLE_SAMPLE, 716 "SQL_DOUBLE": TokenType.DOUBLE, 717 "SQL_VARCHAR": TokenType.VARCHAR, 718 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 719 "TAG": TokenType.TAG, 720 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 721 "TOP": TokenType.TOP, 722 "WAREHOUSE": TokenType.WAREHOUSE, 723 "STREAMLIT": TokenType.STREAMLIT, 724 } 725 KEYWORDS.pop("/*+") 726 727 SINGLE_TOKENS = { 728 **tokens.Tokenizer.SINGLE_TOKENS, 729 "$": TokenType.PARAMETER, 730 } 731 732 VAR_SINGLE_TOKENS = {"$"} 733 734 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- IDENTIFIER_ESCAPES
- QUOTES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
736 class Generator(generator.Generator): 737 PARAMETER_TOKEN = "$" 738 MATCHED_BY_SOURCE = False 739 SINGLE_STRING_INTERVAL = True 740 JOIN_HINTS = False 741 TABLE_HINTS = False 742 QUERY_HINTS = False 743 AGGREGATE_FILTER_SUPPORTED = False 744 SUPPORTS_TABLE_COPY = False 745 COLLATE_IS_FUNC = True 746 LIMIT_ONLY_LITERALS = True 747 JSON_KEY_VALUE_PAIR_SEP = "," 748 INSERT_OVERWRITE = " OVERWRITE INTO" 749 STRUCT_DELIMITER = ("(", ")") 750 COPY_PARAMS_ARE_WRAPPED = False 751 COPY_PARAMS_EQ_REQUIRED = True 752 STAR_EXCEPT = "EXCLUDE" 753 SUPPORTS_EXPLODING_PROJECTIONS = False 754 ARRAY_CONCAT_IS_VAR_LEN = False 755 SUPPORTS_CONVERT_TIMEZONE = True 756 757 TRANSFORMS = { 758 **generator.Generator.TRANSFORMS, 759 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 760 exp.ArgMax: rename_func("MAX_BY"), 761 exp.ArgMin: rename_func("MIN_BY"), 762 exp.Array: inline_array_sql, 763 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 764 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 765 exp.AtTimeZone: lambda self, e: self.func( 766 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 767 ), 768 exp.BitwiseXor: rename_func("BITXOR"), 769 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 770 exp.DateAdd: date_delta_sql("DATEADD"), 771 exp.DateDiff: date_delta_sql("DATEDIFF"), 772 exp.DateStrToDate: datestrtodate_sql, 773 exp.DayOfMonth: rename_func("DAYOFMONTH"), 774 exp.DayOfWeek: rename_func("DAYOFWEEK"), 775 exp.DayOfYear: rename_func("DAYOFYEAR"), 776 exp.Explode: rename_func("FLATTEN"), 777 exp.Extract: rename_func("DATE_PART"), 778 exp.FromTimeZone: lambda self, e: self.func( 779 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 780 ), 781 exp.GenerateSeries: lambda self, e: self.func( 782 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 783 ), 784 exp.GroupConcat: rename_func("LISTAGG"), 785 exp.If: if_sql(name="IFF", false_value="NULL"), 786 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 787 exp.JSONExtractScalar: lambda self, e: self.func( 788 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 789 ), 790 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 791 exp.JSONPathRoot: lambda *_: "", 792 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 793 exp.LogicalOr: rename_func("BOOLOR_AGG"), 794 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 795 exp.Max: max_or_greatest, 796 exp.Min: min_or_least, 797 exp.ParseJSON: lambda self, e: self.func( 798 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 799 ), 800 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 801 exp.PercentileCont: transforms.preprocess( 802 [transforms.add_within_group_for_percentiles] 803 ), 804 exp.PercentileDisc: transforms.preprocess( 805 [transforms.add_within_group_for_percentiles] 806 ), 807 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 808 exp.RegexpILike: _regexpilike_sql, 809 exp.Rand: rename_func("RANDOM"), 810 exp.Select: transforms.preprocess( 811 [ 812 transforms.eliminate_distinct_on, 813 transforms.explode_to_unnest(), 814 transforms.eliminate_semi_and_anti_joins, 815 _unnest_generate_date_array, 816 ] 817 ), 818 exp.SHA: rename_func("SHA1"), 819 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 820 exp.StartsWith: rename_func("STARTSWITH"), 821 exp.StrPosition: lambda self, e: self.func( 822 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 823 ), 824 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 825 exp.Stuff: rename_func("INSERT"), 826 exp.TimeAdd: date_delta_sql("TIMEADD"), 827 exp.TimestampDiff: lambda self, e: self.func( 828 "TIMESTAMPDIFF", e.unit, e.expression, e.this 829 ), 830 exp.TimestampTrunc: timestamptrunc_sql(), 831 exp.TimeStrToTime: timestrtotime_sql, 832 exp.TimeToStr: lambda self, e: self.func( 833 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 834 ), 835 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 836 exp.ToArray: rename_func("TO_ARRAY"), 837 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 838 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 839 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 840 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 841 exp.TsOrDsToDate: lambda self, e: self.func( 842 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 843 ), 844 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 845 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 846 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 847 exp.Xor: rename_func("BOOLXOR"), 848 } 849 850 SUPPORTED_JSON_PATH_PARTS = { 851 exp.JSONPathKey, 852 exp.JSONPathRoot, 853 exp.JSONPathSubscript, 854 } 855 856 TYPE_MAPPING = { 857 **generator.Generator.TYPE_MAPPING, 858 exp.DataType.Type.NESTED: "OBJECT", 859 exp.DataType.Type.STRUCT: "OBJECT", 860 } 861 862 PROPERTIES_LOCATION = { 863 **generator.Generator.PROPERTIES_LOCATION, 864 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 865 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 866 } 867 868 UNSUPPORTED_VALUES_EXPRESSIONS = { 869 exp.Map, 870 exp.StarMap, 871 exp.Struct, 872 exp.VarMap, 873 } 874 875 def with_properties(self, properties: exp.Properties) -> str: 876 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 877 878 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 879 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 880 values_as_table = False 881 882 return super().values_sql(expression, values_as_table=values_as_table) 883 884 def datatype_sql(self, expression: exp.DataType) -> str: 885 expressions = expression.expressions 886 if ( 887 expressions 888 and expression.is_type(*exp.DataType.STRUCT_TYPES) 889 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 890 ): 891 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 892 return "OBJECT" 893 894 return super().datatype_sql(expression) 895 896 def tonumber_sql(self, expression: exp.ToNumber) -> str: 897 return self.func( 898 "TO_NUMBER", 899 expression.this, 900 expression.args.get("format"), 901 expression.args.get("precision"), 902 expression.args.get("scale"), 903 ) 904 905 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 906 milli = expression.args.get("milli") 907 if milli is not None: 908 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 909 expression.set("nano", milli_to_nano) 910 911 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 912 913 def trycast_sql(self, expression: exp.TryCast) -> str: 914 value = expression.this 915 916 if value.type is None: 917 from sqlglot.optimizer.annotate_types import annotate_types 918 919 value = annotate_types(value) 920 921 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 922 return super().trycast_sql(expression) 923 924 # TRY_CAST only works for string values in Snowflake 925 return self.cast_sql(expression) 926 927 def log_sql(self, expression: exp.Log) -> str: 928 if not expression.expression: 929 return self.func("LN", expression.this) 930 931 return super().log_sql(expression) 932 933 def unnest_sql(self, expression: exp.Unnest) -> str: 934 unnest_alias = expression.args.get("alias") 935 offset = expression.args.get("offset") 936 937 columns = [ 938 exp.to_identifier("seq"), 939 exp.to_identifier("key"), 940 exp.to_identifier("path"), 941 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 942 seq_get(unnest_alias.columns if unnest_alias else [], 0) 943 or exp.to_identifier("value"), 944 exp.to_identifier("this"), 945 ] 946 947 if unnest_alias: 948 unnest_alias.set("columns", columns) 949 else: 950 unnest_alias = exp.TableAlias(this="_u", columns=columns) 951 952 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 953 alias = self.sql(unnest_alias) 954 alias = f" AS {alias}" if alias else "" 955 return f"{explode}{alias}" 956 957 def show_sql(self, expression: exp.Show) -> str: 958 terse = "TERSE " if expression.args.get("terse") else "" 959 history = " HISTORY" if expression.args.get("history") else "" 960 like = self.sql(expression, "like") 961 like = f" LIKE {like}" if like else "" 962 963 scope = self.sql(expression, "scope") 964 scope = f" {scope}" if scope else "" 965 966 scope_kind = self.sql(expression, "scope_kind") 967 if scope_kind: 968 scope_kind = f" IN {scope_kind}" 969 970 starts_with = self.sql(expression, "starts_with") 971 if starts_with: 972 starts_with = f" STARTS WITH {starts_with}" 973 974 limit = self.sql(expression, "limit") 975 976 from_ = self.sql(expression, "from") 977 if from_: 978 from_ = f" FROM {from_}" 979 980 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 981 982 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 983 # Other dialects don't support all of the following parameters, so we need to 984 # generate default values as necessary to ensure the transpilation is correct 985 group = expression.args.get("group") 986 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 987 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 988 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 989 990 return self.func( 991 "REGEXP_SUBSTR", 992 expression.this, 993 expression.expression, 994 position, 995 occurrence, 996 parameters, 997 group, 998 ) 999 1000 def except_op(self, expression: exp.Except) -> str: 1001 if not expression.args.get("distinct"): 1002 self.unsupported("EXCEPT with All is not supported in Snowflake") 1003 return super().except_op(expression) 1004 1005 def intersect_op(self, expression: exp.Intersect) -> str: 1006 if not expression.args.get("distinct"): 1007 self.unsupported("INTERSECT with All is not supported in Snowflake") 1008 return super().intersect_op(expression) 1009 1010 def describe_sql(self, expression: exp.Describe) -> str: 1011 # Default to table if kind is unknown 1012 kind_value = expression.args.get("kind") or "TABLE" 1013 kind = f" {kind_value}" if kind_value else "" 1014 this = f" {self.sql(expression, 'this')}" 1015 expressions = self.expressions(expression, flat=True) 1016 expressions = f" {expressions}" if expressions else "" 1017 return f"DESCRIBE{kind}{this}{expressions}" 1018 1019 def generatedasidentitycolumnconstraint_sql( 1020 self, expression: exp.GeneratedAsIdentityColumnConstraint 1021 ) -> str: 1022 start = expression.args.get("start") 1023 start = f" START {start}" if start else "" 1024 increment = expression.args.get("increment") 1025 increment = f" INCREMENT {increment}" if increment else "" 1026 return f"AUTOINCREMENT{start}{increment}" 1027 1028 def swaptable_sql(self, expression: exp.SwapTable) -> str: 1029 this = self.sql(expression, "this") 1030 return f"SWAP WITH {this}" 1031 1032 def cluster_sql(self, expression: exp.Cluster) -> str: 1033 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1034 1035 def struct_sql(self, expression: exp.Struct) -> str: 1036 keys = [] 1037 values = [] 1038 1039 for i, e in enumerate(expression.expressions): 1040 if isinstance(e, exp.PropertyEQ): 1041 keys.append( 1042 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1043 ) 1044 values.append(e.expression) 1045 else: 1046 keys.append(exp.Literal.string(f"_{i}")) 1047 values.append(e) 1048 1049 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1050 1051 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1052 if expression.args.get("weight") or expression.args.get("accuracy"): 1053 self.unsupported( 1054 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1055 ) 1056 1057 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1058 1059 def alterset_sql(self, expression: exp.AlterSet) -> str: 1060 exprs = self.expressions(expression, flat=True) 1061 exprs = f" {exprs}" if exprs else "" 1062 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1063 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1064 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1065 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1066 tag = self.expressions(expression, key="tag", flat=True) 1067 tag = f" TAG {tag}" if tag else "" 1068 1069 return f"SET{exprs}{file_format}{copy_options}{tag}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
884 def datatype_sql(self, expression: exp.DataType) -> str: 885 expressions = expression.expressions 886 if ( 887 expressions 888 and expression.is_type(*exp.DataType.STRUCT_TYPES) 889 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 890 ): 891 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 892 return "OBJECT" 893 894 return super().datatype_sql(expression)
905 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 906 milli = expression.args.get("milli") 907 if milli is not None: 908 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 909 expression.set("nano", milli_to_nano) 910 911 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
913 def trycast_sql(self, expression: exp.TryCast) -> str: 914 value = expression.this 915 916 if value.type is None: 917 from sqlglot.optimizer.annotate_types import annotate_types 918 919 value = annotate_types(value) 920 921 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 922 return super().trycast_sql(expression) 923 924 # TRY_CAST only works for string values in Snowflake 925 return self.cast_sql(expression)
933 def unnest_sql(self, expression: exp.Unnest) -> str: 934 unnest_alias = expression.args.get("alias") 935 offset = expression.args.get("offset") 936 937 columns = [ 938 exp.to_identifier("seq"), 939 exp.to_identifier("key"), 940 exp.to_identifier("path"), 941 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 942 seq_get(unnest_alias.columns if unnest_alias else [], 0) 943 or exp.to_identifier("value"), 944 exp.to_identifier("this"), 945 ] 946 947 if unnest_alias: 948 unnest_alias.set("columns", columns) 949 else: 950 unnest_alias = exp.TableAlias(this="_u", columns=columns) 951 952 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 953 alias = self.sql(unnest_alias) 954 alias = f" AS {alias}" if alias else "" 955 return f"{explode}{alias}"
957 def show_sql(self, expression: exp.Show) -> str: 958 terse = "TERSE " if expression.args.get("terse") else "" 959 history = " HISTORY" if expression.args.get("history") else "" 960 like = self.sql(expression, "like") 961 like = f" LIKE {like}" if like else "" 962 963 scope = self.sql(expression, "scope") 964 scope = f" {scope}" if scope else "" 965 966 scope_kind = self.sql(expression, "scope_kind") 967 if scope_kind: 968 scope_kind = f" IN {scope_kind}" 969 970 starts_with = self.sql(expression, "starts_with") 971 if starts_with: 972 starts_with = f" STARTS WITH {starts_with}" 973 974 limit = self.sql(expression, "limit") 975 976 from_ = self.sql(expression, "from") 977 if from_: 978 from_ = f" FROM {from_}" 979 980 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}"
982 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 983 # Other dialects don't support all of the following parameters, so we need to 984 # generate default values as necessary to ensure the transpilation is correct 985 group = expression.args.get("group") 986 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 987 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 988 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 989 990 return self.func( 991 "REGEXP_SUBSTR", 992 expression.this, 993 expression.expression, 994 position, 995 occurrence, 996 parameters, 997 group, 998 )
1010 def describe_sql(self, expression: exp.Describe) -> str: 1011 # Default to table if kind is unknown 1012 kind_value = expression.args.get("kind") or "TABLE" 1013 kind = f" {kind_value}" if kind_value else "" 1014 this = f" {self.sql(expression, 'this')}" 1015 expressions = self.expressions(expression, flat=True) 1016 expressions = f" {expressions}" if expressions else "" 1017 return f"DESCRIBE{kind}{this}{expressions}"
1019 def generatedasidentitycolumnconstraint_sql( 1020 self, expression: exp.GeneratedAsIdentityColumnConstraint 1021 ) -> str: 1022 start = expression.args.get("start") 1023 start = f" START {start}" if start else "" 1024 increment = expression.args.get("increment") 1025 increment = f" INCREMENT {increment}" if increment else "" 1026 return f"AUTOINCREMENT{start}{increment}"
1035 def struct_sql(self, expression: exp.Struct) -> str: 1036 keys = [] 1037 values = [] 1038 1039 for i, e in enumerate(expression.expressions): 1040 if isinstance(e, exp.PropertyEQ): 1041 keys.append( 1042 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1043 ) 1044 values.append(e.expression) 1045 else: 1046 keys.append(exp.Literal.string(f"_{i}")) 1047 values.append(e) 1048 1049 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
1051 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1052 if expression.args.get("weight") or expression.args.get("accuracy"): 1053 self.unsupported( 1054 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1055 ) 1056 1057 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile"))
1059 def alterset_sql(self, expression: exp.AlterSet) -> str: 1060 exprs = self.expressions(expression, flat=True) 1061 exprs = f" {exprs}" if exprs else "" 1062 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1063 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1064 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1065 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1066 tag = self.expressions(expression, key="tag", flat=True) 1067 tag = f" TAG {tag}" if tag else "" 1068 1069 return f"SET{exprs}{file_format}{copy_options}{tag}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXPLICIT_SET_OP
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- SET_OP_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql