sqlglot.dialects.duckdb
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.expressions import DATA_TYPE 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 JSON_EXTRACT_TYPE, 10 NormalizationStrategy, 11 approx_count_distinct_sql, 12 arg_max_or_min_no_count, 13 arrow_json_extract_sql, 14 binary_from_function, 15 bool_xor_sql, 16 build_default_decimal_type, 17 date_trunc_to_time, 18 datestrtodate_sql, 19 no_datetime_sql, 20 encode_decode_sql, 21 build_formatted_time, 22 inline_array_unless_query, 23 no_comment_column_constraint_sql, 24 no_safe_divide_sql, 25 no_time_sql, 26 no_timestamp_sql, 27 pivot_column_names, 28 regexp_extract_sql, 29 rename_func, 30 str_position_sql, 31 str_to_time_sql, 32 timestamptrunc_sql, 33 timestrtotime_sql, 34 unit_to_var, 35 unit_to_str, 36 sha256_sql, 37) 38from sqlglot.helper import seq_get 39from sqlglot.tokens import TokenType 40 41DATETIME_DELTA = t.Union[ 42 exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd, exp.DateSub, exp.DatetimeSub 43] 44 45WINDOW_FUNCS_WITH_IGNORE_NULLS = ( 46 exp.FirstValue, 47 exp.LastValue, 48 exp.Lag, 49 exp.Lead, 50 exp.NthValue, 51) 52 53 54def _date_delta_sql(self: DuckDB.Generator, expression: DATETIME_DELTA) -> str: 55 this = expression.this 56 unit = unit_to_var(expression) 57 op = ( 58 "+" 59 if isinstance(expression, (exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd)) 60 else "-" 61 ) 62 63 to_type: t.Optional[DATA_TYPE] = None 64 if isinstance(expression, exp.TsOrDsAdd): 65 to_type = expression.return_type 66 elif this.is_string: 67 # Cast string literals (i.e function parameters) to the appropriate type for +/- interval to work 68 to_type = ( 69 exp.DataType.Type.DATETIME 70 if isinstance(expression, (exp.DatetimeAdd, exp.DatetimeSub)) 71 else exp.DataType.Type.DATE 72 ) 73 74 this = exp.cast(this, to_type) if to_type else this 75 76 return f"{self.sql(this)} {op} {self.sql(exp.Interval(this=expression.expression, unit=unit))}" 77 78 79# BigQuery -> DuckDB conversion for the DATE function 80def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str: 81 result = f"CAST({self.sql(expression, 'this')} AS DATE)" 82 zone = self.sql(expression, "zone") 83 84 if zone: 85 date_str = self.func("STRFTIME", result, "'%d/%m/%Y'") 86 date_str = f"{date_str} || ' ' || {zone}" 87 88 # This will create a TIMESTAMP with time zone information 89 result = self.func("STRPTIME", date_str, "'%d/%m/%Y %Z'") 90 91 return result 92 93 94# BigQuery -> DuckDB conversion for the TIME_DIFF function 95def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str: 96 this = exp.cast(expression.this, exp.DataType.Type.TIME) 97 expr = exp.cast(expression.expression, exp.DataType.Type.TIME) 98 99 # Although the 2 dialects share similar signatures, BQ seems to inverse 100 # the sign of the result so the start/end time operands are flipped 101 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 102 103 104def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str: 105 if expression.expression: 106 self.unsupported("DuckDB ARRAY_SORT does not support a comparator") 107 return self.func("ARRAY_SORT", expression.this) 108 109 110def _sort_array_sql(self: DuckDB.Generator, expression: exp.SortArray) -> str: 111 name = "ARRAY_REVERSE_SORT" if expression.args.get("asc") == exp.false() else "ARRAY_SORT" 112 return self.func(name, expression.this) 113 114 115def _build_sort_array_desc(args: t.List) -> exp.Expression: 116 return exp.SortArray(this=seq_get(args, 0), asc=exp.false()) 117 118 119def _build_date_diff(args: t.List) -> exp.Expression: 120 return exp.DateDiff(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 121 122 123def _build_generate_series(end_exclusive: bool = False) -> t.Callable[[t.List], exp.GenerateSeries]: 124 def _builder(args: t.List) -> exp.GenerateSeries: 125 # Check https://duckdb.org/docs/sql/functions/nested.html#range-functions 126 if len(args) == 1: 127 # DuckDB uses 0 as a default for the series' start when it's omitted 128 args.insert(0, exp.Literal.number("0")) 129 130 gen_series = exp.GenerateSeries.from_arg_list(args) 131 gen_series.set("is_end_exclusive", end_exclusive) 132 133 return gen_series 134 135 return _builder 136 137 138def _build_make_timestamp(args: t.List) -> exp.Expression: 139 if len(args) == 1: 140 return exp.UnixToTime(this=seq_get(args, 0), scale=exp.UnixToTime.MICROS) 141 142 return exp.TimestampFromParts( 143 year=seq_get(args, 0), 144 month=seq_get(args, 1), 145 day=seq_get(args, 2), 146 hour=seq_get(args, 3), 147 min=seq_get(args, 4), 148 sec=seq_get(args, 5), 149 ) 150 151 152def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str: 153 args: t.List[str] = [] 154 155 # BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is 156 # canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB 157 # The transformation to ROW will take place if a cast to STRUCT / ARRAY of STRUCTs is found 158 ancestor_cast = expression.find_ancestor(exp.Cast) 159 is_struct_cast = ancestor_cast and any( 160 casted_type.is_type(exp.DataType.Type.STRUCT) 161 for casted_type in ancestor_cast.find_all(exp.DataType) 162 ) 163 164 for i, expr in enumerate(expression.expressions): 165 is_property_eq = isinstance(expr, exp.PropertyEQ) 166 value = expr.expression if is_property_eq else expr 167 168 if is_struct_cast: 169 args.append(self.sql(value)) 170 else: 171 key = expr.name if is_property_eq else f"_{i}" 172 args.append(f"{self.sql(exp.Literal.string(key))}: {self.sql(value)}") 173 174 csv_args = ", ".join(args) 175 176 return f"ROW({csv_args})" if is_struct_cast else f"{{{csv_args}}}" 177 178 179def _datatype_sql(self: DuckDB.Generator, expression: exp.DataType) -> str: 180 if expression.is_type("array"): 181 return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]" 182 183 # Modifiers are not supported for TIME, [TIME | TIMESTAMP] WITH TIME ZONE 184 if expression.is_type( 185 exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ, exp.DataType.Type.TIMESTAMPTZ 186 ): 187 return expression.this.value 188 189 return self.datatype_sql(expression) 190 191 192def _json_format_sql(self: DuckDB.Generator, expression: exp.JSONFormat) -> str: 193 sql = self.func("TO_JSON", expression.this, expression.args.get("options")) 194 return f"CAST({sql} AS TEXT)" 195 196 197def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str: 198 scale = expression.args.get("scale") 199 timestamp = expression.this 200 201 if scale in (None, exp.UnixToTime.SECONDS): 202 return self.func("TO_TIMESTAMP", timestamp) 203 if scale == exp.UnixToTime.MILLIS: 204 return self.func("EPOCH_MS", timestamp) 205 if scale == exp.UnixToTime.MICROS: 206 return self.func("MAKE_TIMESTAMP", timestamp) 207 208 return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale))) 209 210 211WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In) 212 213 214def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str: 215 arrow_sql = arrow_json_extract_sql(self, expression) 216 if not expression.same_parent and isinstance( 217 expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS 218 ): 219 arrow_sql = self.wrap(arrow_sql) 220 return arrow_sql 221 222 223def _implicit_datetime_cast( 224 arg: t.Optional[exp.Expression], type: exp.DataType.Type = exp.DataType.Type.DATE 225) -> t.Optional[exp.Expression]: 226 return exp.cast(arg, type) if isinstance(arg, exp.Literal) else arg 227 228 229def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str: 230 this = _implicit_datetime_cast(expression.this) 231 expr = _implicit_datetime_cast(expression.expression) 232 233 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 234 235 236def _generate_datetime_array_sql( 237 self: DuckDB.Generator, expression: t.Union[exp.GenerateDateArray, exp.GenerateTimestampArray] 238) -> str: 239 is_generate_date_array = isinstance(expression, exp.GenerateDateArray) 240 241 type = exp.DataType.Type.DATE if is_generate_date_array else exp.DataType.Type.TIMESTAMP 242 start = _implicit_datetime_cast(expression.args.get("start"), type=type) 243 end = _implicit_datetime_cast(expression.args.get("end"), type=type) 244 245 # BQ's GENERATE_DATE_ARRAY & GENERATE_TIMESTAMP_ARRAY are transformed to DuckDB'S GENERATE_SERIES 246 gen_series: t.Union[exp.GenerateSeries, exp.Cast] = exp.GenerateSeries( 247 start=start, end=end, step=expression.args.get("step") 248 ) 249 250 if is_generate_date_array: 251 # The GENERATE_SERIES result type is TIMESTAMP array, so to match BQ's semantics for 252 # GENERATE_DATE_ARRAY we must cast it back to DATE array 253 gen_series = exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>")) 254 255 return self.sql(gen_series) 256 257 258class DuckDB(Dialect): 259 NULL_ORDERING = "nulls_are_last" 260 SUPPORTS_USER_DEFINED_TYPES = False 261 SAFE_DIVISION = True 262 INDEX_OFFSET = 1 263 CONCAT_COALESCE = True 264 SUPPORTS_ORDER_BY_ALL = True 265 SUPPORTS_FIXED_SIZE_ARRAYS = True 266 267 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 268 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 269 270 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 271 if isinstance(path, exp.Literal): 272 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 273 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 274 # This check ensures we'll avoid trying to parse these as JSON paths, which can 275 # either result in a noisy warning or in an invalid representation of the path. 276 path_text = path.name 277 if path_text.startswith("/") or "[#" in path_text: 278 return path 279 280 return super().to_json_path(path) 281 282 class Tokenizer(tokens.Tokenizer): 283 HEREDOC_STRINGS = ["$"] 284 285 HEREDOC_TAG_IS_IDENTIFIER = True 286 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 287 288 KEYWORDS = { 289 **tokens.Tokenizer.KEYWORDS, 290 "//": TokenType.DIV, 291 "ATTACH": TokenType.COMMAND, 292 "BINARY": TokenType.VARBINARY, 293 "BITSTRING": TokenType.BIT, 294 "BPCHAR": TokenType.TEXT, 295 "CHAR": TokenType.TEXT, 296 "CHARACTER VARYING": TokenType.TEXT, 297 "EXCLUDE": TokenType.EXCEPT, 298 "LOGICAL": TokenType.BOOLEAN, 299 "ONLY": TokenType.ONLY, 300 "PIVOT_WIDER": TokenType.PIVOT, 301 "POSITIONAL": TokenType.POSITIONAL, 302 "SIGNED": TokenType.INT, 303 "STRING": TokenType.TEXT, 304 "SUMMARIZE": TokenType.SUMMARIZE, 305 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 306 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 307 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 308 "TIMESTAMP_US": TokenType.TIMESTAMP, 309 "UBIGINT": TokenType.UBIGINT, 310 "UINTEGER": TokenType.UINT, 311 "USMALLINT": TokenType.USMALLINT, 312 "UTINYINT": TokenType.UTINYINT, 313 "VARCHAR": TokenType.TEXT, 314 } 315 KEYWORDS.pop("/*+") 316 317 SINGLE_TOKENS = { 318 **tokens.Tokenizer.SINGLE_TOKENS, 319 "$": TokenType.PARAMETER, 320 } 321 322 class Parser(parser.Parser): 323 BITWISE = { 324 **parser.Parser.BITWISE, 325 TokenType.TILDA: exp.RegexpLike, 326 } 327 328 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 329 330 FUNCTIONS = { 331 **parser.Parser.FUNCTIONS, 332 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 333 "ARRAY_SORT": exp.SortArray.from_arg_list, 334 "DATEDIFF": _build_date_diff, 335 "DATE_DIFF": _build_date_diff, 336 "DATE_TRUNC": date_trunc_to_time, 337 "DATETRUNC": date_trunc_to_time, 338 "DECODE": lambda args: exp.Decode( 339 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 340 ), 341 "ENCODE": lambda args: exp.Encode( 342 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 343 ), 344 "EPOCH": exp.TimeToUnix.from_arg_list, 345 "EPOCH_MS": lambda args: exp.UnixToTime( 346 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 347 ), 348 "JSON": exp.ParseJSON.from_arg_list, 349 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 350 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 351 "LIST_HAS": exp.ArrayContains.from_arg_list, 352 "LIST_REVERSE_SORT": _build_sort_array_desc, 353 "LIST_SORT": exp.SortArray.from_arg_list, 354 "LIST_VALUE": lambda args: exp.Array(expressions=args), 355 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 356 "MAKE_TIMESTAMP": _build_make_timestamp, 357 "MEDIAN": lambda args: exp.PercentileCont( 358 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 359 ), 360 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 361 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 362 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 363 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 364 ), 365 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 366 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 367 this=seq_get(args, 0), 368 expression=seq_get(args, 1), 369 replacement=seq_get(args, 2), 370 modifiers=seq_get(args, 3), 371 ), 372 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 373 "STRING_SPLIT": exp.Split.from_arg_list, 374 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 375 "STRING_TO_ARRAY": exp.Split.from_arg_list, 376 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 377 "STRUCT_PACK": exp.Struct.from_arg_list, 378 "STR_SPLIT": exp.Split.from_arg_list, 379 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 380 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 381 "UNNEST": exp.Explode.from_arg_list, 382 "XOR": binary_from_function(exp.BitwiseXor), 383 "GENERATE_SERIES": _build_generate_series(), 384 "RANGE": _build_generate_series(end_exclusive=True), 385 } 386 387 FUNCTIONS.pop("DATE_SUB") 388 FUNCTIONS.pop("GLOB") 389 390 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 391 FUNCTION_PARSERS.pop("DECODE") 392 393 NO_PAREN_FUNCTION_PARSERS = { 394 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 395 "MAP": lambda self: self._parse_map(), 396 } 397 398 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 399 TokenType.SEMI, 400 TokenType.ANTI, 401 } 402 403 PLACEHOLDER_PARSERS = { 404 **parser.Parser.PLACEHOLDER_PARSERS, 405 TokenType.PARAMETER: lambda self: ( 406 self.expression(exp.Placeholder, this=self._prev.text) 407 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 408 else None 409 ), 410 } 411 412 TYPE_CONVERTERS = { 413 # https://duckdb.org/docs/sql/data_types/numeric 414 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 415 # https://duckdb.org/docs/sql/data_types/text 416 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 417 } 418 419 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 420 # https://duckdb.org/docs/sql/samples.html 421 sample = super()._parse_table_sample(as_modifier=as_modifier) 422 if sample and not sample.args.get("method"): 423 if sample.args.get("size"): 424 sample.set("method", exp.var("RESERVOIR")) 425 else: 426 sample.set("method", exp.var("SYSTEM")) 427 428 return sample 429 430 def _parse_bracket( 431 self, this: t.Optional[exp.Expression] = None 432 ) -> t.Optional[exp.Expression]: 433 bracket = super()._parse_bracket(this) 434 if isinstance(bracket, exp.Bracket): 435 bracket.set("returns_list_for_maps", True) 436 437 return bracket 438 439 def _parse_map(self) -> exp.ToMap | exp.Map: 440 if self._match(TokenType.L_BRACE, advance=False): 441 return self.expression(exp.ToMap, this=self._parse_bracket()) 442 443 args = self._parse_wrapped_csv(self._parse_assignment) 444 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 445 446 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 447 return self._parse_field_def() 448 449 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 450 if len(aggregations) == 1: 451 return super()._pivot_column_names(aggregations) 452 return pivot_column_names(aggregations, dialect="duckdb") 453 454 class Generator(generator.Generator): 455 PARAMETER_TOKEN = "$" 456 NAMED_PLACEHOLDER_TOKEN = "$" 457 JOIN_HINTS = False 458 TABLE_HINTS = False 459 QUERY_HINTS = False 460 LIMIT_FETCH = "LIMIT" 461 STRUCT_DELIMITER = ("(", ")") 462 RENAME_TABLE_WITH_DB = False 463 NVL2_SUPPORTED = False 464 SEMI_ANTI_JOIN_WITH_SIDE = False 465 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 466 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 467 LAST_DAY_SUPPORTS_DATE_PART = False 468 JSON_KEY_VALUE_PAIR_SEP = "," 469 IGNORE_NULLS_IN_FUNC = True 470 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 471 SUPPORTS_CREATE_TABLE_LIKE = False 472 MULTI_ARG_DISTINCT = False 473 CAN_IMPLEMENT_ARRAY_ANY = True 474 SUPPORTS_TO_NUMBER = False 475 COPY_HAS_INTO_KEYWORD = False 476 STAR_EXCEPT = "EXCLUDE" 477 PAD_FILL_PATTERN_IS_REQUIRED = True 478 ARRAY_CONCAT_IS_VAR_LEN = False 479 480 TRANSFORMS = { 481 **generator.Generator.TRANSFORMS, 482 exp.ApproxDistinct: approx_count_distinct_sql, 483 exp.Array: inline_array_unless_query, 484 exp.ArrayContainsAll: rename_func("ARRAY_HAS_ALL"), 485 exp.ArrayFilter: rename_func("LIST_FILTER"), 486 exp.ArraySize: rename_func("ARRAY_LENGTH"), 487 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 488 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 489 exp.ArraySort: _array_sort_sql, 490 exp.ArraySum: rename_func("LIST_SUM"), 491 exp.BitwiseXor: rename_func("XOR"), 492 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 493 exp.CurrentDate: lambda *_: "CURRENT_DATE", 494 exp.CurrentTime: lambda *_: "CURRENT_TIME", 495 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 496 exp.DayOfMonth: rename_func("DAYOFMONTH"), 497 exp.DayOfWeek: rename_func("DAYOFWEEK"), 498 exp.DayOfYear: rename_func("DAYOFYEAR"), 499 exp.DataType: _datatype_sql, 500 exp.Date: _date_sql, 501 exp.DateAdd: _date_delta_sql, 502 exp.DateFromParts: rename_func("MAKE_DATE"), 503 exp.DateSub: _date_delta_sql, 504 exp.DateDiff: _date_diff_sql, 505 exp.DateStrToDate: datestrtodate_sql, 506 exp.Datetime: no_datetime_sql, 507 exp.DatetimeSub: _date_delta_sql, 508 exp.DatetimeAdd: _date_delta_sql, 509 exp.DateToDi: lambda self, 510 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 511 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 512 exp.DiToDate: lambda self, 513 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 514 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 515 exp.GenerateDateArray: _generate_datetime_array_sql, 516 exp.GenerateTimestampArray: _generate_datetime_array_sql, 517 exp.Explode: rename_func("UNNEST"), 518 exp.IntDiv: lambda self, e: self.binary(e, "//"), 519 exp.IsInf: rename_func("ISINF"), 520 exp.IsNan: rename_func("ISNAN"), 521 exp.JSONExtract: _arrow_json_extract_sql, 522 exp.JSONExtractScalar: _arrow_json_extract_sql, 523 exp.JSONFormat: _json_format_sql, 524 exp.LogicalOr: rename_func("BOOL_OR"), 525 exp.LogicalAnd: rename_func("BOOL_AND"), 526 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 527 exp.MonthsBetween: lambda self, e: self.func( 528 "DATEDIFF", 529 "'month'", 530 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 531 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 532 ), 533 exp.PercentileCont: rename_func("QUANTILE_CONT"), 534 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 535 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 536 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 537 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 538 exp.RegexpExtract: regexp_extract_sql, 539 exp.RegexpReplace: lambda self, e: self.func( 540 "REGEXP_REPLACE", 541 e.this, 542 e.expression, 543 e.args.get("replacement"), 544 e.args.get("modifiers"), 545 ), 546 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 547 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 548 exp.Return: lambda self, e: self.sql(e, "this"), 549 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 550 exp.Rand: rename_func("RANDOM"), 551 exp.SafeDivide: no_safe_divide_sql, 552 exp.SHA2: sha256_sql, 553 exp.Split: rename_func("STR_SPLIT"), 554 exp.SortArray: _sort_array_sql, 555 exp.StrPosition: str_position_sql, 556 exp.StrToUnix: lambda self, e: self.func( 557 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 558 ), 559 exp.Struct: _struct_sql, 560 exp.Transform: rename_func("LIST_TRANSFORM"), 561 exp.TimeAdd: _date_delta_sql, 562 exp.Time: no_time_sql, 563 exp.TimeDiff: _timediff_sql, 564 exp.Timestamp: no_timestamp_sql, 565 exp.TimestampDiff: lambda self, e: self.func( 566 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 567 ), 568 exp.TimestampTrunc: timestamptrunc_sql(), 569 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 570 exp.TimeStrToTime: timestrtotime_sql, 571 exp.TimeStrToUnix: lambda self, e: self.func( 572 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 573 ), 574 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 575 exp.TimeToUnix: rename_func("EPOCH"), 576 exp.TsOrDiToDi: lambda self, 577 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 578 exp.TsOrDsAdd: _date_delta_sql, 579 exp.TsOrDsDiff: lambda self, e: self.func( 580 "DATE_DIFF", 581 f"'{e.args.get('unit') or 'DAY'}'", 582 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 583 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 584 ), 585 exp.UnixToStr: lambda self, e: self.func( 586 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 587 ), 588 exp.DatetimeTrunc: lambda self, e: self.func( 589 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 590 ), 591 exp.UnixToTime: _unix_to_time_sql, 592 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 593 exp.VariancePop: rename_func("VAR_POP"), 594 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 595 exp.Xor: bool_xor_sql, 596 } 597 598 SUPPORTED_JSON_PATH_PARTS = { 599 exp.JSONPathKey, 600 exp.JSONPathRoot, 601 exp.JSONPathSubscript, 602 exp.JSONPathWildcard, 603 } 604 605 TYPE_MAPPING = { 606 **generator.Generator.TYPE_MAPPING, 607 exp.DataType.Type.BINARY: "BLOB", 608 exp.DataType.Type.BPCHAR: "TEXT", 609 exp.DataType.Type.CHAR: "TEXT", 610 exp.DataType.Type.FLOAT: "REAL", 611 exp.DataType.Type.NCHAR: "TEXT", 612 exp.DataType.Type.NVARCHAR: "TEXT", 613 exp.DataType.Type.UINT: "UINTEGER", 614 exp.DataType.Type.VARBINARY: "BLOB", 615 exp.DataType.Type.ROWVERSION: "BLOB", 616 exp.DataType.Type.VARCHAR: "TEXT", 617 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 618 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 619 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 620 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 621 } 622 623 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 624 RESERVED_KEYWORDS = { 625 "array", 626 "analyse", 627 "union", 628 "all", 629 "when", 630 "in_p", 631 "default", 632 "create_p", 633 "window", 634 "asymmetric", 635 "to", 636 "else", 637 "localtime", 638 "from", 639 "end_p", 640 "select", 641 "current_date", 642 "foreign", 643 "with", 644 "grant", 645 "session_user", 646 "or", 647 "except", 648 "references", 649 "fetch", 650 "limit", 651 "group_p", 652 "leading", 653 "into", 654 "collate", 655 "offset", 656 "do", 657 "then", 658 "localtimestamp", 659 "check_p", 660 "lateral_p", 661 "current_role", 662 "where", 663 "asc_p", 664 "placing", 665 "desc_p", 666 "user", 667 "unique", 668 "initially", 669 "column", 670 "both", 671 "some", 672 "as", 673 "any", 674 "only", 675 "deferrable", 676 "null_p", 677 "current_time", 678 "true_p", 679 "table", 680 "case", 681 "trailing", 682 "variadic", 683 "for", 684 "on", 685 "distinct", 686 "false_p", 687 "not", 688 "constraint", 689 "current_timestamp", 690 "returning", 691 "primary", 692 "intersect", 693 "having", 694 "analyze", 695 "current_user", 696 "and", 697 "cast", 698 "symmetric", 699 "using", 700 "order", 701 "current_catalog", 702 } 703 704 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 705 706 # DuckDB doesn't generally support CREATE TABLE .. properties 707 # https://duckdb.org/docs/sql/statements/create_table.html 708 PROPERTIES_LOCATION = { 709 prop: exp.Properties.Location.UNSUPPORTED 710 for prop in generator.Generator.PROPERTIES_LOCATION 711 } 712 713 # There are a few exceptions (e.g. temporary tables) which are supported or 714 # can be transpiled to DuckDB, so we explicitly override them accordingly 715 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 716 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 717 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 718 719 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 720 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 721 722 def strtotime_sql(self, expression: exp.StrToTime) -> str: 723 if expression.args.get("safe"): 724 formatted_time = self.format_time(expression) 725 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 726 return str_to_time_sql(self, expression) 727 728 def strtodate_sql(self, expression: exp.StrToDate) -> str: 729 if expression.args.get("safe"): 730 formatted_time = self.format_time(expression) 731 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 732 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 733 734 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 735 arg = expression.this 736 if expression.args.get("safe"): 737 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 738 return self.func("JSON", arg) 739 740 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 741 nano = expression.args.get("nano") 742 if nano is not None: 743 expression.set( 744 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 745 ) 746 747 return rename_func("MAKE_TIME")(self, expression) 748 749 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 750 sec = expression.args["sec"] 751 752 milli = expression.args.get("milli") 753 if milli is not None: 754 sec += milli.pop() / exp.Literal.number(1000.0) 755 756 nano = expression.args.get("nano") 757 if nano is not None: 758 sec += nano.pop() / exp.Literal.number(1000000000.0) 759 760 if milli or nano: 761 expression.set("sec", sec) 762 763 return rename_func("MAKE_TIMESTAMP")(self, expression) 764 765 def tablesample_sql( 766 self, 767 expression: exp.TableSample, 768 tablesample_keyword: t.Optional[str] = None, 769 ) -> str: 770 if not isinstance(expression.parent, exp.Select): 771 # This sample clause only applies to a single source, not the entire resulting relation 772 tablesample_keyword = "TABLESAMPLE" 773 774 if expression.args.get("size"): 775 method = expression.args.get("method") 776 if method and method.name.upper() != "RESERVOIR": 777 self.unsupported( 778 f"Sampling method {method} is not supported with a discrete sample count, " 779 "defaulting to reservoir sampling" 780 ) 781 expression.set("method", exp.var("RESERVOIR")) 782 783 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 784 785 def interval_sql(self, expression: exp.Interval) -> str: 786 multiplier: t.Optional[int] = None 787 unit = expression.text("unit").lower() 788 789 if unit.startswith("week"): 790 multiplier = 7 791 if unit.startswith("quarter"): 792 multiplier = 90 793 794 if multiplier: 795 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 796 797 return super().interval_sql(expression) 798 799 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 800 if isinstance(expression.parent, exp.UserDefinedFunction): 801 return self.sql(expression, "this") 802 return super().columndef_sql(expression, sep) 803 804 def join_sql(self, expression: exp.Join) -> str: 805 if ( 806 expression.side == "LEFT" 807 and not expression.args.get("on") 808 and isinstance(expression.this, exp.Unnest) 809 ): 810 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 811 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 812 return super().join_sql(expression.on(exp.true())) 813 814 return super().join_sql(expression) 815 816 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 817 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 818 if expression.args.get("is_end_exclusive"): 819 return rename_func("RANGE")(self, expression) 820 821 return self.function_fallback_sql(expression) 822 823 def bracket_sql(self, expression: exp.Bracket) -> str: 824 this = expression.this 825 if isinstance(this, exp.Array): 826 this.replace(exp.paren(this)) 827 828 bracket = super().bracket_sql(expression) 829 830 if not expression.args.get("returns_list_for_maps"): 831 if not this.type: 832 from sqlglot.optimizer.annotate_types import annotate_types 833 834 this = annotate_types(this) 835 836 if this.is_type(exp.DataType.Type.MAP): 837 bracket = f"({bracket})[1]" 838 839 return bracket 840 841 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 842 expression_sql = self.sql(expression, "expression") 843 844 func = expression.this 845 if isinstance(func, exp.PERCENTILES): 846 # Make the order key the first arg and slide the fraction to the right 847 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 848 order_col = expression.find(exp.Ordered) 849 if order_col: 850 func.set("expression", func.this) 851 func.set("this", order_col.this) 852 853 this = self.sql(expression, "this").rstrip(")") 854 855 return f"{this}{expression_sql})" 856 857 def length_sql(self, expression: exp.Length) -> str: 858 arg = expression.this 859 860 # Dialects like BQ and Snowflake also accept binary values as args, so 861 # DDB will attempt to infer the type or resort to case/when resolution 862 if not expression.args.get("binary") or arg.is_string: 863 return self.func("LENGTH", arg) 864 865 if not arg.type: 866 from sqlglot.optimizer.annotate_types import annotate_types 867 868 arg = annotate_types(arg) 869 870 if arg.is_type(*exp.DataType.TEXT_TYPES): 871 return self.func("LENGTH", arg) 872 873 # We need these casts to make duckdb's static type checker happy 874 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 875 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 876 877 case = ( 878 exp.case(self.func("TYPEOF", arg)) 879 .when( 880 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 881 ) # anonymous to break length_sql recursion 882 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 883 ) 884 885 return self.sql(case) 886 887 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 888 this = expression.this 889 key = expression.args.get("key") 890 key_sql = key.name if isinstance(key, exp.Expression) else "" 891 value_sql = self.sql(expression, "value") 892 893 kv_sql = f"{key_sql} := {value_sql}" 894 895 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 896 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 897 if isinstance(this, exp.Struct) and not this.expressions: 898 return self.func("STRUCT_PACK", kv_sql) 899 900 return self.func("STRUCT_INSERT", this, kv_sql) 901 902 def unnest_sql(self, expression: exp.Unnest) -> str: 903 explode_array = expression.args.get("explode_array") 904 if explode_array: 905 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 906 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 907 expression.expressions.append( 908 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 909 ) 910 911 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 912 alias = expression.args.get("alias") 913 if alias: 914 expression.set("alias", None) 915 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 916 917 unnest_sql = super().unnest_sql(expression) 918 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 919 return self.sql(select) 920 921 return super().unnest_sql(expression) 922 923 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 924 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 925 # DuckDB should render IGNORE NULLS only for the general-purpose 926 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 927 return super().ignorenulls_sql(expression) 928 929 return self.sql(expression, "this")
259class DuckDB(Dialect): 260 NULL_ORDERING = "nulls_are_last" 261 SUPPORTS_USER_DEFINED_TYPES = False 262 SAFE_DIVISION = True 263 INDEX_OFFSET = 1 264 CONCAT_COALESCE = True 265 SUPPORTS_ORDER_BY_ALL = True 266 SUPPORTS_FIXED_SIZE_ARRAYS = True 267 268 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 269 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 270 271 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 272 if isinstance(path, exp.Literal): 273 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 274 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 275 # This check ensures we'll avoid trying to parse these as JSON paths, which can 276 # either result in a noisy warning or in an invalid representation of the path. 277 path_text = path.name 278 if path_text.startswith("/") or "[#" in path_text: 279 return path 280 281 return super().to_json_path(path) 282 283 class Tokenizer(tokens.Tokenizer): 284 HEREDOC_STRINGS = ["$"] 285 286 HEREDOC_TAG_IS_IDENTIFIER = True 287 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 288 289 KEYWORDS = { 290 **tokens.Tokenizer.KEYWORDS, 291 "//": TokenType.DIV, 292 "ATTACH": TokenType.COMMAND, 293 "BINARY": TokenType.VARBINARY, 294 "BITSTRING": TokenType.BIT, 295 "BPCHAR": TokenType.TEXT, 296 "CHAR": TokenType.TEXT, 297 "CHARACTER VARYING": TokenType.TEXT, 298 "EXCLUDE": TokenType.EXCEPT, 299 "LOGICAL": TokenType.BOOLEAN, 300 "ONLY": TokenType.ONLY, 301 "PIVOT_WIDER": TokenType.PIVOT, 302 "POSITIONAL": TokenType.POSITIONAL, 303 "SIGNED": TokenType.INT, 304 "STRING": TokenType.TEXT, 305 "SUMMARIZE": TokenType.SUMMARIZE, 306 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 307 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 308 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 309 "TIMESTAMP_US": TokenType.TIMESTAMP, 310 "UBIGINT": TokenType.UBIGINT, 311 "UINTEGER": TokenType.UINT, 312 "USMALLINT": TokenType.USMALLINT, 313 "UTINYINT": TokenType.UTINYINT, 314 "VARCHAR": TokenType.TEXT, 315 } 316 KEYWORDS.pop("/*+") 317 318 SINGLE_TOKENS = { 319 **tokens.Tokenizer.SINGLE_TOKENS, 320 "$": TokenType.PARAMETER, 321 } 322 323 class Parser(parser.Parser): 324 BITWISE = { 325 **parser.Parser.BITWISE, 326 TokenType.TILDA: exp.RegexpLike, 327 } 328 329 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 330 331 FUNCTIONS = { 332 **parser.Parser.FUNCTIONS, 333 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 334 "ARRAY_SORT": exp.SortArray.from_arg_list, 335 "DATEDIFF": _build_date_diff, 336 "DATE_DIFF": _build_date_diff, 337 "DATE_TRUNC": date_trunc_to_time, 338 "DATETRUNC": date_trunc_to_time, 339 "DECODE": lambda args: exp.Decode( 340 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 341 ), 342 "ENCODE": lambda args: exp.Encode( 343 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 344 ), 345 "EPOCH": exp.TimeToUnix.from_arg_list, 346 "EPOCH_MS": lambda args: exp.UnixToTime( 347 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 348 ), 349 "JSON": exp.ParseJSON.from_arg_list, 350 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 351 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 352 "LIST_HAS": exp.ArrayContains.from_arg_list, 353 "LIST_REVERSE_SORT": _build_sort_array_desc, 354 "LIST_SORT": exp.SortArray.from_arg_list, 355 "LIST_VALUE": lambda args: exp.Array(expressions=args), 356 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 357 "MAKE_TIMESTAMP": _build_make_timestamp, 358 "MEDIAN": lambda args: exp.PercentileCont( 359 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 360 ), 361 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 362 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 363 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 364 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 365 ), 366 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 367 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 368 this=seq_get(args, 0), 369 expression=seq_get(args, 1), 370 replacement=seq_get(args, 2), 371 modifiers=seq_get(args, 3), 372 ), 373 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 374 "STRING_SPLIT": exp.Split.from_arg_list, 375 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 376 "STRING_TO_ARRAY": exp.Split.from_arg_list, 377 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 378 "STRUCT_PACK": exp.Struct.from_arg_list, 379 "STR_SPLIT": exp.Split.from_arg_list, 380 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 381 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 382 "UNNEST": exp.Explode.from_arg_list, 383 "XOR": binary_from_function(exp.BitwiseXor), 384 "GENERATE_SERIES": _build_generate_series(), 385 "RANGE": _build_generate_series(end_exclusive=True), 386 } 387 388 FUNCTIONS.pop("DATE_SUB") 389 FUNCTIONS.pop("GLOB") 390 391 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 392 FUNCTION_PARSERS.pop("DECODE") 393 394 NO_PAREN_FUNCTION_PARSERS = { 395 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 396 "MAP": lambda self: self._parse_map(), 397 } 398 399 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 400 TokenType.SEMI, 401 TokenType.ANTI, 402 } 403 404 PLACEHOLDER_PARSERS = { 405 **parser.Parser.PLACEHOLDER_PARSERS, 406 TokenType.PARAMETER: lambda self: ( 407 self.expression(exp.Placeholder, this=self._prev.text) 408 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 409 else None 410 ), 411 } 412 413 TYPE_CONVERTERS = { 414 # https://duckdb.org/docs/sql/data_types/numeric 415 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 416 # https://duckdb.org/docs/sql/data_types/text 417 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 418 } 419 420 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 421 # https://duckdb.org/docs/sql/samples.html 422 sample = super()._parse_table_sample(as_modifier=as_modifier) 423 if sample and not sample.args.get("method"): 424 if sample.args.get("size"): 425 sample.set("method", exp.var("RESERVOIR")) 426 else: 427 sample.set("method", exp.var("SYSTEM")) 428 429 return sample 430 431 def _parse_bracket( 432 self, this: t.Optional[exp.Expression] = None 433 ) -> t.Optional[exp.Expression]: 434 bracket = super()._parse_bracket(this) 435 if isinstance(bracket, exp.Bracket): 436 bracket.set("returns_list_for_maps", True) 437 438 return bracket 439 440 def _parse_map(self) -> exp.ToMap | exp.Map: 441 if self._match(TokenType.L_BRACE, advance=False): 442 return self.expression(exp.ToMap, this=self._parse_bracket()) 443 444 args = self._parse_wrapped_csv(self._parse_assignment) 445 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 446 447 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 448 return self._parse_field_def() 449 450 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 451 if len(aggregations) == 1: 452 return super()._pivot_column_names(aggregations) 453 return pivot_column_names(aggregations, dialect="duckdb") 454 455 class Generator(generator.Generator): 456 PARAMETER_TOKEN = "$" 457 NAMED_PLACEHOLDER_TOKEN = "$" 458 JOIN_HINTS = False 459 TABLE_HINTS = False 460 QUERY_HINTS = False 461 LIMIT_FETCH = "LIMIT" 462 STRUCT_DELIMITER = ("(", ")") 463 RENAME_TABLE_WITH_DB = False 464 NVL2_SUPPORTED = False 465 SEMI_ANTI_JOIN_WITH_SIDE = False 466 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 467 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 468 LAST_DAY_SUPPORTS_DATE_PART = False 469 JSON_KEY_VALUE_PAIR_SEP = "," 470 IGNORE_NULLS_IN_FUNC = True 471 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 472 SUPPORTS_CREATE_TABLE_LIKE = False 473 MULTI_ARG_DISTINCT = False 474 CAN_IMPLEMENT_ARRAY_ANY = True 475 SUPPORTS_TO_NUMBER = False 476 COPY_HAS_INTO_KEYWORD = False 477 STAR_EXCEPT = "EXCLUDE" 478 PAD_FILL_PATTERN_IS_REQUIRED = True 479 ARRAY_CONCAT_IS_VAR_LEN = False 480 481 TRANSFORMS = { 482 **generator.Generator.TRANSFORMS, 483 exp.ApproxDistinct: approx_count_distinct_sql, 484 exp.Array: inline_array_unless_query, 485 exp.ArrayContainsAll: rename_func("ARRAY_HAS_ALL"), 486 exp.ArrayFilter: rename_func("LIST_FILTER"), 487 exp.ArraySize: rename_func("ARRAY_LENGTH"), 488 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 489 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 490 exp.ArraySort: _array_sort_sql, 491 exp.ArraySum: rename_func("LIST_SUM"), 492 exp.BitwiseXor: rename_func("XOR"), 493 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 494 exp.CurrentDate: lambda *_: "CURRENT_DATE", 495 exp.CurrentTime: lambda *_: "CURRENT_TIME", 496 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 497 exp.DayOfMonth: rename_func("DAYOFMONTH"), 498 exp.DayOfWeek: rename_func("DAYOFWEEK"), 499 exp.DayOfYear: rename_func("DAYOFYEAR"), 500 exp.DataType: _datatype_sql, 501 exp.Date: _date_sql, 502 exp.DateAdd: _date_delta_sql, 503 exp.DateFromParts: rename_func("MAKE_DATE"), 504 exp.DateSub: _date_delta_sql, 505 exp.DateDiff: _date_diff_sql, 506 exp.DateStrToDate: datestrtodate_sql, 507 exp.Datetime: no_datetime_sql, 508 exp.DatetimeSub: _date_delta_sql, 509 exp.DatetimeAdd: _date_delta_sql, 510 exp.DateToDi: lambda self, 511 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 512 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 513 exp.DiToDate: lambda self, 514 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 515 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 516 exp.GenerateDateArray: _generate_datetime_array_sql, 517 exp.GenerateTimestampArray: _generate_datetime_array_sql, 518 exp.Explode: rename_func("UNNEST"), 519 exp.IntDiv: lambda self, e: self.binary(e, "//"), 520 exp.IsInf: rename_func("ISINF"), 521 exp.IsNan: rename_func("ISNAN"), 522 exp.JSONExtract: _arrow_json_extract_sql, 523 exp.JSONExtractScalar: _arrow_json_extract_sql, 524 exp.JSONFormat: _json_format_sql, 525 exp.LogicalOr: rename_func("BOOL_OR"), 526 exp.LogicalAnd: rename_func("BOOL_AND"), 527 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 528 exp.MonthsBetween: lambda self, e: self.func( 529 "DATEDIFF", 530 "'month'", 531 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 532 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 533 ), 534 exp.PercentileCont: rename_func("QUANTILE_CONT"), 535 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 536 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 537 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 538 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 539 exp.RegexpExtract: regexp_extract_sql, 540 exp.RegexpReplace: lambda self, e: self.func( 541 "REGEXP_REPLACE", 542 e.this, 543 e.expression, 544 e.args.get("replacement"), 545 e.args.get("modifiers"), 546 ), 547 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 548 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 549 exp.Return: lambda self, e: self.sql(e, "this"), 550 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 551 exp.Rand: rename_func("RANDOM"), 552 exp.SafeDivide: no_safe_divide_sql, 553 exp.SHA2: sha256_sql, 554 exp.Split: rename_func("STR_SPLIT"), 555 exp.SortArray: _sort_array_sql, 556 exp.StrPosition: str_position_sql, 557 exp.StrToUnix: lambda self, e: self.func( 558 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 559 ), 560 exp.Struct: _struct_sql, 561 exp.Transform: rename_func("LIST_TRANSFORM"), 562 exp.TimeAdd: _date_delta_sql, 563 exp.Time: no_time_sql, 564 exp.TimeDiff: _timediff_sql, 565 exp.Timestamp: no_timestamp_sql, 566 exp.TimestampDiff: lambda self, e: self.func( 567 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 568 ), 569 exp.TimestampTrunc: timestamptrunc_sql(), 570 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 571 exp.TimeStrToTime: timestrtotime_sql, 572 exp.TimeStrToUnix: lambda self, e: self.func( 573 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 574 ), 575 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 576 exp.TimeToUnix: rename_func("EPOCH"), 577 exp.TsOrDiToDi: lambda self, 578 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 579 exp.TsOrDsAdd: _date_delta_sql, 580 exp.TsOrDsDiff: lambda self, e: self.func( 581 "DATE_DIFF", 582 f"'{e.args.get('unit') or 'DAY'}'", 583 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 584 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 585 ), 586 exp.UnixToStr: lambda self, e: self.func( 587 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 588 ), 589 exp.DatetimeTrunc: lambda self, e: self.func( 590 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 591 ), 592 exp.UnixToTime: _unix_to_time_sql, 593 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 594 exp.VariancePop: rename_func("VAR_POP"), 595 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 596 exp.Xor: bool_xor_sql, 597 } 598 599 SUPPORTED_JSON_PATH_PARTS = { 600 exp.JSONPathKey, 601 exp.JSONPathRoot, 602 exp.JSONPathSubscript, 603 exp.JSONPathWildcard, 604 } 605 606 TYPE_MAPPING = { 607 **generator.Generator.TYPE_MAPPING, 608 exp.DataType.Type.BINARY: "BLOB", 609 exp.DataType.Type.BPCHAR: "TEXT", 610 exp.DataType.Type.CHAR: "TEXT", 611 exp.DataType.Type.FLOAT: "REAL", 612 exp.DataType.Type.NCHAR: "TEXT", 613 exp.DataType.Type.NVARCHAR: "TEXT", 614 exp.DataType.Type.UINT: "UINTEGER", 615 exp.DataType.Type.VARBINARY: "BLOB", 616 exp.DataType.Type.ROWVERSION: "BLOB", 617 exp.DataType.Type.VARCHAR: "TEXT", 618 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 619 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 620 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 621 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 622 } 623 624 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 625 RESERVED_KEYWORDS = { 626 "array", 627 "analyse", 628 "union", 629 "all", 630 "when", 631 "in_p", 632 "default", 633 "create_p", 634 "window", 635 "asymmetric", 636 "to", 637 "else", 638 "localtime", 639 "from", 640 "end_p", 641 "select", 642 "current_date", 643 "foreign", 644 "with", 645 "grant", 646 "session_user", 647 "or", 648 "except", 649 "references", 650 "fetch", 651 "limit", 652 "group_p", 653 "leading", 654 "into", 655 "collate", 656 "offset", 657 "do", 658 "then", 659 "localtimestamp", 660 "check_p", 661 "lateral_p", 662 "current_role", 663 "where", 664 "asc_p", 665 "placing", 666 "desc_p", 667 "user", 668 "unique", 669 "initially", 670 "column", 671 "both", 672 "some", 673 "as", 674 "any", 675 "only", 676 "deferrable", 677 "null_p", 678 "current_time", 679 "true_p", 680 "table", 681 "case", 682 "trailing", 683 "variadic", 684 "for", 685 "on", 686 "distinct", 687 "false_p", 688 "not", 689 "constraint", 690 "current_timestamp", 691 "returning", 692 "primary", 693 "intersect", 694 "having", 695 "analyze", 696 "current_user", 697 "and", 698 "cast", 699 "symmetric", 700 "using", 701 "order", 702 "current_catalog", 703 } 704 705 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 706 707 # DuckDB doesn't generally support CREATE TABLE .. properties 708 # https://duckdb.org/docs/sql/statements/create_table.html 709 PROPERTIES_LOCATION = { 710 prop: exp.Properties.Location.UNSUPPORTED 711 for prop in generator.Generator.PROPERTIES_LOCATION 712 } 713 714 # There are a few exceptions (e.g. temporary tables) which are supported or 715 # can be transpiled to DuckDB, so we explicitly override them accordingly 716 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 717 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 718 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 719 720 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 721 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 722 723 def strtotime_sql(self, expression: exp.StrToTime) -> str: 724 if expression.args.get("safe"): 725 formatted_time = self.format_time(expression) 726 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 727 return str_to_time_sql(self, expression) 728 729 def strtodate_sql(self, expression: exp.StrToDate) -> str: 730 if expression.args.get("safe"): 731 formatted_time = self.format_time(expression) 732 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 733 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 734 735 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 736 arg = expression.this 737 if expression.args.get("safe"): 738 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 739 return self.func("JSON", arg) 740 741 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 742 nano = expression.args.get("nano") 743 if nano is not None: 744 expression.set( 745 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 746 ) 747 748 return rename_func("MAKE_TIME")(self, expression) 749 750 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 751 sec = expression.args["sec"] 752 753 milli = expression.args.get("milli") 754 if milli is not None: 755 sec += milli.pop() / exp.Literal.number(1000.0) 756 757 nano = expression.args.get("nano") 758 if nano is not None: 759 sec += nano.pop() / exp.Literal.number(1000000000.0) 760 761 if milli or nano: 762 expression.set("sec", sec) 763 764 return rename_func("MAKE_TIMESTAMP")(self, expression) 765 766 def tablesample_sql( 767 self, 768 expression: exp.TableSample, 769 tablesample_keyword: t.Optional[str] = None, 770 ) -> str: 771 if not isinstance(expression.parent, exp.Select): 772 # This sample clause only applies to a single source, not the entire resulting relation 773 tablesample_keyword = "TABLESAMPLE" 774 775 if expression.args.get("size"): 776 method = expression.args.get("method") 777 if method and method.name.upper() != "RESERVOIR": 778 self.unsupported( 779 f"Sampling method {method} is not supported with a discrete sample count, " 780 "defaulting to reservoir sampling" 781 ) 782 expression.set("method", exp.var("RESERVOIR")) 783 784 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 785 786 def interval_sql(self, expression: exp.Interval) -> str: 787 multiplier: t.Optional[int] = None 788 unit = expression.text("unit").lower() 789 790 if unit.startswith("week"): 791 multiplier = 7 792 if unit.startswith("quarter"): 793 multiplier = 90 794 795 if multiplier: 796 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 797 798 return super().interval_sql(expression) 799 800 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 801 if isinstance(expression.parent, exp.UserDefinedFunction): 802 return self.sql(expression, "this") 803 return super().columndef_sql(expression, sep) 804 805 def join_sql(self, expression: exp.Join) -> str: 806 if ( 807 expression.side == "LEFT" 808 and not expression.args.get("on") 809 and isinstance(expression.this, exp.Unnest) 810 ): 811 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 812 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 813 return super().join_sql(expression.on(exp.true())) 814 815 return super().join_sql(expression) 816 817 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 818 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 819 if expression.args.get("is_end_exclusive"): 820 return rename_func("RANGE")(self, expression) 821 822 return self.function_fallback_sql(expression) 823 824 def bracket_sql(self, expression: exp.Bracket) -> str: 825 this = expression.this 826 if isinstance(this, exp.Array): 827 this.replace(exp.paren(this)) 828 829 bracket = super().bracket_sql(expression) 830 831 if not expression.args.get("returns_list_for_maps"): 832 if not this.type: 833 from sqlglot.optimizer.annotate_types import annotate_types 834 835 this = annotate_types(this) 836 837 if this.is_type(exp.DataType.Type.MAP): 838 bracket = f"({bracket})[1]" 839 840 return bracket 841 842 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 843 expression_sql = self.sql(expression, "expression") 844 845 func = expression.this 846 if isinstance(func, exp.PERCENTILES): 847 # Make the order key the first arg and slide the fraction to the right 848 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 849 order_col = expression.find(exp.Ordered) 850 if order_col: 851 func.set("expression", func.this) 852 func.set("this", order_col.this) 853 854 this = self.sql(expression, "this").rstrip(")") 855 856 return f"{this}{expression_sql})" 857 858 def length_sql(self, expression: exp.Length) -> str: 859 arg = expression.this 860 861 # Dialects like BQ and Snowflake also accept binary values as args, so 862 # DDB will attempt to infer the type or resort to case/when resolution 863 if not expression.args.get("binary") or arg.is_string: 864 return self.func("LENGTH", arg) 865 866 if not arg.type: 867 from sqlglot.optimizer.annotate_types import annotate_types 868 869 arg = annotate_types(arg) 870 871 if arg.is_type(*exp.DataType.TEXT_TYPES): 872 return self.func("LENGTH", arg) 873 874 # We need these casts to make duckdb's static type checker happy 875 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 876 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 877 878 case = ( 879 exp.case(self.func("TYPEOF", arg)) 880 .when( 881 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 882 ) # anonymous to break length_sql recursion 883 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 884 ) 885 886 return self.sql(case) 887 888 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 889 this = expression.this 890 key = expression.args.get("key") 891 key_sql = key.name if isinstance(key, exp.Expression) else "" 892 value_sql = self.sql(expression, "value") 893 894 kv_sql = f"{key_sql} := {value_sql}" 895 896 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 897 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 898 if isinstance(this, exp.Struct) and not this.expressions: 899 return self.func("STRUCT_PACK", kv_sql) 900 901 return self.func("STRUCT_INSERT", this, kv_sql) 902 903 def unnest_sql(self, expression: exp.Unnest) -> str: 904 explode_array = expression.args.get("explode_array") 905 if explode_array: 906 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 907 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 908 expression.expressions.append( 909 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 910 ) 911 912 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 913 alias = expression.args.get("alias") 914 if alias: 915 expression.set("alias", None) 916 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 917 918 unnest_sql = super().unnest_sql(expression) 919 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 920 return self.sql(select) 921 922 return super().unnest_sql(expression) 923 924 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 925 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 926 # DuckDB should render IGNORE NULLS only for the general-purpose 927 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 928 return super().ignorenulls_sql(expression) 929 930 return self.sql(expression, "this")
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
A NULL
arg in CONCAT
yields NULL
by default, but in some dialects it yields an empty string.
Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator
Specifies the strategy according to which identifiers should be normalized.
271 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 272 if isinstance(path, exp.Literal): 273 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 274 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 275 # This check ensures we'll avoid trying to parse these as JSON paths, which can 276 # either result in a noisy warning or in an invalid representation of the path. 277 path_text = path.name 278 if path_text.startswith("/") or "[#" in path_text: 279 return path 280 281 return super().to_json_path(path)
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- UNESCAPED_SEQUENCES
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- CREATABLE_KIND_MAPPING
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
283 class Tokenizer(tokens.Tokenizer): 284 HEREDOC_STRINGS = ["$"] 285 286 HEREDOC_TAG_IS_IDENTIFIER = True 287 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 288 289 KEYWORDS = { 290 **tokens.Tokenizer.KEYWORDS, 291 "//": TokenType.DIV, 292 "ATTACH": TokenType.COMMAND, 293 "BINARY": TokenType.VARBINARY, 294 "BITSTRING": TokenType.BIT, 295 "BPCHAR": TokenType.TEXT, 296 "CHAR": TokenType.TEXT, 297 "CHARACTER VARYING": TokenType.TEXT, 298 "EXCLUDE": TokenType.EXCEPT, 299 "LOGICAL": TokenType.BOOLEAN, 300 "ONLY": TokenType.ONLY, 301 "PIVOT_WIDER": TokenType.PIVOT, 302 "POSITIONAL": TokenType.POSITIONAL, 303 "SIGNED": TokenType.INT, 304 "STRING": TokenType.TEXT, 305 "SUMMARIZE": TokenType.SUMMARIZE, 306 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 307 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 308 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 309 "TIMESTAMP_US": TokenType.TIMESTAMP, 310 "UBIGINT": TokenType.UBIGINT, 311 "UINTEGER": TokenType.UINT, 312 "USMALLINT": TokenType.USMALLINT, 313 "UTINYINT": TokenType.UTINYINT, 314 "VARCHAR": TokenType.TEXT, 315 } 316 KEYWORDS.pop("/*+") 317 318 SINGLE_TOKENS = { 319 **tokens.Tokenizer.SINGLE_TOKENS, 320 "$": TokenType.PARAMETER, 321 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- IDENTIFIER_ESCAPES
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
323 class Parser(parser.Parser): 324 BITWISE = { 325 **parser.Parser.BITWISE, 326 TokenType.TILDA: exp.RegexpLike, 327 } 328 329 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 330 331 FUNCTIONS = { 332 **parser.Parser.FUNCTIONS, 333 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 334 "ARRAY_SORT": exp.SortArray.from_arg_list, 335 "DATEDIFF": _build_date_diff, 336 "DATE_DIFF": _build_date_diff, 337 "DATE_TRUNC": date_trunc_to_time, 338 "DATETRUNC": date_trunc_to_time, 339 "DECODE": lambda args: exp.Decode( 340 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 341 ), 342 "ENCODE": lambda args: exp.Encode( 343 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 344 ), 345 "EPOCH": exp.TimeToUnix.from_arg_list, 346 "EPOCH_MS": lambda args: exp.UnixToTime( 347 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 348 ), 349 "JSON": exp.ParseJSON.from_arg_list, 350 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 351 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 352 "LIST_HAS": exp.ArrayContains.from_arg_list, 353 "LIST_REVERSE_SORT": _build_sort_array_desc, 354 "LIST_SORT": exp.SortArray.from_arg_list, 355 "LIST_VALUE": lambda args: exp.Array(expressions=args), 356 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 357 "MAKE_TIMESTAMP": _build_make_timestamp, 358 "MEDIAN": lambda args: exp.PercentileCont( 359 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 360 ), 361 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 362 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 363 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 364 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 365 ), 366 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 367 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 368 this=seq_get(args, 0), 369 expression=seq_get(args, 1), 370 replacement=seq_get(args, 2), 371 modifiers=seq_get(args, 3), 372 ), 373 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 374 "STRING_SPLIT": exp.Split.from_arg_list, 375 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 376 "STRING_TO_ARRAY": exp.Split.from_arg_list, 377 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 378 "STRUCT_PACK": exp.Struct.from_arg_list, 379 "STR_SPLIT": exp.Split.from_arg_list, 380 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 381 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 382 "UNNEST": exp.Explode.from_arg_list, 383 "XOR": binary_from_function(exp.BitwiseXor), 384 "GENERATE_SERIES": _build_generate_series(), 385 "RANGE": _build_generate_series(end_exclusive=True), 386 } 387 388 FUNCTIONS.pop("DATE_SUB") 389 FUNCTIONS.pop("GLOB") 390 391 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 392 FUNCTION_PARSERS.pop("DECODE") 393 394 NO_PAREN_FUNCTION_PARSERS = { 395 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 396 "MAP": lambda self: self._parse_map(), 397 } 398 399 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 400 TokenType.SEMI, 401 TokenType.ANTI, 402 } 403 404 PLACEHOLDER_PARSERS = { 405 **parser.Parser.PLACEHOLDER_PARSERS, 406 TokenType.PARAMETER: lambda self: ( 407 self.expression(exp.Placeholder, this=self._prev.text) 408 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 409 else None 410 ), 411 } 412 413 TYPE_CONVERTERS = { 414 # https://duckdb.org/docs/sql/data_types/numeric 415 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 416 # https://duckdb.org/docs/sql/data_types/text 417 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 418 } 419 420 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 421 # https://duckdb.org/docs/sql/samples.html 422 sample = super()._parse_table_sample(as_modifier=as_modifier) 423 if sample and not sample.args.get("method"): 424 if sample.args.get("size"): 425 sample.set("method", exp.var("RESERVOIR")) 426 else: 427 sample.set("method", exp.var("SYSTEM")) 428 429 return sample 430 431 def _parse_bracket( 432 self, this: t.Optional[exp.Expression] = None 433 ) -> t.Optional[exp.Expression]: 434 bracket = super()._parse_bracket(this) 435 if isinstance(bracket, exp.Bracket): 436 bracket.set("returns_list_for_maps", True) 437 438 return bracket 439 440 def _parse_map(self) -> exp.ToMap | exp.Map: 441 if self._match(TokenType.L_BRACE, advance=False): 442 return self.expression(exp.ToMap, this=self._parse_bracket()) 443 444 args = self._parse_wrapped_csv(self._parse_assignment) 445 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 446 447 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 448 return self._parse_field_def() 449 450 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 451 if len(aggregations) == 1: 452 return super()._pivot_column_names(aggregations) 453 return pivot_column_names(aggregations, dialect="duckdb")
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- RANGE_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
455 class Generator(generator.Generator): 456 PARAMETER_TOKEN = "$" 457 NAMED_PLACEHOLDER_TOKEN = "$" 458 JOIN_HINTS = False 459 TABLE_HINTS = False 460 QUERY_HINTS = False 461 LIMIT_FETCH = "LIMIT" 462 STRUCT_DELIMITER = ("(", ")") 463 RENAME_TABLE_WITH_DB = False 464 NVL2_SUPPORTED = False 465 SEMI_ANTI_JOIN_WITH_SIDE = False 466 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 467 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 468 LAST_DAY_SUPPORTS_DATE_PART = False 469 JSON_KEY_VALUE_PAIR_SEP = "," 470 IGNORE_NULLS_IN_FUNC = True 471 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 472 SUPPORTS_CREATE_TABLE_LIKE = False 473 MULTI_ARG_DISTINCT = False 474 CAN_IMPLEMENT_ARRAY_ANY = True 475 SUPPORTS_TO_NUMBER = False 476 COPY_HAS_INTO_KEYWORD = False 477 STAR_EXCEPT = "EXCLUDE" 478 PAD_FILL_PATTERN_IS_REQUIRED = True 479 ARRAY_CONCAT_IS_VAR_LEN = False 480 481 TRANSFORMS = { 482 **generator.Generator.TRANSFORMS, 483 exp.ApproxDistinct: approx_count_distinct_sql, 484 exp.Array: inline_array_unless_query, 485 exp.ArrayContainsAll: rename_func("ARRAY_HAS_ALL"), 486 exp.ArrayFilter: rename_func("LIST_FILTER"), 487 exp.ArraySize: rename_func("ARRAY_LENGTH"), 488 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 489 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 490 exp.ArraySort: _array_sort_sql, 491 exp.ArraySum: rename_func("LIST_SUM"), 492 exp.BitwiseXor: rename_func("XOR"), 493 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 494 exp.CurrentDate: lambda *_: "CURRENT_DATE", 495 exp.CurrentTime: lambda *_: "CURRENT_TIME", 496 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 497 exp.DayOfMonth: rename_func("DAYOFMONTH"), 498 exp.DayOfWeek: rename_func("DAYOFWEEK"), 499 exp.DayOfYear: rename_func("DAYOFYEAR"), 500 exp.DataType: _datatype_sql, 501 exp.Date: _date_sql, 502 exp.DateAdd: _date_delta_sql, 503 exp.DateFromParts: rename_func("MAKE_DATE"), 504 exp.DateSub: _date_delta_sql, 505 exp.DateDiff: _date_diff_sql, 506 exp.DateStrToDate: datestrtodate_sql, 507 exp.Datetime: no_datetime_sql, 508 exp.DatetimeSub: _date_delta_sql, 509 exp.DatetimeAdd: _date_delta_sql, 510 exp.DateToDi: lambda self, 511 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 512 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 513 exp.DiToDate: lambda self, 514 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 515 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 516 exp.GenerateDateArray: _generate_datetime_array_sql, 517 exp.GenerateTimestampArray: _generate_datetime_array_sql, 518 exp.Explode: rename_func("UNNEST"), 519 exp.IntDiv: lambda self, e: self.binary(e, "//"), 520 exp.IsInf: rename_func("ISINF"), 521 exp.IsNan: rename_func("ISNAN"), 522 exp.JSONExtract: _arrow_json_extract_sql, 523 exp.JSONExtractScalar: _arrow_json_extract_sql, 524 exp.JSONFormat: _json_format_sql, 525 exp.LogicalOr: rename_func("BOOL_OR"), 526 exp.LogicalAnd: rename_func("BOOL_AND"), 527 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 528 exp.MonthsBetween: lambda self, e: self.func( 529 "DATEDIFF", 530 "'month'", 531 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 532 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 533 ), 534 exp.PercentileCont: rename_func("QUANTILE_CONT"), 535 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 536 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 537 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 538 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 539 exp.RegexpExtract: regexp_extract_sql, 540 exp.RegexpReplace: lambda self, e: self.func( 541 "REGEXP_REPLACE", 542 e.this, 543 e.expression, 544 e.args.get("replacement"), 545 e.args.get("modifiers"), 546 ), 547 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 548 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 549 exp.Return: lambda self, e: self.sql(e, "this"), 550 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 551 exp.Rand: rename_func("RANDOM"), 552 exp.SafeDivide: no_safe_divide_sql, 553 exp.SHA2: sha256_sql, 554 exp.Split: rename_func("STR_SPLIT"), 555 exp.SortArray: _sort_array_sql, 556 exp.StrPosition: str_position_sql, 557 exp.StrToUnix: lambda self, e: self.func( 558 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 559 ), 560 exp.Struct: _struct_sql, 561 exp.Transform: rename_func("LIST_TRANSFORM"), 562 exp.TimeAdd: _date_delta_sql, 563 exp.Time: no_time_sql, 564 exp.TimeDiff: _timediff_sql, 565 exp.Timestamp: no_timestamp_sql, 566 exp.TimestampDiff: lambda self, e: self.func( 567 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 568 ), 569 exp.TimestampTrunc: timestamptrunc_sql(), 570 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 571 exp.TimeStrToTime: timestrtotime_sql, 572 exp.TimeStrToUnix: lambda self, e: self.func( 573 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 574 ), 575 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 576 exp.TimeToUnix: rename_func("EPOCH"), 577 exp.TsOrDiToDi: lambda self, 578 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 579 exp.TsOrDsAdd: _date_delta_sql, 580 exp.TsOrDsDiff: lambda self, e: self.func( 581 "DATE_DIFF", 582 f"'{e.args.get('unit') or 'DAY'}'", 583 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 584 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 585 ), 586 exp.UnixToStr: lambda self, e: self.func( 587 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 588 ), 589 exp.DatetimeTrunc: lambda self, e: self.func( 590 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 591 ), 592 exp.UnixToTime: _unix_to_time_sql, 593 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 594 exp.VariancePop: rename_func("VAR_POP"), 595 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 596 exp.Xor: bool_xor_sql, 597 } 598 599 SUPPORTED_JSON_PATH_PARTS = { 600 exp.JSONPathKey, 601 exp.JSONPathRoot, 602 exp.JSONPathSubscript, 603 exp.JSONPathWildcard, 604 } 605 606 TYPE_MAPPING = { 607 **generator.Generator.TYPE_MAPPING, 608 exp.DataType.Type.BINARY: "BLOB", 609 exp.DataType.Type.BPCHAR: "TEXT", 610 exp.DataType.Type.CHAR: "TEXT", 611 exp.DataType.Type.FLOAT: "REAL", 612 exp.DataType.Type.NCHAR: "TEXT", 613 exp.DataType.Type.NVARCHAR: "TEXT", 614 exp.DataType.Type.UINT: "UINTEGER", 615 exp.DataType.Type.VARBINARY: "BLOB", 616 exp.DataType.Type.ROWVERSION: "BLOB", 617 exp.DataType.Type.VARCHAR: "TEXT", 618 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 619 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 620 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 621 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 622 } 623 624 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 625 RESERVED_KEYWORDS = { 626 "array", 627 "analyse", 628 "union", 629 "all", 630 "when", 631 "in_p", 632 "default", 633 "create_p", 634 "window", 635 "asymmetric", 636 "to", 637 "else", 638 "localtime", 639 "from", 640 "end_p", 641 "select", 642 "current_date", 643 "foreign", 644 "with", 645 "grant", 646 "session_user", 647 "or", 648 "except", 649 "references", 650 "fetch", 651 "limit", 652 "group_p", 653 "leading", 654 "into", 655 "collate", 656 "offset", 657 "do", 658 "then", 659 "localtimestamp", 660 "check_p", 661 "lateral_p", 662 "current_role", 663 "where", 664 "asc_p", 665 "placing", 666 "desc_p", 667 "user", 668 "unique", 669 "initially", 670 "column", 671 "both", 672 "some", 673 "as", 674 "any", 675 "only", 676 "deferrable", 677 "null_p", 678 "current_time", 679 "true_p", 680 "table", 681 "case", 682 "trailing", 683 "variadic", 684 "for", 685 "on", 686 "distinct", 687 "false_p", 688 "not", 689 "constraint", 690 "current_timestamp", 691 "returning", 692 "primary", 693 "intersect", 694 "having", 695 "analyze", 696 "current_user", 697 "and", 698 "cast", 699 "symmetric", 700 "using", 701 "order", 702 "current_catalog", 703 } 704 705 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 706 707 # DuckDB doesn't generally support CREATE TABLE .. properties 708 # https://duckdb.org/docs/sql/statements/create_table.html 709 PROPERTIES_LOCATION = { 710 prop: exp.Properties.Location.UNSUPPORTED 711 for prop in generator.Generator.PROPERTIES_LOCATION 712 } 713 714 # There are a few exceptions (e.g. temporary tables) which are supported or 715 # can be transpiled to DuckDB, so we explicitly override them accordingly 716 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 717 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 718 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 719 720 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 721 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 722 723 def strtotime_sql(self, expression: exp.StrToTime) -> str: 724 if expression.args.get("safe"): 725 formatted_time = self.format_time(expression) 726 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 727 return str_to_time_sql(self, expression) 728 729 def strtodate_sql(self, expression: exp.StrToDate) -> str: 730 if expression.args.get("safe"): 731 formatted_time = self.format_time(expression) 732 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 733 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 734 735 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 736 arg = expression.this 737 if expression.args.get("safe"): 738 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 739 return self.func("JSON", arg) 740 741 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 742 nano = expression.args.get("nano") 743 if nano is not None: 744 expression.set( 745 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 746 ) 747 748 return rename_func("MAKE_TIME")(self, expression) 749 750 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 751 sec = expression.args["sec"] 752 753 milli = expression.args.get("milli") 754 if milli is not None: 755 sec += milli.pop() / exp.Literal.number(1000.0) 756 757 nano = expression.args.get("nano") 758 if nano is not None: 759 sec += nano.pop() / exp.Literal.number(1000000000.0) 760 761 if milli or nano: 762 expression.set("sec", sec) 763 764 return rename_func("MAKE_TIMESTAMP")(self, expression) 765 766 def tablesample_sql( 767 self, 768 expression: exp.TableSample, 769 tablesample_keyword: t.Optional[str] = None, 770 ) -> str: 771 if not isinstance(expression.parent, exp.Select): 772 # This sample clause only applies to a single source, not the entire resulting relation 773 tablesample_keyword = "TABLESAMPLE" 774 775 if expression.args.get("size"): 776 method = expression.args.get("method") 777 if method and method.name.upper() != "RESERVOIR": 778 self.unsupported( 779 f"Sampling method {method} is not supported with a discrete sample count, " 780 "defaulting to reservoir sampling" 781 ) 782 expression.set("method", exp.var("RESERVOIR")) 783 784 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 785 786 def interval_sql(self, expression: exp.Interval) -> str: 787 multiplier: t.Optional[int] = None 788 unit = expression.text("unit").lower() 789 790 if unit.startswith("week"): 791 multiplier = 7 792 if unit.startswith("quarter"): 793 multiplier = 90 794 795 if multiplier: 796 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 797 798 return super().interval_sql(expression) 799 800 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 801 if isinstance(expression.parent, exp.UserDefinedFunction): 802 return self.sql(expression, "this") 803 return super().columndef_sql(expression, sep) 804 805 def join_sql(self, expression: exp.Join) -> str: 806 if ( 807 expression.side == "LEFT" 808 and not expression.args.get("on") 809 and isinstance(expression.this, exp.Unnest) 810 ): 811 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 812 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 813 return super().join_sql(expression.on(exp.true())) 814 815 return super().join_sql(expression) 816 817 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 818 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 819 if expression.args.get("is_end_exclusive"): 820 return rename_func("RANGE")(self, expression) 821 822 return self.function_fallback_sql(expression) 823 824 def bracket_sql(self, expression: exp.Bracket) -> str: 825 this = expression.this 826 if isinstance(this, exp.Array): 827 this.replace(exp.paren(this)) 828 829 bracket = super().bracket_sql(expression) 830 831 if not expression.args.get("returns_list_for_maps"): 832 if not this.type: 833 from sqlglot.optimizer.annotate_types import annotate_types 834 835 this = annotate_types(this) 836 837 if this.is_type(exp.DataType.Type.MAP): 838 bracket = f"({bracket})[1]" 839 840 return bracket 841 842 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 843 expression_sql = self.sql(expression, "expression") 844 845 func = expression.this 846 if isinstance(func, exp.PERCENTILES): 847 # Make the order key the first arg and slide the fraction to the right 848 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 849 order_col = expression.find(exp.Ordered) 850 if order_col: 851 func.set("expression", func.this) 852 func.set("this", order_col.this) 853 854 this = self.sql(expression, "this").rstrip(")") 855 856 return f"{this}{expression_sql})" 857 858 def length_sql(self, expression: exp.Length) -> str: 859 arg = expression.this 860 861 # Dialects like BQ and Snowflake also accept binary values as args, so 862 # DDB will attempt to infer the type or resort to case/when resolution 863 if not expression.args.get("binary") or arg.is_string: 864 return self.func("LENGTH", arg) 865 866 if not arg.type: 867 from sqlglot.optimizer.annotate_types import annotate_types 868 869 arg = annotate_types(arg) 870 871 if arg.is_type(*exp.DataType.TEXT_TYPES): 872 return self.func("LENGTH", arg) 873 874 # We need these casts to make duckdb's static type checker happy 875 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 876 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 877 878 case = ( 879 exp.case(self.func("TYPEOF", arg)) 880 .when( 881 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 882 ) # anonymous to break length_sql recursion 883 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 884 ) 885 886 return self.sql(case) 887 888 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 889 this = expression.this 890 key = expression.args.get("key") 891 key_sql = key.name if isinstance(key, exp.Expression) else "" 892 value_sql = self.sql(expression, "value") 893 894 kv_sql = f"{key_sql} := {value_sql}" 895 896 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 897 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 898 if isinstance(this, exp.Struct) and not this.expressions: 899 return self.func("STRUCT_PACK", kv_sql) 900 901 return self.func("STRUCT_INSERT", this, kv_sql) 902 903 def unnest_sql(self, expression: exp.Unnest) -> str: 904 explode_array = expression.args.get("explode_array") 905 if explode_array: 906 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 907 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 908 expression.expressions.append( 909 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 910 ) 911 912 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 913 alias = expression.args.get("alias") 914 if alias: 915 expression.set("alias", None) 916 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 917 918 unnest_sql = super().unnest_sql(expression) 919 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 920 return self.sql(select) 921 922 return super().unnest_sql(expression) 923 924 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 925 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 926 # DuckDB should render IGNORE NULLS only for the general-purpose 927 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 928 return super().ignorenulls_sql(expression) 929 930 return self.sql(expression, "this")
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
729 def strtodate_sql(self, expression: exp.StrToDate) -> str: 730 if expression.args.get("safe"): 731 formatted_time = self.format_time(expression) 732 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 733 return f"CAST({str_to_time_sql(self, expression)} AS DATE)"
741 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 742 nano = expression.args.get("nano") 743 if nano is not None: 744 expression.set( 745 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 746 ) 747 748 return rename_func("MAKE_TIME")(self, expression)
750 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 751 sec = expression.args["sec"] 752 753 milli = expression.args.get("milli") 754 if milli is not None: 755 sec += milli.pop() / exp.Literal.number(1000.0) 756 757 nano = expression.args.get("nano") 758 if nano is not None: 759 sec += nano.pop() / exp.Literal.number(1000000000.0) 760 761 if milli or nano: 762 expression.set("sec", sec) 763 764 return rename_func("MAKE_TIMESTAMP")(self, expression)
766 def tablesample_sql( 767 self, 768 expression: exp.TableSample, 769 tablesample_keyword: t.Optional[str] = None, 770 ) -> str: 771 if not isinstance(expression.parent, exp.Select): 772 # This sample clause only applies to a single source, not the entire resulting relation 773 tablesample_keyword = "TABLESAMPLE" 774 775 if expression.args.get("size"): 776 method = expression.args.get("method") 777 if method and method.name.upper() != "RESERVOIR": 778 self.unsupported( 779 f"Sampling method {method} is not supported with a discrete sample count, " 780 "defaulting to reservoir sampling" 781 ) 782 expression.set("method", exp.var("RESERVOIR")) 783 784 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword)
786 def interval_sql(self, expression: exp.Interval) -> str: 787 multiplier: t.Optional[int] = None 788 unit = expression.text("unit").lower() 789 790 if unit.startswith("week"): 791 multiplier = 7 792 if unit.startswith("quarter"): 793 multiplier = 90 794 795 if multiplier: 796 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 797 798 return super().interval_sql(expression)
805 def join_sql(self, expression: exp.Join) -> str: 806 if ( 807 expression.side == "LEFT" 808 and not expression.args.get("on") 809 and isinstance(expression.this, exp.Unnest) 810 ): 811 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 812 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 813 return super().join_sql(expression.on(exp.true())) 814 815 return super().join_sql(expression)
824 def bracket_sql(self, expression: exp.Bracket) -> str: 825 this = expression.this 826 if isinstance(this, exp.Array): 827 this.replace(exp.paren(this)) 828 829 bracket = super().bracket_sql(expression) 830 831 if not expression.args.get("returns_list_for_maps"): 832 if not this.type: 833 from sqlglot.optimizer.annotate_types import annotate_types 834 835 this = annotate_types(this) 836 837 if this.is_type(exp.DataType.Type.MAP): 838 bracket = f"({bracket})[1]" 839 840 return bracket
842 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 843 expression_sql = self.sql(expression, "expression") 844 845 func = expression.this 846 if isinstance(func, exp.PERCENTILES): 847 # Make the order key the first arg and slide the fraction to the right 848 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 849 order_col = expression.find(exp.Ordered) 850 if order_col: 851 func.set("expression", func.this) 852 func.set("this", order_col.this) 853 854 this = self.sql(expression, "this").rstrip(")") 855 856 return f"{this}{expression_sql})"
858 def length_sql(self, expression: exp.Length) -> str: 859 arg = expression.this 860 861 # Dialects like BQ and Snowflake also accept binary values as args, so 862 # DDB will attempt to infer the type or resort to case/when resolution 863 if not expression.args.get("binary") or arg.is_string: 864 return self.func("LENGTH", arg) 865 866 if not arg.type: 867 from sqlglot.optimizer.annotate_types import annotate_types 868 869 arg = annotate_types(arg) 870 871 if arg.is_type(*exp.DataType.TEXT_TYPES): 872 return self.func("LENGTH", arg) 873 874 # We need these casts to make duckdb's static type checker happy 875 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 876 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 877 878 case = ( 879 exp.case(self.func("TYPEOF", arg)) 880 .when( 881 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 882 ) # anonymous to break length_sql recursion 883 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 884 ) 885 886 return self.sql(case)
888 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 889 this = expression.this 890 key = expression.args.get("key") 891 key_sql = key.name if isinstance(key, exp.Expression) else "" 892 value_sql = self.sql(expression, "value") 893 894 kv_sql = f"{key_sql} := {value_sql}" 895 896 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 897 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 898 if isinstance(this, exp.Struct) and not this.expressions: 899 return self.func("STRUCT_PACK", kv_sql) 900 901 return self.func("STRUCT_INSERT", this, kv_sql)
903 def unnest_sql(self, expression: exp.Unnest) -> str: 904 explode_array = expression.args.get("explode_array") 905 if explode_array: 906 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 907 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 908 expression.expressions.append( 909 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 910 ) 911 912 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 913 alias = expression.args.get("alias") 914 if alias: 915 expression.set("alias", None) 916 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 917 918 unnest_sql = super().unnest_sql(expression) 919 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 920 return self.sql(select) 921 922 return super().unnest_sql(expression)
924 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 925 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 926 # DuckDB should render IGNORE NULLS only for the general-purpose 927 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 928 return super().ignorenulls_sql(expression) 929 930 return self.sql(expression, "this")
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXPLICIT_SET_OP
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- SUPPORTS_CONVERT_TIMEZONE
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql