sqlglot.dialects.redshift
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, transforms 6from sqlglot.dialects.dialect import ( 7 NormalizationStrategy, 8 concat_to_dpipe_sql, 9 concat_ws_to_dpipe_sql, 10 date_delta_sql, 11 generatedasidentitycolumnconstraint_sql, 12 json_extract_segments, 13 no_tablesample_sql, 14 rename_func, 15) 16from sqlglot.dialects.postgres import Postgres 17from sqlglot.helper import seq_get 18from sqlglot.tokens import TokenType 19 20if t.TYPE_CHECKING: 21 from sqlglot._typing import E 22 23 24def _build_date_delta(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 25 def _builder(args: t.List) -> E: 26 expr = expr_type(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 27 if expr_type is exp.TsOrDsAdd: 28 expr.set("return_type", exp.DataType.build("TIMESTAMP")) 29 30 return expr 31 32 return _builder 33 34 35class Redshift(Postgres): 36 # https://docs.aws.amazon.com/redshift/latest/dg/r_names.html 37 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 38 39 SUPPORTS_USER_DEFINED_TYPES = False 40 INDEX_OFFSET = 0 41 42 TIME_FORMAT = "'YYYY-MM-DD HH:MI:SS'" 43 TIME_MAPPING = { 44 **Postgres.TIME_MAPPING, 45 "MON": "%b", 46 "HH": "%H", 47 } 48 49 class Parser(Postgres.Parser): 50 FUNCTIONS = { 51 **Postgres.Parser.FUNCTIONS, 52 "ADD_MONTHS": lambda args: exp.TsOrDsAdd( 53 this=seq_get(args, 0), 54 expression=seq_get(args, 1), 55 unit=exp.var("month"), 56 return_type=exp.DataType.build("TIMESTAMP"), 57 ), 58 "DATEADD": _build_date_delta(exp.TsOrDsAdd), 59 "DATE_ADD": _build_date_delta(exp.TsOrDsAdd), 60 "DATEDIFF": _build_date_delta(exp.TsOrDsDiff), 61 "DATE_DIFF": _build_date_delta(exp.TsOrDsDiff), 62 "GETDATE": exp.CurrentTimestamp.from_arg_list, 63 "LISTAGG": exp.GroupConcat.from_arg_list, 64 "STRTOL": exp.FromBase.from_arg_list, 65 } 66 67 NO_PAREN_FUNCTION_PARSERS = { 68 **Postgres.Parser.NO_PAREN_FUNCTION_PARSERS, 69 "APPROXIMATE": lambda self: self._parse_approximate_count(), 70 "SYSDATE": lambda self: self.expression(exp.CurrentTimestamp, transaction=True), 71 } 72 73 SUPPORTS_IMPLICIT_UNNEST = True 74 75 def _parse_table( 76 self, 77 schema: bool = False, 78 joins: bool = False, 79 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 80 parse_bracket: bool = False, 81 is_db_reference: bool = False, 82 ) -> t.Optional[exp.Expression]: 83 # Redshift supports UNPIVOTing SUPER objects, e.g. `UNPIVOT foo.obj[0] AS val AT attr` 84 unpivot = self._match(TokenType.UNPIVOT) 85 table = super()._parse_table( 86 schema=schema, 87 joins=joins, 88 alias_tokens=alias_tokens, 89 parse_bracket=parse_bracket, 90 is_db_reference=is_db_reference, 91 ) 92 93 return self.expression(exp.Pivot, this=table, unpivot=True) if unpivot else table 94 95 def _parse_convert( 96 self, strict: bool, safe: t.Optional[bool] = None 97 ) -> t.Optional[exp.Expression]: 98 to = self._parse_types() 99 self._match(TokenType.COMMA) 100 this = self._parse_bitwise() 101 return self.expression(exp.TryCast, this=this, to=to, safe=safe) 102 103 def _parse_approximate_count(self) -> t.Optional[exp.ApproxDistinct]: 104 index = self._index - 1 105 func = self._parse_function() 106 107 if isinstance(func, exp.Count) and isinstance(func.this, exp.Distinct): 108 return self.expression(exp.ApproxDistinct, this=seq_get(func.this.expressions, 0)) 109 self._retreat(index) 110 return None 111 112 class Tokenizer(Postgres.Tokenizer): 113 BIT_STRINGS = [] 114 HEX_STRINGS = [] 115 STRING_ESCAPES = ["\\", "'"] 116 117 KEYWORDS = { 118 **Postgres.Tokenizer.KEYWORDS, 119 "HLLSKETCH": TokenType.HLLSKETCH, 120 "SUPER": TokenType.SUPER, 121 "TOP": TokenType.TOP, 122 "UNLOAD": TokenType.COMMAND, 123 "VARBYTE": TokenType.VARBINARY, 124 } 125 KEYWORDS.pop("VALUES") 126 127 # Redshift allows # to appear as a table identifier prefix 128 SINGLE_TOKENS = Postgres.Tokenizer.SINGLE_TOKENS.copy() 129 SINGLE_TOKENS.pop("#") 130 131 class Generator(Postgres.Generator): 132 LOCKING_READS_SUPPORTED = False 133 QUERY_HINTS = False 134 VALUES_AS_TABLE = False 135 TZ_TO_WITH_TIME_ZONE = True 136 NVL2_SUPPORTED = True 137 LAST_DAY_SUPPORTS_DATE_PART = False 138 CAN_IMPLEMENT_ARRAY_ANY = False 139 MULTI_ARG_DISTINCT = True 140 141 TYPE_MAPPING = { 142 **Postgres.Generator.TYPE_MAPPING, 143 exp.DataType.Type.BINARY: "VARBYTE", 144 exp.DataType.Type.INT: "INTEGER", 145 exp.DataType.Type.TIMETZ: "TIME", 146 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 147 exp.DataType.Type.VARBINARY: "VARBYTE", 148 } 149 150 TRANSFORMS = { 151 **Postgres.Generator.TRANSFORMS, 152 exp.Concat: concat_to_dpipe_sql, 153 exp.ConcatWs: concat_ws_to_dpipe_sql, 154 exp.ApproxDistinct: lambda self, 155 e: f"APPROXIMATE COUNT(DISTINCT {self.sql(e, 'this')})", 156 exp.CurrentTimestamp: lambda self, e: ( 157 "SYSDATE" if e.args.get("transaction") else "GETDATE()" 158 ), 159 exp.DateAdd: date_delta_sql("DATEADD"), 160 exp.DateDiff: date_delta_sql("DATEDIFF"), 161 exp.DistKeyProperty: lambda self, e: self.func("DISTKEY", e.this), 162 exp.DistStyleProperty: lambda self, e: self.naked_property(e), 163 exp.FromBase: rename_func("STRTOL"), 164 exp.GeneratedAsIdentityColumnConstraint: generatedasidentitycolumnconstraint_sql, 165 exp.JSONExtract: json_extract_segments("JSON_EXTRACT_PATH_TEXT"), 166 exp.JSONExtractScalar: json_extract_segments("JSON_EXTRACT_PATH_TEXT"), 167 exp.GroupConcat: rename_func("LISTAGG"), 168 exp.ParseJSON: rename_func("JSON_PARSE"), 169 exp.Select: transforms.preprocess( 170 [transforms.eliminate_distinct_on, transforms.eliminate_semi_and_anti_joins] 171 ), 172 exp.SortKeyProperty: lambda self, 173 e: f"{'COMPOUND ' if e.args['compound'] else ''}SORTKEY({self.format_args(*e.this)})", 174 exp.StartsWith: lambda self, 175 e: f"{self.sql(e.this)} LIKE {self.sql(e.expression)} || '%'", 176 exp.TableSample: no_tablesample_sql, 177 exp.TsOrDsAdd: date_delta_sql("DATEADD"), 178 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 179 exp.UnixToTime: lambda self, 180 e: f"(TIMESTAMP 'epoch' + {self.sql(e.this)} * INTERVAL '1 SECOND')", 181 } 182 183 # Postgres maps exp.Pivot to no_pivot_sql, but Redshift support pivots 184 TRANSFORMS.pop(exp.Pivot) 185 186 # Redshift uses the POW | POWER (expr1, expr2) syntax instead of expr1 ^ expr2 (postgres) 187 TRANSFORMS.pop(exp.Pow) 188 189 # Redshift supports ANY_VALUE(..) 190 TRANSFORMS.pop(exp.AnyValue) 191 192 # Redshift supports LAST_DAY(..) 193 TRANSFORMS.pop(exp.LastDay) 194 195 RESERVED_KEYWORDS = {*Postgres.Generator.RESERVED_KEYWORDS, "snapshot", "type"} 196 197 def unnest_sql(self, expression: exp.Unnest) -> str: 198 args = expression.expressions 199 num_args = len(args) 200 201 if num_args > 1: 202 self.unsupported(f"Unsupported number of arguments in UNNEST: {num_args}") 203 return "" 204 205 arg = self.sql(seq_get(args, 0)) 206 alias = self.expressions(expression.args.get("alias"), key="columns") 207 return f"{arg} AS {alias}" if alias else arg 208 209 def with_properties(self, properties: exp.Properties) -> str: 210 """Redshift doesn't have `WITH` as part of their with_properties so we remove it""" 211 return self.properties(properties, prefix=" ", suffix="") 212 213 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 214 if expression.is_type(exp.DataType.Type.JSON): 215 # Redshift doesn't support a JSON type, so casting to it is treated as a noop 216 return self.sql(expression, "this") 217 218 return super().cast_sql(expression, safe_prefix=safe_prefix) 219 220 def datatype_sql(self, expression: exp.DataType) -> str: 221 """ 222 Redshift converts the `TEXT` data type to `VARCHAR(255)` by default when people more generally mean 223 VARCHAR of max length which is `VARCHAR(max)` in Redshift. Therefore if we get a `TEXT` data type 224 without precision we convert it to `VARCHAR(max)` and if it does have precision then we just convert 225 `TEXT` to `VARCHAR`. 226 """ 227 if expression.is_type("text"): 228 expression.set("this", exp.DataType.Type.VARCHAR) 229 precision = expression.args.get("expressions") 230 231 if not precision: 232 expression.append("expressions", exp.var("MAX")) 233 234 return super().datatype_sql(expression)
36class Redshift(Postgres): 37 # https://docs.aws.amazon.com/redshift/latest/dg/r_names.html 38 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 39 40 SUPPORTS_USER_DEFINED_TYPES = False 41 INDEX_OFFSET = 0 42 43 TIME_FORMAT = "'YYYY-MM-DD HH:MI:SS'" 44 TIME_MAPPING = { 45 **Postgres.TIME_MAPPING, 46 "MON": "%b", 47 "HH": "%H", 48 } 49 50 class Parser(Postgres.Parser): 51 FUNCTIONS = { 52 **Postgres.Parser.FUNCTIONS, 53 "ADD_MONTHS": lambda args: exp.TsOrDsAdd( 54 this=seq_get(args, 0), 55 expression=seq_get(args, 1), 56 unit=exp.var("month"), 57 return_type=exp.DataType.build("TIMESTAMP"), 58 ), 59 "DATEADD": _build_date_delta(exp.TsOrDsAdd), 60 "DATE_ADD": _build_date_delta(exp.TsOrDsAdd), 61 "DATEDIFF": _build_date_delta(exp.TsOrDsDiff), 62 "DATE_DIFF": _build_date_delta(exp.TsOrDsDiff), 63 "GETDATE": exp.CurrentTimestamp.from_arg_list, 64 "LISTAGG": exp.GroupConcat.from_arg_list, 65 "STRTOL": exp.FromBase.from_arg_list, 66 } 67 68 NO_PAREN_FUNCTION_PARSERS = { 69 **Postgres.Parser.NO_PAREN_FUNCTION_PARSERS, 70 "APPROXIMATE": lambda self: self._parse_approximate_count(), 71 "SYSDATE": lambda self: self.expression(exp.CurrentTimestamp, transaction=True), 72 } 73 74 SUPPORTS_IMPLICIT_UNNEST = True 75 76 def _parse_table( 77 self, 78 schema: bool = False, 79 joins: bool = False, 80 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 81 parse_bracket: bool = False, 82 is_db_reference: bool = False, 83 ) -> t.Optional[exp.Expression]: 84 # Redshift supports UNPIVOTing SUPER objects, e.g. `UNPIVOT foo.obj[0] AS val AT attr` 85 unpivot = self._match(TokenType.UNPIVOT) 86 table = super()._parse_table( 87 schema=schema, 88 joins=joins, 89 alias_tokens=alias_tokens, 90 parse_bracket=parse_bracket, 91 is_db_reference=is_db_reference, 92 ) 93 94 return self.expression(exp.Pivot, this=table, unpivot=True) if unpivot else table 95 96 def _parse_convert( 97 self, strict: bool, safe: t.Optional[bool] = None 98 ) -> t.Optional[exp.Expression]: 99 to = self._parse_types() 100 self._match(TokenType.COMMA) 101 this = self._parse_bitwise() 102 return self.expression(exp.TryCast, this=this, to=to, safe=safe) 103 104 def _parse_approximate_count(self) -> t.Optional[exp.ApproxDistinct]: 105 index = self._index - 1 106 func = self._parse_function() 107 108 if isinstance(func, exp.Count) and isinstance(func.this, exp.Distinct): 109 return self.expression(exp.ApproxDistinct, this=seq_get(func.this.expressions, 0)) 110 self._retreat(index) 111 return None 112 113 class Tokenizer(Postgres.Tokenizer): 114 BIT_STRINGS = [] 115 HEX_STRINGS = [] 116 STRING_ESCAPES = ["\\", "'"] 117 118 KEYWORDS = { 119 **Postgres.Tokenizer.KEYWORDS, 120 "HLLSKETCH": TokenType.HLLSKETCH, 121 "SUPER": TokenType.SUPER, 122 "TOP": TokenType.TOP, 123 "UNLOAD": TokenType.COMMAND, 124 "VARBYTE": TokenType.VARBINARY, 125 } 126 KEYWORDS.pop("VALUES") 127 128 # Redshift allows # to appear as a table identifier prefix 129 SINGLE_TOKENS = Postgres.Tokenizer.SINGLE_TOKENS.copy() 130 SINGLE_TOKENS.pop("#") 131 132 class Generator(Postgres.Generator): 133 LOCKING_READS_SUPPORTED = False 134 QUERY_HINTS = False 135 VALUES_AS_TABLE = False 136 TZ_TO_WITH_TIME_ZONE = True 137 NVL2_SUPPORTED = True 138 LAST_DAY_SUPPORTS_DATE_PART = False 139 CAN_IMPLEMENT_ARRAY_ANY = False 140 MULTI_ARG_DISTINCT = True 141 142 TYPE_MAPPING = { 143 **Postgres.Generator.TYPE_MAPPING, 144 exp.DataType.Type.BINARY: "VARBYTE", 145 exp.DataType.Type.INT: "INTEGER", 146 exp.DataType.Type.TIMETZ: "TIME", 147 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 148 exp.DataType.Type.VARBINARY: "VARBYTE", 149 } 150 151 TRANSFORMS = { 152 **Postgres.Generator.TRANSFORMS, 153 exp.Concat: concat_to_dpipe_sql, 154 exp.ConcatWs: concat_ws_to_dpipe_sql, 155 exp.ApproxDistinct: lambda self, 156 e: f"APPROXIMATE COUNT(DISTINCT {self.sql(e, 'this')})", 157 exp.CurrentTimestamp: lambda self, e: ( 158 "SYSDATE" if e.args.get("transaction") else "GETDATE()" 159 ), 160 exp.DateAdd: date_delta_sql("DATEADD"), 161 exp.DateDiff: date_delta_sql("DATEDIFF"), 162 exp.DistKeyProperty: lambda self, e: self.func("DISTKEY", e.this), 163 exp.DistStyleProperty: lambda self, e: self.naked_property(e), 164 exp.FromBase: rename_func("STRTOL"), 165 exp.GeneratedAsIdentityColumnConstraint: generatedasidentitycolumnconstraint_sql, 166 exp.JSONExtract: json_extract_segments("JSON_EXTRACT_PATH_TEXT"), 167 exp.JSONExtractScalar: json_extract_segments("JSON_EXTRACT_PATH_TEXT"), 168 exp.GroupConcat: rename_func("LISTAGG"), 169 exp.ParseJSON: rename_func("JSON_PARSE"), 170 exp.Select: transforms.preprocess( 171 [transforms.eliminate_distinct_on, transforms.eliminate_semi_and_anti_joins] 172 ), 173 exp.SortKeyProperty: lambda self, 174 e: f"{'COMPOUND ' if e.args['compound'] else ''}SORTKEY({self.format_args(*e.this)})", 175 exp.StartsWith: lambda self, 176 e: f"{self.sql(e.this)} LIKE {self.sql(e.expression)} || '%'", 177 exp.TableSample: no_tablesample_sql, 178 exp.TsOrDsAdd: date_delta_sql("DATEADD"), 179 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 180 exp.UnixToTime: lambda self, 181 e: f"(TIMESTAMP 'epoch' + {self.sql(e.this)} * INTERVAL '1 SECOND')", 182 } 183 184 # Postgres maps exp.Pivot to no_pivot_sql, but Redshift support pivots 185 TRANSFORMS.pop(exp.Pivot) 186 187 # Redshift uses the POW | POWER (expr1, expr2) syntax instead of expr1 ^ expr2 (postgres) 188 TRANSFORMS.pop(exp.Pow) 189 190 # Redshift supports ANY_VALUE(..) 191 TRANSFORMS.pop(exp.AnyValue) 192 193 # Redshift supports LAST_DAY(..) 194 TRANSFORMS.pop(exp.LastDay) 195 196 RESERVED_KEYWORDS = {*Postgres.Generator.RESERVED_KEYWORDS, "snapshot", "type"} 197 198 def unnest_sql(self, expression: exp.Unnest) -> str: 199 args = expression.expressions 200 num_args = len(args) 201 202 if num_args > 1: 203 self.unsupported(f"Unsupported number of arguments in UNNEST: {num_args}") 204 return "" 205 206 arg = self.sql(seq_get(args, 0)) 207 alias = self.expressions(expression.args.get("alias"), key="columns") 208 return f"{arg} AS {alias}" if alias else arg 209 210 def with_properties(self, properties: exp.Properties) -> str: 211 """Redshift doesn't have `WITH` as part of their with_properties so we remove it""" 212 return self.properties(properties, prefix=" ", suffix="") 213 214 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 215 if expression.is_type(exp.DataType.Type.JSON): 216 # Redshift doesn't support a JSON type, so casting to it is treated as a noop 217 return self.sql(expression, "this") 218 219 return super().cast_sql(expression, safe_prefix=safe_prefix) 220 221 def datatype_sql(self, expression: exp.DataType) -> str: 222 """ 223 Redshift converts the `TEXT` data type to `VARCHAR(255)` by default when people more generally mean 224 VARCHAR of max length which is `VARCHAR(max)` in Redshift. Therefore if we get a `TEXT` data type 225 without precision we convert it to `VARCHAR(max)` and if it does have precision then we just convert 226 `TEXT` to `VARCHAR`. 227 """ 228 if expression.is_type("text"): 229 expression.set("this", exp.DataType.Type.VARCHAR) 230 precision = expression.args.get("expressions") 231 232 if not precision: 233 expression.append("expressions", exp.var("MAX")) 234 235 return super().datatype_sql(expression)
Specifies the strategy according to which identifiers should be normalized.
Associates this dialect's time formats with their equivalent Python strftime formats.
Mapping of an escaped sequence (\n) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- SAFE_DIVISION
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
50 class Parser(Postgres.Parser): 51 FUNCTIONS = { 52 **Postgres.Parser.FUNCTIONS, 53 "ADD_MONTHS": lambda args: exp.TsOrDsAdd( 54 this=seq_get(args, 0), 55 expression=seq_get(args, 1), 56 unit=exp.var("month"), 57 return_type=exp.DataType.build("TIMESTAMP"), 58 ), 59 "DATEADD": _build_date_delta(exp.TsOrDsAdd), 60 "DATE_ADD": _build_date_delta(exp.TsOrDsAdd), 61 "DATEDIFF": _build_date_delta(exp.TsOrDsDiff), 62 "DATE_DIFF": _build_date_delta(exp.TsOrDsDiff), 63 "GETDATE": exp.CurrentTimestamp.from_arg_list, 64 "LISTAGG": exp.GroupConcat.from_arg_list, 65 "STRTOL": exp.FromBase.from_arg_list, 66 } 67 68 NO_PAREN_FUNCTION_PARSERS = { 69 **Postgres.Parser.NO_PAREN_FUNCTION_PARSERS, 70 "APPROXIMATE": lambda self: self._parse_approximate_count(), 71 "SYSDATE": lambda self: self.expression(exp.CurrentTimestamp, transaction=True), 72 } 73 74 SUPPORTS_IMPLICIT_UNNEST = True 75 76 def _parse_table( 77 self, 78 schema: bool = False, 79 joins: bool = False, 80 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 81 parse_bracket: bool = False, 82 is_db_reference: bool = False, 83 ) -> t.Optional[exp.Expression]: 84 # Redshift supports UNPIVOTing SUPER objects, e.g. `UNPIVOT foo.obj[0] AS val AT attr` 85 unpivot = self._match(TokenType.UNPIVOT) 86 table = super()._parse_table( 87 schema=schema, 88 joins=joins, 89 alias_tokens=alias_tokens, 90 parse_bracket=parse_bracket, 91 is_db_reference=is_db_reference, 92 ) 93 94 return self.expression(exp.Pivot, this=table, unpivot=True) if unpivot else table 95 96 def _parse_convert( 97 self, strict: bool, safe: t.Optional[bool] = None 98 ) -> t.Optional[exp.Expression]: 99 to = self._parse_types() 100 self._match(TokenType.COMMA) 101 this = self._parse_bitwise() 102 return self.expression(exp.TryCast, this=this, to=to, safe=safe) 103 104 def _parse_approximate_count(self) -> t.Optional[exp.ApproxDistinct]: 105 index = self._index - 1 106 func = self._parse_function() 107 108 if isinstance(func, exp.Count) and isinstance(func.this, exp.Distinct): 109 return self.expression(exp.ApproxDistinct, this=seq_get(func.this.expressions, 0)) 110 self._retreat(index) 111 return None
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- TABLE_ALIAS_TOKENS
- ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
113 class Tokenizer(Postgres.Tokenizer): 114 BIT_STRINGS = [] 115 HEX_STRINGS = [] 116 STRING_ESCAPES = ["\\", "'"] 117 118 KEYWORDS = { 119 **Postgres.Tokenizer.KEYWORDS, 120 "HLLSKETCH": TokenType.HLLSKETCH, 121 "SUPER": TokenType.SUPER, 122 "TOP": TokenType.TOP, 123 "UNLOAD": TokenType.COMMAND, 124 "VARBYTE": TokenType.VARBINARY, 125 } 126 KEYWORDS.pop("VALUES") 127 128 # Redshift allows # to appear as a table identifier prefix 129 SINGLE_TOKENS = Postgres.Tokenizer.SINGLE_TOKENS.copy() 130 SINGLE_TOKENS.pop("#")
Inherited Members
132 class Generator(Postgres.Generator): 133 LOCKING_READS_SUPPORTED = False 134 QUERY_HINTS = False 135 VALUES_AS_TABLE = False 136 TZ_TO_WITH_TIME_ZONE = True 137 NVL2_SUPPORTED = True 138 LAST_DAY_SUPPORTS_DATE_PART = False 139 CAN_IMPLEMENT_ARRAY_ANY = False 140 MULTI_ARG_DISTINCT = True 141 142 TYPE_MAPPING = { 143 **Postgres.Generator.TYPE_MAPPING, 144 exp.DataType.Type.BINARY: "VARBYTE", 145 exp.DataType.Type.INT: "INTEGER", 146 exp.DataType.Type.TIMETZ: "TIME", 147 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 148 exp.DataType.Type.VARBINARY: "VARBYTE", 149 } 150 151 TRANSFORMS = { 152 **Postgres.Generator.TRANSFORMS, 153 exp.Concat: concat_to_dpipe_sql, 154 exp.ConcatWs: concat_ws_to_dpipe_sql, 155 exp.ApproxDistinct: lambda self, 156 e: f"APPROXIMATE COUNT(DISTINCT {self.sql(e, 'this')})", 157 exp.CurrentTimestamp: lambda self, e: ( 158 "SYSDATE" if e.args.get("transaction") else "GETDATE()" 159 ), 160 exp.DateAdd: date_delta_sql("DATEADD"), 161 exp.DateDiff: date_delta_sql("DATEDIFF"), 162 exp.DistKeyProperty: lambda self, e: self.func("DISTKEY", e.this), 163 exp.DistStyleProperty: lambda self, e: self.naked_property(e), 164 exp.FromBase: rename_func("STRTOL"), 165 exp.GeneratedAsIdentityColumnConstraint: generatedasidentitycolumnconstraint_sql, 166 exp.JSONExtract: json_extract_segments("JSON_EXTRACT_PATH_TEXT"), 167 exp.JSONExtractScalar: json_extract_segments("JSON_EXTRACT_PATH_TEXT"), 168 exp.GroupConcat: rename_func("LISTAGG"), 169 exp.ParseJSON: rename_func("JSON_PARSE"), 170 exp.Select: transforms.preprocess( 171 [transforms.eliminate_distinct_on, transforms.eliminate_semi_and_anti_joins] 172 ), 173 exp.SortKeyProperty: lambda self, 174 e: f"{'COMPOUND ' if e.args['compound'] else ''}SORTKEY({self.format_args(*e.this)})", 175 exp.StartsWith: lambda self, 176 e: f"{self.sql(e.this)} LIKE {self.sql(e.expression)} || '%'", 177 exp.TableSample: no_tablesample_sql, 178 exp.TsOrDsAdd: date_delta_sql("DATEADD"), 179 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 180 exp.UnixToTime: lambda self, 181 e: f"(TIMESTAMP 'epoch' + {self.sql(e.this)} * INTERVAL '1 SECOND')", 182 } 183 184 # Postgres maps exp.Pivot to no_pivot_sql, but Redshift support pivots 185 TRANSFORMS.pop(exp.Pivot) 186 187 # Redshift uses the POW | POWER (expr1, expr2) syntax instead of expr1 ^ expr2 (postgres) 188 TRANSFORMS.pop(exp.Pow) 189 190 # Redshift supports ANY_VALUE(..) 191 TRANSFORMS.pop(exp.AnyValue) 192 193 # Redshift supports LAST_DAY(..) 194 TRANSFORMS.pop(exp.LastDay) 195 196 RESERVED_KEYWORDS = {*Postgres.Generator.RESERVED_KEYWORDS, "snapshot", "type"} 197 198 def unnest_sql(self, expression: exp.Unnest) -> str: 199 args = expression.expressions 200 num_args = len(args) 201 202 if num_args > 1: 203 self.unsupported(f"Unsupported number of arguments in UNNEST: {num_args}") 204 return "" 205 206 arg = self.sql(seq_get(args, 0)) 207 alias = self.expressions(expression.args.get("alias"), key="columns") 208 return f"{arg} AS {alias}" if alias else arg 209 210 def with_properties(self, properties: exp.Properties) -> str: 211 """Redshift doesn't have `WITH` as part of their with_properties so we remove it""" 212 return self.properties(properties, prefix=" ", suffix="") 213 214 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 215 if expression.is_type(exp.DataType.Type.JSON): 216 # Redshift doesn't support a JSON type, so casting to it is treated as a noop 217 return self.sql(expression, "this") 218 219 return super().cast_sql(expression, safe_prefix=safe_prefix) 220 221 def datatype_sql(self, expression: exp.DataType) -> str: 222 """ 223 Redshift converts the `TEXT` data type to `VARCHAR(255)` by default when people more generally mean 224 VARCHAR of max length which is `VARCHAR(max)` in Redshift. Therefore if we get a `TEXT` data type 225 without precision we convert it to `VARCHAR(max)` and if it does have precision then we just convert 226 `TEXT` to `VARCHAR`. 227 """ 228 if expression.is_type("text"): 229 expression.set("this", exp.DataType.Type.VARCHAR) 230 precision = expression.args.get("expressions") 231 232 if not precision: 233 expression.append("expressions", exp.var("MAX")) 234 235 return super().datatype_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHEREclause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
198 def unnest_sql(self, expression: exp.Unnest) -> str: 199 args = expression.expressions 200 num_args = len(args) 201 202 if num_args > 1: 203 self.unsupported(f"Unsupported number of arguments in UNNEST: {num_args}") 204 return "" 205 206 arg = self.sql(seq_get(args, 0)) 207 alias = self.expressions(expression.args.get("alias"), key="columns") 208 return f"{arg} AS {alias}" if alias else arg
210 def with_properties(self, properties: exp.Properties) -> str: 211 """Redshift doesn't have `WITH` as part of their with_properties so we remove it""" 212 return self.properties(properties, prefix=" ", suffix="")
Redshift doesn't have WITH as part of their with_properties so we remove it
214 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 215 if expression.is_type(exp.DataType.Type.JSON): 216 # Redshift doesn't support a JSON type, so casting to it is treated as a noop 217 return self.sql(expression, "this") 218 219 return super().cast_sql(expression, safe_prefix=safe_prefix)
221 def datatype_sql(self, expression: exp.DataType) -> str: 222 """ 223 Redshift converts the `TEXT` data type to `VARCHAR(255)` by default when people more generally mean 224 VARCHAR of max length which is `VARCHAR(max)` in Redshift. Therefore if we get a `TEXT` data type 225 without precision we convert it to `VARCHAR(max)` and if it does have precision then we just convert 226 `TEXT` to `VARCHAR`. 227 """ 228 if expression.is_type("text"): 229 expression.set("this", exp.DataType.Type.VARCHAR) 230 precision = expression.args.get("expressions") 231 232 if not precision: 233 expression.append("expressions", exp.var("MAX")) 234 235 return super().datatype_sql(expression)
Redshift converts the TEXT data type to VARCHAR(255) by default when people more generally mean
VARCHAR of max length which is VARCHAR(max) in Redshift. Therefore if we get a TEXT data type
without precision we convert it to VARCHAR(max) and if it does have precision then we just convert
TEXT to VARCHAR.
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_CREATE_TABLE_LIKE
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SUPPORTS_TO_NUMBER
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- NAMED_PLACEHOLDER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- generateseries_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- sqlglot.dialects.postgres.Postgres.Generator
- SINGLE_STRING_INTERVAL
- RENAME_TABLE_WITH_DB
- JOIN_HINTS
- TABLE_HINTS
- PARAMETER_TOKEN
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_SEED_KEYWORD
- SUPPORTS_SELECT_INTO
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- SUPPORTED_JSON_PATH_PARTS
- PROPERTIES_LOCATION
- bracket_sql
- matchagainst_sql