|
|
|
|
|
|
|
|
|
|
CREATE TABLE
STORED AS AVRO
CREATE TABLE
TBLPROPERTIES
TINYINT
SMALLINT
INT
INT
DESCRIBE
SHOW CREATE TABLE
笔记:TIMESTAMP
STRING
BIGINT
UNIX_TIMESTAMP()
EXTRACT()
[localhost:21000] > CREATE TABLE avro_only_sql_columns
> (
> id INT,
> bool_col BOOLEAN,
> tinyint_col TINYINT, /* Gets promoted to INT */
> smallint_col SMALLINT, /* Gets promoted to INT */
> int_col INT,
> bigint_col BIGINT,
> float_col FLOAT,
> double_col DOUBLE,
> date_string_col STRING,
> string_col STRING
> )
> STORED AS AVRO;
[localhost:21000] > CREATE TABLE impala_avro_table
> (bool_col BOOLEAN, int_col INT, long_col BIGINT, float_col FLOAT, double_col DOUBLE, string_col STRING, nullable_int INT)
> STORED AS AVRO
> TBLPROPERTIES ('avro.schema.literal'='{
> "name": "my_record",
> "type": "record",
> "fields": [
> {"name":"bool_col", "type":"boolean"},
> {"name":"int_col", "type":"int"},
> {"name":"long_col", "type":"long"},
> {"name":"float_col", "type":"float"},
> {"name":"double_col", "type":"double"},
> {"name":"string_col", "type":"string"},
> {"name": "nullable_int", "type": ["null", "int"]}]}');
[localhost:21000] > CREATE TABLE avro_examples_of_all_types (
> id INT,
> bool_col BOOLEAN,
> tinyint_col TINYINT,
> smallint_col SMALLINT,
> int_col INT,
> bigint_col BIGINT,
> float_col FLOAT,
> double_col DOUBLE,
> date_string_col STRING,
> string_col STRING
> )
> STORED AS AVRO
> TBLPROPERTIES ('avro.schema.url'='hdfs://localhost:8020/avro_schemas/alltypes.json');
hive> CREATE TABLE hive_avro_table
> ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
> STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
> OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
> TBLPROPERTIES ('avro.schema.literal'='{
> "name": "my_record",
> "type": "record",
> "fields": [
> {"name":"bool_col", "type":"boolean"},
> {"name":"int_col", "type":"int"},
> {"name":"long_col", "type":"long"},
> {"name":"float_col", "type":"float"},
> {"name":"double_col", "type":"double"},
> {"name":"string_col", "type":"string"},
> {"name": "nullable_int", "type": ["null", "int"]}]}');
注意:"null"
NOT NULL
DECIMAL
BYTE
logicalType"decimal"
long
BIGINT
INVALIDATE METADATA table_name
CREATE TABLE
INVALIDATE METADATA
CHAR
VARCHAR
STRING
CHAR
VARCHAR
CHAR
VARCHAR
CHAR
VARCHAR
TIMESTAMP
STRING
STRING
TIMESTAMP
TIMESTAMP
复杂类型注意事项:ARRAY
STRUCT
MAP
COUNT(*)
array
map
record
struct
union
[supported_type,null][null,supported_type]
enum
bytes
fixed
INVALIDATE METADATA
LOAD DATA
INSERT
REFRESH table_nameLOAD DATAREFRESH
boolean
int
long
float
double
string
["string", "null"]
null
CREATE TABLE
JSON
tblproperties ('avro.schema.url'='hdfs//your-name-node:port/path/to/schema.json');
INSERT
CREATE TABLE AS SELECT
INSERT
LOAD DATA
CREATE TABLE
snappy
deflate
hive> set hive.exec.compress.output=true;
hive> set avro.output.codec=snappy;
ALTER TABLE
int
bigint
float
REFRESH table_nameINVALIDATE METADATA table_nameREFRESHINVALIDATE METADATA
SELECT c1, c2 FROM t1
c3
avro.schema.literal
avro.schema.url
CREATE TABLE avro_table (a string, b string)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
TBLPROPERTIES (
'avro.schema.literal'='{
"type": "record",
"name": "my_record",
"fields": [
{"name": "a", "type": "int"},
{"name": "b", "type": "string"}
]}');
INSERT OVERWRITE TABLE avro_table SELECT 1, "avro" FROM functional.alltypes LIMIT 1;
[localhost:21000] > select * from avro_table;
+---+------+
| a | b |
+---+------+
| 1 | avro |
+---+------+
-- Promote column "a" from INT to FLOAT (no need to update Avro schema)
ALTER TABLE avro_table CHANGE A A FLOAT;
-- Add column "c" with default
ALTER TABLE avro_table ADD COLUMNS (c int);
ALTER TABLE avro_table SET TBLPROPERTIES (
'avro.schema.literal'='{
"type": "record",
"name": "my_record",
"fields": [
{"name": "a", "type": "int"},
{"name": "b", "type": "string"},
{"name": "c", "type": "int", "default": 10}
]}');
REFRESH
[localhost:21000] > refresh avro_table;
[localhost:21000] > select * from avro_table;
+---+------+----+
| a | b | c |
+---+------+----+
| 1 | avro | 10 |
+---+------+----+
原始类型:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CHAR
VARCHAR
STRING
逻辑类型:
|
|
|
|
|
|
fs.s3a.block.size
fs.s3a.block.size
fs.s3a.block.size