WHERE
YEAR
YEAR=2010
[localhost:21000] > drop table census;
[localhost:21000] > create table census (name string, census_year int) partitioned by (year int);
[localhost:21000] > insert into census partition (year=2010) values ('Smith',2010),('Jones',2010);
[localhost:21000] > insert into census partition (year=2011) values ('Smith',2020),('Jones',2020),('Doe',2020);
[localhost:21000] > insert into census partition (year=2012) values ('Smith',2020),('Doe',2020);
[localhost:21000] > select name from census where year = census_year and census_year=2010;
+-------+
| name |
+-------+
| Smith |
| Jones |
+-------+
[localhost:21000] > explain select name from census where year = census_year and census_year=2010;
+------------------------------------------------------------------+
| Explain String |
+------------------------------------------------------------------+
| PLAN FRAGMENT 0 |
| PARTITION: UNPARTITIONED |
| |
| 1:EXCHANGE |
| |
| PLAN FRAGMENT 1 |
| PARTITION: RANDOM |
| |
| STREAM DATA SINK |
| EXCHANGE ID: 1 |
| UNPARTITIONED |
| |
| 0:SCAN HDFS |
| table=predicate_propagation.census #partitions=1/3 size=22B |
| predicates: census_year = 2010, year = census_year |
+------------------------------------------------------------------+
WHERE
WHERE
CREATE VIEW
PARTITION BY
WHERE year=2016
YEAR
PARTITION BY year
OVER (PARTITION BY year,other_columns other_analytic_clauses)
WHERE
WHERE
-- The query only needs to read 3 partitions whose key values are known ahead of time.
-- That's static partition pruning.
SELECT COUNT(*) FROM sales_table WHERE year IN (2005, 2010, 2015);
CREATE TABLE yy (s STRING) PARTITIONED BY (year INT);
INSERT INTO yy PARTITION (year) VALUES ('1999', 1999), ('2000', 2000),
('2001', 2001), ('2010', 2010), ('2018', 2018);
COMPUTE STATS yy;
CREATE TABLE yy2 (s STRING, year INT);
INSERT INTO yy2 VALUES ('1999', 1999), ('2000', 2000), ('2001', 2001);
COMPUTE STATS yy2;
-- The following query reads an unknown number of partitions, whose key values
-- are only known at run time. The runtime filters line shows the
-- information used in query fragment 02 to decide which partitions to skip.
EXPLAIN SELECT s FROM yy WHERE year IN (SELECT year FROM yy2);
+--------------------------------------------------------------------------+
| PLAN-ROOT SINK |
| | |
| 04:EXCHANGE [UNPARTITIONED] |
| | |
| 02:HASH JOIN [LEFT SEMI JOIN, BROADCAST] |
| | hash predicates: year = year |
| | runtime filters: RF000 <- year |
| | |
| |--03:EXCHANGE [BROADCAST] |
| | | |
| | 01:SCAN HDFS [default.yy2] |
| | partitions=1/1 files=1 size=620B |
| | |
| 00:SCAN HDFS [default.yy] |
| partitions=5/5 files=5 size=1.71KB |
| runtime filters: RF000 -> year |
+--------------------------------------------------------------------------+
SELECT s FROM yy WHERE year IN (SELECT year FROM yy2); -- Returns 3 rows from yy
PROFILE;
EXPLAIN
yy
PROFILE
Filter 0 (1.00 MB):
- Files processed: 3
- Files rejected: 1 (1)
- Files total: 3 (3)
ON
WHERE
ON
TIMESTAMP
OPTIMIZE_PARTITION_KEY_SCANS
SELECT MAX(year)
MIN()
MAX()
COUNT(DISTINCT)
OPTIMIZE_PARTITION_KEY_SCANS
[localhost:21000] > create table census (name string) partitioned by (year smallint);
[localhost:21000] > alter table census add partition (year=2012); -- Text format;
[localhost:21000] > alter table census add partition (year=2013); -- Text format switches to Parquet before data loaded;
[localhost:21000] > alter table census partition (year=2013) set fileformat parquet;
[localhost:21000] > insert into census partition (year=2012) values ('Smith'),('Jones'),('Lee'),('Singh');
[localhost:21000] > insert into census partition (year=2013) values ('Flores'),('Bogomolov'),('Cooper'),('Appiah');
year=2012
year=2013
INSERT ... SELECT
LOAD DATA
INSERT ... VALUES
ALTER TABLE ... SET FILEFORMAT
INSERT
LOAD DATA
REFRESH table_name
注意:ALTER TABLE
ADD PARTITION
LOCATION
ADD PARTITION
SET LOCATION
PARTITION BY
CREATE TABLE
COMPUTE STATS
重要的:
COMPUTE STATS
COMPUTE INCREMENTAL STATS
COMPUTE STATS
COMPUTE INCREMENTAL STATS
DROP STATS
COMPUTE INCREMENTAL STATS
COMPUTE INCREMENTAL STATS
COMPUTE INCREMENTAL STATS
COMPUTE INCREMENTAL STATS
COMPUTE STATS
-- Initially the table has no incremental stats, as indicated
-- 'false' under Incremental stats.
show table stats item_partitioned;
+-------------+-------+--------+----------+--------------+---------+------------------
| i_category | #Rows | #Files | Size | Bytes Cached | Format | Incremental stats
+-------------+-------+--------+----------+--------------+---------+------------------
| Books | -1 | 1 | 223.74KB | NOT CACHED | PARQUET | false
| Children | -1 | 1 | 230.05KB | NOT CACHED | PARQUET | false
| Electronics | -1 | 1 | 232.67KB | NOT CACHED | PARQUET | false
| Home | -1 | 1 | 232.56KB | NOT CACHED | PARQUET | false
| Jewelry | -1 | 1 | 223.72KB | NOT CACHED | PARQUET | false
| Men | -1 | 1 | 231.25KB | NOT CACHED | PARQUET | false
| Music | -1 | 1 | 237.90KB | NOT CACHED | PARQUET | false
| Shoes | -1 | 1 | 234.90KB | NOT CACHED | PARQUET | false
| Sports | -1 | 1 | 227.97KB | NOT CACHED | PARQUET | false
| Women | -1 | 1 | 226.27KB | NOT CACHED | PARQUET | false
| Total | -1 | 10 | 2.25MB | 0B | |
+-------------+-------+--------+----------+--------------+---------+------------------
-- After the first COMPUTE INCREMENTAL STATS,
-- all partitions have stats. The first
-- COMPUTE INCREMENTAL STATS scans the whole
-- table, discarding any previous stats from
-- a traditional COMPUTE STATS statement.
compute incremental stats item_partitioned;
+-------------------------------------------+
| summary |
+-------------------------------------------+
| Updated 10 partition(s) and 21 column(s). |
+-------------------------------------------+
show table stats item_partitioned;
+-------------+-------+--------+----------+--------------+---------+------------------
| i_category | #Rows | #Files | Size | Bytes Cached | Format | Incremental stats
+-------------+-------+--------+----------+--------------+---------+------------------
| Books | 1733 | 1 | 223.74KB | NOT CACHED | PARQUET | true
| Children | 1786 | 1 | 230.05KB | NOT CACHED | PARQUET | true
| Electronics | 1812 | 1 | 232.67KB | NOT CACHED | PARQUET | true
| Home | 1807 | 1 | 232.56KB | NOT CACHED | PARQUET | true
| Jewelry | 1740 | 1 | 223.72KB | NOT CACHED | PARQUET | true
| Men | 1811 | 1 | 231.25KB | NOT CACHED | PARQUET | true
| Music | 1860 | 1 | 237.90KB | NOT CACHED | PARQUET | true
| Shoes | 1835 | 1 | 234.90KB | NOT CACHED | PARQUET | true
| Sports | 1783 | 1 | 227.97KB | NOT CACHED | PARQUET | true
| Women | 1790 | 1 | 226.27KB | NOT CACHED | PARQUET | true
| Total | 17957 | 10 | 2.25MB | 0B | |
+-------------+-------+--------+----------+--------------+---------+------------------
-- Add a new partition...
alter table item_partitioned add partition (i_category='Camping');
-- Add or replace files in HDFS outside of Impala,
-- rendering the stats for a partition obsolete.
!import_data_into_sports_partition.sh
refresh item_partitioned;
drop incremental stats item_partitioned partition (i_category='Sports');
-- Now some partitions have incremental stats
-- and some do not.
show table stats item_partitioned;
+-------------+-------+--------+----------+--------------+---------+------------------
| i_category | #Rows | #Files | Size | Bytes Cached | Format | Incremental stats
+-------------+-------+--------+----------+--------------+---------+------------------
| Books | 1733 | 1 | 223.74KB | NOT CACHED | PARQUET | true
| Camping | -1 | 1 | 408.02KB | NOT CACHED | PARQUET | false
| Children | 1786 | 1 | 230.05KB | NOT CACHED | PARQUET | true
| Electronics | 1812 | 1 | 232.67KB | NOT CACHED | PARQUET | true
| Home | 1807 | 1 | 232.56KB | NOT CACHED | PARQUET | true
| Jewelry | 1740 | 1 | 223.72KB | NOT CACHED | PARQUET | true
| Men | 1811 | 1 | 231.25KB | NOT CACHED | PARQUET | true
| Music | 1860 | 1 | 237.90KB | NOT CACHED | PARQUET | true
| Shoes | 1835 | 1 | 234.90KB | NOT CACHED | PARQUET | true
| Sports | -1 | 1 | 227.97KB | NOT CACHED | PARQUET | false
| Women | 1790 | 1 | 226.27KB | NOT CACHED | PARQUET | true
| Total | 17957 | 11 | 2.65MB | 0B | |
+-------------+-------+--------+----------+--------------+---------+------------------
-- After another COMPUTE INCREMENTAL STATS,
-- all partitions have incremental stats, and only the 2
-- partitions without incremental stats were scanned.
compute incremental stats item_partitioned;
+------------------------------------------+
| summary |
+------------------------------------------+
| Updated 2 partition(s) and 21 column(s). |
+------------------------------------------+
show table stats item_partitioned;
+-------------+-------+--------+----------+--------------+---------+------------------
| i_category | #Rows | #Files | Size | Bytes Cached | Format | Incremental stats
+-------------+-------+--------+----------+--------------+---------+------------------
| Books | 1733 | 1 | 223.74KB | NOT CACHED | PARQUET | true
| Camping | 5328 | 1 | 408.02KB | NOT CACHED | PARQUET | true
| Children | 1786 | 1 | 230.05KB | NOT CACHED | PARQUET | true
| Electronics | 1812 | 1 | 232.67KB | NOT CACHED | PARQUET | true
| Home | 1807 | 1 | 232.56KB | NOT CACHED | PARQUET | true
| Jewelry | 1740 | 1 | 223.72KB | NOT CACHED | PARQUET | true
| Men | 1811 | 1 | 231.25KB | NOT CACHED | PARQUET | true
| Music | 1860 | 1 | 237.90KB | NOT CACHED | PARQUET | true
| Shoes | 1835 | 1 | 234.90KB | NOT CACHED | PARQUET | true
| Sports | 1783 | 1 | 227.97KB | NOT CACHED | PARQUET | true
| Women | 1790 | 1 | 226.27KB | NOT CACHED | PARQUET | true
| Total | 17957 | 11 | 2.65MB | 0B | |
+-------------+-------+--------+----------+--------------+---------+------------------