default
defaultuse db_name default.customerdefault.customer_name
[impala-host:21000] > show databases
Query finished, fetching results ...
default
Returned 1 row(s) in 0.00s
[impala-host:21000] > show tables
Query finished, fetching results ...
customer
customer_address
Returned 2 row(s) in 0.00s
[impala-host:21000] > describe customer_address
+------------------+--------+---------+
| name | type | comment |
+------------------+--------+---------+
| ca_address_sk | int | |
| ca_address_id | string | |
| ca_street_number | string | |
| ca_street_name | string | |
| ca_street_type | string | |
| ca_suite_number | string | |
| ca_city | string | |
| ca_county | string | |
| ca_state | string | |
| ca_zip | string | |
| ca_country | string | |
| ca_gmt_offset | float | |
| ca_location_type | string | |
+------------------+--------+---------+
Returned 13 row(s) in 0.01
impala-shell
$ impala-shell -i impala-host
Connected to localhost:21000
[impala-host:21000] > select count(*) from customer_address;
50000
Returned 1 row(s) in 0.37s
$ impala-shell -i impala-host -f myquery.sql
Connected to localhost:21000
50000
Returned 1 row(s) in 0.19s
impala-shell
*
。
$ impala-shell -i impala-host -q 'select count(*) from customer_address'
Connected to localhost:21000
50000
Returned 1 row(s) in 0.29s
.csv
query.sql
query.sql
impala-host
query.sqlquery.sqlimpala-host
impala-shell.sh -i impala-host -f query.sql
LIMIT
Ctrl-C
impala-shell
SELECT * FROM tab1;
SELECT * FROM tab2;
SELECT * FROM tab2 LIMIT 5;
+----+-------+------------+-------------------------------+
| id | col_1 | col_2 | col_3 |
+----+-------+------------+-------------------------------+
| 1 | true | 123.123 | 2012-10-24 08:55:00 |
| 2 | false | 1243.5 | 2012-10-25 13:40:00 |
| 3 | false | 24453.325 | 2008-08-22 09:33:21.123000000 |
| 4 | false | 243423.325 | 2007-05-12 22:32:21.334540000 |
| 5 | true | 243.325 | 1953-04-22 09:11:33 |
+----+-------+------------+-------------------------------+
+----+-------+---------------+
| id | col_1 | col_2 |
+----+-------+---------------+
| 1 | true | 12789.123 |
| 2 | false | 1243.5 |
| 3 | false | 24453.325 |
| 4 | false | 2423.3254 |
| 5 | true | 243.325 |
| 60 | false | 243565423.325 |
| 70 | true | 243.325 |
| 80 | false | 243423.325 |
| 90 | true | 243.325 |
+----+-------+---------------+
+----+-------+-----------+
| id | col_1 | col_2 |
+----+-------+-----------+
| 1 | true | 12789.123 |
| 2 | false | 1243.5 |
| 3 | false | 24453.325 |
| 4 | false | 2423.3254 |
| 5 | true | 243.325 |
+----+-------+-----------+
SELECT tab1.col_1, MAX(tab2.col_2), MIN(tab2.col_2)
FROM tab2 JOIN tab1 USING (id)
GROUP BY col_1 ORDER BY 1 LIMIT 5;
+-------+-----------------+-----------------+
| col_1 | max(tab2.col_2) | min(tab2.col_2) |
+-------+-----------------+-----------------+
| false | 24453.325 | 1243.5 |
| true | 12789.123 | 243.325 |
+-------+-----------------+-----------------+
SELECT tab2.*
FROM tab2,
(SELECT tab1.col_1, MAX(tab2.col_2) AS max_col2
FROM tab2, tab1
WHERE tab1.id = tab2.id
GROUP BY col_1) subquery1
WHERE subquery1.max_col2 = tab2.col_2;
+----+-------+-----------+
| id | col_1 | col_2 |
+----+-------+-----------+
| 1 | true | 12789.123 |
| 3 | false | 24453.325 |
+----+-------+-----------+
INSERT OVERWRITE TABLE tab3
SELECT id, col_1, col_2, MONTH(col_3), DAYOFMONTH(col_3)
FROM tab1 WHERE YEAR(col_3) = 2012;
TAB3
SELECT * FROM tab3;
+----+-------+---------+-------+-----+
| id | col_1 | col_2 | month | day |
+----+-------+---------+-------+-----+
| 1 | true | 123.123 | 10 | 24 |
| 2 | false | 1243.5 | 10 | 25 |
+----+-------+---------+-------+-----+