Anda di halaman 1dari 8

Hive Taks

# Do the Following on HDFS

hadoop fs -mkdir mhdfs_dir

hadoop fs -put Desktop/access_log mhdfs_dir

# =============================================

start hive
Hive database and environment management commands

# =============================================

[cloudera@localhost ~]$ hive

show databases;

CREATE DATABASE practice_db1


WITH DBPROPERTIES('created-by'='Myself', created-on'='27-08-18');

show databases;

DESC DATABASE EXTENDED practice_db1 ;

ALTER DATABASE practice_db1

SET DBPROPERTIES('purpose'='practice demo','last-modified'='27-08-18@9.42');

SHOW DATABASES;

DESC DATABASE EXTENDED practice_db1 ;

use practice_db1;

#Displays the current db name on prompt

set hive.cli.print.current.db=true;
# ==========================================================

Hive table creation


# ==========================================================

Create Table sample(line String);

DESC EXTENDED sample;

LOAD DATA INPATH 'mhdfs_dir/access_log' INTO TABLE sample;

SELECT * FROM sample;

# 1.check hive/warehouse/practice_db1/sample......
# access_log file available ?

# 2. check is access_log file available at mhdfs_dir ?

CREATE TABLE emp(eid INT, ename STRING , esal FLOAT , did INT)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ',' ;

LOAD DATA LOCAL INPATH 'Desktop/emp.txt' INTO TABLE emp;

SELECT * FROM emp;

CREATE TABLE dept(did INT, dname STRING, dloc STRING)


ROW FORMAT DELIMITED
FIELDS TERMINATED BY ',' ;

LOAD DATA LOCAL INPATH 'Desktop/dept.txt' INTO TABLE dept;

SELECT * FROM dept;


# ==========================================================

Joins On Hive table


# ==========================================================

SELECT e.empid, e.ename, e.esal, d.did, d.dname


FROM emp e JOIN dept d ON(e.did = d.did);

SELECT e.empid, e.ename, e.esal, d.did, d.dname


FROM emp e LEFT OUTER JOIN dept d ON(e.did = d.did);

SELECT e.empid, e.ename, e.esal, d.did, d.dname


FROM emp e RIGHT OUTER JOIN dept d ON(e.did = d.did);

SELECT e.empid, e.ename, e.esal, d.did, d.dname


FROM emp e FULL OUTER JOIN dept d ON(e.did = d.did);

# ==========================================================

Creating Views In Hive


# ==========================================================

CREATE VIEW emp_dept_view AS


SELECT e.eid, e.ename, e.esal, d.did, d.dname
FROM emp e FULL OUTER JOIN dept d ON(e.did = d.did);

# Display Details of view creation

SHOW CREATE TABLE emp_dept_view;

# Edit the view query

CREATE OR REPLACE VIEW emp_dept_view as select eid,ename from emp;

SHOW CREATE TABLE emp_dept_view;


# ==========================================================

Complex Datatypes In Hive


# ==========================================================

CREATE TABLE empcomplex(


name STRING,
sal FLOAT,
subordinate ARRAY<STRING>,
taxes MAP<STRING,FLOAT>,
address STRUCT<street:STRING,city:STRING,state:STRING,zip:INT>)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
COLLECTION ITEMS TERMINATED BY '|'
MAP KEYS TERMINATED BY '@'
LINES TERMINATED BY '\n' ;

Load data local inpath 'Desktop/complex.txt' into table empcomplex;

# Selecting regular columns

SELECT name , sal FROM empcomplex;

# Selecting array column

SELECT name, subordinate[0] FROM employee;

SELECT name, subordinate[1] FROM employee;

# Returns the number of values in array for each row

SELECT name , size(subordinate) FROM employee;

# Check value existance in a array

SELECT name, ARRAY_CONTAINS(subordinate, 'Mary Smith')


FROM EMPLOYEE;

SELECT name
FROM EMPLOYEE
WHERE ARRAY_CONTAINS(subordinate, 'Mary Smith') = true;
# Display array like a table

SELECT name, juniors


FROM employee LATERAL VIEW EXPLODE(subordinate) ADTABLE AS juniors;

# Select May type column

SELECT name, taxes["Federal Taxes"] FROM employee;

SELECT name, taxes["Insurance"] FROM employee;

SELECT map_keys(taxes), map_values(taxes) FROM employee;

# Display map and keys as table for map type column

SELECT name , adtable.tax_type, adtable.tax_value


FROM employee LATERAL VIEW EXPLODE(taxes) ADTABLE AS tax_type,
tax_value;

#Display total tax to be paid by each employee

SELECT name , SUM(adtable.tax_value)


FROM employee LATERAL VIEW EXPLODE(taxes) ADTABLE AS tax_type,
tax_value
GROUP BY name;
# ==========================================================

Static Partition Tables In Hive


# ==========================================================

CREATE TABLE country_manual(


id INT,
name STRING,
did INT)
PARTITIONED BY (country STRING, city STRING)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ',' ;

LOAD DATA LOCAL INPATH 'Desktop/uk_edinburgh.csv'


INTO TABLE country_manual PARTITION(country='UK',city='Edinburg');

LOAD DATA LOCAL INPATH 'Desktop/usa_chicago.csv' INTO TABLE


country_manual PARTITION(country='USA', city='Chicago');

LOAD DATA LOCAL INPATH 'Desktop/uk_london.csv' INTO TABLE


country_manual PARTITION(country='UK',city='London');
# ==========================================================

Dynamic Partition Tables In Hive


# ==========================================================

#Creation of regular table (Source Table)

Create table state(state_name string,district string, enrollments int)


row format delimited
fields terminated by ',' ;

Load Data Local Inpath 'Desktop/AllStates.csv' Into Table state;

CREATE TABLE state_dynamic(


district STRING,
enrollments INT)
PARTITIONED BY (state STRING)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ',' ;

# Enables loading data into dynamic partitions

set hive.exec.dynamic.partition.mode=nonstrict;

INTSERT INTO TABLE state_dynamic PARTITION(state) select district,


enrollments,state_name from allstate;

# Query on regular table observe the time required

SELECT * FROM state WHERE state_name='Kerala';

# Query on partitioned table observe the time required, plus it doesnt even go to MR for
execution

SELECT * FROM state_dynamic WHERE state='Kerala';


# ==========================================================

External Tables In Hive


# ==========================================================

# Create a directory on hdfs named mhdfs_dir1 load file nasdaq.csv into it

hadoop fs -put Desktop/nasdaq.csv mhdfs_dir1

# Create external table in hive

CREATE EXTERNAL TABLE nasdaq_ext_tab(


exchange STRING,
stock_symbol STRING,
added_on STRING,
stock_price_open FLOAT,
stock_price_high FLOAT,
stock_price_low FLOAT,
stock_price_close FLOAT,
stock_volume FLOAT,
stock_price_adj_close FLOAT)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
LOCATION '/user/cloudera/mhdfs_dir1' ;