Hive, Sqoop

1.
Create table temp_batting (col_value STRING); ( type hive )

# hadoop fs -ls /user/root/ (used other Puttuy to change dir)
hive> LOAD DATA INPATH '/user/sandbox/Batting.csv' OVERWRITE INTO TABLE temp_
batting; (Your HDFS Path)
hive> Select * from temp_batting;
hive> create table batting (player_id STRING, year INT, runs INT);
hive> insert overwrite table batting
SELECT regexp_extract (col_value, '^(?:([^,]*)\,?){1}',1) player_id,
regexp_extract (col_value, '^(?:([^,]*)\,?){2}',1) year,
regexp_extract (col_value, '^(?:([^,]*)\,?){9}',1) runs from temp_battin
g;
hive> SELECT year, max(runs) FROM batting GROUP BY year;
hive> SELECT a.year, a.player_id, a.runs from batting a JOIN (SELECT year, ma
x(runs) runs FROM batting GROUP BY year)
b ON (a.year = b.year AND a.runs = b.runs);
2.
123456,001
456123,002
789123,003
789456,004
UserName, delivery Status Code Description
------------------------------------------------jim,
Delivered
---------------------------------------------------------------------------------------------------------------------------------creating Hive tables to store the files
create TABLE customer_details (cellNumber String,consumerName String)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY',';
LOAD DATA LOCAL INPATH '/user/root/userDetails.txt' INTO TABLE customer_detail
s;
CREATE TABLE delivery_report (cellNumber String,statusCode int)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY',';
LOAD DATA LOCAL INPATH '/user/root/DeliveryStatusCode.txt' INTO TABLE status_c
odes;
--------------------------------------------------------------------------------------------------------------HIVE QUERY to execute join operation on data sets

select cd.consumerName,sc.statusMessage FROM customer_details cd JOIN deliver
y_report dr ON (cd.cellNumber=dr.cellNumber) JOIN
status_code sc ON(dr.statuscode = sc.statusCode);
3 . HIVE queries for word Count

drop table if exists doc;
create table doc (text string) row format delimited fields terminated by '\
n' stored as textfile;
load data inpath '4300.txt' overwrite into table doc;
#
hive>
hadoop fs -ls /apps/hive/warehouse/

or
dfs -ls /apps/hive/warehouse ;
SELECT word , COUNT(*) FROM doc LATERAL VIEW explode(split(text,' ')) xTab
le as word GROUP BY word;
-----------------------------------------------------------------------------------------------------------------------------------------------------DATA MODEL - Par

titions
$
$
$
$
hadoop
hadoop
hadoop
hadoop
fs
fs
fs
fs
-ls
-ls
-ls
-ls
/apps/hive
/apps/hive/warehouse
/apps/hive/warehouse/battin/
/apps/hive/warehouse/battin/
//locate the table batting

//then go to that dircetory and see files in directory
hive> create TABLE president (ts BIGINT, line STRING) partitioned by (country S
TRING) row format delimited fields terminated by ',';
// Upload file president.txt in winscp .
hive> LOAD DATA LOCAL INPATH 'user/root/president.txt' OVERWRITE INTO TABLE pr
esident;
// will fail
hive> LOAD DATA LOCAL INPATH '/root/ravi/president.txt' OVERWRITE INTO TABLE pr
esident partition (country='USA');
$hadoop fs -ls /aaps/hive/warehouse/president
$hadoop fs -ls /aaps/hive/warehouse/president/country=USA
// after that we past 2 more file in winscp Presidentindia , presidentrussia
hive> LOAD DATA LOCAL INPATH '/root/ravi/president.txt' OVERWRITE INTO TABLE p
resident partition (country='india');
hive> LOAD DATA LOCAL INPATH '/root/ravi/president.txt' OVERWRITE INTO TABLE p
resident partition (country='russia');
//to see out put
hive> show partitions president;
hive> select * from president where country='USA';
hive> select * from president where country='india';
hive> select * from president where country='russia';
----------------------------------------------------------------------------------------------------------------------------------------------------HCatalog
_____________
if you are a hive user , you can use your hive metastore with no modificati
ons
type $ hcat -e
$ hcat -e "create table rawevent (url string, user string) partitioned by (
ds string);";
grunt> a = LOAD 'rawevent' using org.apache.hcatalog.pig.HcatLoader();
--------------------------------------------------------------------------------------------------------------------------------------------SQOOP
1 . $ sql (launch mysql on command prompt to start sql it's alredy in sandbox
)
2.
show databases ;
3.
use test;
4.
create table mysql_data(id int primary key, name varchar (50));
5.
insert into mysql_data values(1,'SRK');
6.
select * from mysql_data;
7.
insert into mysql_data values(2, 'AB');
$ sqoop import --connect jdbc:mysql://localhost/test --table mysql_data

(by defalt )
--target-dir /ravi/sqoop2 (target file )
--target-dir /ravi/sqoop3 -m 1 (to start mapper)
--where "id>1"
--where "id>1" --target-dir /ravi/sqoop2

Hive, Sqoop

Încărcat de

Informații document

Titlu original

Drepturi de autor

Formate disponibile

Partajați acest document

Partajați sau inserați document

Opțiuni de partajare

Vi se pare util acest document?

Este necorespunzător acest conținut?

Drepturi de autor:

Formate disponibile

Hive, Sqoop

Încărcat de

Drepturi de autor:

Formate disponibile

1.

Create table temp_batting (col_value STRING); ( type hive )

--------------------------------------------------------------------------------------------------------------HIVE QUERY to execute join operation on data sets

3 . HIVE queries for word Count

hadoop fs -ls /apps/hive/warehouse/

-----------------------------------------------------------------------------------------------------------------------------------------------------DATA MODEL - Par

//locate the table batting

create table mysql_data(id int primary key, name varchar (50));

insert into mysql_data values(1,'SRK');

select * from mysql_data;

insert into mysql_data values(2, 'AB');

$ sqoop import --connect jdbc:mysql://localhost/test --table mysql_data

S-ar putea să vă placă și