Sunteți pe pagina 1din 4

1.

Create table temp_batting (col_value STRING); ( type hive )


# hadoop fs -ls /user/root/ (used other Puttuy to change dir)
hive> LOAD DATA INPATH '/user/sandbox/Batting.csv' OVERWRITE INTO TABLE temp_
batting; (Your HDFS Path)
hive> Select * from temp_batting;
hive> create table batting (player_id STRING, year INT, runs INT);
hive> insert overwrite table batting
SELECT regexp_extract (col_value, '^(?:([^,]*)\,?){1}',1) player_id,
regexp_extract (col_value, '^(?:([^,]*)\,?){2}',1) year,
regexp_extract (col_value, '^(?:([^,]*)\,?){9}',1) runs from temp_battin
g;
hive> SELECT year, max(runs) FROM batting GROUP BY year;
hive> SELECT a.year, a.player_id, a.runs from batting a JOIN (SELECT year, ma
x(runs) runs FROM batting GROUP BY year)
b ON (a.year = b.year AND a.runs = b.runs);

2.
123456,001
456123,002
789123,003
789456,004
UserName, delivery Status Code Description
------------------------------------------------jim,
Delivered
---------------------------------------------------------------------------------------------------------------------------------creating Hive tables to store the files
create TABLE customer_details (cellNumber String,consumerName String)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY',';
LOAD DATA LOCAL INPATH '/user/root/userDetails.txt' INTO TABLE customer_detail
s;
CREATE TABLE delivery_report (cellNumber String,statusCode int)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY',';
LOAD DATA LOCAL INPATH '/user/root/DeliveryStatusCode.txt' INTO TABLE status_c
odes;

--------------------------------------------------------------------------------------------------------------HIVE QUERY to execute join operation on data sets


select cd.consumerName,sc.statusMessage FROM customer_details cd JOIN deliver
y_report dr ON (cd.cellNumber=dr.cellNumber) JOIN
status_code sc ON(dr.statuscode = sc.statusCode);

3 . HIVE queries for word Count


drop table if exists doc;
create table doc (text string) row format delimited fields terminated by '\
n' stored as textfile;
load data inpath '4300.txt' overwrite into table doc;
#
hive>

hadoop fs -ls /apps/hive/warehouse/


or
dfs -ls /apps/hive/warehouse ;

SELECT word , COUNT(*) FROM doc LATERAL VIEW explode(split(text,' ')) xTab
le as word GROUP BY word;

-----------------------------------------------------------------------------------------------------------------------------------------------------DATA MODEL - Par


titions
$
$
$
$

hadoop
hadoop
hadoop
hadoop

fs
fs
fs
fs

-ls
-ls
-ls
-ls

/apps/hive
/apps/hive/warehouse
/apps/hive/warehouse/battin/
/apps/hive/warehouse/battin/

//locate the table batting


//then go to that dircetory and see files in directory
hive> create TABLE president (ts BIGINT, line STRING) partitioned by (country S
TRING) row format delimited fields terminated by ',';
// Upload file president.txt in winscp .
hive> LOAD DATA LOCAL INPATH 'user/root/president.txt' OVERWRITE INTO TABLE pr
esident;

// will fail
hive> LOAD DATA LOCAL INPATH '/root/ravi/president.txt' OVERWRITE INTO TABLE pr
esident partition (country='USA');
$hadoop fs -ls /aaps/hive/warehouse/president
$hadoop fs -ls /aaps/hive/warehouse/president/country=USA
// after that we past 2 more file in winscp Presidentindia , presidentrussia
hive> LOAD DATA LOCAL INPATH '/root/ravi/president.txt' OVERWRITE INTO TABLE p
resident partition (country='india');
hive> LOAD DATA LOCAL INPATH '/root/ravi/president.txt' OVERWRITE INTO TABLE p
resident partition (country='russia');
//to see out put
hive> show partitions president;
hive> select * from president where country='USA';
hive> select * from president where country='india';
hive> select * from president where country='russia';

----------------------------------------------------------------------------------------------------------------------------------------------------HCatalog
_____________
if you are a hive user , you can use your hive metastore with no modificati
ons
type $ hcat -e
$ hcat -e "create table rawevent (url string, user string) partitioned by (
ds string);";
grunt> a = LOAD 'rawevent' using org.apache.hcatalog.pig.HcatLoader();
--------------------------------------------------------------------------------------------------------------------------------------------SQOOP

1 . $ sql (launch mysql on command prompt to start sql it's alredy in sandbox
)
2.

show databases ;

3.

use test;

4.

create table mysql_data(id int primary key, name varchar (50));

5.

insert into mysql_data values(1,'SRK');

6.

select * from mysql_data;

7.

insert into mysql_data values(2, 'AB');

$ sqoop import --connect jdbc:mysql://localhost/test --table mysql_data


(by defalt )
$ sqoop import --connect jdbc:mysql://localhost/test --table mysql_data
--target-dir /ravi/sqoop2 (target file )
$ sqoop import --connect jdbc:mysql://localhost/test --table mysql_data
--target-dir /ravi/sqoop3 -m 1 (to start mapper)
$ sqoop import --connect jdbc:mysql://localhost/test --table mysql_data
--where "id>1"
$ sqoop import --connect jdbc:mysql://localhost/test --table mysql_data
--where "id>1" --target-dir /ravi/sqoop2

S-ar putea să vă placă și