Using JsonSerDe in Hive
- Download JSON Serde – https://github.com/rcongiu/Hive-JSON-Serde
- Compile command for hive 1.2.1 – “mvn -Pcdh5 -Dcdh5.hive.version=1.2.1 clean package” . change hive version per the environment
- Copy json-serde/target/json-serde-1.3.8-SNAPSHOT-jar-with-dependencies.jar (or similar) to hive/lib
- Restart hive
Sample JSON with test HiveQLs
-- json sample: {"k2":"v2","k1":"v1"}
drop table json_test;
CREATE TABLE json_test ( k1 string, k2 string)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe';
LOAD DATA local INPATH '/private/tmp/t.json' OVERWRITE INTO TABLE json_test;
select * from json_test;
-- json sample: {"$k2":"v2","k1":"v1"}
drop table json_test;
CREATE TABLE json_test ( k1 string, k2 string)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ("mapping.k2" = "$k2");
LOAD DATA local INPATH '/private/tmp/t.json' OVERWRITE INTO TABLE json_test;
select * from json_test;
-- json sample: {"$k2":"k2_v","k1":"k1_v","k3":{"k3_k1":"k3_k1_v"}}
drop table json_test;
CREATE TABLE json_test ( k1 string, k2 string, k3 struct)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ("mapping.k2" = "$k2");
LOAD DATA local INPATH '/private/tmp/t.json' OVERWRITE INTO TABLE json_test;
select * from json_test;
-- json sample: {"$k2":"k2_v","k1":"k1_v","k3":{"$k3_k1":"$k3_k1_v"}}
drop table json_test;
CREATE TABLE json_test ( k1 string, k2 string, k3 struct)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ("mapping.k2" = "$k2", "mapping.k3_k1" = "$k3_k1");
LOAD DATA local INPATH '/private/tmp/t.json' OVERWRITE INTO TABLE json_test;
select *,k3.k3_k1 from json_test;