Fetching Hive schema definitions using Webhcat
Following shell script will get the schema information from Hive using WebHCat server.
#!/bin/sh # fetch_webhcat.sh, v0.1, 2016-04-00, [email protected] # Pre-requisites: jq, curl, python (json.tool) _WEBHCAT_SERVER="server:50111" _USER_NAME="JohnDoe" while [[ $# > 1 ]] do key="$1" case $key in -u|--user) _USER_NAME="$2" shift # past argument ;; -s|--server) _WEBHCAT_SERVER="$2" shift # past argument ;; *) # unknown option printf "Usage: %s: [-s <server:port> -u <user name>]\n" $0 exit 2 ;; esac shift # past argument or value done _URL_SUFFIX="?user.name=$_USER_NAME" _URL_TABLES="" _URL_COLUMNS="" # curl hit and get all databases _URL_START="http://$_WEBHCAT_SERVER/templeton/v1/ddl/database$_URL_SUFFIX" echo "Fetching Hive server databases from $_URL_START to databases.json" $(curl $_URL_START > databases.json) _DATABASES=$(jq -r .databases[] ./databases.json) for db in $_DATABASES; do _URL_TABLES="http://$_WEBHCAT_SERVER/templeton/v1/ddl/database/$db/table$_URL_SUFFIX"; _FILE_TABLE=db-$db.json echo "Fetching database $_URL_TABLES to $_FILE_TABLE" # curl hit and get json for each URL curl $_URL_TABLES | python -m json.tool > $_FILE_TABLE _TABLES=$(jq -r .tables[] $_FILE_TABLE) for table in $_TABLES; do _URL_COLUMNS="http://$_WEBHCAT_SERVER/templeton/v1/ddl/database/$db/table/$table/column$_URL_SUFFIX"; _FILE_TABLE_COLUMNS=db-$db-table-$table.json$_FILE_TABLE_COLUMNS echo "Fetching table $_URL_COLUMNS to $_FILE_TABLE_COLUMNS" # curl hit and get json for each URL curl $_URL_COLUMNS | python -m json.tool > $_FILE_TABLE_COLUMNS done done