test.resources.findPopularRoutes.pig Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of azure-documentdb-hadoop Show documentation
Show all versions of azure-documentdb-hadoop Show documentation
Hadoop Connector for Microsoft Azure DocumentDB
The newest version!
-- You can run this pig script as :
-- pig -x local /home/cloudera/sdk/hadoop/DocumentDBHadoopConnector/src/test/resources/findPopularRoutes.pig
-- Register our Jar package.
REGISTER /home/cloudera/sdk/Java/DocumentDB/target/DocumentDB-1.1.0-jar-with-dependencies.jar
REGISTER /home/cloudera/sdk/hadoop/DocumentDBHadoopConnector/target/DocumentDBHadoopConnector-1.1.0.jar
%declare RoutesCSV '/home/cloudera/sdk/hadoop/DocumentDBHadoopConnector/src/test/resources/routes.csv'
%declare StopsCSV '/home/cloudera/sdk/hadoop/DocumentDBHadoopConnector/src/test/resources/stops.csv'
%declare docdbEndpoint 'https://ming2.documents.azure.com:443/'
%declare docdbMasterkey 'R17FHH6bn0Knz/yhb4jMBRkcQMe1tVeiQ6e0SlQnDjtXRJG9OufFEt7sgaC+PaCMeAmcYY9r+gphaJuc1jdKTg=='
%declare docdbDBName 'DemoDatabase'
%declare docdbOutputCollName 'StopsWithRouteCount'
-- here id is route_id
routes = LOAD '$RoutesCSV'
USING PigStorage(',')
AS (agency_id:chararray, id:chararray,
route_short_name:chararray,route_long_name:chararray,
route_type:chararray, route_desc:chararray,
route_url:chararray,route_color:chararray, route_text_color:chararray);
-- here id is trip_id
stops = LOAD '$StopsCSV'
USING PigStorage(',')
AS (route_id:chararray,
id:chararray,service_id:chararray,
trip_headsign, direction_id:int,
shape_id:chararray);
-- filter out the direction_id
stops_filtered = FILTER stops BY direction_id == 1;
popularRoutes = join routes by id, stops_filtered by route_id;
describe popularRoutes;
groupRoute = GROUP popularRoutes BY (routes::route_short_name, routes::route_long_name);
describe groupRoute;
result = FOREACH groupRoute {
daily_movements = COUNT($1) /31;
id = CONCAT(group.route_short_name, group.route_long_name);
GENERATE id as id, daily_movements as daily_movements;
}
explain result;
dump result;
STORE result INTO '$docdbEndpoint'
USING com.microsoft.azure.documentdb.hadoop.pig.DocumentDBStorage(
'$docdbMasterkey',
'$docdbDBName',
'$docdbOutputCollName');