All Downloads are FREE. Search and download functionalities are using the official Maven repository.

test.resources.findPopularRoutes.pig Maven / Gradle / Ivy

The newest version!
-- You can  run this pig script as : 
-- pig -x local /home/cloudera/sdk/hadoop/DocumentDBHadoopConnector/src/test/resources/findPopularRoutes.pig

-- Register our Jar package.
REGISTER /home/cloudera/sdk/Java/DocumentDB/target/DocumentDB-1.1.0-jar-with-dependencies.jar
REGISTER /home/cloudera/sdk/hadoop/DocumentDBHadoopConnector/target/DocumentDBHadoopConnector-1.1.0.jar

%declare  RoutesCSV '/home/cloudera/sdk/hadoop/DocumentDBHadoopConnector/src/test/resources/routes.csv'
%declare  StopsCSV '/home/cloudera/sdk/hadoop/DocumentDBHadoopConnector/src/test/resources/stops.csv'

%declare docdbEndpoint 'https://ming2.documents.azure.com:443/'
%declare docdbMasterkey 'R17FHH6bn0Knz/yhb4jMBRkcQMe1tVeiQ6e0SlQnDjtXRJG9OufFEt7sgaC+PaCMeAmcYY9r+gphaJuc1jdKTg=='
%declare docdbDBName  'DemoDatabase'
%declare docdbOutputCollName  'StopsWithRouteCount'


-- here id is route_id
routes = LOAD '$RoutesCSV'  
         USING PigStorage(',') 
         AS (agency_id:chararray, id:chararray,
             route_short_name:chararray,route_long_name:chararray,
             route_type:chararray, route_desc:chararray,
             route_url:chararray,route_color:chararray, route_text_color:chararray); 

-- here id is trip_id
stops = LOAD '$StopsCSV'  
         USING PigStorage(',') 
         AS (route_id:chararray,
             id:chararray,service_id:chararray,
             trip_headsign, direction_id:int,
             shape_id:chararray);

-- filter out the direction_id
stops_filtered = FILTER stops BY direction_id == 1;

popularRoutes = join routes by id, stops_filtered by route_id;
describe popularRoutes;

groupRoute = GROUP popularRoutes BY (routes::route_short_name, routes::route_long_name); 
describe groupRoute;

result = FOREACH groupRoute {
    daily_movements = COUNT($1) /31;
    id = CONCAT(group.route_short_name, group.route_long_name);
    GENERATE id as id, daily_movements as daily_movements;
}

explain result;

dump result;

STORE result INTO '$docdbEndpoint'
               USING com.microsoft.azure.documentdb.hadoop.pig.DocumentDBStorage(
             '$docdbMasterkey', 
             '$docdbDBName', 
             '$docdbOutputCollName');


           




© 2015 - 2024 Weber Informatics LLC | Privacy Policy