All Downloads are FREE. Search and download functionalities are using the official Maven repository.

test.resources.load_docdb_dump.pig Maven / Gradle / Ivy

The newest version!
-- You can  run this pig script as : 
-- pig -x local /home/cloudera/sdk/hadoop/DocumentDBHadoopConnector/src/test/resources/load_docdb_dump.pig

-- Register our Jar package.
REGISTER /home/cloudera/sdk/Java/DocumentDB/target/DocumentDB-1.1.0-preview-jar-with-dependencies.jar
REGISTER /home/cloudera/sdk/hadoop/DocumentDBHadoopConnector/target/DocumentDBHadoopConnector-1.1.0.jar

tweet_info = LOAD 'https://mingdemo.documents.azure.com:443/' 
          USING com.microsoft.azure.documentdb.hadoop.pig.DocumentDBLoader(
                'IjzL58oYTHsNLxZOm+KZKrK1rlhFdeeWXZsJYtFvnISTvcbvkB/VD/ONvxezObAILqt/sfnqVZwHrl23SGHkAg==', 
                'DemoDatabase', 'InputColl');
      
-- tweet_format =  GROUP tweet_info BY id;

-- assume we have those properties.
tweet_format = FOREACH tweet_info
       GENERATE $0#'hashtagtext' as hashtagtext, 
                $0#'id' as id;
--                $0#'text' as text;
                
dump tweet_format;

STORE tweet_format INTO 'https://mingdemo.documents.azure.com:443/'
               USING com.microsoft.azure.documentdb.hadoop.pig.DocumentDBStorage(
             'IjzL58oYTHsNLxZOm+KZKrK1rlhFdeeWXZsJYtFvnISTvcbvkB/VD/ONvxezObAILqt/sfnqVZwHrl23SGHkAg==', 
             'DemoDatabase', 'OutputColl');





© 2015 - 2024 Weber Informatics LLC | Privacy Policy