examples.awk.strings Maven / Gradle / Ivy
/*
* $Id: strings.java,v 1.3 2003/08/12 18:11:30 dfs Exp $
*
* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2002 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation", "Jakarta-Oro"
* must not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact [email protected].
*
* 5. Products derived from this software may not be called "Apache"
* or "Jakarta-Oro", nor may "Apache" or "Jakarta-Oro" appear in their
* name, without prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* .
*/
package examples.awk;
import java.io.*;
import org.apache.oro.text.regex.*;
import org.apache.oro.text.awk.*;
/**
* This is a test program demonstrating how to search an input stream
* with the jakarta-oro awk package regular expression classes. It
* performs a function similar to the Unix strings
command,
* but is intended to show how matching on a stream is affected by its
* character encoding. The most important thing to remember is that
* AwkMatcher only matches on 8-bit values. If your input contains
* Java characters containing values greater than 255, the pattern
* matching process will result in an ArrayIndexOutOfBoundsException.
* Therefore, if you want to search a binary file containing arbitrary
* bytes, you have to make sure you use an 8-bit character encoding
* like ISO-8859-1, so that the mapping between byte-values and character
* values will be one to one. Otherwise, the file will be interpreted
* as UTF-8 by default, and you will probably wind up with character
* values outside of the 8-bit range.
*
* @version @version@
*/
public final class strings {
public static final class StringFinder {
/**
* Default string expression. Looks for at least 4 contiguous
* printable characters. Differs slightly from GNU strings command
* in that any printable character may start a string.
*/
public static final String DEFAULT_PATTERN =
"[\\x20-\\x7E]{3}[\\x20-\\x7E]+";
Pattern pattern;
AwkMatcher matcher;
public StringFinder(String regex) throws MalformedPatternException {
AwkCompiler compiler = new AwkCompiler();
pattern = compiler.compile(regex, AwkCompiler.CASE_INSENSITIVE_MASK);
matcher = new AwkMatcher();
}
public StringFinder() throws MalformedPatternException {
this(DEFAULT_PATTERN);
}
public void search(Reader input, PrintWriter output) throws IOException {
MatchResult result;
AwkStreamInput in = new AwkStreamInput(input);
while(matcher.contains(in, pattern)) {
result = matcher.getMatch();
output.println(result);
}
output.flush();
}
}
public static final String DEFAULT_ENCODING = "ISO-8859-1";
public static final void main(String args[]) {
String regex = StringFinder.DEFAULT_PATTERN;
String filename, encoding = DEFAULT_ENCODING;
StringFinder finder;
Reader file = null;
// Some users thought it would be useful to use the default pattern
// and just pass the encoding as the second parameter. Therefore,
// when two arguments are given and the second argument is not a valid
// encoding, it is interpreted as a pattern. This means you can't
// use a valid encoding name as a pattern without also specifying
// an encoding as a third argument.
if(args.length < 1) {
System.err.println("usage: strings file [pattern|encoding] [encoding]");
return;
} else if(args.length > 2) {
regex = args[1];
encoding = args[2];
} else if(args.length > 1)
encoding = args[1];
filename = args[0];
try {
InputStream fin = new FileInputStream(filename);
try {
file = new InputStreamReader(fin, encoding);
} catch(UnsupportedEncodingException uee) {
if(args.length == 2) {
regex = encoding;
encoding = DEFAULT_ENCODING;
file = new InputStreamReader(fin, encoding);
} else
throw uee;
}
finder = new StringFinder(regex);
finder.search(file, new PrintWriter(new OutputStreamWriter(System.out)));
file.close();
} catch(Exception e) {
e.printStackTrace();
return;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy