All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.formulasearchengine.mathosphere.basex.Client Maven / Gradle / Ivy

There is a newer version: 1.0.1
Show newest version
package com.formulasearchengine.mathosphere.basex;

import com.formulasearchengine.mathmlquerygenerator.NtcirPattern;
import com.formulasearchengine.mathmlquerygenerator.QVarXQueryGenerator;
import com.formulasearchengine.mathmlquerygenerator.XQueryGenerator;
import com.formulasearchengine.mathmltools.xmlhelper.XMLHelper;
import com.formulasearchengine.mathosphere.basex.types.Hit;
import com.formulasearchengine.mathosphere.basex.types.Result;
import com.formulasearchengine.mathosphere.basex.types.Results;
import com.formulasearchengine.mathosphere.basex.types.Run;
import com.thoughtworks.xstream.XStream;
import com.thoughtworks.xstream.io.xml.XmlFriendlyNameCoder;
import com.thoughtworks.xstream.io.xml.Xpp3Driver;
import net.xqj.basex.BaseXXQDataSource;
import org.intellij.lang.annotations.Language;
import org.w3c.dom.Document;
import org.w3c.dom.Node;

import javax.xml.namespace.QName;
import javax.xml.stream.*;
import javax.xml.stream.events.Attribute;
import javax.xml.stream.events.StartElement;
import javax.xml.stream.events.XMLEvent;
import javax.xml.transform.TransformerException;
import javax.xml.xquery.*;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;

/**
 * Inputs NtcirPattern queries, runs them through the BaseX search engine, and then outputs results.
 * Created by Moritz on 08.11.2014.
 */
public class Client {
	public static final String USER = "admin";
	public static final String PASSWORD = "admin";
	private static final Pattern CR_PATTERN = Pattern.compile("\r");
	private Results results = new Results();
	private Run currentRun = new Run( "baseX" + System.currentTimeMillis(), "automated" );
	private Result currentResult = new Result( "NTCIR11-Math-" );
	private Long lastQueryDuration;
	private boolean useXQ = true;
	private boolean showTime = true;

	/**
	 * Constructs a new empty Client. Used for running individual queries.
	 */
	public Client() {}

	/**
	 * Constructs a new Client with the given queryset. This constructor will also search all queries immediately.
	 * @param patterns List of NtcirPattern
	 */
	public Client(List patterns) throws XQException {
		for (final NtcirPattern pattern : patterns) {
			processPattern( pattern );
		}
		results.addRun( currentRun );
	}

	/**
	 * @return Returns given Result as XML string, and shows time based on showTime
	 */
	public static String resultToXML( Result result ) {
		//Use custom coder to disable underscore escaping so run_type is properly printed
		final XStream stream = new XStream( new Xpp3Driver( new XmlFriendlyNameCoder( "_-", "_" ) ) );
		if ( !result.getShowTime() ) {
			stream.omitField( Result.class, "ms" );
		}
		stream.processAnnotations( Result.class );
		return "\n" + stream.toXML( result );
	}

	/**
	 * @return Returns given Results as XML string, and shows time based on showTime
	 */
	public static String resultsToXML( Results results ) {
		//Use custom coder to disable underscore escaping so run_type is properly printed
		final XStream stream = new XStream(new Xpp3Driver( new XmlFriendlyNameCoder( "_-", "_" ) ) );
		if ( !results.getShowTime() ) {
			stream.omitField( Run.class, "ms" );
			stream.omitField( Result.class, "ms" );
		}
		stream.processAnnotations( Results.class );
		return "\n" + stream.toXML( results );
	}

	/**
	 * @return the given XML string as an object of the given class. note that this method disables
	 * underscore as an escape character if the class is Results so the attribute "run_type" is printed correctly.
	 */
	public static Object xmlToClass( String xml, Class convertClass ) {
		final XStream stream;
		if ( convertClass.equals( Results.class )) {
			//Use custom coder to disable underscore escaping so run_type is properly printed
			stream = new XStream( new Xpp3Driver( new XmlFriendlyNameCoder( "_-", "_" ) ) );
		} else {
			stream = new XStream();
		}
		stream.processAnnotations( convertClass );
		return stream.fromXML( xml );
	}

	private static XQConnection getXqConnection() throws XQException {
		final Server srv = Server.getInstance();
		final XQDataSource xqs = new BaseXXQDataSource();
		//Other properties: description, logLevel, loginTimeout, readOnly
		xqs.setProperty("serverName", Server.SERVER_NAME);
		xqs.setProperty("port", String.valueOf(Server.PORT));
		xqs.setProperty("databaseName", Server.DATABASE_NAME);
		xqs.setProperty("user", USER);
		xqs.setProperty("password", PASSWORD);

		return xqs.getConnection(USER, PASSWORD);
	}

	//Alternative API that enables XQuery v3.1
	private static BaseXClient getBaseXClient() throws IOException {
		final Server srv = Server.getInstance();
		final BaseXClient session = new BaseXClient(Server.SERVER_NAME, Server.PORT, USER, PASSWORD);
		session.execute("OPEN " + Server.DATABASE_NAME);
		return session;
	}

	/**
	 * @return Returns new StartElement with replaced value for given attribute
	 */
	public static StartElement replaceAttr(StartElement event, String attribute, String value) {
		final XMLEventFactory eventFactory = XMLEventFactory.newInstance();
		final Iterator attributeIterator = event.getAttributes();
		final List attrs = new ArrayList<>();
		while (attributeIterator.hasNext()) {
			final Attribute curAttr = attributeIterator.next();
			if (attribute.equals(curAttr.getName().getLocalPart())) {
				attrs.add(eventFactory.createAttribute(new QName(attribute), value));
			} else {
				attrs.add(curAttr);
			}
		}
		return eventFactory.createStartElement(new QName(event.getName().getLocalPart()), attrs.iterator(), event.getNamespaces());
	}

	/**
	 * Runs a query with no timing or effects on {@link #currentResult}
	 *
	 * @param query XQuery string
	 * @return XQResult in string format
	 * @throws XQException
	 */
	static String directXQuery(String query) throws XQException {
		final StringBuilder outputBuilder = new StringBuilder();
		final XQConnection conn = getXqConnection();
		try {
			final XQPreparedExpression xqpe = conn.prepareExpression(query);
			final XQResultSequence rs = xqpe.executeQuery();
			while (rs.next()) {
				outputBuilder.append(CR_PATTERN.matcher(rs.getItemAsString(null)).replaceAll(""));
			}
		} finally {
			conn.close();
		}
		return outputBuilder.toString();
	}

	/**
	 * @return Returns results in XML format.
	 */
	public String getXML() {
		results.setShowTime(showTime);
		return resultsToXML(results);
	}

	/**
	 * Setter for whether or not to show time in results.
	 * @param showTime Boolean for showing time or not
	 */
	public void setShowTime (boolean showTime) {
		this.showTime = showTime;
		results.setShowTime( showTime );
	}

	/**
	 * Setter for whether or not to use XQuery expression.
	 * @param useXQ Boolean for using XQuery expressions.
	 */
	public void setUseXQ (boolean useXQ) {
		this.useXQ = useXQ;
	}

	private void processPattern(NtcirPattern pattern) throws XQException {
		currentResult = new Result( pattern.getNum() );
		currentResult.setShowTime( showTime );
		basex( pattern.getxQueryExpression() );
		currentRun.addResult( currentResult );
	}

	/**
	 * Wrapper around XQuery search method runQueryBaseXSimple() which handles exceptions and returns the length of time
	 * it took to run that query.
	 * @param query Query in XQuery string format.
	 * @return Time it took to run the query.
	 */
	public Long basex(String query) throws XQException {
		runQueryBaseXSimple( query );
		return lastQueryDuration;
	}

	/**
	 * Connects with the BaseX database, sending the given query as an XQuery query and saves the
	 * result in the currentResult list. Assumes NTCIR_FOOTER is used as the result return type.
	 * @param query Query in XQuery string format.
	 * @param queryID ID number to mark this query (required for NTCIR search highlight format)
	 * @return Result in NTCIR_FOOTER XML format (not in full NTCIR format)
	 * @throws XQException When getXqConnection() falis to connect to the BaseX server, XQJ fails to process the query,
	 * or XQJ fails to execute the query.
	 * @throws XMLStreamException When the output fails to parse as XML
	 * @throws IOException When the client fails to open properly
	 * @throws TransformerException When the XML reader/writers fail
	 */
	protected Result runQueryNTCIR( String query, String queryID )
			throws XQException, XMLStreamException, IOException, TransformerException, java.io.UnsupportedEncodingException {
		int score = 0;
		int rank = 1;
		if ( useXQ ) {
			return null;
		} else {
			final BaseXClient session = getBaseXClient();
			try {
				lastQueryDuration = System.nanoTime();
				final BaseXClient.Query querySession = session.query( query );
				lastQueryDuration = System.nanoTime() - lastQueryDuration;
				currentResult.setTime( lastQueryDuration );
				currentResult.setShowTime( showTime );

				while ( querySession.more() ) {
					final String result = querySession.next();
					final byte[] byteArray = result.getBytes( "UTF-8" );
					final ByteArrayInputStream inputStream = new ByteArrayInputStream( byteArray );
					final XMLEventReader reader = XMLInputFactory.newFactory().createXMLEventReader( inputStream );
					final StringWriter hitWriter = new StringWriter();
					final XMLEventWriter writer = XMLOutputFactory.newInstance().createXMLEventWriter( hitWriter );

					while ( reader.hasNext() ) {
						final XMLEvent curEvent = reader.nextEvent();
						switch ( curEvent.getEventType() ) {
							case XMLStreamConstants.START_ELEMENT:
								if ( "formula".equals( curEvent.asStartElement().getName().getLocalPart() ) ) {
									writer.add( replaceAttr( curEvent.asStartElement(), "for", queryID ) );
								} else {
									writer.add( curEvent );
								}
								break;
							case XMLStreamConstants.START_DOCUMENT:
								//do nothing
								break;
							default:
								writer.add( curEvent );
								break;
						}
					}
					currentResult.addHit( (Hit) xmlToClass( hitWriter.toString(), Hit.class ) );
				}
			} finally {
				session.close();
			}
			return currentResult;
		}
	}

	/**
	 * Connects with the BaseX database, sending the given query as an XQuery query and saves the
	 * result in the currentResult list. Assumes BASEX_FOOTER is used as the result return type.
	 * @param query Query in XQuery string format.
	 * @return Number of results.
	 * @throws XQException When getXqConnection() fails to connect to the BaseX server, XQJ fails to process the query,
	 * or XQJ fails to execute the query.
	 */
	protected int runQueryBaseXSimple( String query ) throws XQException {
		int score = 10;
		int rank = 1;
		if ( useXQ ) {
			final XQConnection conn = getXqConnection();
			try {
				final XQPreparedExpression xqpe = conn.prepareExpression( query );
				lastQueryDuration = System.nanoTime();
				final XQResultSequence rs = xqpe.executeQuery();
				lastQueryDuration = System.nanoTime() - lastQueryDuration;
				currentResult.setTime( lastQueryDuration );
				currentResult.setShowTime( showTime );
				while ( rs.next() ) {
					final String result = rs.getItemAsString( null );
					currentResult.addHit( new Hit( CR_PATTERN.matcher( result ).replaceAll( "" ), "", score, rank ) );
					rank++;
				}
			} finally {
				conn.close();
			}
		} else {
			//TODO: This does not yet work
/*			measurement = System.nanoTime();
			new Open("math").execute( Server.context );
			QueryProcessor proc = new QueryProcessor(query, Server.context );
			Iter iter = proc.iter();
			for(Item item; (item = iter.next()) != null;) {
				Object o = item.toJava();
				String s;
				if(o instanceof String){
					s = (String) o;
				} else {
					s = item.toString();
				}
				currentResult.addHit( s, "", score, rank );
				rank++;
			}*/
		}
		return rank-1;
	}

	/**
	 * Calls {@link #runQueryBaseXSimple(String)} and wraps the result with the NTCIR XML format.
	 * This adds the result to {@link #currentResult}
	 * @param query XQuery string
	 * @throws XQException when the server xq connection fails
	 * @return NTCIR XML formatted result
	 */
	public Results runQueryNtcirWrap( String query ) throws XQException {
		currentResult = new Result( "NTCIR11-Math-");
		currentResult.setShowTime( showTime );
		runQueryBaseXSimple( query );
		final Results resultsFrame = new Results();
		resultsFrame.setShowTime( showTime );
		if ( currentResult.getNumHits() != 0 ) {
			final Run run = new Run( "", "" );
			run.setShowTime( showTime );
			run.addResult( currentResult );
			resultsFrame.addRun( run );
		}
		return resultsFrame;
	}

	/**
	 * Calls {@link #runQueryNtcirWrap(String)} given a MathML MathWebSearch XML document query
	 * @param mwsQuery Document in MathML MathWebSearch query format
	 * @throws XQException when the server xq connection fails
	 * @return NTCIR XML formatted result
	 */
	public Results runMWSQuery( Document mwsQuery ) throws XQException {
		if ( mwsQuery == null ){
			throw new IllegalArgumentException( "Got empty MathML document" );
		}
		final QVarXQueryGenerator generator = new QVarXQueryGenerator(mwsQuery);
		generator.setPathToRoot("//*:expr");
		generator.setReturnFormat(Benchmark.BASEX_FOOTER );
		generator.setAddQvarMap( false );
		return runQueryNtcirWrap(generator.toString());
	}

	/**
	 * Calls {@link #runMWSQuery(Document)} given a Tex string.
	 * Converts the Tex string into MathML MathWebSearch XML document query format and then runs the search.
	 * @param tex Tex string
	 *
	 * @throws XQException when the server xq connection fails
	 * @throws IOException when the tex to MathML conversion fails
	 * @return NTCIR XML formatted result
	 */
	public Results runTexQuery( String tex ) throws IOException, XQException {
		if (tex == null || tex.isEmpty()){
			throw new IllegalArgumentException( "Got empty TeX query" );
		}
		final TexQueryGenerator t = new TexQueryGenerator();
		final String mmlString = t.request(tex);
		final Document doc = XMLHelper.string2Doc( mmlString, true );
		return runMWSQuery( doc );
	}

	/**
	 * Returns XQuery expression for matching formulae based on revision number
	 * @param rev Revision number to match
	 * @return XQuery expression
	 */
	private String getRevFormula( int rev ) {
		return "expr[matches(@url, '" + rev + "#(.*)')]";
	}

	/**
	 * Shortcut call on {@link #directXQuery(String)} to count the number of formulae with specified revision number
	 * @param rev Revision number to count
	 * @return Number of formulae with specified revision number
	 */
	public int countRevisionFormula(int rev){
		try {
			return Integer.parseInt( directXQuery( "count(//*:" + getRevFormula( rev ) + ")"
			) );
		} catch (final XQException e) {
			e.printStackTrace();
			return 0;
		}
	}

	/**
	 * Shortcut call on {@link #directXQuery(String)} to count the total number of formulae
	 * @return Total number of formulae
	 */
	public int countAllFormula(){
		try {
			return Integer.parseInt( directXQuery( "count(./*/*)" ) );
		} catch (final XQException e) {
			e.printStackTrace();
			return 0;
		}
	}

	/**
	 * Shortcut call on {@link #directXQuery(String)} to delete all formulae with specified revision number
	 * @param rev Revision number
	 * @return Whether or not this operation succeeded
	 */
	public boolean deleteRevisionFormula(int rev){
		try {
			directXQuery( "delete node //*:"+ getRevFormula( rev ) );
			return countRevisionFormula(rev) == 0;
		} catch (XQException e) {
			e.printStackTrace();
			return false;
		}
	}

	/**
	 * Inserts the first formula from a harvest XML file into the server database.
	 * @param n Node to start with
	 * @return Whether or not this operation succeeded
	 */
	public boolean updateFormula(Node n) {
		try {
			@Language("XQuery") final String xUpdate = "declare namespace mws=\"http://search.mathweb.org/ns\";\n" +
					"declare variable $input external;\n" +
					"for $e in $input/mws:expr\n" +
					"return ( delete node //*[@url=$e/@url], insert node $e into /mws:harvest[1])";
			final XQConnection conn = getXqConnection();
			try {
				final XQPreparedExpression xqpe = conn.prepareExpression( xUpdate );
				xqpe.bindNode( new QName( "input" ), n, null );
				xqpe.executeQuery();
			} finally {
				conn.close();
			}
			return true;
		} catch (final XQException e ) {
			e.printStackTrace();
			return false;
		}
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy