Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
package ca.uhn.hl7v2.preparser;
import java.util.*;
import java.io.*;
import javax.xml.parsers.*;
import org.xml.sax.*;
import org.xml.sax.helpers.*;
import ca.uhn.hl7v2.HL7Exception;
public class XML
{
protected static class StopParsingException extends SAXException
{
public StopParsingException()
{
super("ca.uhn.hl7.....StopParsingException");
}
}
/** the SAXParser reports parsing events to an object of this class.
We keep track of some parsing state, and the Properties object that
we're supposed to write our data to.
*/
static protected class HL7MessageHandler extends DefaultHandler
{
/* m_props & m_msgMask should be set by the user of this handler before
they pass this handler to SAXParser.parse() or whatever */
/** The data that is found while parsing, and which passes m_msgMask,
will be dumped to m_props, as (DatumPath.toString() / text) key/value
pairs */
public Properties m_props = null;
/** Specifies what parts of a message should be dumped to m_props.
*/
public Collection m_msgMask = null;
/* All other fields are parser state. */
protected boolean m_startedDocument = false;
/* m_msgID / m_curPath together keep track of where we are in the document.
If m_msgID.length() != 0, then we're within the message element. (We're only
expecting one message per document.) Then m_msgID will be the name of the
message. ("ACK" or whatever).
m_curPath keeps track of where within the message we are. See notes at
DatumPath class definition. If m_curPath.size() != 0, then we must be
within a message.
At any point in the code below:
if m_msgID.length() == 0,
then m_curPath().size() == 0
if m_curPath.length() != 0
then m_msgID.length() != 0
Note that our DatumPaths count indices starting from 0 (not 1) -- they're
only converted to 1-based in the string representations that wind up
as m_props keys.
*/
StringBuffer m_msgID = new StringBuffer();
DatumPath m_curPath = new DatumPath();
/* the location in the document of the last datum we dumped to m_props. */
DatumPath m_lastDumpedPath = new DatumPath();
/** For handling repeat segments. segmentID (String) -> next repeat idx
(Integer). So when we hit a segment ZYX, we'll know how many times we've
hit a ZYX before, and set the segmentRepIdx part of m_curPath
appropriately. */
TreeMap m_segmentId2nextRepIdx = new TreeMap();
/* m_depthWithinUselessElement and m_depthWithinUsefulElement
reflect what m_msgMask thinks about our location in the document at any
given time.
Both should always be >= -1. Note that both can be >= 0 at the same time
-- explained in a minute....
If m_depthWithinUsefulElement >= 0, this means that we are however deep
(in terms of nested elements: 0 => just within) within an area of the
message that passes m_msgMask. We should should dump whatever we find
there to m_props. As we move around within such an element, we will still
update m_curPath appropriately.
If m_depthWithinUsefulElement >= 0, we are however deep within an element
which either made no sense (eg. where we were expecting a
-- a few other things maybe), or more importantly that we're within an
element that otherwise has no hope of having any useful elements within it
according to m_msgMask. (eg. m_msgMask says it wants only ZYX segment
contents, we're in an ). So we can safely ignore all content within,
and just keep track of how deep we are within this useless element (with
m_depthWithinUselessElement, of course.) We don't update m_curPath when
m_depthWithinUselessElement >= 0, there's no point and how would we
extract information for the DatumPath out of nonsensical element names
anyway.
If they are both >= 0, this means that there we've found some useless
stuff (nonsensical element names?) within a known-useful element.
*/
int m_depthWithinUsefulElement = -1, m_depthWithinUselessElement = -1;
/* With this we keep the text that we've found within a certain element.
It's cleared whenever we enter a (sub) element or leave an element. */
StringBuffer m_chars = new StringBuffer(10);
public HL7MessageHandler()
{
this.clear();
}
void clear()
{
// reset the state (m_props & m_msgMask are not state)
m_startedDocument = false;
m_msgID.delete(0, m_msgID.length());
m_curPath.clear();
// will always be "less than" (according to DatumPath.numbersLessThan)
// any sensible DatumPath:
m_lastDumpedPath.clear().add(new String()).add(-42).add(-42).add(-42).add(-42).add(-42);
m_segmentId2nextRepIdx.clear();
m_depthWithinUsefulElement = -1;
m_depthWithinUselessElement = -1;
m_chars.delete(0, m_chars.length());
}
public void startDocument() throws SAXException
{
boolean ok = false;
if(!m_startedDocument && (m_props != null)) {
m_startedDocument = true;
ok = true;
}
if(!ok) {
clear();
throw new StopParsingException();
}
}
public void endDocument() throws SAXException
{
boolean ok = false;
if(m_startedDocument) {
this.clear();
ok = true;
}
if(!ok) {
clear();
throw new StopParsingException();
}
}
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException
{
//System.err.println("startelem: " + qName + " curpathsize; " +
//m_curPath.size());
boolean ok = false;
if(m_startedDocument) {
// A single unit of text data will be within a single element,
// -- none of it will be in sub-elements and there will be no
// sub-elements fragmenting the data text.
// Right now we're entering a new element: this means that anything
// in m_chars will be whitespace (likely), or text left over from,
// say, the last field, or text that was somewhere it shouldn't have been.
// (ex. " shouldn't be here P "
m_chars.delete(0, m_chars.length());
if(m_depthWithinUselessElement >= 0) {
++m_depthWithinUselessElement;
}
else {
int oldCurPathSize = m_curPath.size();
if(tryToGrowDocLocationFromElementName(m_msgID, m_curPath,
m_segmentId2nextRepIdx, m_lastDumpedPath, qName))
{
if(m_curPath.size() > oldCurPathSize) {
// assert (m_depthWithinUselessElement == -1) // m_curPath
// should not have grown if we're within a useless element.
if(m_depthWithinUsefulElement == -1) {
// this new element could match one of the DatumPaths in
// m_msgMask -- if that's the case, we've just entered a
// useful element.
// TODO: functional stylee (a la C++'s std::accumulate) ?
boolean curPathStartsWithAMaskElem = false;
for(Iterator maskIt = m_msgMask.iterator();
!curPathStartsWithAMaskElem && maskIt.hasNext(); )
{
curPathStartsWithAMaskElem
= m_curPath.startsWith((DatumPath)maskIt.next());
}
if(curPathStartsWithAMaskElem)
m_depthWithinUsefulElement = 0;
else {
// so this element we're entering is not specified by m_msgMask
// to be useful -- but might it contains elements that
// are?
boolean aMaskElemStartsWithCurPath = false;
for(Iterator maskIt = m_msgMask.iterator();
!aMaskElemStartsWithCurPath && maskIt.hasNext(); )
{
aMaskElemStartsWithCurPath
= ((DatumPath)maskIt.next()).startsWith(m_curPath);
}
if(!aMaskElemStartsWithCurPath) {
// ... nope! useless.
m_depthWithinUselessElement = 0;
m_curPath.setSize(oldCurPathSize);
} // else => ok, carry on, m_depthWithinUse{less,ful}Element
// still both -1.
}
}
// else => already within a useful element, don't need to compare
// against m_msgMask.
}
}
else
m_depthWithinUselessElement = 0;
}
ok = true;
}
if(!ok) {
clear();
throw new StopParsingException();
}
}
/* doc location == msgID & curPath together.
If we've encountered an element called "elementNam", then this tries
to determine what it is, based on what we already know about the document.
returns true if we can make sense of this new element name given the
position we're at (represented by msgID / curPath),
false if we can't (which probably means this should be a useless element).
returning true doesn't mean that we actually changed msgID or curPath, it
might mean that we just passed through a segment group element OK.
*/
protected static boolean tryToGrowDocLocationFromElementName(
StringBuffer msgID /*in/out*/, DatumPath curPath /*in/out*/,
Map segmentId2nextRepIdx /*in/out*/, DatumPath lastDumpedPath /*in*/,
String elementName /*in*/)
{
boolean ok = false; // ok == can we make sense of this new element?
// hmm ... where are we in the document:
if((msgID.length() == 0) && (curPath.size() == 0)) {
// we're entering a message
msgID.replace(0, msgID.length(), elementName);
segmentId2nextRepIdx.clear();
ok = true;
}
else if((msgID.length() > 0) && (curPath.size() == 0)) {
// we're entering either a segment-group element (eg. )
// or an actual segment element.
if(!(elementName.startsWith("" + msgID + '.'))) {
// must be an actual segment.
curPath.add(elementName);
if(segmentId2nextRepIdx.containsKey(elementName))
curPath.add(segmentId2nextRepIdx.get(elementName));
else
curPath.add(new Integer(0));
segmentId2nextRepIdx.put(elementName,
new Integer(((Integer)curPath.get(curPath.size()-1)).intValue() + 1));
}
ok = true;
}
else if((msgID.length() > 0) && (curPath.size() > 0)) {
// we're entering a field or a component or a subcomponent.
if(curPath.size() == 2) { // we're entering a field element
// all fields should start with segment-ID + '.'
if(elementName.startsWith("" + curPath.get(0) + '.')) {
try {
int fieldIdxFromElementName
= Integer.parseInt(elementName.substring(elementName.indexOf('.') + 1));
curPath.add(new Integer(fieldIdxFromElementName));
// now add the repetition idx to curPath:
if((lastDumpedPath.size() >= 4)
&& (((Integer)lastDumpedPath.get(2)).intValue()
== fieldIdxFromElementName))
{
// lastDumpedPath has a fieldIdx and a fieldRepIdx.
curPath.add(new Integer(((Integer)lastDumpedPath.get(3)).intValue() + 1));
}
else
curPath.add(new Integer(0));
ok = true;
} catch(NumberFormatException e) {}
} // else => this isn't a field -- must be useless.
}
else if((curPath.size() == 4) || (curPath.size() == 5)) {
// we're entering a component or subcomponent element
try {
int idxFromElementName
= Integer.parseInt(elementName.substring(elementName.indexOf('.') + 1));
curPath.add(new Integer(idxFromElementName));
ok = true;
} catch(NumberFormatException e) {}
}
}
return ok;
}
public void endElement(String uri, String localName, String qName)
throws SAXException
{
//System.err.println("endElement: " + qName);
boolean ok = false;
if(m_startedDocument) {
if(m_depthWithinUselessElement >= 0) {
--m_depthWithinUselessElement;
ok = true;
}
else {
if((m_msgID.length() > 0) && (m_curPath.size() == 0)) {
// we're exiting either a message element or a
// segment group element.
if((""+qName).compareTo(""+m_msgID) == 0)
m_msgID.delete(0, m_msgID.length()); // => exiting message element
// else => segment group element -- do nothing.
ok = true;
}
else if((m_msgID.length() > 0) && (m_curPath.size() > 0)) {
tryToDumpDataToProps();
if(m_curPath.size() == 2) {
// exiting a segment element
m_curPath.setSize(0);
ok = true;
}
else if(m_curPath.size() == 4) {
// exiting a field element
m_curPath.setSize(2);
ok = true;
}
else if((m_curPath.size() == 5) || (m_curPath.size() == 6)) {
// exiting a component or a subcomponent
m_curPath.setSize(m_curPath.size() - 1);
ok = true;
}
}
if(m_depthWithinUsefulElement >= 0)
--m_depthWithinUsefulElement;
}
}
if(!ok) {
clear();
throw new StopParsingException();
}
}
/** try to dump whatever we've got in m_chars to m_props,
with a key of m_curPath.toString().
*/
protected void tryToDumpDataToProps()
{
if((m_curPath.size() >= 2) && (m_depthWithinUselessElement == -1)) {
/* m_curPath.toString() will be the property key whose value will be
m_chars.
This is (part of) what m_lastDumpedPath is for: With, for example "P" we might have had a m_curPath containing something
like [ZYX, 0, 9, 0, 0] when we exited the PT.1 element. (note: internal
DatumPath elements are 0-indexed, string representations of DatumPaths and
the XML text is 1-indexed.) So in m_props the key for "P" would have been
"ZYX[0]-9[0]-1-1". (the last "-1" is a default that got added by
toString()).
Then we would have exited the PT.3 element, changed m_curPath to [ZYX, 0,
9, 0], picked up the whitespace between and , and when
exiting the ZYX.9 element, we might have written that whitespace to m_props
with a key of the toString() of [ZYX, 0, 9, 0]; that is, "ZYX[0]-9[0]-1-1":
the same as the key for the "P" ... clobbering "P" in m_props with
whitespace.
But since we know that HL7 fields / components / etc are always in order
(numerically), we can count on m_lastDumpedPath and use
DatumPath.numbersLessThan to avoid the clobbering.
*/
if((m_lastDumpedPath.get(0).equals(m_curPath.get(0)))
? (m_lastDumpedPath.numbersLessThan(m_curPath))
: true)
{
if(m_depthWithinUsefulElement >= 0) {
// TODO: remove! or assert
if(m_props.containsKey("" + m_curPath))
System.err.println("ALAAAARM: CLOBBERING PROPERTY in " + getClass());
m_props.setProperty("" + m_curPath, "" + m_chars);
m_lastDumpedPath.copy(m_curPath);
m_chars.delete(0, m_chars.length());
}
}
}
}
public void characters(char[] chars, int start, int length)
{
// note that a contiguous run of characters in the document
// might get reported to us in several chunks.
// (In the order that the text appears in the document,
// non-overlapping and with no gaps between chunks.)
// An entity like & will reach us as an actual & character.
if((m_msgID.length() > 0) && (m_curPath.size() >= 4)) {
m_chars.append(chars, start, length);
}
}
public void ignoreableWhitespace(char []chars, int start, int length)
{
// it's unclear which whitespace is considered ignorable for us.
// what the heck, add it to m_chars.
characters(chars, start, length);
}
public void error(SAXParseException e)
{
// TODO: remove.
System.err.println("Error in " + getClass() + ": " + e);
}
public void fatalError(SAXParseException e) throws SAXException
{
throw e;
}
}
/** parse message according to our HL7 XML handler, and dump the data found
to props.
returns true if we parsed ok, which means well-formed XML, and
that's about it. We just barely check against HL7 structure, and ignore any
elements / text that is unexpected (that is, impossible in any HL7 message:
independant of any message / segment definitions).
"message" should be an XML document with one top-level element -- that being
the message. ( or whatever). We're only expecting one message to be in
"message".
props can be null if you don't want the data (we still parse). The message
data found in message (that passes msgMask) will be added to props as key /
value pairs with the key a toString() of the appropriate DatumPath for the
location where the data is found (i.e. in the ZYX[a]-b[c]-d-e style), and
the value the corresponding text. So, after calling parseMessage
successfully, if you wanted to retrieve the message data from props you
might call something like
props.getProperty((new DatumPath()).add("MSH").add(1).toString())
and that would return a String with "|", probably.
Note that this package facilitates the extraction of message data in a way
independent of message version (i.e. components and whatever getting added):
With a message of "fieldy-field-field",
"ZYX[0]-1[0]-1-1" will be the key that ends up in props (see notes at
DatumPath.toString())
So if you, coding for a future version of the FOO message but
recieving old-version message data, tried
props.getProperty((new DatumPath()).add("ZYX").add(0).add(42).add(0).add(1).toString())
with the message above (that is, trying to extract a repetition and
component that aren't there), you would get "ZYX[0]-42[0]-1-1" mapping to
"fieldy-field-field" in the resulting props.
If the message was
"component data"
and you, coding for an old version of this FOO message but recieving
new-version FOO message data, tried
props.getProperty((new DatumPath()).add("ZYX").add(0).add(42).toString())
you would get "ZYX[0]-42[0]-1-1" mapping to "component data" in the resulting
props.
msgMask lets you specify which parts of the message you want dumped to props.
Passing in null gets you everything. Otherwise, msgMask's elements should
all be DatumPaths (! => ClassCastException), and a particular part of the
message will be dumped to props only if it's location, as represented by a
DatumPath, startsWith (as in DatumPath.startsWith()) at least one element of
msgMask. So if one element of msgMask was a (new DatumPath()).add(new
String("ZYX")), then everything in all ZYX segment would get dumped to props.
A (new DatumPath()).add(new String("ZYX")).add(1) would get only the first
repetitions of same (if there is one) dumped to props. etc. etc. Note that
a DatumPath of size() == 0 in msgMask will get you everything, no matter what
the other elements of msgMask are, because all DatumPaths startsWith the
zero-length DatumPath.
Segment group elements (eg. ADT_A01.PROCEDURE) are handled fine, but they
aren't addressed in msgMask or in the output in props -- basically any
element tags at the level immediately inside the message element, and having
a name that starts with the message element name + '.', is ignored (meaning
it's contents are dealt with the same as if the start and end tags' just
wasn't there.)
*/
public static boolean parseMessage(Properties props, String message,
Collection msgMask) throws HL7Exception
{
boolean ret = false;
try {
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser parser = factory.newSAXParser();
InputSource inSrc = new InputSource(new java.io.StringReader(message));
HL7MessageHandler handler = new HL7MessageHandler();
handler.m_props = (props != null
? props : new Properties()); // it's expecting a props.
if(msgMask != null)
handler.m_msgMask = msgMask;
else {
handler.m_msgMask = new Vector();
handler.m_msgMask.add(new DatumPath());
}
parser.parse(inSrc, handler);
ret = true;
} catch (ParserConfigurationException e) {
throw new HL7Exception(e);
} catch (IOException e) {
throw new HL7Exception(e);
} catch (StopParsingException e) {
throw new HL7Exception(e);
} catch (SAXException e) {
throw new HL7Exception(e);
}
return ret;
}
public static void main(String args[])
{
if(args.length >= 1) {
Properties props = new Properties();
Vector msgMask = new Vector();
msgMask.add((new DatumPath()).add("MSH").add(0).add(9));
//msgMask.add(new DatumPath());
boolean parseret;
try {
parseret = XML.parseMessage(props, args[0], msgMask);
System.err.println("parseMessage returned " + parseret);
} catch (HL7Exception e) {
e.printStackTrace();
}
props.list(System.err);
}
}
}