nl.basjes.parse.httpdlog.dissectors.ModUniqueIdDissector Maven / Gradle / Ivy
/*
* Apache HTTPD & NGINX Access log parsing made easy
* Copyright (C) 2011-2017 Niels Basjes
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nl.basjes.parse.httpdlog.dissectors;
import nl.basjes.parse.core.Casts;
import nl.basjes.parse.core.Dissector;
import nl.basjes.parse.core.Parsable;
import nl.basjes.parse.core.ParsedField;
import nl.basjes.parse.core.exceptions.DissectionFailure;
import org.apache.commons.codec.binary.Base64;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.List;
/**
* The documentation of mod_unique_id clearly states:
* http://httpd.apache.org/docs/current/mod/mod_unique_id.html
* ... it should be emphasized that applications should not dissect the encoding. ...
* Applications should treat the entire encoded UNIQUE_ID as an opaque token,
* which can be compared against other UNIQUE_IDs for equality only.
*
* Yet being able to peek inside is sometimes very useful...
*/
public class ModUniqueIdDissector extends Dissector {
// --------------------------------------------
private static final String INPUT_TYPE = "MOD_UNIQUE_ID";
@Override
public String getInputType() {
return INPUT_TYPE;
}
// --------------------------------------------
@Override
public List getPossibleOutput() {
List result = new ArrayList<>();
result.add("TIME.EPOCH:epoch");
result.add("IP:ip");
result.add("PROCESSID:processid");
result.add("COUNTER:counter");
result.add("THREAD_INDEX:threadindex");
return result;
}
// --------------------------------------------
@Override
public boolean initializeFromSettingsParameter(String settings) {
return true; // Everything went right.
}
// --------------------------------------------
@Override
protected void initializeNewInstance(Dissector newInstance) {
// Nothing to do
}
private boolean wantTime = false;
private boolean wantIp = false;
private boolean wantProcessId = false;
private boolean wantCounter = false;
private boolean wantThreadIndex = false;
@Override
public EnumSet prepareForDissect(final String inputname, final String outputname) {
String name = extractFieldName(inputname, outputname);
if ("epoch".equals(name)) {
wantTime = true;
return Casts.STRING_OR_LONG;
}
if ("ip".equals(name)) {
wantIp = true;
return Casts.STRING_OR_LONG;
}
if ("processid".equals(name)) {
wantProcessId = true;
return Casts.STRING_OR_LONG;
}
if ("counter".equals(name)) {
wantCounter = true;
return Casts.STRING_OR_LONG;
}
if ("threadindex".equals(name)) {
wantThreadIndex = true;
return Casts.STRING_OR_LONG;
}
return null;
}
@Override
public void prepareForRun() {
// We do not do anything extra here
}
// --------------------------------------------
@Override
public void dissect(final Parsable> parsable, final String inputname) throws DissectionFailure {
final ParsedField field = parsable.getParsableField(INPUT_TYPE, inputname);
String fieldValue = field.getValue().getString();
if (fieldValue == null || fieldValue.isEmpty()) {
return; // Nothing to do here
}
UniqueIdRec record = decode(fieldValue);
if (record == null) {
return;
}
if (wantTime) {
parsable.addDissection(inputname, "TIME.EPOCH", "epoch", record.timestamp);
}
if (wantIp) {
parsable.addDissection(inputname, "IP", "ip", record.ipaddrStr);
}
if (wantProcessId) {
parsable.addDissection(inputname, "PROCESSID", "processid", record.pid);
}
if (wantCounter) {
parsable.addDissection(inputname, "COUNTER", "counter", record.counter);
}
if (wantThreadIndex) {
parsable.addDissection(inputname, "THREAD_INDEX", "threadindex", record.threadIndex);
}
}
// --------------------------------------------
private class UniqueIdRec {
long timestamp;
long ipaddr;
String ipaddrStr;
long pid;
long counter;
long threadIndex;
}
// 1 letter = 6 bits of data = 2^6 = 64 letters needed to do the mapping
// 4 letters = 4*6 = 24 = 3*8 = 3 bytes
// So 24 letters = 24*6 = 144 bits = 18 bytes
public static final Charset CHARSET_UTF_8 = Charset.forName("UTF-8");
private byte[] decodeToBytes(String modUniqueIdString) {
if (modUniqueIdString.length() != 24) {
return null;
}
// http://httpd.apache.org/docs/current/mod/mod_unique_id.html
// The UNIQUE_ID environment variable is constructed by encoding the 144-bit
// (32-bit IP address, 32 bit pid, 32 bit time stamp, 16 bit counter, 32 bit thread index)
// quadruple using the alphabet [A-Za-z0-9@-] in a manner similar to MIME base64 encoding,
// producing 24 characters.
// This implementation is based on the observation that the encoding used by mod-unique-id is
// the same as Base64 except that the last two letters are different.
// So by simply replacing the occurences of these letters in the source we reuse and existing
// Base64 decode implementation.
byte[] modUniqueIdBytes = modUniqueIdString.getBytes(CHARSET_UTF_8);
byte[] modUniqueIdBase64Bytes = new byte[modUniqueIdBytes.length];
for (int i = 0; i < modUniqueIdBytes.length; i++) {
byte nextByte = modUniqueIdBytes[i];
switch (nextByte) {
case '+':
modUniqueIdBase64Bytes[i] = '@';
break;
case '/':
modUniqueIdBase64Bytes[i] = '@';
break;
default:
modUniqueIdBase64Bytes[i] = nextByte;
break;
}
}
return Base64.decodeBase64(modUniqueIdBase64Bytes);
}
private UniqueIdRec decode(String modUniqueIdString) {
byte[] bytes = decodeToBytes(modUniqueIdString);
if (bytes == null) {
return null;
}
// Is the decoded output the right length?
if (bytes.length != 18) {
return null;
}
UniqueIdRec result = new UniqueIdRec();
// http://httpd.apache.org/docs/current/mod/mod_unique_id.html
// we will use a Unix timestamp (seconds since January 1, 1970 UTC)
// (32-bit IP address, 32 bit pid, 32 bit time stamp, 16 bit counter, 32 bit thread index)
// The actual ordering of the encoding is: time stamp, IP address, pid, counter.
result.timestamp = (((int)bytes[0]) & 0xFF);
result.timestamp = (result.timestamp * 256) + (((int)bytes[1]) & 0xFF);
result.timestamp = (result.timestamp * 256) + (((int)bytes[2]) & 0xFF);
result.timestamp = (result.timestamp * 256) + (((int)bytes[3]) & 0xFF);
// Quote: The timestamp has only one second granularity
result.timestamp *= 1000; // This is to convert the time into milliseconds
// NOTE: In case of IPv6 the value will be related to the lower bits of the address.
result.ipaddr = (((int)bytes[4]) & 0xFF);
result.ipaddr = (result.ipaddr * 256) + (((int)bytes[5]) & 0xFF);
result.ipaddr = (result.ipaddr * 256) + (((int)bytes[6]) & 0xFF);
result.ipaddr = (result.ipaddr * 256) + (((int)bytes[7]) & 0xFF);
result.ipaddrStr = "" + (((int)bytes[4]) & 0xFF) +
'.' + (((int)bytes[5]) & 0xFF) +
'.' + (((int)bytes[6]) & 0xFF) +
'.' + (((int)bytes[7]) & 0xFF);
result.pid = (((int)bytes[8]) & 0xFF);
result.pid = (result.pid * 256) + (((int)bytes[9]) & 0xFF);
result.pid = (result.pid * 256) + (((int)bytes[10]) & 0xFF);
result.pid = (result.pid * 256) + (((int)bytes[11]) & 0xFF);
result.counter = (((int)bytes[12]) & 0xFF);
result.counter = (result.counter * 256) + (((int)bytes[13]) & 0xFF);
result.threadIndex = (((int)bytes[14]) & 0xFF);
result.threadIndex = (result.threadIndex * 256) + (((int)bytes[15]) & 0xFF);
result.threadIndex = (result.threadIndex * 256) + (((int)bytes[16]) & 0xFF);
result.threadIndex = (result.threadIndex * 256) + (((int)bytes[17]) & 0xFF);
return result;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy