All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.hyperfoil.core.handlers.HtmlHandler Maven / Gradle / Ivy

There is a newer version: 0.27.1
Show newest version
package io.hyperfoil.core.handlers;

import java.nio.charset.StandardCharsets;
import java.util.concurrent.ThreadLocalRandom;
import java.util.stream.Stream;

import org.kohsuke.MetaInfServices;

import io.hyperfoil.api.config.BenchmarkDefinitionException;
import io.hyperfoil.api.config.BuilderBase;
import io.hyperfoil.api.config.Locator;
import io.hyperfoil.api.config.Name;
import io.hyperfoil.api.config.SequenceBuilder;
import io.hyperfoil.api.config.Step;
import io.hyperfoil.api.config.StepBuilder;
import io.hyperfoil.api.connection.HttpRequest;
import io.hyperfoil.api.processor.HttpRequestProcessorBuilder;
import io.hyperfoil.api.http.HttpMethod;
import io.hyperfoil.api.processor.Processor;
import io.hyperfoil.api.session.Access;
import io.hyperfoil.api.session.Action;
import io.hyperfoil.api.session.Session;
import io.hyperfoil.api.session.ResourceUtilizer;
import io.hyperfoil.core.data.DataFormat;
import io.hyperfoil.core.generators.StringGeneratorImplBuilder;
import io.hyperfoil.core.session.SessionFactory;
import io.hyperfoil.core.steps.AddToIntAction;
import io.hyperfoil.core.steps.AwaitIntStep;
import io.hyperfoil.core.steps.HttpRequestStep;
import io.hyperfoil.core.steps.PathMetricSelector;
import io.hyperfoil.core.builders.ServiceLoadedBuilderProvider;
import io.hyperfoil.core.steps.UnsetAction;
import io.hyperfoil.core.util.Trie;
import io.hyperfoil.core.util.Util;
import io.hyperfoil.function.SerializableBiFunction;
import io.netty.buffer.ByteBuf;
import io.netty.buffer.ByteBufAllocator;
import io.vertx.core.logging.Logger;
import io.vertx.core.logging.LoggerFactory;

public class HtmlHandler implements Processor, ResourceUtilizer, Session.ResourceKey {
   private static final Logger log = LoggerFactory.getLogger(HtmlHandler.class);
   private static final boolean trace = log.isTraceEnabled();

   private final TagHandler[] handlers;

   private HtmlHandler(TagHandler... handlers) {
      this.handlers = handlers;
   }

   @Override
   public void before(Session session) {
      for (TagHandler h : handlers) {
         h.processor().before(session);
      }
   }

   @Override
   public void after(Session session) {
      for (TagHandler h : handlers) {
         h.processor().after(session);
      }
   }

   @Override
   public void process(Session session, ByteBuf data, int offset, int length, boolean isLastPart) {
      Context ctx = session.getResource(this);
      switch (ctx.tagStatus) {
         case PARSING_TAG:
            ctx.tagStart = offset;
            break;
         case PARSING_ATTR:
            ctx.attrStart = offset;
            break;
         case PARSING_VALUE:
            ctx.valueStart = offset;
            break;
      }
      while (length > 0) {
         byte c = data.getByte(offset++);
         --length;
         switch (ctx.tagStatus) {
            case NO_TAG:
               if (c == '<') {
                  ctx.tagStatus = TagStatus.ENTERED;
               }
               break;
            case ENTERED:
               if (c == '!') {
                  ctx.tagStatus = TagStatus.DOCTYPE_START;
               } else if (Character.isWhitespace(c)) {
                  ctx.tagStatus = TagStatus.BEFORE_TAG;
               } else if (c == '/') {
                  ctx.tagClosing = true;
                  ctx.tagStatus = TagStatus.BEFORE_TAG;
               } else {
                  ctx.tagStart = offset - 1;
                  ctx.tagStatus = TagStatus.PARSING_TAG;
               }
               break;
            case DOCTYPE_START:
               if (c == '-') {
                  ctx.comment = 3;
                  ctx.tagStatus = TagStatus.COMMENT;
               } else {
                  ctx.tagStatus = TagStatus.DOCTYPE;
               }
               break;
            case DOCTYPE:
               if (c == '>') {
                  ctx.endTag(session);
               }
               break;
            case COMMENT:
               if (ctx.comment == 1) {
                  if (c == '>') {
                     ctx.comment = 0;
                     ctx.tagStatus = TagStatus.NO_TAG;
                  } else if (c != '-') {
                     ctx.comment = 3;
                  }
               } else if (ctx.comment > 0) {
                  if (c == '-') {
                     ctx.comment--;
                  }
               }
               break;
            case BEFORE_TAG:
               if (!Character.isWhitespace(c)) {
                  ctx.tagStatus = TagStatus.PARSING_TAG;
                  ctx.tagStart = offset - 1;
               }
               break;
            case PARSING_TAG:
               if (Character.isWhitespace(c)) {
                  ctx.onTag(session, ctx.tagClosing, data, offset - 1, true);
                  ctx.tagStatus = TagStatus.BEFORE_ATTR;
               } else if (c == '>') {
                  ctx.endTag(session);
               }
               break;
            case BEFORE_ATTR:
               if (c == '>') {
                  ctx.endTag(session);
               } else if (!Character.isWhitespace(c)) {
                  ctx.attrStart = offset - 1;
                  ctx.tagStatus = TagStatus.PARSING_ATTR;
               }
               break;
            case PARSING_ATTR:
               if (c == '=' || Character.isWhitespace(c)) {
                  ctx.onAttr(session, data, offset - 1, true);
                  ctx.tagStatus = TagStatus.BEFORE_VALUE;
               } else if (c == '>') {
                  ctx.onAttr(session, data, offset - 1, true);
                  ctx.endTag(session);
               }
               break;
            case BEFORE_VALUE:
               if (c == '>') {
                  ctx.endTag(session);
               } else if (c == '=' || Character.isWhitespace(c)) {
                  // ignore, there was a whitespace
                  break;
               } else if (c == '"') {
                  ctx.tagStatus = TagStatus.PARSING_VALUE;
                  ctx.valueStart = offset;
                  ctx.valueQuoted = true;
               } else {
                  // missing quotes
                  ctx.tagStatus = TagStatus.PARSING_VALUE;
                  ctx.valueStart = offset - 1;
               }
               break;
            case PARSING_VALUE:
               if (c == '\\') {
                  ctx.charEscaped = true;
               } else if (c == '"' && !ctx.charEscaped) {
                  ctx.onValue(session, data, offset - 1, true);
                  ctx.tagStatus = TagStatus.BEFORE_ATTR;
                  ctx.valueQuoted = false;
               } else if (!ctx.valueQuoted && Character.isWhitespace(c)) {
                  ctx.onValue(session, data, offset - 1, true);
                  ctx.tagStatus = TagStatus.BEFORE_ATTR;
               } else {
                  ctx.charEscaped = false;
               }
               break;
            default:
               throw new IllegalStateException();
         }
      }
      switch (ctx.tagStatus) {
         case PARSING_TAG:
            ctx.onTag(session, ctx.tagClosing, data, offset - 1, false);
            break;
         case PARSING_ATTR:
            ctx.onAttr(session, data, offset - 1, false);
            break;
         case PARSING_VALUE:
            ctx.onValue(session, data, offset - 1, false);
            break;
      }
   }

   @Override
   public void reserve(Session session) {
      session.declareResource(this, new Context());
      ResourceUtilizer.reserve(session, (Object[]) handlers);
   }

   interface TagHandler {
      Processor processor();

      HandlerContext newContext();
   }

   enum TagStatus {
      NO_TAG,
      ENTERED,
      BEFORE_TAG,
      PARSING_TAG,
      BEFORE_ATTR,
      PARSING_ATTR,
      DOCTYPE_START, // doctype, comment
      DOCTYPE,
      BEFORE_VALUE, PARSING_VALUE, COMMENT
   }

   class Context implements Session.Resource {
      TagStatus tagStatus = TagStatus.NO_TAG;
      boolean valueQuoted;
      boolean charEscaped;
      boolean tagClosing;
      int tagStart = -1;
      int attrStart = -1;
      int valueStart = -1;
      int comment;
      HandlerContext[] handlerCtx;

      Context() {
         handlerCtx = Stream.of(handlers).map(TagHandler::newContext).toArray(HandlerContext[]::new);
      }

      void onTag(Session session, boolean close, ByteBuf data, int tagEnd, boolean isLast) {
         assert tagStart >= 0;
         for (HandlerContext handlerCtx : handlerCtx) {
            handlerCtx.onTag(session, close, data, tagStart, tagEnd - tagStart, isLast);
         }
         tagStart = -1;
      }

      void onAttr(Session session, ByteBuf data, int attrEnd, boolean isLast) {
         assert attrStart >= 0;
         for (HandlerContext handlerCtx : handlerCtx) {
            handlerCtx.onAttr(session, data, attrStart, attrEnd - attrStart, isLast);
         }
         attrStart = -1;
      }

      void onValue(Session session, ByteBuf data, int valueEnd, boolean isLast) {
         assert valueStart >= 0;
         for (HandlerContext handlerCtx : handlerCtx) {
            handlerCtx.onValue(session, data, valueStart, valueEnd - valueStart, isLast);
         }
         valueStart = -1;
      }

      // TODO: content handling

      private void endTag(Session session) {
         tagStatus = TagStatus.NO_TAG;
         tagClosing = false;
         for (int i = 0; i < handlerCtx.length; ++i) {
            handlerCtx[i].endTag(session);
         }
      }
   }

   interface HandlerContext {
      void onTag(Session session, boolean close, ByteBuf data, int offset, int length, boolean isLast);

      void onAttr(Session session, ByteBuf data, int offset, int length, boolean isLast);

      void onValue(Session session, ByteBuf data, int offset, int length, boolean isLast);

      void endTag(Session session);
   }

   /**
    * Parses HTML tags and invokes handlers based on criteria.
    */
   @MetaInfServices(HttpRequestProcessorBuilder.class)
   @Name("parseHtml")
   public static class Builder implements HttpRequestProcessorBuilder {
      private Locator locator;
      private EmbeddedResourceHandlerBuilder embeddedResourceHandler;

      @Override
      public Builder setLocator(Locator locator) {
         this.locator = locator;
         return this;
      }

      /**
       * Handler firing upon reference to other resource, e.g. image, stylesheet...
       *
       * @return Builder.
       */
      public EmbeddedResourceHandlerBuilder onEmbeddedResource() {
         if (embeddedResourceHandler != null) {
            throw new BenchmarkDefinitionException("Embedded resource handler already set!");
         }
         return embeddedResourceHandler = new EmbeddedResourceHandlerBuilder().setLocator(locator);
      }

      @Override
      public void prepareBuild() {
         embeddedResourceHandler.prepareBuild();
      }

      @Override
      public Builder copy(Locator locator) {
         Builder newBuilder = new Builder().setLocator(locator);
         newBuilder.embeddedResourceHandler = embeddedResourceHandler.copy(locator);
         return newBuilder;
      }

      @Override
      public HtmlHandler build(boolean fragmented) {
         return new HtmlHandler(embeddedResourceHandler.build());
      }
   }

   /**
    * Handles <img src="...">, <link href="...">,
    * <embed src="...">, <frame src="...">,
    * <iframe src="...">, <object data="..."> and <script src="...">.
    * 

* Does not handle <source src="..."> or <track src="..."> because browser * would choose only one of the options. */ public static class EmbeddedResourceHandlerBuilder implements BuilderBase { private static final String[] TAGS = { "img", "link", "embed", "frame", "iframe", "object", "script" }; private static final String[] ATTRS = { "src", "href", "src", "src", "src", "data", "src" }; private Locator locator; private boolean ignoreExternal = true; private Processor.Builder processor; private FetchResourceBuilder fetchResource; @Override public EmbeddedResourceHandlerBuilder setLocator(Locator locator) { this.locator = locator; return this; } /** * Ignore resources hosted on servers that are not covered in the http section. * * @param ignoreExternal Ignore? * @return Self. */ public EmbeddedResourceHandlerBuilder ignoreExternal(boolean ignoreExternal) { this.ignoreExternal = ignoreExternal; return this; } /** * Automatically download referenced resource. * * @return Builder. */ public FetchResourceBuilder fetchResource() { return this.fetchResource = new FetchResourceBuilder(locator); } public EmbeddedResourceHandlerBuilder processor(Processor.Builder processor) { if (this.processor == null) { this.processor = processor; } else if (this.processor instanceof MultiProcessor.Builder) { @SuppressWarnings("unchecked") MultiProcessor.Builder multiprocessor = (MultiProcessor.Builder) this.processor; multiprocessor.add(processor); } else { this.processor = new MultiProcessor.Builder().add(this.processor).add(processor); } return this; } /** * Custom processor invoked pointing to attribute data - e.g. in case of <img> tag * the processor gets contents of the src attribute. * * @return Builder. */ public ServiceLoadedBuilderProvider processor() { return new ServiceLoadedBuilderProvider<>(HttpRequestProcessorBuilder.class, locator, this::processor); } public void prepareBuild() { if (processor != null) { processor.prepareBuild(); } if (fetchResource != null) { fetchResource.prepareBuild(); } } @Override public EmbeddedResourceHandlerBuilder copy(Locator locator) { EmbeddedResourceHandlerBuilder builder = new EmbeddedResourceHandlerBuilder().setLocator(locator); builder.ignoreExternal(ignoreExternal); if (processor != null) { builder.processor(processor.copy(locator)); } if (fetchResource != null) { builder.fetchResource = fetchResource.copy(locator); } return builder; } public BaseTagAttributeHandler build() { if (processor != null && fetchResource != null) { throw new BenchmarkDefinitionException("Only one of processor/fetchResource allowed!"); } Processor p; if (fetchResource != null) { p = fetchResource.build(); } else if (processor != null) { p = processor.build(false); } else { throw new BenchmarkDefinitionException("Embedded resource handler is missing the processor"); } return new BaseTagAttributeHandler(TAGS, ATTRS, new EmbeddedResourceProcessor(ignoreExternal, p)); } } /** * Automates download of embedded resources. */ public static class FetchResourceBuilder implements BuilderBase { private final Locator locator; private final String generatedSeqName; private int maxResources; private SerializableBiFunction metricSelector; private Action.Builder onCompletion; FetchResourceBuilder(Locator locator) { this.locator = locator; this.generatedSeqName = String.format("%s_fetchResources_%08x", locator.sequence().name(), ThreadLocalRandom.current().nextInt()); } private String completionLatch() { return generatedSeqName + "_latch"; } private String downloadUrlVar() { return generatedSeqName + "_url"; } /** * Maximum number of resources that can be fetched. * * @param maxResources Max resources. * @return Self. */ public FetchResourceBuilder maxResources(int maxResources) { this.maxResources = maxResources; return this; } /** * Metrics selector for downloaded resources. * * @return Builder. */ public PathMetricSelector metric() { PathMetricSelector metricSelector = new PathMetricSelector(); metric(metricSelector); return metricSelector; } public FetchResourceBuilder metric(SerializableBiFunction metricSelector) { if (this.metricSelector != null) { throw new BenchmarkDefinitionException("Metric already set!"); } this.metricSelector = metricSelector; return this; } /** * Action performed when the download of all resources completes. * * @return Builder. */ public ServiceLoadedBuilderProvider onCompletion() { return new ServiceLoadedBuilderProvider<>(Action.Builder.class, locator, this::onCompletion); } public FetchResourceBuilder onCompletion(Action.Builder a) { if (onCompletion != null) { throw new BenchmarkDefinitionException("Completion action already set!"); } onCompletion = a; return this; } public void prepareBuild() { if (maxResources <= 0) { throw new BenchmarkDefinitionException("maxResources is missing or invalid."); } SequenceBuilder sequence = locator.scenario().sequence(generatedSeqName); HttpRequestStep.Builder requestBuilder = new HttpRequestStep.Builder().sync(false).method(HttpMethod.GET); requestBuilder.path( new StringGeneratorImplBuilder<>(requestBuilder, false).fromVar(downloadUrlVar() + "[.]")); if (metricSelector != null) { requestBuilder.metric(metricSelector); } else { // Rather than using auto-generated sequence name we'll use the full path requestBuilder.metric((authority, path) -> authority != null ? authority + path : path); } requestBuilder.handler().onCompletion(new AddToIntAction.Builder().var(completionLatch()).value(-1)); requestBuilder.setLocator(sequence.createLocator()); sequence.stepBuilder(requestBuilder); requestBuilder.prepareBuild(); Action onCompletion = this.onCompletion.build(); // We add unset step for cases where the step is retried and it's not sync locator.sequence().insertAfter(locator) .step(new AwaitIntStep(completionLatch(), x -> x == 0)) .step(new StepBuilder.ActionStep(new UnsetAction(completionLatch()))) .step(new ResourceUtilizingStep(onCompletion)); } @Override public FetchResourceBuilder copy(Locator locator) { return new FetchResourceBuilder(locator) .maxResources(maxResources) .metric(metricSelector) .onCompletion(onCompletion); } @SuppressWarnings("unchecked") public FetchResourcesAdapter build() { return new FetchResourcesAdapter(completionLatch(), new MultiProcessor( new ArrayRecorder(downloadUrlVar(), DataFormat.STRING, maxResources), new NewSequenceProcessor(maxResources, generatedSeqName + "_cnt", generatedSeqName))); } } private static class ResourceUtilizingStep implements Step, ResourceUtilizer { private final Action action; public ResourceUtilizingStep(Action action) { this.action = action; } @Override public boolean invoke(Session session) { action.run(session); return true; } @Override public void reserve(Session session) { ResourceUtilizer.reserve(session, action); } } private static class FetchResourcesAdapter implements Processor, ResourceUtilizer { private final Access completionCounter; private final Processor delegate; private FetchResourcesAdapter(String completionCounter, Processor delegate) { this.completionCounter = SessionFactory.access(completionCounter); this.delegate = delegate; } @Override public void before(Session session) { completionCounter.setInt(session, 1); delegate.before(session); } @Override public void process(Session session, ByteBuf data, int offset, int length, boolean isLastPart) { completionCounter.addToInt(session, 1); delegate.process(session, data, offset, length, isLastPart); } @Override public void after(Session session) { completionCounter.addToInt(session, -1); delegate.after(session); } @Override public void reserve(Session session) { completionCounter.declareInt(session); ResourceUtilizer.reserve(session, delegate); } } private static class BaseTagAttributeHandler implements TagHandler, ResourceUtilizer { private final Trie trie; private final byte[][] attributes; private final Processor processor; BaseTagAttributeHandler(String[] tags, String[] attributes, Processor processor) { this.processor = processor; if (tags.length != attributes.length) { throw new IllegalArgumentException(); } this.trie = new Trie(tags); this.attributes = Stream.of(attributes) .map(s -> s.getBytes(StandardCharsets.UTF_8)).toArray(byte[][]::new); } @Override public Processor processor() { return processor; } @Override public HandlerContext newContext() { return new Ctx(); } @Override public void reserve(Session session) { ResourceUtilizer.reserve(session, processor); } protected class Ctx implements HandlerContext { private final Trie.State trieState = trie.newState(); private int tagMatched = -1; private int attrMatchedIndex = -1; private ByteBuf valueBuffer = ByteBufAllocator.DEFAULT.buffer(); @Override public void onTag(Session session, boolean close, ByteBuf data, int offset, int length, boolean isLast) { for (int i = 0; i < length; ++i) { int terminal = trieState.next(data.getByte(offset + i)); if (isLast && terminal >= 0) { tagMatched = terminal; attrMatchedIndex = 0; } } } @Override public void onAttr(Session session, ByteBuf data, int offset, int length, boolean isLast) { if (tagMatched < 0) { return; } if (attrMatchedIndex >= 0) { for (int i = 0; i < length; ++i) { if (attrMatchedIndex >= attributes[tagMatched].length) { attrMatchedIndex = -1; break; } else if (attributes[tagMatched][attrMatchedIndex] == data.getByte(offset + i)) { attrMatchedIndex++; } else { attrMatchedIndex = -1; break; } } } if (isLast) { if (attrMatchedIndex != attributes[tagMatched].length) { attrMatchedIndex = 0; } // otherwise keep matched positive for value } } @Override public void onValue(Session session, ByteBuf data, int offset, int length, boolean isLast) { if (tagMatched < 0 || attrMatchedIndex <= 0) { return; } valueBuffer.ensureWritable(length); valueBuffer.writeBytes(data, offset, length); if (isLast) { processor().process(session, valueBuffer, valueBuffer.readerIndex(), valueBuffer.readableBytes(), true); valueBuffer.clear(); attrMatchedIndex = 0; } } @Override public void endTag(Session session) { trieState.reset(); tagMatched = -1; attrMatchedIndex = -1; } } } private static class EmbeddedResourceProcessor extends Processor.BaseDelegating { private static final byte[] HTTP_PREFIX = "http".getBytes(StandardCharsets.UTF_8); private final boolean ignoreExternal; EmbeddedResourceProcessor(boolean ignoreExternal, Processor delegate) { super(delegate); this.ignoreExternal = ignoreExternal; } @Override public void process(Session session, ByteBuf data, int offset, int length, boolean isLastPart) { assert isLastPart; // TODO: here we should normalize the URL, remove escapes etc... boolean isAbsolute = hasPrefix(data, offset, length, HTTP_PREFIX); if (isAbsolute) { if (ignoreExternal) { int authorityStart = indexOf(data, offset, length, ':') + 3; boolean external = true; for (byte[] authority : session.httpDestinations().authorityBytes()) { if (hasPrefix(data, offset + authorityStart, length, authority)) { external = false; break; } } if (external) { if (trace) { log.trace("#{} Ignoring external URL {}", session.uniqueId(), Util.toString(data, offset, length)); } return; } } if (trace) { log.trace("#{} Matched URL {}", session.uniqueId(), Util.toString(data, offset, length)); } delegate.process(session, data, offset, length, true); } else if (data.getByte(offset) == '/') { // No need to rewrite relative URL if (trace) { log.trace("#{} Matched URL {}", session.uniqueId(), Util.toString(data, offset, length)); } delegate.process(session, data, offset, length, true); } else { HttpRequest request = (HttpRequest) session.currentRequest(); ByteBuf buffer = ByteBufAllocator.DEFAULT.buffer(request.path.length() + length); Util.string2byteBuf(request.path, buffer); for (int i = buffer.writerIndex() - 1; i >= 0; --i) { if (buffer.getByte(i) == '/') { buffer.writerIndex(i + 1); break; } } buffer.ensureWritable(length); buffer.writeBytes(data, offset, length); if (trace) { log.trace("#{} Rewritten relative URL to {}", session.uniqueId(), Util.toString(buffer, buffer.readerIndex(), buffer.readableBytes())); } delegate.process(session, buffer, buffer.readerIndex(), buffer.readableBytes(), true); buffer.release(); } } private int indexOf(ByteBuf data, int offset, int length, char c) { for (int i = 0; i <= length; ++i) { if (data.getByte(offset + i) == c) { return i; } } return -1; } private boolean hasPrefix(ByteBuf data, int offset, int length, byte[] authority) { int i = 0; for (; i < authority.length && i < length; i++) { if (data.getByte(offset + i) != authority[i]) { return false; } } return i == authority.length; } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy