All Downloads are FREE. Search and download functionalities are using the official Maven repository.

uk.gov.nationalarchives.dp.client.DataProcessor.scala Maven / Gradle / Ivy

package uk.gov.nationalarchives.dp.client

import cats.MonadError
import cats.implicits.*
import uk.gov.nationalarchives.dp.client.Client.*
import uk.gov.nationalarchives.dp.client.DataProcessor.EventAction
import uk.gov.nationalarchives.dp.client.Entities.*
import uk.gov.nationalarchives.dp.client.EntityClient.*
import uk.gov.nationalarchives.dp.client.EntityClient.EntityType.*
import uk.gov.nationalarchives.dp.client.EntityClient.GenerationType.*

import java.time.ZonedDateTime
import java.util.UUID
import scala.xml.{Elem, MetaData, Node, NodeSeq}

/** A class to process XML responses from Preservica
  * @param me
  *   An implicit MonadError instance
  * @tparam F
  *   The effect type
  */
class DataProcessor[F[_]]()(using me: MonadError[F, Throwable]) {

  extension (ns: NodeSeq)
    def textOfFirstElement(): F[String] = ns.headOption.map(_.text) match {
      case Some(value) => me.pure(value)
      case None        => me.raiseError(PreservicaClientException("Generation URL not found"))
    }

  /** Converts an entity response to an [[Entities.Entity]] case class
    * @param entityRef
    *   The reference of the entity
    * @param entityResponse
    *   The XML response from Preservica
    * @param entityType
    *   The [[EntityClient.EntityType]] of the entity.
    * @return
    *   An [[Entities.Entity]] wrapped in the F effect
    */
  def getEntity(entityRef: UUID, entityResponse: Elem, entityType: EntityType): F[Entity] = {
    (entityResponse \ entityType.toString).headOption
      .map { entity =>
        def optionValue(nodeName: String): Option[String] = (entity \ nodeName).headOption.map(_.text.trim)
        val title = optionValue("Title")
        val description = optionValue("Description")
        val securityTag = optionValue("SecurityTag").flatMap(SecurityTag.fromString)
        val deleted = optionValue("Deleted").isDefined
        val parent = optionValue("Parent").map(UUID.fromString)
        fromType[F](entityType.entityTypeShort, entityRef, title, description, deleted, securityTag, parent)
      }
      .getOrElse(me.raiseError(PreservicaClientException(s"Entity type '$entityType' not found for id $entityRef")))
  }

  /** Retrieves the [[EntityClient.EntityType]] Node from an entity response
    * @param entityRef
    *   The reference of the entity
    * @param entityResponse
    *   The XML response from Preservica
    * @param entityType
    *   The [[EntityClient.EntityType]] of the entity.
    * @return
    *   An [[scala.xml.Node]] found in the entity response (by searching for [[EntityClient.EntityType]]) wrapped in the
    *   F effect
    */
  def getEntityXml(entityRef: UUID, entityResponse: Elem, entityType: EntityType): F[Node] =
    (entityResponse \ entityType.toString).headOption
      .map(me.pure)
      .getOrElse(me.raiseError(PreservicaClientException(s"Entity type '$entityType' not found for id $entityRef")))

  /** Fetches `nodeName` -> `childNodeName` text from `entityResponse`
    * @param entityResponse
    *   The response from the entity
    * @param nodeName
    *   The node name to search
    * @param childNodeName
    *   The child node name to return the text from
    * @return
    *   The text of the child node wrapped in the F effect
    */
  def childNodeFromEntity(entityResponse: Elem, nodeName: String, childNodeName: String): F[String] =
    me.fromOption(
      (entityResponse \ nodeName \ childNodeName.capitalize).headOption.map(_.text),
      PreservicaClientException(s"Either $nodeName or $childNodeName does not exist on entity")
    )

  /** Returns `elementName` -> `ApiId` from the XML response where the `Name` field matches `fileName`
    * @param res
    *   The XML response from Preservica
    * @param elementName
    *   The name of the element
    * @param fileName
    *   The name of the file
    * @return
    *   An optional string with the `ApiId` or `None` if not found.
    */
  def existingApiId(res: Elem, elementName: String, fileName: String): Option[String] = {
    (res \\ elementName).find(n => (n \ "Name").text == fileName).map(n => (n \ "ApiId").text)
  }

  /** Returns the metadata fragment urls from the element
    * @param entityResponseElement
    *   The EntityResponse element to search
    * @return
    *   A `Seq` of `String` wrapped in the F effect with the fragment URLS
    */
  def fragmentUrls(entityResponseElement: Elem): F[Seq[String]] = {
    val fragmentUrls = entityResponseElement \ "AdditionalInformation" \ "Metadata" \ "Fragment"
    me.pure(fragmentUrls.map(_.text))
  }

  /** Returns a list of metadata content
    * @param metadataResponseElements
    *   The MetadataResponse elements that contains the fragments
    * @return
    *   A `Seq` of String representing the metadata XML or an error if none are found
    */
  def fragments(metadataResponseElements: Seq[Elem]): F[Seq[Node]] = {
    val metadataContainerObjects =
      (metadataResponseElements \ "MetadataContainer").toList

    val emptyMetadataContainerObjects = metadataContainerObjects.filter(_.isEmpty)
    if (metadataContainerObjects.isEmpty || emptyMetadataContainerObjects.nonEmpty || metadataContainerObjects.size != metadataResponseElements.size) && metadataResponseElements.nonEmpty
    then
      me.raiseError(
        PreservicaClientException(
          s"Could not be retrieve all 'MetadataContainer' Nodes from:\n${metadataResponseElements.mkString("\n")}"
        )
      )
    else me.pure(metadataContainerObjects)
  }

  /** Gets the text of the first generation element
    * @param contentObjectElement
    *   The Content Object element to search
    * @return
    *   The text of the first generation element wrapped in the F effect
    */
  def generationUrlFromEntity(contentObjectElement: Elem): F[String] =
    (contentObjectElement \ "AdditionalInformation" \ "Generations").textOfFirstElement()

  /** Returns all the text content of every `Generations` -> `Generation` element
    * @param generationsElement
    *   The 'Generations' element containing the generations
    * @return
    *   A `Seq` of `String` with the text content of every `Generations` -> `Generation` element
    */
  def allGenerationUrls(generationsElement: Elem, contentObjectRef: UUID): F[Seq[String]] =
    (generationsElement \ "Generations" \ "Generation").map(_.text) match {
      case Nil =>
        me.raiseError(PreservicaClientException(s"No generations found for entity ref: $contentObjectRef"))
      case generationUrls => me.pure(generationUrls)
    }

  /** Returns whether the the text content of every `Generations` -> `Generation` element
    * @param generationsElement
    *   The 'Generations' element containing the generations
    * @return
    *   A `Seq` of `String` with the text content of every `Generations` -> `Generation` element
    */

  def generationType(generationsElement: Elem, contentObjectRef: UUID): F[GenerationType] =
    (generationsElement \ "Generation").map(_.attributes) match {
      case Nil | List(xml.Null) =>
        me.raiseError(PreservicaClientException(s"No attributes found for entity ref: $contentObjectRef"))
      case (attributesPerGeneration: MetaData) :: _ =>
        val potentialOriginalityStatus = attributesPerGeneration.get("original").map(_.text)

        val potentialGenerationType: F[GenerationType] =
          potentialOriginalityStatus match {
            case Some("true")  => me.pure(Original)
            case Some("false") => me.pure(Derived)
            case _ =>
              me.raiseError(
                PreservicaClientException(
                  s"'original' attribute could not be found on Generation for entity ref: $contentObjectRef"
                )
              )
          }
        potentialGenerationType
    }

  /** Returns all the text content of every `Bitstreams` -> `Bitstream` element
    * @param generationElement
    *   The 'Generation' element containing the bitstreams
    * @return
    *   A `Seq` of `String` with the text content of every `Bitstreams` -> `Bitstream` element
    */
  def allBitstreamUrls(generationElement: Elem): F[Seq[String]] = me.pure {
    (generationElement \ "Bitstreams" \ "Bitstream").map(_.text)
  }

  /** Returns a list of [[Client.BitStreamInfo]] objects
    * @param bitstreamElements
    *   The elements containing the bitstream information
    * @return
    *   A `Seq` of `BitStreamInfo` objects parsed from the XML
    */
  def allBitstreamInfo(
      bitstreamElements: Seq[Elem],
      generationType: GenerationType,
      contentObject: Entity
  ): F[Seq[BitStreamInfo]] =
    me.pure {
      bitstreamElements.map { be =>
        val filename = (be \\ "Bitstream" \\ "Filename").text
        val fileSize = (be \\ "Bitstream" \\ "FileSize").text.toLong
        val bitstreamInfoUrl = (be \\ "AdditionalInformation" \\ "Self").text

        val bitstreamInfoUrlReversed = bitstreamInfoUrl.split("/").reverse
        val generationVersion = bitstreamInfoUrlReversed(2).toInt

        val bitstreamUrl = (be \\ "AdditionalInformation" \\ "Content").text

        val fixities = (be \\ "Bitstream" \\ "Fixities" \\ "Fixity").map { eachFixity =>
          Fixity((eachFixity \ "FixityAlgorithmRef").text, (eachFixity \ "FixityValue").text)
        }.toList

        BitStreamInfo(
          filename,
          fileSize,
          bitstreamUrl,
          fixities,
          generationVersion,
          generationType,
          contentObject.title,
          contentObject.parent
        )
      }
    }

  /** Returns the next page
    * @param elem
    *   The element containing the pagination element
    * @return
    *   The optional next page or None if there are none
    */
  def nextPage(elem: Elem): F[Option[String]] =
    me.pure((elem \ "Paging" \ "Next").headOption.map(_.text))

  /** Parses a list of [[Entities.Entity]] parsed from the XML
    * @param elem
    *   The element to parse
    * @return
    *   A `Seq` of [[Entities.Entity]]
    */
  def getEntities(elem: Elem): F[Seq[Entity]] =
    (elem \ "Entities" \ "Entity").map { e =>
      val entityAttributes = e.attributes

      val ref = UUID.fromString(attrToString(e, "ref"))
      val title = entityAttributes.get("title").map(_.toString)
      val description = entityAttributes.get("description").map(_.toString)
      val entityType = attrToString(e, "type")
      val deleted = attrToString(e, "deleted").nonEmpty
      fromType[F](entityType, ref, title, description, deleted)
    }.sequence

  /** Returns a list of [[Entities.IdentifierResponse]] objects
    * @param elem
    *   The element containing the identifiers
    * @return
    *   A `Seq` of `IdentifierResponse` objects parsed from the XML
    */
  def getIdentifiers(elem: Elem): F[Seq[IdentifierResponse]] = {
    me.pure {
      (elem \ "Identifiers" \ "Identifier")
        .map { i =>
          val id = (i \ "ApiId").text
          val identifierName = (i \ "Type").text
          val identifierValue = (i \ "Value").text
          IdentifierResponse(id, identifierName, identifierValue)
        }
    }
  }

  /** Returns a list of [[NodeSeq]] objects
    * @param elem
    *   The element containing the identifiers
    * @return
    *   A `Seq` of `Identifier` elements parsed from the XML
    */
  def getIdentifiersXml(elem: Elem): F[Seq[Node]] = me.pure(elem \ "Identifiers" \ "Identifier")

  /** Returns a list of [[DataProcessor.EventAction]] objects
    * @param elem
    *   The element containing the event actions
    * @return
    *   A `Seq` of `EventAction` objects parsed from the XML
    */
  def getEventActions(elem: Elem): F[Seq[EventAction]] = {
    me.pure(
      (elem \ "EventActions" \ "EventAction")
        .map { e =>
          val eventRef = UUID.fromString((e \\ "Event" \\ "Ref").text)
          val eventType = (e \\ "Event").flatMap(event => event.attributes("type")).text
          val dateOfEvent = ZonedDateTime.parse((e \ "Date").text)

          EventAction(eventRef, eventType, dateOfEvent)
        }
    )
  }

  /** Gets the child node from a workflow instance response
    * @param workflowInstanceResponse
    *   The workflow instance response
    * @param nodeName
    *   The name of the node to search for
    * @return
    *   The text of the child node provided as an argument.
    */
  def childNodeFromWorkflowInstance(workflowInstanceResponse: Elem, nodeName: String): F[String] =
    me.fromOption(
      (workflowInstanceResponse \ nodeName).headOption.map(_.text),
      PreservicaClientException(s"'$nodeName' does not exist on the workflowInstance response.")
    )

  /** Returns a `Seq` of String objects
    * @param elem
    *   The element containing the representations
    * @param representationType
    *   The (Optional) representation type that you want returned
    * @return
    *   A `Seq` of `String` objects parsed from the XML
    */
  def getUrlsToEntityRepresentations(elem: Elem, representationType: Option[RepresentationType]): F[Seq[String]] =
    me.pure {
      (elem \ "Representations" \ "Representation").collect {
        case rep if representationType.isEmpty || attrToString(rep, "type") == representationType.get.toString =>
          rep.text
      }
    }

  private def attrToString(node: Node, key: String) = node.attributes.get(key).map(_.toString()).getOrElse("")

  /** Returns a `Seq` of String objects
    * @param elem
    *   The element containing the representations
    * @param representationType
    *   The (Optional) representation type that you want returned
    * @return
    *   A `Seq` of `String` objects parsed from the XML
    */
  def getContentObjectsFromRepresentation(
      elem: Elem,
      representationType: RepresentationType,
      ioEntityRef: UUID
  ): F[Seq[Entity]] =
    me.pure {
      (elem \ "Representation" \ "ContentObjects" \ "ContentObject").map { rep =>
        Entity(
          Some(ContentObject),
          UUID.fromString(rep.text),
          None,
          None,
          deleted = false,
          ContentObject.entityPath.some,
          None,
          Some(ioEntityRef)
        )
      }
    }

  /** Returns a `Float`
    * @param elem
    *   The element containing the namespace with the version in it
    * @return
    *   A `Float`, representing the Preservica version, parsed from the XML
    */
  def getPreservicaNamespaceVersion(elem: Elem): F[Float] =
    me.pure {
      val namespaceUrl = elem.namespace
      val version = namespaceUrl.split("/").last
      version.stripPrefix("v").toFloat
    }

  /** Returns a list of [[scala.xml.Node]] XML objects
    *
    * @param elem
    *   The element containing the Entity's links
    * @return
    *   A `Seq` of `Node` containing the links
    */
  def getEntityLinksXml(ref: UUID, elem: Elem): F[Seq[Node]] = me.pure {
    val links = elem \ "Links" \ "Link"
    links.flatMap { link =>
      for {
        linkType <- link.attribute("linkType").flatMap(_.headOption)
        linkDirection <- link.attribute("linkDirection").flatMap(_.headOption)
        refLinks <- link.attribute("ref").flatMap(_.headOption.map(_.toString))
      } yield {
        val entities =
          if linkDirection.text == "To" then
            List(
              {refLinks}
          {ref.toString}
            )
          else
            List(
              {ref.toString}
            {refLinks}
            )

        
          {linkType.text}
          {entities}
        
      }
    }
  }

  /** Returns a Seq containing multiple [[scala.xml.Node]] XML object objects representing an `EventAction`
    *
    * @param eventActionsResponseElement
    *   The element containing the Event actions
    * @return
    *   A `Seq` of Node, representing Event actions
    */
  def getEventActionElements(eventActionsResponseElement: Elem): F[Seq[Node]] =
    me.pure((eventActionsResponseElement \ "EventActions" \ "EventAction"))

  /** Returns a Seq containing one Representation [[scala.xml.Node]] XML object
    *
    * @param representationResponseElement
    *   The element containing the information for the Representation
    * @return
    *   A `Seq` of `Node` containing the links
    */
  def getRepresentationElement(representationResponseElement: Elem): F[Seq[Node]] =
    me.pure((representationResponseElement \ "Representation"))

  /** Returns a Seq containing one Generation [[scala.xml.Node]] XML object
    *
    * @param generationResponseElement
    *   The element containing the information for the Generation
    * @return
    *   A `Seq` of `Node` containing the links
    */
  def getGenerationElement(generationResponseElement: Elem): F[Seq[Node]] =
    me.pure((generationResponseElement \ "Generation"))
}

/** An apply method for the `DataProcessor` class and the `EventAction` case class
  */
object DataProcessor {
  def apply[F[_]]()(implicit me: MonadError[F, Throwable]) = new DataProcessor[F]()

  /** @param eventRef
    *   The reference of the event
    * @param eventType
    *   The type of the event
    * @param dateOfEvent
    *   The date of the event
    */
  case class EventAction(eventRef: UUID, eventType: String, dateOfEvent: ZonedDateTime)
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy