from com.googlecode.fascinator.portal.report import SearchCriteriaListing
from com.googlecode.fascinator.api.indexer import SearchRequest
from java.io import ByteArrayInputStream, ByteArrayOutputStream
from com.googlecode.fascinator.common.solr import SolrResult
from java.net import URLEncoder
from java.util import TreeMap, ArrayList
from org.apache.commons.lang import StringEscapeUtils
from org.json.simple import JSONArray
from au.com.bytecode.opencsv import CSVParser
from au.com.bytecode.opencsv import CSVWriter
from java.io import StringWriter
from java.lang import String
import sys
class ReportResultData:
def __init__(self):
def __activate__(self, context):
self.__reportResult = None
self.auth = context["page"].authentication
self.request = context["request"]
self.response = context["response"]
self.log = context["log"]
self.reportManager = context["Services"].getService("reportManager")
self.indexer = context['Services'].getIndexer()
self.metadata = context["metadata"]
self.systemConfig = context["systemConfig"]
self.__rowsFound = 0
self.__rowsFoundSolr = 0
self.processed_results_list = []
self.errorMsg = ""
if (self.auth.is_logged_in()):
if (self.auth.is_admin()==True):
self.errorMsg = "Requires Admin / Librarian / Reviewer access."
self.errorMsg = "Please login."
def __reportSearch(self):
self.reportId = self.request.getParameter("id")
self.format = self.request.getParameter("format")
self.report = self.reportManager.getReports().get(self.reportId)
self.reportQuery = self.report.getQueryAsString()
self.log.debug("Report query: " + self.reportQuery)
#Get a total number of records
out = ByteArrayOutputStream()
recnumreq = SearchRequest(self.reportQuery)
recnumreq.setParam("rows", "0")
self.indexer.search(recnumreq, out)
recnumres = SolrResult(ByteArrayInputStream(out.toByteArray()))
self.__rowsFoundSolr = "%s" % recnumres.getNumFound()
self.errorMsg = "Query failure. The issue has been logged (%s - %s)." % (sys.exc_info()[0], sys.exc_info()[1])
self.log.error("Reporting threw an exception (report was %s): %s - %s" % (self.report.getLabel(), sys.exc_info()[0], sys.exc_info()[1]))
#Setup the main query
req = SearchRequest(self.reportQuery)
req.setParam("fq", 'item_type:"object"')
req.setParam("fq", 'workflow_id:"dataset"')
req.setParam("rows", self.__rowsFoundSolr)
#Now do the master search
out = ByteArrayOutputStream()
self.indexer.search(req, out)
self.__reportResult = SolrResult(ByteArrayInputStream(out.toByteArray()))
self.errorMsg = "Query failure. The issue has been logged (%s - %s)." % (sys.exc_info()[0], sys.exc_info()[1])
self.log.error("Reporting threw an exception (report was %s): %s - %s" % (self.report.getLabel(), sys.exc_info()[0], sys.exc_info()[1]))
#At this point the display template has enough to go with.
#We just need to handle the CSV now
if (self.format == "csv"):
#Setup the main query - we need to requery to make sure we return
#only the required fields. We'll use the specific IDs that met the
#__checkResults check
req = SearchRequest(self.reportQuery)
req.setParam("fq", 'item_type:"object"')
req.setParam("fq", 'workflow_id:"dataset"')
req.setParam("rows", self.__rowsFoundSolr)
#we need to get a list of the matching IDs from Solr
#this doesn't work for long queries so it's abandoned
#but left here commented to make sure we don't try it again
#idQry = ""
#for item in self.getProcessedResultsList():
# idQry += item.get("id") + " OR "
#req.setParam("fq", 'id:(%s)' % idQry[:len(idQry)-4])
#Create a list of IDs for reference when preparing the CSV
idQryList = []
for item in self.getProcessedResultsList():
#Setup SOLR query with the required fields
self.fields = self.systemConfig.getArray("redbox-reports","csv-output-fields")
#We must have an ID field and it must be the first field
fieldString = "id,"
if self.fields is not None:
for field in self.fields:
fieldString = fieldString+ field.get("field-name")+","
fieldString = fieldString[:-1]
out = ByteArrayOutputStream()
self.indexer.search(req, out, self.format)
#We can't get the result back from SOLR so fail back to the template display
self.errorMsg = "Query failure. Failed to load the data - this issue has been logged (%s - %s)." % (sys.exc_info()[0], sys.exc_info()[1])
self.log.error("Reporting threw an exception (report was %s); Error: %s - %s" % (self.report.getLabel(), sys.exc_info()[0], sys.exc_info()[1]))
csvResponseString = String(out.toByteArray(),"utf-8")
csvResponseLines = csvResponseString.split("\n")
#We can't get the result back from SOLR so fail back to the template display
self.errorMsg = "Query failure. Failed to prepare the CSV - this issue has been logged (%s - %s)." % (sys.exc_info()[0], sys.exc_info()[1])
self.log.error("Reporting threw an exception (report was %s); Error: %s - %s" % (self.report.getLabel(), sys.exc_info()[0], sys.exc_info()[1]))
fileName = self.urlEncode(self.report.getLabel())
self.log.debug("Generating CSV report with file name: " + fileName)
self.response.setHeader("Content-Disposition", "attachment; filename=%s.csv" % fileName)
sw = StringWriter()
parser = CSVParser()
writer = CSVWriter(sw)
count = 0
prevLine = ""
badRowFlag = False
for line in csvResponseLines:
if badRowFlag:
#In this section of code we'll handle errors by either trying to fix the problem
#or by adding an error line in the CSV. We'll then move to the next row and keep going
self.log.debug("Reporting - trying to append the previous line with the previous faulty one. Line appears as: %s" % prevLine + line)
csvLine = parser.parseLine(prevLine + line)
badRowFlag = False
prevLine = ""
self.log.debug("Reporting - remedy appears to have worked. Line appears as: %s" % prevLine + line)
#We tried to rescue the file but failed on the second run so give up
writer.writeNext(["Failed to transfer record to CSV - check logs"])
self.log.error("Reporting threw an exception (report was %s); Error: %s - %s; Result line: %s" % (self.report.getLabel(), sys.exc_info()[0], sys.exc_info()[1], prevLine + line))
csvLine = parser.parseLine(line)
badRowFlag = False
prevLine = ""
#This can happen if there's a newline in the index data
#so we raise the badRowFlag and see if we can join this
#row to the next one to fix it
self.log.debug("Reporting threw an exception but I'll see if it's just a formatting issue (report was %s); Error: %s - %s; Result line: %s" % (self.report.getLabel(), sys.exc_info()[0], sys.exc_info()[1], line))
badRowFlag = True
prevLine = line
if count == 0 :
#Header row
count += 1
for idx, csvValue in enumerate(csvLine):
csvLine[idx] = self.findDisplayLabel(csvValue)
elif csvLine[0] not in idQryList:
#Now send off the CSV
self.out = self.response.getOutputStream("text/csv")
def __checkResults(self):
#This is a fix, required because our SOLR index doesn't support
#all of the required reporting criteria - specifically exact/contains
self.processed_results_list = []
if self.__reportResult is None:
#Get the report criteria
criteria = self.report.getCriteria()
#For each result item we need to check that it matches the criteria
for item in self.getReportResult():
#Use last check to assist in the left-to-right check of operators
lastCheck = True
dropResultFlag = False
#For each criteria item
for criteria_item in criteria.getCriteria():
#If the last criteria item didn't check out and the AND op is used, the record doesn't make it
if not lastCheck and criteria_item.getOperator() == SearchCriteriaListing.KEY_CRITERIA_LOGICAL_OP_AND:
dropResultFlag = True
# sanitise solr field
criteria_item.setSolr_field(String(criteria_item.getSolr_field()).replace("\\", ""))
thisCheck = False
if self.__checkResultsNull(criteria_item, item):
if self.__checkResultsMatch(criteria_item, item):
thisCheck = True
#If this criteria item and the last one didn't check out and the OR op is used, the record doesn't make it
if (lastCheck or thisCheck) and criteria_item.getOperator() == SearchCriteriaListing.KEY_CRITERIA_LOGICAL_OP_OR:
dropResultFlag = False
elif (lastCheck and thisCheck) and criteria_item.getOperator() == SearchCriteriaListing.KEY_CRITERIA_LOGICAL_OP_AND:
dropResultFlag = False
#This row doesn't match
dropResultFlag = True
lastCheck = thisCheck
#End of criteria check loop
if not dropResultFlag:
#Copy over to the new listing
self.__rowsFound = len(self.processed_results_list)
def __checkResultsNull(self, criteria_item, item):
# Check Null criteria
if criteria_item.getAllowNulls() == "field_include_null":
#If the query criteria allows nulls and the field is null, true
if item.get(criteria_item.getSolr_field()) is None:
return True
return False
if item.get(criteria_item.getSolr_field()) is None:
return False
return True
def __checkResultsMatch(self, criteria_item, item):
if criteria_item.getAllowNulls() == "field_include_null":
#If the query criteria allows nulls and the field is null, true
if item.get(criteria_item.getSolr_field()) is None:
return True
#Some fields are lists so just handle lists
solrvallist = ArrayList()
solrval = item.getString(None, criteria_item.getSolr_field());
if solrval is None:
solrvallist = item.getList(criteria_item.getSolr_field());
#If the query's matching criteria uses 'equals', check that it's an exact match
for solrval in solrvallist:
if criteria_item.getMatchingOperator() == "field_match":
if String(String(solrval).trim()).equalsIgnoreCase(String(criteria_item.getValue()).trim()):
#self.log.debug("Matched at: field_match --> %s == %s" %(solrval, criteria_item.getValue()))
#self.log.debug("criteria_item.getSolr_field() -> " + criteria_item.getSolr_field())
#self.log.debug("solrvallist:%s" % solrvallist )
return True
#This is a contains search
if solrval.lower().find(criteria_item.getValue().lower()) != -1:
return True
return False
def getProcessedResultsList(self):
return self.processed_results_list
def findDisplayLabel(self, csvValue):
if self.fields is not None:
for field in self.fields:
if field.get("field-name") == csvValue:
return field.get("label")
return csvValue
def getErrorMsg(self):
return self.errorMsg
def buildDashboard(self, context):
self.velocityContext = context
def getReportResult(self):
return self.__reportResult.getResults()
def getReportName(self):
return self.report.getReportName()
def getReportLabel(self):
return self.report.getLabel()
def urlEncode(self, text):
return URLEncoder.encode(text, "utf-8")
def escapeHtml(self, value):
if value:
return StringEscapeUtils.escapeHtml(value) or ""
return ""
def getRowsFound(self):
return self.__rowsFound
