Commit 6e761c05 authored by Marko Kollo's avatar Marko Kollo
Browse files

Fixed error in dashboard.

parent 7a37eb2b
......@@ -18,7 +18,7 @@ class MultiSearcherDashboard:
def conduct_query(self):
conductor = MultiSearchConductor()
result = conductor.query_conductor(self.indices, query_body=self.query_body, elasticsearch=self.elasticsearch, es_url=self.es_url, excluded_fields=self.excluded_fields)
result = conductor.query_conductor(self.indices, query_body=self.query_body, es=self.elasticsearch, es_url=self.es_url, excluded_fields=self.excluded_fields)
self.field_counts = conductor.field_counts
return result
......
......@@ -39,14 +39,13 @@ class DashboardEsHelper:
:return:
"""
new_list = []
for field_dict in fields_and_types:
for nested_field_name in nested_fields:
if nested_field_name in field_dict[field_name_key]:
field_dict['is_nested'] = True
else:
field_dict['is_nested'] = False
new_list.append(field_dict)
for field in fields_and_types:
if field[field_name_key] in nested_fields:
field["is_nested"] = True
new_list.append(field)
else:
field["is_nested"] = False
new_list.append(field)
return new_list
......
from pprint import pprint
import logging
import elasticsearch_dsl
import elasticsearch
from elasticsearch_dsl import MultiSearch
from searcher.dashboard.es_helper import DashboardEsHelper
from texta.settings import ERROR_LOGGER
class MultiSearchConductor:
......@@ -11,7 +13,7 @@ class MultiSearchConductor:
self.field_counts = {}
self.multi_search = MultiSearch()
def query_conductor(self, indices, query_body, elasticsearch, es_url, excluded_fields):
def query_conductor(self, indices, query_body, es, es_url, excluded_fields):
result = {}
list_of_indices = indices.split(',')
......@@ -23,13 +25,18 @@ class MultiSearchConductor:
# Attach all the aggregations to Elasticsearch, depending on the fields.
# Text, keywords get term aggs etc.
self._normal_fields_handler(normal_fields, index=index, query_body=query_body, elasticsearch=elasticsearch)
self._texta_facts_agg_handler(index=index, query_body=query_body, elasticsearch=elasticsearch)
self._normal_fields_handler(normal_fields, index=index, query_body=query_body, elasticsearch=es)
self._texta_facts_agg_handler(index=index, query_body=query_body, elasticsearch=es)
# Send the query towards Elasticsearch and then save it into the result
# dict under its index's name.
responses = self.multi_search.using(elasticsearch).execute()
result[index] = [response.to_dict() for response in responses]
try:
responses = self.multi_search.using(es).execute()
result[index] = [response.to_dict() for response in responses]
except elasticsearch.exceptions.TransportError as e:
logging.getLogger(ERROR_LOGGER).exception(e.info)
raise elasticsearch.exceptions.TransportError
return result
......@@ -46,38 +53,38 @@ class MultiSearchConductor:
# TODO Find a better solution for this.
if field_type == "text":
if query_body is not None:
search_dsl = self._create_search_object(query_body=query_body, index=index, elasticsearch=elasticsearch)
search_dsl = self._create_search_object(query_body=query_body, index=index, es=elasticsearch)
search_dsl.aggs.bucket("sigsterms#{0}#text_sigterms".format(field_name), 'significant_text', field=field_name, filter_duplicate_text=True)
self.multi_search = self.multi_search.add(search_dsl)
elif field_type == "keyword":
search_dsl = self._create_search_object(query_body=query_body, index=index, elasticsearch=elasticsearch)
search_dsl = self._create_search_object(query_body=query_body, index=index, es=elasticsearch)
search_dsl.aggs.bucket("sterms#{0}#keyword_terms".format(field_name), 'terms', field=field_name)
self.multi_search = self.multi_search.add(search_dsl)
elif field_type == "date":
search_dsl = self._create_search_object(query_body=query_body, index=index, elasticsearch=elasticsearch)
search_dsl = self._create_search_object(query_body=query_body, index=index, es=elasticsearch)
search_dsl.aggs.bucket("date_histogram#{0}_month#date_month".format(field_name), 'date_histogram', field=field_name, interval='month')
search_dsl.aggs.bucket("date_histogram#{0}_year#date_year".format(field_name), 'date_histogram', field=field_name, interval='year')
self.multi_search = self.multi_search.add(search_dsl)
elif field_type == "integer":
search_dsl = self._create_search_object(query_body=query_body, index=index, elasticsearch=elasticsearch)
search_dsl = self._create_search_object(query_body=query_body, index=index, es=elasticsearch)
search_dsl.aggs.bucket("extended_stats#{0}#int_stats".format(field_name), 'extended_stats', field=field_name)
self.multi_search = self.multi_search.add(search_dsl)
elif field_type == "long":
search_dsl = self._create_search_object(query_body=query_body, index=index, elasticsearch=elasticsearch)
search_dsl = self._create_search_object(query_body=query_body, index=index, es=elasticsearch)
search_dsl.aggs.bucket('extended_stats#{0}#long_stats'.format(field_name), 'extended_stats', field=field_name)
self.multi_search = self.multi_search.add(search_dsl)
elif field_type == "float":
search_dsl = self._create_search_object(query_body=query_body, index=index, elasticsearch=elasticsearch)
search_dsl = self._create_search_object(query_body=query_body, index=index, es=elasticsearch)
search_dsl.aggs.bucket("extended_stats#{0}#float_stats".format(field_name), 'extended_stats', field=field_name)
self.multi_search = self.multi_search.add(search_dsl)
def _texta_facts_agg_handler(self, query_body, index, elasticsearch):
search_dsl = self._create_search_object(query_body=query_body, index=index, elasticsearch=elasticsearch)
search_dsl = self._create_search_object(query_body=query_body, index=index, es=elasticsearch)
search_dsl.aggs.bucket("nested#texta_facts", 'nested', path='texta_facts') \
.bucket('sterms#fact_category', 'terms', field='texta_facts.fact', collect_mode="breadth_first") \
......@@ -104,9 +111,9 @@ class MultiSearchConductor:
else:
return field_name
def _create_search_object(self, query_body, index, elasticsearch):
def _create_search_object(self, query_body, index, es):
if query_body:
search = elasticsearch_dsl.Search.from_dict(query_body).index(index).using(elasticsearch).extra(size=0).source(False)
search = elasticsearch_dsl.Search.from_dict(query_body).index(index).using(es).extra(size=0).source(False)
return search
else:
search = elasticsearch_dsl.Search().index(index).extra(size=0).source(False)
......
......@@ -727,30 +727,6 @@ class ES_Manager:
without_duplicates = list(set(nested_field_names))
return without_duplicates
def get_field_types(self, filtered_field_mapping) -> List[Dict[str, str]]:
"""
Parses the results of the _mapping endpoint for fields to extract only the
full path name of the field and its type. Nested fields are not included,
multi-fields are by dot notation.
:return:
"""
all_fields = []
for field_name, field_dict in filtered_field_mapping.items():
if field_dict['mapping']: # Empty dicts evaluate to False.
full_path_and_types = dict()
mapping_key = list(field_dict['mapping'].keys())[0]
full_path_and_types['full_path'] = field_dict['full_name']
full_path_and_types['type'] = field_dict['mapping'][mapping_key]['type']
all_fields.append(full_path_and_types)
if remove_duplicate_keys:
unique_fields_with_schemas = [i for n, i in enumerate(fields_with_schemas) if i not in fields_with_schemas[n + 1:]]
return unique_fields_with_schemas
else:
return fields_with_schemas
def get_filtered_field_mappings(self, es_field_mappings: dict) -> dict:
"""
Given the results of the _mapping endpoint for fields,
......@@ -781,27 +757,6 @@ class ES_Manager:
return normal_fields, nested_fields
def get_aggregation_field_data(self):
"""
Implements the helper functions to give the necessary data
about fields which are needed for the aggregations.
:return:
"""
names_of_nested_fields = self.get_nested_field_names()
field_mappings = self.get_field_mappings()
filtered_field_mappings = self.get_filtered_field_mappings(field_mappings)
fieldnames_and_types = self.get_field_types(filtered_field_mappings)
with_is_nested = self.add_is_nested_to_fields(names_of_nested_fields, fieldnames_and_types)
normal_fields, nested_fields = self.split_nested_fields(with_is_nested)
for nested_field in nested_fields:
nested_field['parent'] = nested_field['full_path'].split('.')[0] # By ES dot notation, "field.data"
return normal_fields, nested_fields
@staticmethod
def is_field_text_field(field_name, index_name):
text_types = ["text", "keyword"]
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment