Commit ea6b7b02 authored by Marko Kollo's avatar Marko Kollo
Browse files

Ignore and log on fail, Dashboard fix for wildcard.

parent cda8f021
......@@ -165,7 +165,7 @@ def dashboard_visualize(request):
indices = es_params.get("chosen_index", None).split(',')
for i in range(len(indices)):
indices[i] = indices[i].replace('.', '-')
indices[i] = indices[i].replace('.', '-').replace("*", "WILDCARD")
color_setting = request.POST['dashboard-color']
color_max = request.POST['dashboard-color-maximum']
......
......@@ -45,7 +45,7 @@ function removeLoader() {
function createIndices(indicesArray, data) {
data.indices.forEach((element) => {
indicesArray.push(new Index(element.aggregations, element.index_name.replace('.', '-'), element.total_documents))
indicesArray.push(new Index(element.aggregations, element.index_name.replace('.', '-').replace("*", "WILDCARD"), element.total_documents))
});
return indicesArray
}
......
......@@ -61,28 +61,30 @@ class MlpPreprocessor(object):
# This part is under a try catch because it's an notorious trouble maker.
try:
analyzation_datum = analyzation_datum[0]
except Exception as e:
logging.getLogger(ERROR_LOGGER).exception(analyzation_datum)
input_feature_path = input_feature.split(".")
if len(input_feature) == 1:
documents[analyzation_idx][input_feature + '_mlp'] = analyzation_datum['text']
documents[analyzation_idx][input_feature + '_mlp']['lang'] = analyzation_datum['text']['lang']
if 'texta_facts' not in documents[analyzation_idx]:
documents[analyzation_idx]['texta_facts'] = []
documents[analyzation_idx]['texta_facts'].extend(analyzation_datum['texta_facts'])
input_feature_path = input_feature.split(".")
if len(input_feature) == 1:
documents[analyzation_idx][input_feature + '_mlp'] = analyzation_datum['text']
documents[analyzation_idx][input_feature + '_mlp']['lang'] = analyzation_datum['text']['lang']
if 'texta_facts' not in documents[analyzation_idx]:
documents[analyzation_idx]['texta_facts'] = []
documents[analyzation_idx]['texta_facts'].extend(analyzation_datum['texta_facts'])
else:
# Make sure the last field is used as the path.
mlp_field_path = input_feature_path[:-1] + [input_feature_path[-1] + "_mlp"]
Helpers.set_in_dict(documents[analyzation_idx], mlp_field_path, analyzation_datum['text'])
else:
# Make sure the last field is used as the path.
mlp_field_path = input_feature_path[:-1] + [input_feature_path[-1] + "_mlp"]
Helpers.set_in_dict(documents[analyzation_idx], mlp_field_path, analyzation_datum['text'])
lang_path = mlp_field_path + ["lang"]
Helpers.set_in_dict(documents[analyzation_idx], lang_path, analyzation_datum['text']['lang'])
lang_path = mlp_field_path + ["lang"]
Helpers.set_in_dict(documents[analyzation_idx], lang_path, analyzation_datum['text']['lang'])
if 'texta_facts' not in documents[analyzation_idx]:
documents[analyzation_idx]["texta_facts"] = []
if 'texta_facts' not in documents[analyzation_idx]:
documents[analyzation_idx]["texta_facts"] = []
documents[analyzation_idx]["texta_facts"].extend(analyzation_datum["texta_facts"])
documents[analyzation_idx]["texta_facts"].extend(analyzation_datum["texta_facts"])
except Exception as e:
logging.getLogger(ERROR_LOGGER).exception("Error: {}, Document ID: {}".format(e, documents[analyzation_idx]))
continue
return {'documents': documents, 'meta': {}, 'errors': errors}
import json
import logging
from texta.settings import ERROR_LOGGER
from utils.mlp_task_adapter import MLPTaskAdapter
from utils.mlp_task_adapter import Helpers
......@@ -78,24 +80,29 @@ class MLPLitePreprocessor(object):
analyzation_data, errors = MLPTaskAdapter(self.mlp_url, mlp_type='mlp_lite').process(data)
for analyzation_idx, analyzation_datum in enumerate(analyzation_data):
# Because for some whatever reason, at times this will be None
# If it happens, ignore it, log it, and move on with life.
try:
# Is it a nested field or a normal field?
if len(input_feature_path) > 1:
# Make sure the last field is used as the path.
mlp_field_path = input_feature_path[:-1] + [input_feature_path[-1] + "_mlp-lite"]
Helpers.set_in_dict(documents[analyzation_idx], mlp_field_path, {})
mlp_text_path = mlp_field_path + ["text"]
Helpers.set_in_dict(documents[analyzation_idx], mlp_text_path, analyzation_datum['text'])
if output_type == 'full':
mlp_stats_path = mlp_field_path + ["stats"]
Helpers.set_in_dict(documents[analyzation_idx], mlp_stats_path, self._process_stats(analyzation_datum["stats"]))
# Is it a nested field or a normal field?
if len(input_feature_path) > 1:
# Make sure the last field is used as the path.
mlp_field_path = input_feature_path[:-1] + [input_feature_path[-1] + "_mlp-lite"]
Helpers.set_in_dict(documents[analyzation_idx], mlp_field_path, {})
mlp_text_path = mlp_field_path + ["text"]
Helpers.set_in_dict(documents[analyzation_idx], mlp_text_path, analyzation_datum['text'])
if output_type == 'full':
mlp_stats_path = mlp_field_path + ["stats"]
Helpers.set_in_dict(documents[analyzation_idx], mlp_stats_path, self._process_stats(analyzation_datum["stats"]))
else:
documents[analyzation_idx][input_feature + '_mlp-lite'] = {}
documents[analyzation_idx][input_feature + '_mlp-lite']['text'] = analyzation_datum['text']
if output_type == 'full':
documents[analyzation_idx][input_feature + '_mlp-lite']['stats'] = self._process_stats(analyzation_datum['stats'])
else:
documents[analyzation_idx][input_feature + '_mlp-lite'] = {}
documents[analyzation_idx][input_feature + '_mlp-lite']['text'] = analyzation_datum['text']
if output_type == 'full':
documents[analyzation_idx][input_feature + '_mlp-lite']['stats'] = self._process_stats(analyzation_datum['stats'])
except Exception as e:
logging.getLogger(ERROR_LOGGER).exception("Error Message: {}, Document: {}".format(e, documents[analyzation_idx]))
continue
return {'documents': documents, 'meta': {}, 'erros': errors}
......@@ -62,7 +62,7 @@ class Helpers:
class MLPTaskAdapter(object):
CELERY_CHUNK_SIZE = 10
MAX_NETWORK_RETRY_COUNT = 5
MAX_TASK_RETRY_COUNT = 1000
MAX_TASK_RETRY_COUNT = 200
def __init__(self, mlp_url, mlp_type='mlp'):
self.mlp_url = mlp_url
......@@ -169,6 +169,8 @@ class MLPTaskAdapter(object):
# Remove all the tasks that have finished their jobs or failed turning it.
self.tasks = [task for task in self.tasks if task["task"] not in self.finished_task_ids and task["task"] not in self.failed_task_ids]
self.tasks = [task for task in self.tasks if task["retry_count"] < MLPTaskAdapter.MAX_TASK_RETRY_COUNT]
sleep(5) # Wait a small amount of time until checking wheter the task has finished.
if not self.tasks: # Avoid waiting without reason if the next batch is up.
sleep(5) # Wait a small amount of time until checking wheter the task has finished.
return self.analyzation_data, self.errors
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment