<?php

/**
 * Elastic Search tools build on top of the official Elasticsearch php client library
 * The main idea is to keep one index with more types (tables). Each table has more localized versions named "table_".$lang
 * There are two tables for a fulltext google-like document search for rendered content, wchich have the same mapping.
 *  - one of these tables is for pimcore \Pimcore\Model\Document\Page(s) and the other is for dynamic pages created from Objects (one table for each object class)
 *  - we do not use only one table because there would be ID conflicts since document and object IDs are separeted
 *  - object extension (via classmapping) should implement \Website\Model\IElasticObjectFulltext to be indexed into elasticsearch
 * And then there are custom tables for custom searches - with custom mappings.
 *  - object extension (via classmapping) should implement \Website\Model\IElasticObject to be indexed into elasticsearch
 * 
 * @author Martin Kuric <martin.kuric@portadesign.cz>
 */

namespace Website\Tool;

class ElasticSearch
{

	const MAIN_INDEX_KEY = 'main_elastic_search_index';
	const DOCUMENT_TYPE_KEY = 'document';
	const OBJECT_TYPE_KEY = 'object';
	const PRODUCT_TYPE_KEY = 'product';

	static $client = null;

	/**
	 * updates elastic search mapping and index by computing a new index, switch the main alias to it an removing the old one 
	 * @param bool $buildFulltextIndex
	 * @param bool $buildObjectIndex
	 */
	public static function updateElasticSearchIndexesAndMappings($buildFulltextIndex = false, $buildObjectIndex = false)
	{
		$elasticClient = self::getClient();

		$alias = self::getMainIndexName();

		//fist check if main index exists and if not create a dummy one which will then be deleted...
		//otherwise a non-existing index would cause problems when indexing some documents (? -> research)
		if (!$elasticClient->indices()->existsAlias(array('name' => $alias))) {
			$dummyIndexName = $alias . time() . 'dummy';
			\Zend_Registry::set(self::MAIN_INDEX_KEY, $dummyIndexName);
			$elasticClient->indices()->create(array('index' => $dummyIndexName));
			$elasticClient->indices()->putAlias(array('index' => $dummyIndexName, 'name' => $alias));
		}

		//create the new index
		$analysis = array(
			'analyzer' => array(
				'pathIndexAnalyzer' => array(
					'type' => 'custom',
					'tokenizer' => 'pathIndexTokenizer'
				),
				'cs_hunspell' => array(
					'type' => 'custom',
					'tokenizer' => 'standard',
					'filter' => array('stopwords_CZ', 'cs_CZ', 'lowercase', 'stopwords_CZ', 'remove_duplicities')
				),
				'sk_hunspell' => array(
					'type' => 'custom',
					'tokenizer' => 'standard',
					'filter' => array('stopwords_SK', 'sk_SK', 'lowercase', 'stopwords_SK', 'remove_duplicities')
				),
				'cs_icu_analyzer' => array(
					'type' => 'custom',
					'tokenizer' => 'standard',
					'filter' => array('stopwords_CZ', 'icu_folding', 'remove_duplicities')
				),
				'sk_icu_analyzer' => array(
					'type' => 'custom',
					'tokenizer' => 'standard',
					'filter' => array('stopwords_SK', 'icu_folding', 'remove_duplicities')
				)
			),
			'filter' => array(
				'stopwords_CZ' => array(
					'type' => 'stop',
					'stopwords' => array('právě', 'že', '_czech_'),
					'ignore_case' => true
				),
				'stopwords_SK' => array(
					'type' => 'stop',
					'stopwords' => array('a', 'aby', 'aj', 'ako', 'ale', 'alebo', 'ani', 'áno', 'asi', 'až', 'bez', 'buď', 'by', 'cez', 'či', 'čo', 'ešte', 'ho', 'i', 'iba', 'ich', 'ja', 'je', 'jeho', 'jej', 'ju', 'k', 'kam', 'kde', 'keď', 'kto', 'ku', 'menej', 'mi', 'moja', 'moje', 'môj', 'my', 'nad', 'nám', 'než', 'nič', 'nie', 'o', 'od', 'on', 'on', 'ona', 'ona', 'oni', 'ono', 'po', 'pod', 'podľa', 'pokiaľ', 'potom', 'práve', 'prečo', 'pred', 'preto', 'pretože', 'pri', 's', 'sa', 'si', 'sme', 'so', 'som', 'späť', 'ste', 'sú', 'sú', 'ta', 'tá', 'tak', 'tak', 'takže', 'tam', 'tam', 'táto', 'teda', 'ten', 'tento', 'tieto', 'tiež', 'to', 'to', 'toho', 'tom', 'tomto', 'toto', 'tu', 'túto', 'ty', 'tým', 'týmto', 'už', 'v', 'vám', 'viac', 'vo', 'však', 'vy', 'z', 'za', 'zo'),
					'ignore_case' => true
				),
				'cs_CZ' => array(
					'type' => 'hunspell',
					'locale' => 'cs_CZ',
					'dedup' => true,
					'recursion_level' => 0
				),
				'sk_SK' => array(
					'type' => 'hunspell',
					'locale' => 'sk_SK',
					'dedup' => true,
					'recursion_level' => 0
				),
				'remove_duplicities' => array(
					'type' => 'unique',
					'only_on_same_position' => true
				)
			),
			'tokenizer' => array(
				'pathIndexTokenizer' => array(
					'type' => 'path_hierarchy',
					'delimiter' => '/'
				)
			)
		);
		$newIndex = $alias . '_' . time();
		$elasticClient->indices()->create(array('index' => $newIndex, 'body' => array('analysis' => $analysis)));
		//we use \Zend_Registry for temporary main index key storage instead of passing it through params
		//the main index key is used across several methods which handle the reindexing process
		\Zend_Registry::set(self::MAIN_INDEX_KEY, $newIndex);
		\Zend_Registry::set('elasticSearchUpdateRequest', true);

		//fulltext mapping
		self::createFulltexttMappings();
		//object mappings
		self::createObjectMappings();
		//objects data
		if ($buildObjectIndex) {
			self::indexObjects();
		}
		//fulltext data
		if ($buildFulltextIndex) {
			//clear output cache, because of chached thumbnails links (they may not exist anymore)
			\Pimcore\Cache::clearTags(array('output'));
			self::indexFulltext();
		}

		//switch aliases and remove old index
		if ($elasticClient->indices()->existsAlias(array('name' => $alias))) {
			$oldIndex = key($elasticClient->indices()->getAlias(array('name' => $alias)));
			$elasticClient->indices()->putAlias(array('index' => $newIndex, 'name' => $alias));
			$elasticClient->indices()->delete(array('index' => $oldIndex));
		}

		//refresh index
		$elasticClient->indices()->refresh(array('index' => $newIndex));

		\Zend_Registry::set(self::MAIN_INDEX_KEY, null);
		\Zend_Registry::set('elasticSearchUpdateRequest', null);
	}

	/**
	 * returns main index name based on domain name or the one from registry on update reuquests
	 * @param boolean $searchRegistry Checks the registry for a possible new index name used while reindexing 
	 * @return string
	 */
	public static function getMainIndexName($searchRegistry = true)
	{
		$index = null;

		if ($searchRegistry && \Zend_Registry::isRegistered(self::MAIN_INDEX_KEY)) {
			$index = \Zend_Registry::get(self::MAIN_INDEX_KEY);
		}

		if (!$index) {
			$index = \Pimcore\Config::getSystemConfig()->database->params->dbname;
		}

		return $index;
	}

	/**
	 * creates a new index for fulltext search on pimcore documents and on dynamic pages (objects) searched like documents 
	 * @param string $indexName new index name
	 */
	private static function createFulltexttMappings()
	{
		$indexName = self::getMainIndexName();
		$elasticClient = self::getClient();

		$mapping = array(
			'_source' => array(
				'enabled' => true,
			),
			'dynamic' => false,
			'properties' => array(
				'id' => array('type' => 'integer', 'include_in_all' => false),
				'path' => array('type' => 'string', 'include_in_all' => false),
				'published' => array('type' => 'boolean', 'include_in_all' => false),
				'title' => array('type' => 'string', 'include_in_all' => false, 'fields' => array(
					'folded' => array('type' => 'string')
				)),
				'keywords' => array('type' => 'string', 'include_in_all' => false, 'fields' => array(
					'folded' => array('type' => 'string')
				)),
				'description' => array('type' => 'string', 'include_in_all' => false, 'fields' => array(
					'folded' => array('type' => 'string')
				)),
				'content' => array('type' => 'string', 'include_in_all' => false, 'fields' => array(
					'folded' => array('type' => 'string')
				))
			)
		);

		$systemLanguages = \Pimcore\Tool::getValidLanguages();
		$fulltextFields = array('title', 'keywords', 'description', 'content');
		foreach ($systemLanguages as $lang) {
			//apply hunspell
			foreach ($fulltextFields as $field) {
				$analyzer = ($lang == 'cs' || $lang == 'sk') ? $lang.'_hunspell' : 'standard';
				$mapping['properties'][$field]['index_analyzer'] = $analyzer;
				$mapping['properties'][$field]['search_analyzer'] = $analyzer;
				$icuAnalyzer = ($lang == 'cs' || $lang == 'sk') ? $lang.'_icu_analyzer' : 'standard';
				$mapping['properties'][$field]['fields']['folded']['index_analyzer'] = $icuAnalyzer;
				$mapping['properties'][$field]['fields']['folded']['search_analyzer'] = $icuAnalyzer;
			}
			$type = self::DOCUMENT_TYPE_KEY . '_' . $lang;
			$elasticClient->indices()->putMapping(array(
				'index' => $indexName,
				'type' => $type,
				'body' => array($type => $mapping)
			));
			$type = self::OBJECT_TYPE_KEY . '_' . $lang;
			$elasticClient->indices()->putMapping(array(
				'index' => $indexName,
				'type' => $type,
				'body' => array($type => $mapping)
			));
		}
	}

	/**
	 * indexes one document for fulltext search with elastic search, uses \Pimcore\Model\Document\Service::render for rendering content 
	 * @param \Pimcore\Model\Document\Page $document
	 * @param string $indexName
	 */
	public static function indexDocument(\Pimcore\Model\Document\Page $document)
	{
		if ($document->getProperty('elastic_search_exclude'))
			return;

		//set frontend translator if needed
		if (!\Zend_Registry::isRegistered('Zend_Translate') || 
				\Zend_Registry::get("Zend_Translate")->getLocale() != $document->getProperty('language') ||
				\Zend_Registry::get("Zend_Translate")->_('label_email') == 'label_email') # hack, tests for fronend translation
		{
			$translate = new \Pimcore\Translate\Website($document->getProperty('language'));
			\Zend_Registry::set("Zend_Translate", $translate);
		}

		$indexName = self::getMainIndexName();
		$elasticClient = self::getClient();

		//disable redirecting in the dispatch process which would break the indexing
		$redirector = \Zend_Controller_Action_HelperBroker::getStaticHelper('redirector');
		$exitBackup = $redirector->getExit();
		$redirector->setExit(false);
		$data = array(
			'id' => $document->getId(),
			'path' => $document->getPath() . $document->getKey(),
			'published' => $document->getPublished(),
			'title' => $document->getTitle(),
			'keywords' => $document->getKeywords(),
			'description' => $document->getDescription(),
			'content' => ($document->getPublished()) ? self::normalizeText(\Pimcore\Model\Document\Service::render($document, array('update_elastic_index' => 0, 'elastic_document_indexing' => true), false)) : ''
		);
		$redirector->setExit($exitBackup);

		$elasticClient->index(array(
			'index' => $indexName,
			'type' => self::DOCUMENT_TYPE_KEY . '_' . $document->getProperty('language'),
			'id' => $data['id'],
			'body' => $data
		));
		if (!self::isUpdateRequest()) {
			$elasticClient->indices()->refresh(array('index' => $indexName));
		}
	}

	/**
	 * indexes all \Pimcore\Model\Document\Page(s) except the ones with elastic_search_exclude property
	 * AND
	 * indexes all objects with the indexForDocumentSearch() method
	 * AND
	 * put the content into a new index which should be set in \Zend_Registry for the update request on key self::MAIN_INDEX_KEY
	 */
	private static function indexFulltext()
	{
		//DOCUMENTS
		$rootDocument = \Pimcore\Model\Document::getById(1);
		$childList = new \Pimcore\Model\Document\Listing();
		$childList->setUnpublished(true);
		$queue = array($rootDocument);
		//@TODO other types of documents (hardlinks, link...)
		while (!empty($queue)) {
			$document = array_shift($queue);
			if ($document->getId() != 1 && $document instanceof \Website\Model\Document\Page) {
				self::indexDocument($document);
			}
			$childList->setCondition("(type = 'page' OR type = 'folder') AND parentId = ".$document->getId());
			$childList->load();
			foreach ($childList->getItems(0, 0) as $child) {
				$queue[] = $child;
			}
		}

		//OBJECTS
		$classes = new \Pimcore\Model\Object\ClassDefinition\Listing();
		$classes->load();
		foreach ($classes->getClasses() as $class) {
			$className = '\\Website\\Model\\' . $class->getName();
			if (@class_exists($className)) {
				$classImplements = class_implements($className);
				if ($classImplements && in_array('Website\\Model\\IElasticObjectFulltext', $classImplements)) {
					$listClassName = $className . '\\Listing';
					$list = new $listClassName();
					foreach ($list as $object) {
						$object->elasticSearchUpdateFulltext();
					}
				}
			}
		}
	}

	/**
	 * Creates mappings for all objects which implement \Website\Model\\IElasticObject
	 * mappings are put into a new index which should be set in \Zend_Registry for the update request on key self::MAIN_INDEX_KEY
	 */
	private static function createObjectMappings()
	{
		$classes = new \Pimcore\Model\Object\ClassDefinition\Listing();
		$classes->load();

		foreach ($classes->getClasses() as $class) {
			$className = '\\Website\\Model\\' . $class->getName();
			if (@class_exists($className)) {
				$classImplements = (array)class_implements($className);
				if (!empty($classImplements) && in_array('Website\\Model\\IElasticObject', $classImplements)) {
					$className::elasticSearchCreateMappings();
				}
			}
		}
	}

	/**
	 * Indexes objects
	 */
	private static function indexObjects()
	{
		$classes = new \Pimcore\Model\Object\ClassDefinition\Listing();
		$classes->load();

		foreach ($classes->getClasses() as $class) {
			$className = '\\Website\\Model\\' . $class->getName();
			if (@class_exists($className)) {
				$classImplements = class_implements($className);
				if ($classImplements && in_array('Website\\Model\\IElasticObject', $classImplements)) {
					$className::elasticsearchReindexObjects();
				}
			}
		}
	}

	public static function delete($id, $type, $language = null)
	{
		$elasticClient = self::getClient();

		if (!$language) {
			$systemLanguages = \Pimcore\Tool::getValidLanguages();
		} else {
			$systemLanguages = array($language);
		}

		$mainIndexName = \Website\Tool\ElasticSearch::getMainIndexName();
		foreach ($systemLanguages as $lang) {
			try {
				$elasticClient->delete(array('index' => $mainIndexName, 'type' => $type . '_' . $lang, 'id' => $id));
			} catch (\Exception $e) {
				\Pimcore\Log\Simple::log('elasticsearch', 'Could not delete document with ID: ' . $id . ' and type: ' . $type . '. Exception: ' . $e->getMessage() . "\n" . $e->getTraceAsString());
			}
		}
		if (!self::isUpdateRequest()) {
			$elasticClient->indices()->refresh(array('index' => $mainIndexName));
		}
	}

	/**
	 * 
	 * @return \Elasticsearch\Client
	 */
	public static function getClient()
	{
		if (!self::$client) {
			self::$client = new \Elasticsearch\Client(array(
						'logPath' => PIMCORE_LOG_DIRECTORY . '/elasticsearch_client.log'
					));
		}

		return self::$client;
	}

	/**
	 * decomposes a string into parts for better results of prefix suggestions
	 */
	public static function createSuggestion($string, $payload = null)
	{
		$tmp = \Website\Tool\Utils::webalize($string);

		$suggest = array('input' => explode('-', $tmp), 'output' => $string, 'weight' => 1);
		if ($payload) {
			$suggest['payload'] = $payload;
		}

		return $suggest;
	}

	public static function isUpdateRequest()
	{
		return \Zend_Registry::isRegistered('elasticSearchUpdateRequest');
	}

	public static function normalizeText($text)
	{
		return preg_replace('/[ \t\r\n]+/', ' ', strip_tags(html_entity_decode($text)));
	}

}
