<?php

/**
 * Elastic Search tools build on top of the official Elasticsearch php client library
 * The main idea is to keep one index with more types (tables). Each table has more localized versions named "table_".$lang
 * There are two tables for a fulltext google-like document search for rendered content, wchich have the same mapping.
 *  - one of these tables is for pimcore \Pimcore\Model\Document\Page(s) and the other is for dynamic pages created from Objects (one table for each object class)
 *  - we do not use only one table because there would be ID conflicts since document and object IDs are separeted
 *  - object extension (via classmapping) should implement \Website\Model\IElasticObjectFulltext to be indexed into elasticsearch
 * And then there are custom tables for custom searches - with custom mappings.
 *  - object extension (via classmapping) should implement \Website\Model\IElasticObject to be indexed into elasticsearch
 *
 * @author Martin Kuric <martin.kuric@portadesign.cz>
 */

namespace Website\Tool;

class ElasticSearch
{
	const MAIN_INDEX_KEY = 'main_elastic_search_index';
	const DOCUMENT_FULLTEXT_TYPE_KEY = 'document';
	const OBJECT_FULLTEXT_TYPE_KEY = 'object';

	public static $client = null;
	public static $mappings = null;
	public static $fromCli = false;

	/**
	 *
	 * @return \Elasticsearch\Client
	 */
	public static function getClient()
	{
		if (!self::$client) {
			self::$client = new \Elasticsearch\Client([
				'logPath' => PIMCORE_LOG_DIRECTORY . '/elasticsearch_client.log'
			]);
		}

		return self::$client;
	}

	/**
	 * returns main index name based on domain name or the one from registry on update reuquests
	 * @param boolean $searchRegistry Checks the registry for a possible new index name used while reindexing
	 * @return string
	 */
	public static function getMainIndexName($searchRegistry = true)
	{
		$index = null;

		if ($searchRegistry && \Zend_Registry::isRegistered(self::MAIN_INDEX_KEY)) {
			$index = \Zend_Registry::get(self::MAIN_INDEX_KEY);
		}

		if (!$index) {
			$index = \Pimcore\Config::getSystemConfig()->database->params->dbname;
		}

		return $index;
	}

	public static function isUpdateRequest()
	{
		return \Zend_Registry::isRegistered('elasticSearchUpdateRequest');
	}

	/**
	 * creates a new index for fulltext search on pimcore documents and on dynamic pages (objects) searched like documents
	 * @param string $indexName new index name
	 */
	private static function createFulltexttMappings()
	{
		$indexName = self::getMainIndexName();
		$elasticClient = self::getClient();

		$fulltextMapping = self::getObjectMapping('fulltext_mapping');

		$mapping = [
			'_source' => ['enabled' => true],
			'dynamic' => false,
			'properties' => $fulltextMapping['properties']
		];

		foreach ($fulltextMapping['meta']['languages'] as $lang) {
			self::addFulltextFieldsToMapping($mapping, $fulltextMapping['meta']['fulltextFields'], $lang);
			$type = self::DOCUMENT_FULLTEXT_TYPE_KEY . '_' . $lang;
			$elasticClient->indices()->putMapping([
				'index' => $indexName,
				'type' => $type,
				'body' => [$type => $mapping]
			]);
			$type = self::OBJECT_FULLTEXT_TYPE_KEY . '_' . $lang;
			$elasticClient->indices()->putMapping([
				'index' => $indexName,
				'type' => $type,
				'body' => [$type => $mapping]
			]);
		}
	}

	public static function getObjectMapping($className)
	{
		if (!self::$mappings) {
			self::$mappings = include(PIMCORE_WEBSITE_PATH . '/tools/ElasticSearchMappings.php');
		}

		$mappingKey = strtolower($className);
		if (!isset(self::$mappings[$mappingKey])) {
			$mappingKey = 'default_mapping';
		}

		return self::$mappings[$mappingKey];
	}

	private static function addFulltextFieldsToMapping(&$mapping, $fields, $lang)
	{
		foreach ($fields as $field) {
			//add field
			if (!isset($mapping['properties'][$field])) {
				$mapping['properties'][$field] = [
					'type' => 'string',
					'include_in_all' => false,
					'term_vector' => 'with_positions_offsets'
				];
			}
			//add folded field
			if (!isset($mapping['properties'][$field]['fields']['folded'])) {
				$mapping['properties'][$field]['fields']['folded'] = [
					'type' => 'string',
					'include_in_all' => false,
					'term_vector' => 'with_positions_offsets'
				];
			}
			//apply hunspell
			$analyzer = ($lang == 'cs' || $lang == 'sk') ? $lang.'_hunspell' : 'standard';
			$mapping['properties'][$field]['index_analyzer'] = $analyzer;
			$mapping['properties'][$field]['search_analyzer'] = $analyzer;
			$icuAnalyzer = ($lang == 'cs' || $lang == 'sk') ? $lang.'_icu_analyzer' : 'standard';
			$mapping['properties'][$field]['fields']['folded']['index_analyzer'] = $icuAnalyzer;
			$mapping['properties'][$field]['fields']['folded']['search_analyzer'] = $icuAnalyzer;
		}
	}

	/**
	 * Creates mappings for all objects which implement \Website\Model\IElasticObject
	 * mappings are put into a new index which should be set in \Zend_Registry for the update request on key self::MAIN_INDEX_KEY
	 */
	private static function createObjectMappings()
	{
		$classes = new \Pimcore\Model\Object\ClassDefinition\Listing();
		$classes->load();

		foreach ($classes->getClasses() as $class) {
			$className = '\\Website\\Model\\' . $class->getName();
			if (@class_exists($className)) {
				$classImplements = (array)class_implements($className);
				if (!empty($classImplements) && in_array('Website\\Model\\IElasticObject', $classImplements)) {
					$objectMapping = self::getObjectMapping($class->getName());
					$mapping = [
						'_source' => ['enabled' => true],
						'dynamic' => false,
						'properties' => $objectMapping['properties']
					];

					foreach ($objectMapping['meta']['languages'] as $lang) {
						//apply hunspell on fulltext fields
						if (isset($objectMapping['meta']['fulltextFields'])) {
							self::addFulltextFieldsToMapping($mapping, $objectMapping['meta']['fulltextFields'], $lang);
						}
						$type = strtolower($class->getName()) . '_' . $lang;
						self::getClient()->indices()->putMapping([
							'index' => self::getMainIndexName(),
							'type' => $type,
							'body' => [$type => $mapping]
						]);
					}
				}
			}
		}
	}

	/**
	 * indexes all \Pimcore\Model\Document\Page(s) except the ones with elastic_search_exclude property
	 * AND
	 * indexes all objects with the indexForDocumentSearch() method
	 * AND
	 * put the content into a new index which should be set in \Zend_Registry for the update request on key self::MAIN_INDEX_KEY
	 */
	public static function indexFulltext($documents = true, $objects = true)
	{
		//DOCUMENTS
		if ($documents) {
			$rootDocument = \Pimcore\Model\Document::getById(1);
			$childList = new \Pimcore\Model\Document\Listing();
			$childList->setUnpublished(true);
			$queue = [$rootDocument];
			// TODO other types of documents (hardlinks, link...)
			if (self::$fromCli) {
				fputs(STDOUT, sprintf("Page.. "));
			}
			while (!empty($queue)) {
				$document = array_shift($queue);
				if ($document instanceof \Website\Model\Document\Page && $document->getId() != 1) {
					if (self::$fromCli) {
						fputs(STDOUT, sprintf("%s ", $document->getId()));
					}
					$document->elasticSearchUpdateFulltext();
				}
				$childList->setCondition("(type = 'page' OR type = 'folder') AND parentId = ".$document->getId());
				$childList->load();
				foreach ($childList->getItems(0, 0) as $child) {
					$queue[] = $child;
				}
			}
		}

		//OBJECTS
		if ($objects) {
			$classes = new \Pimcore\Model\Object\ClassDefinition\Listing();
			$classes->load();
			foreach ($classes->getClasses() as $class) {
				$className = '\\Website\\Model\\' . $class->getName();
				$classListName = $className . '\\Listing';
				if (@class_exists($className) && @class_exists($classListName)) {
					$classImplements = class_implements($className);
					if ($classImplements && in_array('Website\\Model\\IElasticObjectFulltext', $classImplements)) {
						if (self::$fromCli) {
							fputs(STDOUT, sprintf("%s.. ", $class->getName()));
						}
						$list = new $classListName();
						$list->setUnpublished(true);
						$list->load();
						foreach ($list->getItems(0, 0) as $object) {
							$object->elasticSearchUpdateFulltext();
						}
					}
				}
			}
		}
	}

	/**
	 * Indexes objects
	 */
	private static function indexObjects()
	{
		$classes = new \Pimcore\Model\Object\ClassDefinition\Listing();
		$classes->load();

		foreach ($classes->getClasses() as $class) {
			$className = '\\Website\\Model\\' . $class->getName();
			$classListName = $className . '\\Listing';
			if (@class_exists($className) && @class_exists($classListName)) {
				$classImplements = class_implements($className);
				if ($classImplements && in_array('Website\\Model\\IElasticObject', $classImplements)) {
					if (self::$fromCli) {
						fputs(STDOUT, sprintf("%s.. ", $class->getName()));
					}
					$list = new $classListName();
					$list->setUnpublished(true);
					$list->load();
					foreach ($list->getItems(0, 0) as $object) {
						$object->elasticSearchUpdate();
					}
				}
			}
		}
	}

	/**
	 * updates elastic search mapping and index by computing a new index, switch the main alias to it an removing the old one
	 * @param bool $fulltext
	 * @param bool $objects
	 */
	public static function updateMappingsAndReindex($fulltext = false, $objects = false)
	{
		if (self::$fromCli) {
			fputs(STDOUT, sprintf("Creating new index analysis... "));
		}
		$elasticClient = self::getClient();

		$alias = self::getMainIndexName();

		//fist check if main index exists and if not create a dummy one which will then be deleted...
		//otherwise a non-existing index would cause problems when indexing some documents (? -> research)
		if (!$elasticClient->indices()->existsAlias(['name' => $alias])) {
			$dummyIndexName = $alias . time() . 'dummy';
			\Zend_Registry::set(self::MAIN_INDEX_KEY, $dummyIndexName);
			$elasticClient->indices()->create(['index' => $dummyIndexName]);
			$elasticClient->indices()->putAlias(['index' => $dummyIndexName, 'name' => $alias]);
		}

		//create the new index
		$analysis = [
			'analyzer' => [
				'pathIndexAnalyzer' => [
					'type' => 'custom',
					'tokenizer' => 'pathIndexTokenizer'
				],
				'cs_hunspell' => [
					'type' => 'custom',
					'tokenizer' => 'standard',
					'filter' => ['lowercase', 'stopwords_CZ', 'cs_CZ', 'remove_duplicities']
				],
				'sk_hunspell' => [
					'type' => 'custom',
					'tokenizer' => 'standard',
					'filter' => ['lowercase', 'stopwords_SK', 'sk_SK', 'remove_duplicities']
				],
				'cs_icu_analyzer' => [
					'type' => 'custom',
					'tokenizer' => 'standard',
					'filter' => ['stopwords_CZ', 'icu_folding', 'remove_duplicities']
				],
				'cs_icu_analyzer_sort' => [
					'type' => 'custom',
					'tokenizer' => 'keyword',
					'filter' => ['lowercase', 'cs_icu_collation']
				],
				'sk_icu_analyzer' => [
					'type' => 'custom',
					'tokenizer' => 'standard',
					'filter' => ['stopwords_SK', 'icu_folding', 'remove_duplicities']
				]
			],
			'filter' => [
				'stopwords_CZ' => [
					'type' => 'stop',
					'stopwords' => ['právě', 'že', '_czech_'],
					'ignore_case' => true
				],
				'stopwords_SK' => [
					'type' => 'stop',
					'stopwords' => ['a', 'aby', 'aj', 'ako', 'ale', 'alebo', 'ani', 'áno', 'asi', 'až', 'bez', 'buď', 'by', 'cez', 'či', 'čo', 'ešte', 'ho', 'i', 'iba', 'ich', 'ja', 'je', 'jeho', 'jej', 'ju', 'k', 'kam', 'kde', 'keď', 'kto', 'ku', 'menej', 'mi', 'moja', 'moje', 'môj', 'my', 'nad', 'nám', 'než', 'nič', 'nie', 'o', 'od', 'on', 'on', 'ona', 'ona', 'oni', 'ono', 'po', 'pod', 'podľa', 'pokiaľ', 'potom', 'práve', 'prečo', 'pred', 'preto', 'pretože', 'pri', 's', 'sa', 'si', 'sme', 'so', 'som', 'späť', 'ste', 'sú', 'sú', 'ta', 'tá', 'tak', 'tak', 'takže', 'tam', 'tam', 'táto', 'teda', 'ten', 'tento', 'tieto', 'tiež', 'to', 'to', 'toho', 'tom', 'tomto', 'toto', 'tu', 'túto', 'ty', 'tým', 'týmto', 'už', 'v', 'vám', 'viac', 'vo', 'však', 'vy', 'z', 'za', 'zo'],
					'ignore_case' => true
				],
				'cs_CZ' => [
					'type' => 'hunspell',
					'locale' => 'cs_CZ',
					'dedup' => true,
					'recursion_level' => 0
				],
				'cs_icu_collation' => [
					'type' => 'icu_collation',
					'language' => 'cs'
				],
				'sk_SK' => [
					'type' => 'hunspell',
					'locale' => 'sk_SK',
					'dedup' => true,
					'recursion_level' => 0
				],
				'remove_duplicities' => [
					'type' => 'unique',
					'only_on_same_position' => true
				]
			],
			'tokenizer' => [
				'pathIndexTokenizer' => [
					'type' => 'path_hierarchy',
					'delimiter' => '/'
				]
			]
		];
		$newIndex = $alias . '_' . time();
		$elasticClient->indices()->create(['index' => $newIndex, 'body' => ['analysis' => $analysis]]);
		//we use \Zend_Registry for temporary main index key storage instead of passing it through params
		//the main index key is used across several methods which handle the reindexing process
		\Zend_Registry::set(self::MAIN_INDEX_KEY, $newIndex);
		\Zend_Registry::set('elasticSearchUpdateRequest', true);

		//fulltext mapping
		if (self::$fromCli) {
			fputs(STDOUT, sprintf("[%s] done.\n\nCreating mappings... ", $newIndex));
		}
		self::createFulltexttMappings();
		//object mappings
		self::createObjectMappings();
		//objects data
		if ($objects) {
			if (self::$fromCli) {
				fputs(STDOUT, sprintf("done.\n\nIndexing objects... "));
			}
			self::indexObjects();
		}
		//fulltext data
		if ($fulltext) {
			if (self::$fromCli) {
				fputs(STDOUT, sprintf("done.\n\nIndexing fulltext... "));
			}
			self::indexFulltext(true, $objects);
		}

		//switch aliases and remove old index
		if (self::$fromCli) {
			fputs(STDOUT, sprintf("done.\n\nSwitching indexes... "));
		}
		if ($elasticClient->indices()->existsAlias(['name' => $alias])) {
			$oldIndex = key($elasticClient->indices()->getAlias(['name' => $alias]));
			$elasticClient->indices()->putAlias(['index' => $newIndex, 'name' => $alias]);
			$elasticClient->indices()->delete(['index' => $oldIndex]);
		}

		//refresh index
		$elasticClient->indices()->refresh(['index' => $newIndex]);

		\Zend_Registry::set(self::MAIN_INDEX_KEY, null);
		\Zend_Registry::set('elasticSearchUpdateRequest', null);
		if (self::$fromCli) {
			fputs(STDOUT, sprintf("done.\n\ndone"));
		}
	}
}
