<?php

/**
 * Elastic Search tools build on top of the official Elasticsearch php client library
 * The main idea is to keep one index with more types (tables). Each table has more localized versions named "table_".$lang
 * There are two tables for a fulltext google-like document search for rendered content, wchich have the same mapping.
 *  - one of these tables is for pimcore \Pimcore\Model\Document\Page(s) and the other is for dynamic pages created from Objects (one table for each object class)
 *  - we do not use only one table because there would be ID conflicts since document and object IDs are separeted
 *  - object extension (via classmapping) should implement \AppBundle\Model\IElasticObjectFulltext to be indexed into elasticsearch
 * And then there are custom tables for custom searches - with custom mappings.
 *  - object extension (via classmapping) should implement \AppBundle\Model\IElasticObject to be indexed into elasticsearch.
 */

namespace AppBundle\Tool;

use Elasticsearch\ClientBuilder;
use Pimcore\Cache;
use Pimcore\Model\Document;
use Pimcore\Model\DataObject\AbstractObject;
use Pimcore\Model\DataObject\ClassDefinition;
use Symfony\Component\Console\Helper\ProgressBar;
use Symfony\Component\Console\Output\OutputInterface;

class ElasticSearch
{
	const MAIN_INDEX_KEY = 'main_elastic_search_index';
	const DOCUMENT_FULLTEXT_TYPE_KEY = 'document';
	const OBJECT_FULLTEXT_TYPE_KEY = 'object';

	/** @var \Elasticsearch\Client */
	private static $client = null;
	/** @var array */
	private static $mappings = null;
	/**
	 * @var OutputInterface
	 */
	private static $outputInterface = null;
	/**
	 * @var ProgressBar
	 */
	private static $progressBar = null;

	/**
	 * @return \Elasticsearch\Client
	 */
	public static function getClient()
	{
		if (!self::$client) {
			self::$client = ClientBuilder::create()
				->setHosts(['http://localhost:9200/'])
				->build();
		}

		return self::$client;
	}

	/**
	 * @param OutputInterface $outputInterface
	 */
	public static function setOutputInterface(OutputInterface $outputInterface = null)
	{
		self::$outputInterface = $outputInterface;
	}

	/**
	 * @param string $msg
	 */
	private static function write($msg)
	{
		if (self::$outputInterface) {
			self::$outputInterface->write($msg);
		}
	}

	/**
	 * @param string $msg
	 */
	private static function writeln($msg)
	{
		if (self::$outputInterface) {
			self::$outputInterface->writeln($msg);
		}
	}

	/**
	 * @param string $type
	 * @param int    $max
	 */
	private static function progressStart($type, $max)
	{
		if (self::$outputInterface && $max) {
			self::$progressBar = new ProgressBar(self::$outputInterface, $max);
			self::$progressBar->setFormat(str_pad($type, 20).'  [%bar% %percent:3s%%] %current%/%max% %remaining:6s%');
			self::$progressBar->setRedrawFrequency(ceil($max / 100));
		} else {
			self::$progressBar = null;
		}
	}

	/**
	 * @param int $step
	 */
	private static function progressAdvance($step = 1)
	{
		if (self::$progressBar) {
			self::$progressBar->advance($step);
		}
	}

	private static function progressEnd()
	{
		if (self::$progressBar) {
			self::$progressBar->finish();
			self::writeln(' <info>✔</info>');
		}
	}

	/**
	 * returns main index name based on domain name or the one from registry on update reuquests.
	 *
	 * @param bool $searchRegistry Checks the registry for a possible new index name used while reindexing
	 *
	 * @return string
	 */
	public static function getMainIndexName($searchRegistry = true)
	{
		$index = null;

		if ($searchRegistry && Cache\Runtime::isRegistered(self::MAIN_INDEX_KEY)) {
			$index = Cache\Runtime::get(self::MAIN_INDEX_KEY);
		}

		if (!$index) {
			$index = \Pimcore\Config::getSystemConfig()->database->params->dbname;
		}

		return $index;
	}

	/**
	 * @return bool
	 */
	public static function isUpdateRequest()
	{
		return Cache\Runtime::isRegistered('elasticSearchUpdateRequest');
	}

	/**
	 * creates a new index for fulltext search on pimcore documents and on dynamic pages (objects) searched like documents.
	 *
	 * @param string $indexName new index name
	 */
	private static function createFulltexttMappings()
	{
		$indexName = self::getMainIndexName();
		$elasticClient = self::getClient();

		$fulltextMapping = self::getObjectMapping('fulltext_mapping');

		$mapping = [
			'_source' => ['enabled' => true],
			'dynamic' => false,
			'properties' => $fulltextMapping['properties'],
		];

		foreach ($fulltextMapping['meta']['languages'] as $lang) {
			self::addFulltextFieldsToMapping($mapping, $fulltextMapping['meta']['fulltextFields'], $lang);
			$type = self::DOCUMENT_FULLTEXT_TYPE_KEY.'_'.$lang;
			$elasticClient->indices()->putMapping([
				'index' => $indexName,
				'type' => $type,
				'body' => [$type => $mapping],
			]);
			$type = self::OBJECT_FULLTEXT_TYPE_KEY.'_'.$lang;
			$elasticClient->indices()->putMapping([
				'index' => $indexName,
				'type' => $type,
				'body' => [$type => $mapping],
			]);
		}
	}

	/**
	 * @param string $className
	 *
	 * @return array
	 */
	public static function getObjectMapping($className)
	{
		if (!self::$mappings) {
			self::$mappings = include PIMCORE_PROJECT_ROOT.'/src/AppBundle/Tool/ElasticSearchMappings.php';
		}

		$mappingKey = strtolower($className);
		if (!isset(self::$mappings[$mappingKey])) {
			$mappingKey = 'default_mapping';
		}

		return self::$mappings[$mappingKey];
	}

	/**
	 * @param array  $mapping
	 * @param array  $fields
	 * @param string $lang
	 */
	private static function addFulltextFieldsToMapping(&$mapping, $fields, $lang)
	{
		foreach ($fields as $field) {
			//add field
			if (!isset($mapping['properties'][$field])) {
				$mapping['properties'][$field] = [
					'type' => 'string',
					'include_in_all' => false,
					'term_vector' => 'with_positions_offsets',
				];
			}
			//add folded field
			if (!isset($mapping['properties'][$field]['fields']['folded'])) {
				$mapping['properties'][$field]['fields']['folded'] = [
					'type' => 'string',
					'include_in_all' => false,
					'term_vector' => 'with_positions_offsets',
				];
			}
			//apply hunspell
			$analyzer = ('cs' == $lang || 'sk' == $lang) ? $lang.'_hunspell' : 'standard';
			$mapping['properties'][$field]['index_analyzer'] = $analyzer;
			$mapping['properties'][$field]['search_analyzer'] = $analyzer;
			$icuAnalyzer = ('cs' == $lang || 'sk' == $lang) ? $lang.'_icu_analyzer' : 'standard';
			$mapping['properties'][$field]['fields']['folded']['index_analyzer'] = $icuAnalyzer;
			$mapping['properties'][$field]['fields']['folded']['search_analyzer'] = $icuAnalyzer;
		}
	}

	/**
	 * Creates mappings for all objects which implement \AppBundle\Model\IElasticObject
	 * mappings are put into a new index which should be set in Cache\Runtime for the update request on key self::MAIN_INDEX_KEY.
	 */
	private static function createObjectMappings()
	{
		$classes = new ClassDefinition\Listing();
		$classes->load();

		foreach ($classes->getClasses() as $class) {
			$className = '\\AppBundle\\Model\\'.$class->getName();
			if (@class_exists($className)) {
				$classImplements = (array) class_implements($className);
				if (!empty($classImplements) && in_array('AppBundle\\Model\\IElasticObject', $classImplements)) {
					$objectMapping = self::getObjectMapping($class->getName());
					$mapping = [
						'_source' => ['enabled' => true],
						'dynamic' => false,
						'properties' => $objectMapping['properties'],
					];

					foreach ($objectMapping['meta']['languages'] as $lang) {
						//apply hunspell on fulltext fields
						if (isset($objectMapping['meta']['fulltextFields'])) {
							self::addFulltextFieldsToMapping($mapping, $objectMapping['meta']['fulltextFields'], $lang);
						}
						$type = strtolower($class->getName()).'_'.$lang;
						self::getClient()->indices()->putMapping([
							'index' => self::getMainIndexName(),
							'type' => $type,
							'body' => [$type => $mapping],
						]);
					}
				}
			}
		}
	}

	/**
	 * indexes all \Pimcore\Model\Document\Page(s) except the ones with elastic_search_exclude property
	 * AND
	 * indexes all objects with the indexForDocumentSearch() method
	 * AND
	 * put the content into a new index which should be set in Cache\Runtime for the update request on key self::MAIN_INDEX_KEY.
	 *
	 * @param array $allowedOnlyClasses
	 * @param bool  $copyNotIndexed
	 */
	public static function indexFulltext($allowedOnlyClasses = [], $copyNotIndexed = true)
	{
		//DOCUMENTS
		if (!$allowedOnlyClasses || in_array('Page', $allowedOnlyClasses)) {
			// TODO other types of documents (hardlinks, link...)
			$childList = new Document\Listing();
			$childList->setUnpublished(true);
			$childList->setCondition('type = ?', ['page']);
			self::progressStart('  Document Page', $childList->count());
			$rootDocument = Document::getById(1);
			$queue = [$rootDocument];
			while (!empty($queue)) {
				$document = array_shift($queue);
				if ($document instanceof Document\Page) {
					$document->elasticSearchUpdateFulltext();
					self::progressAdvance();
				}
				$childList->setCondition('(type = ? OR type = ?) AND parentId = ?', ['page', 'folder', $document->getId()]);
				$childList->load();
				foreach ($childList->getItems(0, 0) as $child) {
					$queue[] = $child;
				}
			}
			self::progressEnd();
		} elseif ($copyNotIndexed) {
			$fulltextMapping = self::getObjectMapping('fulltext_mapping');
			self::write('  '.str_pad('Document Page', 20).'<comment>[copy]</comment>');
			foreach ($fulltextMapping['meta']['languages'] as $lang) {
				self::copyType(
					self::getMainIndexName(false),
					self::getMainIndexName(),
					self::DOCUMENT_FULLTEXT_TYPE_KEY.'_'.$lang
				);
			}
			self::writeln(' <info>✔</info>');
		}

		//OBJECTS
		$classes = new ClassDefinition\Listing();
		$classes->load();
		foreach ($classes->getClasses() as $class) {
			$className = '\\AppBundle\\Model\\'.$class->getName();
			$classListName = $className.'\\Listing';
			if (@class_exists($className) && @class_exists($classListName)) {
				$classImplements = class_implements($className);
				if ($classImplements && in_array('AppBundle\\Model\\IElasticObjectFulltext', $classImplements)) {
					if ($allowedOnlyClasses && !in_array($class->getName(), $allowedOnlyClasses)) {
						if ($copyNotIndexed) {
							self::write(sprintf('  %s<comment>[copy]</comment>', str_pad($class->getName(), 20)));
							$fulltextMapping = self::getObjectMapping('fulltext_mapping');
							foreach ($fulltextMapping['meta']['languages'] as $lang) {
								self::copyType(
									self::getMainIndexName(false),
									self::getMainIndexName(),
									self::OBJECT_FULLTEXT_TYPE_KEY.'_'.$lang,
									$class->getName()
								);
							}
							self::writeln(' <info>✔</info>');
						}
						continue;
					}
					$list = new $classListName();
					$list->setUnpublished(true);
					$list->setObjectTypes([
						AbstractObject::OBJECT_TYPE_OBJECT,
						AbstractObject::OBJECT_TYPE_VARIANT,
					]);
					$list->load();
					self::progressStart(sprintf('  %s', $class->getName()), $list->count());
					foreach ($list->getItems(0, 0) as $object) {
						$object->elasticSearchUpdateFulltext();
						self::progressAdvance();
					}
					self::progressEnd();
				}
			}
		}
	}

	/**
	 * Indexes objects.
	 *
	 * @param array $allowedOnlyClasses
	 * @param bool  $copyNotIndexed
	 */
	private static function indexObjects($allowedOnlyClasses = [], $copyNotIndexed = true)
	{
		$classes = new ClassDefinition\Listing();
		$classes->load();

		foreach ($classes->getClasses() as $class) {
			$className = '\\AppBundle\\Model\\'.$class->getName();
			$classListName = $className.'\\Listing';
			if (@class_exists($className) && @class_exists($classListName)) {
				$classImplements = class_implements($className);
				if ($classImplements && in_array('AppBundle\\Model\\IElasticObject', $classImplements)) {
					if ($allowedOnlyClasses && !in_array($class->getName(), $allowedOnlyClasses)) {
						if ($copyNotIndexed) {
							self::write(sprintf('  %s<comment>[copy]</comment>', str_pad($class->getName(), 20)));
							$objectMapping = self::getObjectMapping($class->getName());
							foreach ($objectMapping['meta']['languages'] as $lang) {
								self::copyType(
									self::getMainIndexName(false),
									self::getMainIndexName(),
									strtolower($class->getName()).'_'.$lang
								);
							}
							self::writeln(' <info>✔</info>');
						}
						continue;
					}
					$list = new $classListName();
					$list->setUnpublished(true);
					$list->setObjectTypes([
						AbstractObject::OBJECT_TYPE_OBJECT,
						AbstractObject::OBJECT_TYPE_VARIANT,
					]);
					$list->load();
					self::progressStart(sprintf('  %s', $class->getName()), $list->count());
					foreach ($list->getItems(0, 0) as $object) {
						$object->elasticSearchUpdate();
						self::progressAdvance();
					}
					self::progressEnd();
				}
			}
		}
	}

	/**
	 * Copy type from one index to another (the indexes should have same mappings).
	 *
	 * @param string $oldIndex
	 * @param string $newIndex
	 * @param string $type
	 * @param string $subType  - "type" field in document data
	 */
	public static function copyType($oldIndex, $newIndex, $type, $subType = null)
	{
		$client = self::getClient();
		$query = ($subType)
			? ['match' => ['type' => $subType]]
			: ['match_all' => []];
		$scrollParams = [
			'search_type' => 'scan',
			'scroll' => '1m',
			'size' => 50,
			'index' => $oldIndex,
			'type' => $type,
			'body' => [
				'query' => $query,
			],
		];
		$search = $client->search($scrollParams);
		$scrollId = $search['_scroll_id'];
		$bulkParams = ['body' => []];
		while (true) {
			$response = $client->scroll([
				'scroll_id' => $scrollId,
				'scroll' => $scrollParams['scroll'],
			]);
			if (empty($response['hits']['hits'])) {
				break;
			}
			foreach ($response['hits']['hits'] as $row) {
				$bulkParams['body'][] = [
					'index' => [
						'_id' => $row['_id'],
						'_index' => $newIndex,
						'_type' => $type,
					],
				];
				$bulkParams['body'][] = $row['_source'];
			}
			$scrollId = $response['_scroll_id'];
		}
		if (!empty($bulkParams['body'])) {
			$client->bulk($bulkParams);
		}
	}

	/**
	 * updates elastic search mapping and index by computing a new index, switch the main alias to it an removing the old one.
	 *
	 * @param mixed $fulltext         true|false to controll indexing of everything or array of class names of indexed objects
	 * @param mixed $objects          true|false to controll indexing of everything or array of class names of indexed objects
	 * @param bool  $copyNotReindexed copy types which are not reindexed
	 */
	public static function updateMappingsAndReindex($fulltext = false, $objects = false, $copyNotReindexed = true)
	{
		self::write('➤ Creating new index');
		$elasticClient = self::getClient();

		$alias = self::getMainIndexName();

		//fist check if main index exists and if not create a dummy one which will then be deleted...
		//otherwise a non-existing index would cause problems when indexing some documents (? -> research)
		if (!$elasticClient->indices()->existsAlias(['name' => $alias])) {
			$dummyIndexName = $alias.time().'dummy';
			Cache\Runtime::set(self::MAIN_INDEX_KEY, $dummyIndexName);
			$elasticClient->indices()->create(['index' => $dummyIndexName]);
			$elasticClient->indices()->putAlias(['index' => $dummyIndexName, 'name' => $alias]);
		}

		//create the new index
		$analysis = [
			'analyzer' => [
				'pathIndexAnalyzer' => [
					'type' => 'custom',
					'tokenizer' => 'pathIndexTokenizer',
				],
				'cs_hunspell' => [
					'type' => 'custom',
					'tokenizer' => 'standard',
					'filter' => ['lowercase', 'stopwords_CZ', 'cs_CZ', 'remove_duplicities'],
				],
				'sk_hunspell' => [
					'type' => 'custom',
					'tokenizer' => 'standard',
					'filter' => ['lowercase', 'stopwords_SK', 'sk_SK', 'remove_duplicities'],
				],
				'cs_icu_analyzer' => [
					'type' => 'custom',
					'tokenizer' => 'standard',
					'filter' => ['stopwords_CZ', 'icu_folding', 'remove_duplicities'],
				],
				'cs_icu_analyzer_sort' => [
					'type' => 'custom',
					'tokenizer' => 'keyword',
					'filter' => ['lowercase', 'cs_icu_collation'],
				],
				'sk_icu_analyzer' => [
					'type' => 'custom',
					'tokenizer' => 'standard',
					'filter' => ['stopwords_SK', 'icu_folding', 'remove_duplicities'],
				],
			],
			'filter' => [
				'stopwords_CZ' => [
					'type' => 'stop',
					'stopwords' => ['právě', 'že', '_czech_'],
					'ignore_case' => true,
				],
				'stopwords_SK' => [
					'type' => 'stop',
					'stopwords' => ['a', 'aby', 'aj', 'ako', 'ale', 'alebo', 'ani', 'áno', 'asi', 'až', 'bez', 'buď', 'by', 'cez', 'či', 'čo', 'ešte', 'ho', 'i', 'iba', 'ich', 'ja', 'je', 'jeho', 'jej', 'ju', 'k', 'kam', 'kde', 'keď', 'kto', 'ku', 'menej', 'mi', 'moja', 'moje', 'môj', 'my', 'nad', 'nám', 'než', 'nič', 'nie', 'o', 'od', 'on', 'on', 'ona', 'ona', 'oni', 'ono', 'po', 'pod', 'podľa', 'pokiaľ', 'potom', 'práve', 'prečo', 'pred', 'preto', 'pretože', 'pri', 's', 'sa', 'si', 'sme', 'so', 'som', 'späť', 'ste', 'sú', 'sú', 'ta', 'tá', 'tak', 'tak', 'takže', 'tam', 'tam', 'táto', 'teda', 'ten', 'tento', 'tieto', 'tiež', 'to', 'to', 'toho', 'tom', 'tomto', 'toto', 'tu', 'túto', 'ty', 'tým', 'týmto', 'už', 'v', 'vám', 'viac', 'vo', 'však', 'vy', 'z', 'za', 'zo'],
					'ignore_case' => true,
				],
				'cs_CZ' => [
					'type' => 'hunspell',
					'locale' => 'cs_CZ',
					'dedup' => true,
					'recursion_level' => 0,
				],
				'cs_icu_collation' => [
					'type' => 'icu_collation',
					'language' => 'cs',
				],
				'sk_SK' => [
					'type' => 'hunspell',
					'locale' => 'sk_SK',
					'dedup' => true,
					'recursion_level' => 0,
				],
				'remove_duplicities' => [
					'type' => 'unique',
					'only_on_same_position' => true,
				],
			],
			'tokenizer' => [
				'pathIndexTokenizer' => [
					'type' => 'path_hierarchy',
					'delimiter' => '/',
				],
			],
		];
		$newIndex = $alias.'_'.time();
		$elasticClient->indices()->create(['index' => $newIndex, 'body' => ['analysis' => $analysis]]);
		//we use Cache\Runtime for temporary main index key storage instead of passing it through params
		//the main index key is used across several methods which handle the reindexing process
		Cache\Runtime::set(self::MAIN_INDEX_KEY, $newIndex);
		Cache\Runtime::set('elasticSearchUpdateRequest', true);

		//fulltext mapping
		self::writeln(sprintf(' <comment>[%s]</comment> <info>✔</info>', $newIndex));
		self::write('➤ Creating mappings');
		self::createFulltexttMappings();
		//object mappings
		self::createObjectMappings();
		//objects data
		if ($objects) {
			self::writeln(' <info>✔</info>');
			self::writeln('➤ Indexing objects');
			$allowedOnlyClasses = [];
			if (is_string($objects)) {
				$allowedOnlyClasses = explode(',', $objects);
			}
			self::indexObjects($allowedOnlyClasses, $copyNotReindexed);
		}
		//fulltext data
		if ($fulltext) {
			self::writeln('➤ Indexing fulltext');
			$allowedOnlyClasses = [];
			if (is_string($fulltext)) {
				$allowedOnlyClasses = explode(',', $fulltext);
			}
			self::indexFulltext($allowedOnlyClasses, $copyNotReindexed);
		}

		//switch aliases and remove old index
		self::write('➤ Switching indexes');
		if ($elasticClient->indices()->existsAlias(['name' => $alias])) {
			$oldIndex = key($elasticClient->indices()->getAlias(['name' => $alias]));
			$elasticClient->indices()->putAlias(['index' => $newIndex, 'name' => $alias]);
			$elasticClient->indices()->delete(['index' => $oldIndex]);
		}

		//refresh index
		$elasticClient->indices()->refresh(['index' => $newIndex]);

		Cache\Runtime::set(self::MAIN_INDEX_KEY, null);
		Cache\Runtime::set('elasticSearchUpdateRequest', null);
		self::writeln(' <info>✔</info>');
	}
}
