import collections
import copy
import hashlib
import os
import pickle
import re
import shutil
import threading
import time
import traceback

import sublime

from ...vendor.frozendict import frozendict
from .logging import logger
from .settings import get_setting
from .utils import ThreadPool

# the folder, if the local cache is not hidden, i.e. folder in the same
# folder as the tex root
# folder to store all hidden local caches in the cache path
LOCAL_CACHE_FOLDER = "local_cache"
# global cache folder for ST2, this folder will be created inside the User
# folder to store the global and the local cache
ST2_GLOBAL_CACHE_FOLDER = ".lt_cache"

# re for parsing the cache.life_span setting when written
# in "natural" language:
# 100 d(ays) 100 h(ours) 100 m((in)utes) 100 s((ec)onds)
TIME_RE = re.compile(
    r"\s*(?:(?P<day>\d+)\s*d(?:ays?)?)?"
    r"\s*(?:(?P<hour>\d+)\s*h(?:ours?)?)?"
    r"\s*(?:(?P<minute>\d+)\s*m(?:in(?:utes?)?)?)?"
    r"\s*(?:(?P<second>\d+)\s*s(?:ec(?:onds?)?)?)?\s*"
)


class CacheMiss(Exception):
    """exception to indicate that the cache file is missing"""

    pass


def hash_digest(text):
    """
    Create the hash digest for a text. These digest can be used to
    create a unique filename from the path to the root file.
    The used has function is md5.

    Arguments:
    text -- the text for which the digest should be created
    """
    return hashlib.md5(text.encode("utf-8")).hexdigest()


def cache_local(tex_root, key, func):
    """
    alias for cache() on the LocalCache instance corresponding to the tex_root:

    convenience method to attempt to get the value from the cache and
    generate the value if it hasn't been cached yet or the entry has
    otherwise been invalidated

    :param tex_root:
        the tex_root the data should be associated with

    :param key:
        the key to retrieve or set

    :param func:
        a callable that takes no arguments and when invoked will return
        the proper value
    """
    return LocalCache(tex_root).cache(key, func)


def write_local(tex_root, key, obj):
    """
    alias for set() on the LocalCache instance corresponding to the tex_root:

    set the cache value for the given key

    :param tex_root:
        the tex_root the data should be associated with

    :param key:
        the key to set

    :param obj:
        the value to store; note that obj *must* be picklable
    """
    return LocalCache(tex_root).set(key, obj)


def read_local(tex_root, key):
    """
    alias for get() on the LocalCache instance corresponding to the tex_root:

    retrieve the cached value for the corresponding key

    raises CacheMiss if value has not been cached

    :param tex_root:
        the tex_root the data should be associated with

    :param key:
        the key to set
    """
    return LocalCache(tex_root).get(key)


def cache_global(key, func):
    """
    alias for cache() on the GlobalCache:

    convenience method to attempt to get the value from the cache and
    generate the value if it hasn't been cached yet or the entry has
    otherwise been invalidated

    :param key:
        the key to retrieve or set

    :param func:
        a callable that takes no arguments and when invoked will return
        the proper value
    """
    return GlobalCache().cache(key, func)


def write_global(key, obj):
    """
    alias for set() on the GlobalCache:

    set the cache value for the given key

    :param key:
        the key to set

    :param obj:
        the value to store; note that obj *must* be picklable
    """
    return GlobalCache().set(key, obj)


def read_global(key):
    """
    alias for get() on the GlobablCache:

    retrieve the cached value for the corresponding key

    raises CacheMiss if value has not been cached

    :param tex_root:
        the tex_root the data should be associated with

    :param key:
        the key to set
    """
    return GlobalCache().get(key)


# aliases
cache = cache_local
write = write_local
read = read_local


def _global_cache_path():
    return os.path.normpath(os.path.join(sublime.cache_path(), "LaTeXTools", "internal"))


# marker class for invalidated result
class InvalidObject:
    __slots__ = []
    __hash = hash("_LaTeXTools_InvalidObject")

    @classmethod
    def __hash__(cls):
        return cls.__hash

    @classmethod
    def __eq__(cls, other):
        # in general, this is a bad pattern, since it will treat the
        # literal string "_LaTeXTools_InvalidObject" as being an invalid
        # object; nevertheless, we need an object identity that persists
        # across reloads, and this seems to be the only way to guarantee
        # that
        try:
            return cls.__hash == hash(other)
        except TypeError:
            return False

    @classmethod
    def __ne__(cls, other):
        return not cls == other


class Cache:
    """
    default cache object and definition

    implements the shared functionality between the various caches
    """

    def __new__(cls, *args, **kwargs):
        # don't allow this class to be instantiated directly
        if cls is Cache:
            raise NotImplemented

        return super(Cache, cls).__new__(cls)

    def __init__(self):
        # initialize state but ONLY if it hasn't already been initialized
        if not hasattr(self, "_disk_lock"):
            self._disk_lock = threading.Lock()
        if not hasattr(self, "_write_lock"):
            self._write_lock = threading.Lock()
        if not hasattr(self, "_objects"):
            self._objects = {}
        if not hasattr(self, "_dirty"):
            self._dirty = False
        if not hasattr(self, "_save_queue"):
            self._save_queue = 0
        if not hasattr(self, "_pool"):
            self._pool = ThreadPool(2)

        self.cache_path = self._get_cache_path()

    def get(self, key):
        """
        retrieve the cached value for the corresponding key

        raises CacheMiss if value has not been cached

        :param key:
            the key that the value has been stored under
        """
        if key is None:
            raise ValueError("key cannot be None")

        try:
            result = self._objects[key]
        except KeyError:
            # note: will raise CacheMiss if can't be found
            result = self.load(key)

        if result == InvalidObject:
            raise CacheMiss("{0} is invalid".format(key))

        # return a copy of any objects
        try:
            if hasattr(result, "__dict__") or hasattr(result, "__slots__"):
                result = copy.copy(result)
        except Exception:
            pass

        return result

    def has(self, key):
        """
        check if cache has a value for the corresponding key

        :param key:
            the key that the value has been stored under
        """
        if key is None:
            raise ValueError("key cannot be None")

        return key in self._objects and self._objects[key] != InvalidObject

    def set(self, key, obj):
        """
        set the cache value for the given key

        :param key:
            the key to store the value under

        :param obj:
            the value to store; note that obj *must* be picklable
        """
        if key is None:
            raise ValueError("key cannot be None")

        if isinstance(obj, list):
            obj = tuple(obj)
        elif isinstance(obj, dict):
            obj = frozendict(obj)
        elif isinstance(obj, set):
            obj = frozenset(obj)

        with self._write_lock:
            self._objects[key] = obj
            self._dirty = True
        self._schedule_save()

    def cache(self, key, func):
        """
        convenience method to attempt to get the value from the cache and
        generate the value if it hasn't been cached yet or the entry has
        otherwise been invalidated

        :param key:
            the key to retrieve or set

        :param func:
            a callable that takes no arguments and when invoked will return
            the proper value
        """
        if key is None:
            raise ValueError("key cannot be None")

        try:
            return self.get(key)
        except Exception:
            result = func()
            self.set(key, result)
            return result

    def invalidate(self, key=None):
        """
        invalidates either this whole cache, a single entry or a list of
        entries in this cache

        :param key:
            the key of the entry to invalidate; if None, the entire cache
            will be invalidated
        """

        def _invalidate(key):
            try:
                self._objects[key] = InvalidObject
            except Exception:
                logger.error("error occurred while invalidating %s", key)
                traceback.print_exc()

        with self._write_lock:
            if key is None:
                for k in self._objects.keys():
                    _invalidate(k)
            else:
                if isinstance(key, str):
                    _invalidate(key)
                else:
                    for k in key:
                        _invalidate(k)

        self._schedule_save()

    def _get_cache_path(self):
        return _global_cache_path()

    def load(self, key=None):
        """
        loads the value specified from the disk and stores it in the in-memory
        cache

        :param key:
            the key to load from disk; if None, all entries in the cache
            will be read from disk
        """
        with self._write_lock:
            if key is None:
                for entry in os.listdir(self.cache_path):
                    if os.path.isfile(entry):
                        entry_name = os.path.basename(entry)
                        try:
                            self._objects[entry_name] = self._read(entry_name)
                        except Exception:
                            logger.error("error while loading %s", entry_name)
            else:
                self._objects[key] = self._read(key)

        if key is not None:
            return self._objects[key]

    def load_async(self, key=None):
        """
        an async version of load; does the loading in a new thread
        """
        self._pool.apply_async(self.load, key)

    def _read(self, key):
        file_path = os.path.join(self.cache_path, key)
        with self._disk_lock:
            try:
                with open(file_path, "rb") as f:
                    return pickle.load(f)
            except Exception:
                raise CacheMiss("cannot read cache file {0}".format(key))

    def save(self, key=None):
        """
        saves the cache entry specified to disk

        :param key:
            the entry to flush to disk; if None, all entries in the cache will
            be written to disk
        """
        if not self._dirty:
            return

        # lock is aquired here so that all keys being flushed reflect the
        # same state; note that this blocks disk reads, but not cache reads
        with self._disk_lock:
            # operate on a stable copy of the object
            with self._write_lock:
                _objs = self._objects.copy()
                self._dirty = False

            if key is None:
                # remove all InvalidObjects
                delete_keys = [k for k in _objs if _objs[k] == InvalidObject]

                for k in delete_keys:
                    del _objs[k]
                    file_path = os.path.join(self.cache_path, k)
                    try:
                        os.remove(file_path)
                    except OSError:
                        pass

                if _objs:
                    os.makedirs(self.cache_path, exist_ok=True)
                    for k in _objs.keys():
                        try:
                            self._write(k, _objs)
                        except Exception:
                            traceback.print_exc()
                else:
                    # cache has been emptied, so remove it
                    try:
                        shutil.rmtree(self.cache_path)
                    except OSError as e:
                        logger.error("error while deleting %s: %s", self.cache_path, e)

            elif key in _objs:
                if _objs[key] == InvalidObject:
                    file_path = os.path.join(self.cache_path, key)
                    try:
                        os.remove(file_path)
                    except OSError as e:
                        logger.error("error while deleting %s: %s", file_path, e)
                else:
                    os.makedirs(self.cache_path, exist_ok=True)
                    self._write(key, _objs)

    def save_async(self, key=None):
        """
        an async version of save; does the save in a new thread
        """
        try:
            self._pool.apply_async(self.save, key)
        except ValueError:
            pass

    def _write(self, key, obj):
        try:
            _obj = obj[key]
        except KeyError:
            raise CacheMiss()

        try:
            with open(os.path.join(self.cache_path, key), "wb") as f:
                pickle.dump(_obj, f, protocol=-1)
        except OSError as e:
            logger.error("error while writing to %s: %s", key, e)
            raise CacheMiss()

    def _schedule_save(self):
        def _debounce():
            self._save_queue -= 1
            if self._save_queue > 0:
                sublime.set_timeout(_debounce, 1000)
            else:
                self._save_queue = 0
                self.save_async()

        self._save_queue += 1
        sublime.set_timeout(_debounce, 1000)

    # ensure cache is saved to disk when removed from memory
    def __del__(self):
        self.save_async()
        self._pool.terminate()


class GlobalCache(Cache):
    """
    the global cache

    stores data in the appropriate global cache folder; SHOULD NOT be used
    for data related to a particular tex document

    note that all instance of the global cache share state, meaning that it
    behaves as though there were a single object
    """

    __STATE = {}

    def __new__(cls, *args, **kwargs):
        # almost-singleton implementation; all instances share the same state
        inst = super(GlobalCache, cls).__new__(cls, *args, **kwargs)
        inst.__dict__ = cls.__STATE
        return inst

    def invalidate(self, key):
        if key is None:
            raise ValueError("key must not be None")
        super(GlobalCache, self).invalidate(key)


class ValidatingCache(Cache):
    """
    an abstract class for a cache which implements validation either when an
    entry is retrieved or changed

    implementing subclasses SHOULD override validate_on_get or validate_on_set
    as appropriate
    """

    def __new__(cls, *args, **kwargs):
        # don't allow this class to be instantiated directly
        if cls is ValidatingCache:
            raise NotImplemented

        return super(ValidatingCache, cls).__new__(cls, *args, **kwargs)

    def validate_on_get(self, key):
        """
        subclasses should override this to run validation when an object is
        retrieved from the cache

        subclasses should raise a ValueError if the validation shouldn't
        succeed
        """

    def validate_on_set(self, key, obj):
        """
        subclasses should override this to run validation when an object is
        added or modified in the cache

        subclasses should raise a ValueError if the validation shouldn't
        succeed
        """

    def get(self, key):
        try:
            self.validate_on_get(key)
        except ValueError as e:
            self.invalidate()
            raise CacheMiss(str(e))

        return super(ValidatingCache, self).get(key)

    def set(self, key, obj):
        if key is None:
            raise ValueError("key cannot be None")

        self.validate_on_set(key, obj)

        return super(ValidatingCache, self).set(key, obj)


class InstanceTrackingCache(Cache):
    """
    an abstract class for caches that share state between different instances
    that point to the same underlying data; in addition, when all instances
    of a given cache have been removed from memory, the cache is written to
    disk

    this is used, for example, by the local cache to ensure that all documents
    with the same tex_root share a local cache instance; this helps minimize
    memory usage and ensure data consistency across multiple cache instances,
    e.g., caches instantiated in different functions or multiple ST views of
    the "same" document

    subclasses MUST implement the _get_inst_key method
    """

    _CLASSES = set([])

    def __new__(cls, *args, **kwargs):
        if cls is InstanceTrackingCache:
            raise NotImplemented

        InstanceTrackingCache._CLASSES.add(cls)

        if not hasattr(cls, "_INSTANCES"):
            cls._INSTANCES = collections.defaultdict(lambda: {})
            cls._REF_COUNTS = collections.defaultdict(lambda: 0)
            cls._LOCKS = collections.defaultdict(lambda: threading.Lock())

        inst = super(InstanceTrackingCache, cls).__new__(cls, *args, **kwargs)
        inst_key = inst._get_inst_key(*args, **kwargs)

        with cls._LOCKS[inst_key]:
            inst.__dict__ = cls._INSTANCES[inst_key]
            cls._REF_COUNTS[inst_key] += 1

        return inst

    def _get_inst_key(self, *args, **kwargs):
        """
        subclasses MUST override this method to return a key which identifies
        this instance; this key MUST be able to be used as a dictionary key

        the key is intended to be shared by multiple instances of the cache,
        but only those which represent the same underlying data; for example,
        the LocalCache uses the tex_root value as its key, so that all
        documents with the same tex_root share the same cache instance

        NB this method is called in TWO DISTINCT ways and subclass
        implementations MUST be able to generate the same response for both or
        else the behavior of the instance-tracking cannot be guaranteed.

            1)  This method is called from __new__ with the args and kwargs
                passed to the constructor; subclasses SHOULD derive the key
                from those args
            2)  This method is called from __del__ without the args and kwargs
                passed to the construtor; subclasses MUST ensure that the same
                key derived in #1 can be derived in this case from information
                stored in the object
        """
        raise NotImplemented

    # ensure the cache is written to disk when LAST copy of this instance is
    # removed
    def __del__(self):
        inst_key = self._get_inst_key()
        if inst_key is None:
            return

        lock = self._LOCKS.get(inst_key)
        if lock is None:
            return

        with lock:
            ref_count = self._REF_COUNTS[inst_key]
            ref_count -= 1
            self._REF_COUNTS[inst_key] = ref_count

            if ref_count <= 0:
                self.save_async()
                self._pool.terminate()
                del self._REF_COUNTS[inst_key]
                del self._INSTANCES[inst_key]
                del self._LOCKS[inst_key]


class LocalCache(ValidatingCache, InstanceTrackingCache):
    """
    the local cache

    stores data related to a particular tex document (identified by the
    tex_root) to a uniquely named folder in the cache directory

    all data in this cache SHOULD relate directly to the tex_root
    """

    _CACHE_TIMESTAMP = "created_time_stamp"
    _LIFE_SPAN_LOCK = threading.Lock()

    def __init__(self, tex_root):
        self.tex_root = tex_root
        super(LocalCache, self).__init__()

    def validate_on_get(self, key):
        try:
            cache_time = Cache.get(self, self._CACHE_TIMESTAMP)
        except Exception:
            raise ValueError("cannot load created timestamp")
        else:
            if not self.is_up_to_date(key, cache_time):
                raise ValueError("value outdated")

    def validate_on_set(self, key, obj):
        if not self.has(self._CACHE_TIMESTAMP):
            Cache.set(self, self._CACHE_TIMESTAMP, int(time.time()))

    def _get_inst_key(self, *args, **kwargs):
        if not hasattr(self, "tex_root"):
            if len(args) > 0:
                return args[0]
            return None
        else:
            return self.tex_root

    def _get_cache_path(self):
        cache_path = super(LocalCache, self)._get_cache_path()
        root_hash = hash_digest(self.tex_root)
        return os.path.join(cache_path, LOCAL_CACHE_FOLDER, root_hash)

    def is_up_to_date(self, key, timestamp):
        if timestamp is None:
            return False

        cache_life_span = LocalCache._get_cache_life_span()

        current_time = int(time.time())
        if timestamp + cache_life_span < current_time:
            return False

        return True

    @classmethod
    def _get_cache_life_span(cls):
        """
        gets the length of time an item should remain in the local cache
        before being evicted

        note that previous values are calculated and stored since this method
        is used on every cache read
        """

        def __parse_life_span_string(life_span_str):
            try:
                return int(life_span_str)
            except ValueError:
                try:
                    (d, h, m, s) = TIME_RE.match(life_span_str).groups()
                    # time conversions in seconds
                    times = [(s, 1), (m, 60), (h, 3600), (d, 86400)]
                    # sum the converted times
                    # if not specified (None) use 0
                    return sum(int(t[0] or 0) * t[1] for t in times)
                except Exception as e:
                    logger.error("error parsing cache.life_span: %s", life_span_str)
                    # default 30 minutes in seconds
                    return 1800

        with cls._LIFE_SPAN_LOCK:
            life_span_str = get_setting("cache.life_span")
            try:
                if cls._PREV_LIFE_SPAN_STR == life_span_str:
                    return cls._PREV_LIFE_SPAN
            except AttributeError:
                pass

            cls._PREV_LIFE_SPAN_STR = life_span_str
            cls._PREV_LIFE_SPAN = life_span = __parse_life_span_string(life_span_str)
            return life_span
