import os
import time
import io
import logging
import datetime
import re
import glob
import simpleplugin
import simpleplugin.inputs as inputs
import encoding

import threading
name = "logsink"
CPU_OVERLOAD_PROTECT = datetime.timedelta(seconds=5)
LOGSINK_STATS_REPORT = datetime.timedelta(minutes=1)
REFRESH_TRACERS = datetime.timedelta(minutes=1)
STATS_CATEGORY = "logsink_stats"
defaultNoWrapKeywords = ["Caused by:"]

class LogFile(object):
    def __init__(self):
        self.Path = ""
        self.Encoding = ""
        self.Disabled = False
        self.NoWrapKeywords = []
    
    def load(self, doc):
        self.Path = doc.get('path', self.Path)
        self.Encoding = doc.get('encoding', self.Encoding)
        self.Disabled = doc.get('disabled', self.Disabled)
        self.NoWrapKeywords = doc.get('nowrap_keywords', self.NoWrapKeywords)
    

    @classmethod
    def parseAll(cls, docs=[]):
        all = []
        for doc in docs:
            f = cls()
            f.load(doc)
            all.append(f)

        return all

def systemNowMillis():
    return int(time.time()*1000)

def wildCardToRegexp(src):
    strbuf = []
    for i, literal in enumerate(src.split("*")):
        if i > 0:
            strbuf.append(".*")
        strbuf.append(re.escape(literal))
    pat = re.compile("".join(strbuf))
    return pat

sampleConfig = '''
[[inputs.logsink]]
    category = "mylog"
    stats_category = "logsink_stats"
    stats_enabled = true
    excludeNames = [ "*.gz","*.zip" ]
    [[inputs.logsink.file]]
        path = "/some/path/to/*.log"
        disabled = false
        nowrap_keywords = ["Caused by:", "Test"]
    [[inputs.logsink.file]]
        path = "/some/other/path/*.log"
        disabled = false
'''

class LogSink(simpleplugin.ServiceInput):
    def __init__(self):
        self.Files = []
        self.ExcludeNames = []
        self.excludeNamesRegex = []
        self.tracers = []
        self.acc = None
        self.Category = "serverlog"
        self.cancel = 0
        self.done = None
        self.worker = None
        self.StatsEnabled = False
        self.StatsCategory = STATS_CATEGORY

    def load(self, doc):
        self.Files = LogFile.parseAll(doc.get('file'))
        self.ExcludeNames = doc.get('excludeNames', self.ExcludeNames)
        self.Category = doc.get('category', self.Category)
        self.StatsEnabled = doc.get('statsEnabled', self.StatsEnabled)
        self.StatsCategory = doc.get('statsCategory', self.StatsCategory)

    def initExcludes(self):
        self.excludeNamesRegex = [wildCardToRegexp(excludeName) for excludeName in self.ExcludeNames]

    def serveForever(self):
        cancelThisTime = self.cancel
        startedAt = datetime.datetime.now()
        def refreshTracers():
            try:
                self.initExcludes()
                self.refreshTracersImpl(traceFromStartSince = startedAt)
                self.getIncremental()
                now = datetime.datetime.now()
                statsTimeout = now + LOGSINK_STATS_REPORT
                incrementalTimeout = now + CPU_OVERLOAD_PROTECT
                refreshTracersTimeout = now + REFRESH_TRACERS
                while cancelThisTime == self.cancel:
                    now = datetime.datetime.now()
                    if now > statsTimeout:
                        self.reportStats()
                        statsTimeout = datetime.datetime.now() + LOGSINK_STATS_REPORT
                    if now > incrementalTimeout:
                        self.getIncremental()
                        incrementalTimeout = now + CPU_OVERLOAD_PROTECT
                    if now > refreshTracersTimeout:
                        self.refreshTracersImpl(traceFromStartSince = startedAt)
                        refreshTracersTimeout = now + REFRESH_TRACERS
                    time.sleep(1)
            except Exception as e:
                logging.error("logsink.serveForever {}".format(e))
        try:
            self.done = False
            refreshTracers()
        except Exception as e:
             logging.error("logsink.serveForever {}".format(e) )
        finally:
            self.done = True

    def isExcludeMatch(self, logfile):
        excludeThisTime = self.excludeNamesRegex
        logfilename = logfile.split("/")[-1]
        for pattern in excludeThisTime:
            if pattern is not None:
                match = pattern.match(logfilename)
                if match:
                    return True
        return False

    def filterExcludedFiles(self, logfiles):
        ret = []
        for logfile in logfiles:
            if not self.isExcludeMatch( logfile):
                ret.append(logfile)
        return ret

    def isNewFile(self, fi, traceFromStartSince):

        return traceFromStartSince < datetime.datetime.fromtimestamp(fi.st_ctime)

    def refreshTracersImpl(self, traceFromStartSince = None):
        tracersThisTime = []
        for fileinfo in self.Files:
            if fileinfo.Disabled:
                continue
            logfilepath = fileinfo.Path
            t = datetime.datetime.now()
            dateEvaluatedPath = t.strftime(logfilepath)
            globFiles = None
            try:
                globFiles = glob.glob(dateEvaluatedPath)
            except Exception as e:
                logging.error("logsink.refreshTracersImpl glob error:{}".format( e))
                continue
            for fileFound in self.filterExcludedFiles( globFiles):
                fi = None
                try:
                    fi = os.stat(fileFound)
                except Exception as e:
                    logging.error("logsink.refreshTracersImpl stat error:{}".format(e))
                    continue

                tracer = SimpleTracer()
                tracer.filenamepath = fileFound
                if self.isNewFile(fi, traceFromStartSince):
                    tracer.filepos =0
                else:    
                    tracer.filepos = fi.st_size
                tracer.encoding = fileinfo.Encoding
                tracer.created = systemNowMillis()
                tracer.noWrapKeywords = appendNoWrapKeywords(fileinfo.NoWrapKeywords, defaultNoWrapKeywords)

                def isLogfileMatch(target):
                    if target.filenamepath == fileFound:
                        tracersThisTime.append(target)
                        return True
                    return False

                if not find(isLogfileMatch, self.tracers):
                    tracersThisTime.append(tracer)
                
        self.tracers = tracersThisTime

    def getIncremental(self):
        fields = {}
        tags = {}
        acc = self.acc
        #print('getIncremental step -0.1', self.tracers)
        for tracer in self.tracers:
            def h4(logfilepath, line, content, pos):
                tags["file"] = logfilepath
                acc.addLog(self.Category, fields, tags, content, line)
            #print('getIncremental step -1')
            newlogerr = tracer.checkNewLogs( h4)
            if newlogerr is not None:
                logging.error("logsink.getIncredental {}".format(newlogerr) )

    def reportStats(self):
        if not self.StatsEnabled:
            return
        acc = self.acc
        for tracer in self.tracers:
            fields = {}
            tags = {}
            tags["file"] = tracer.filenamepath
            tags["checkInterval"] = str(tracer.checkInterval)
            tags["encoding"] = tracer.encoding
            fields["filepos"] = tracer.filepos
            fields["checkedLocalTime"] = tracer.lastTimestamp
            try:
                fields["lastupdatedLocalTime"], fields["fileSize"], err = getFileSize(tracer.filenamepath)
                if err is not None:
                    fields["error"] = str(err)
            except Exception as e:
                fields["error"] = str(e)
            fields["firstCheck"] = tracer.created
            fields["transferBytes"] = tracer.transferBytes
            acc.AddCounter(self.StatsCategory, fields, tags)

    def start(self, acc):
        self.acc = acc
        self.worker = self.doAsync(self.serveForever)
        
    def doAsync(self, callback, *args, **kwargs ):
        t = threading.Thread(target=callback, args=args, kwargs=kwargs )
        t.setDaemon(True)
        t.start()

        return t    

    def cleanup(self):
        self.cancel +=1
        if self.worker:
            self.worker.join()

    def restart(self):
        self.cleanup()
        time.sleep(1)
        self.start( self.acc)

    def stop(self):
        self.cleanup()

    def gather(self, acc):
        raise NotImplementedError("not implemented")

    def interval(self):
        return -1

    def name(self):
        return name

def find(callback, sources):
    for s in sources:
        if callback(s):
            return True
    return False

class SimpleTracer(object):
    def __init__(self):
        self.filenamepath = ""
        self.filepos = 0
        self.lineno = 0
        self.checkInterval = 0
        self.lastTimestamp = 0
        self.encoding = ""
        self.transferBytes = 0
        self.created = 0
        self.noWrapKeywords = []
        
    def checkNewLogs(self, h4):
        now = systemNowMillis()
        if self.checkInterval != 0 and self.lastTimestamp != 0:
            if self.checkInterval != 0 and (now - self.lastTimestamp) < self.checkInterval:
                return
        #print('checkNewLogs step -1 ', self.filenamepath)
        self.lastTimestamp = now
        fi = os.stat(self.filenamepath)
        fileposthistime = fi.st_size
        #print('checkNewLogs step -2')
        if fileposthistime == self.filepos:
            return
        elif fileposthistime < self.filepos:
            self.filepos = 0
        #print('checkNewLogs step -3')
        with open(self.filenamepath, "r") as f:
            f.seek(self.filepos, os.SEEK_SET)

            scanner = encoding.getScanner(self.encoding, f)
            if not scanner:
                logging.error('SimpleTracer.checkNewLog {}'.format(f))
                return

            self.lineno = self.filepos

            SEND_THRESHOLD = 0
            INDENTATION_CHARS = (u"\t", u" ")
            NEWLINE = u"\n"
            multilinebuffer = io.StringIO()
            for line in scanner:
                if len(line) > 0 and line[0] not in INDENTATION_CHARS and not self.isNoWrap(line):
                    if multilinebuffer.tell() > SEND_THRESHOLD:
                        self.lineno += multilinebuffer.tell()
                        pos = f.tell()
                        bytes2read = multilinebuffer.tell()
                        multilinebuffer.seek(0)
                        content2send = multilinebuffer.read(bytes2read)
                        h4(self.filenamepath, self.lineno, content2send, pos)
                        self.transferBytes += multilinebuffer.tell()
                        multilinebuffer.seek(0)
                if multilinebuffer.tell() > 0:
                    multilinebuffer.write(NEWLINE)
                multilinebuffer.write(line)
            if multilinebuffer.tell() > SEND_THRESHOLD:
                self.lineno += multilinebuffer.tell()
                pos = f.tell()
                bytes2read = multilinebuffer.tell()
                multilinebuffer.seek(0)
                content2send = multilinebuffer.read(bytes2read)
                h4(self.filenamepath, self.lineno, content2send, pos)
                self.transferBytes += multilinebuffer.tell()
                multilinebuffer.seek(0)

            pos = f.tell()
            self.filepos = pos
        #print('checkNewLogs step -5')

    def isNoWrap(self, line):
        for kw in self.noWrapKeywords:
            if line.find(kw) > -1:
                return True
            
        return False

def getFileSize(filename):
    statinfo = os.stat(filename)
    return statinfo.st_ctime, statinfo.st_size, None

def appendNoWrapKeywords(src, appendArr):
    rt = src
    for str in appendArr:
        isAppend = True
        for str1 in rt:
            if str == str1:
                isAppend = False
                break
        if isAppend:
            rt.append(str)
    return rt



inputs.add(name, lambda: LogSink())
